From dd3bee7325cb919ecac8725069f8b62a111c4b7a Mon Sep 17 00:00:00 2001 From: Darwin Date: Sun, 6 Dec 2015 00:33:28 +0000 Subject: [PATCH] xnu-3247.1.106 Imported from https://opensource.apple.com/tarballs/xnu/xnu-3247.1.106.tar.gz --- .DS_Store | Bin 6148 -> 0 bytes .clang-format | 121 + .gitignore | 50 + EXTERNAL_HEADERS/AssertMacros.h | 2 +- EXTERNAL_HEADERS/Availability.h | 185 +- EXTERNAL_HEADERS/AvailabilityInternal.h | 5303 ++++++++++++- EXTERNAL_HEADERS/AvailabilityMacros.h | 652 +- EXTERNAL_HEADERS/Makefile | 2 + EXTERNAL_HEADERS/architecture/Makefile | 2 + EXTERNAL_HEADERS/corecrypto/cc.h | 41 +- EXTERNAL_HEADERS/corecrypto/cc_config.h | 81 +- EXTERNAL_HEADERS/corecrypto/cc_debug.h | 66 + EXTERNAL_HEADERS/corecrypto/cc_macros.h | 80 + EXTERNAL_HEADERS/corecrypto/cc_priv.h | 113 +- EXTERNAL_HEADERS/corecrypto/ccaes.h | 18 +- EXTERNAL_HEADERS/corecrypto/ccasn1.h | 9 +- EXTERNAL_HEADERS/corecrypto/ccder.h | 11 +- EXTERNAL_HEADERS/corecrypto/ccdes.h | 5 +- EXTERNAL_HEADERS/corecrypto/ccdigest.h | 50 +- EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h | 5 +- EXTERNAL_HEADERS/corecrypto/ccdrbg.h | 93 +- EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h | 13 +- EXTERNAL_HEADERS/corecrypto/cchmac.h | 7 +- EXTERNAL_HEADERS/corecrypto/ccmd5.h | 5 +- EXTERNAL_HEADERS/corecrypto/ccmode.h | 31 +- EXTERNAL_HEADERS/corecrypto/ccmode_factory.h | 103 +- EXTERNAL_HEADERS/corecrypto/ccmode_impl.h | 5 +- EXTERNAL_HEADERS/corecrypto/ccn.h | 137 +- EXTERNAL_HEADERS/corecrypto/ccpad.h | 8 +- EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h | 6 +- EXTERNAL_HEADERS/corecrypto/ccrc4.h | 7 +- EXTERNAL_HEADERS/corecrypto/ccrng.h | 19 +- EXTERNAL_HEADERS/corecrypto/ccrng_system.h | 8 +- EXTERNAL_HEADERS/corecrypto/ccsha1.h | 9 +- EXTERNAL_HEADERS/corecrypto/ccsha2.h | 13 +- EXTERNAL_HEADERS/mach-o/loader.h | 26 +- EXTERNAL_HEADERS/mach-o/nlist.h | 12 +- EXTERNAL_HEADERS/mach-o/stab.h | 1 + Makefile | 48 +- README | 31 +- SETUP/Makefile | 4 + SETUP/json_compilation_db/Makefile | 30 + .../json_compilation_db/json_compilation_db.c | 188 + SETUP/kextsymboltool/Makefile | 12 +- SETUP/kextsymboltool/kextsymboltool.c | 28 +- bsd/Makefile | 4 + bsd/bsm/audit_fcntl.h | 1 + bsd/conf/Makefile.template | 9 +- bsd/conf/files | 18 +- bsd/conf/param.c | 5 +- bsd/dev/dtrace/dtrace.c | 149 +- bsd/dev/dtrace/dtrace_glue.c | 4 - bsd/dev/dtrace/dtrace_subr.c | 111 +- bsd/dev/dtrace/lockstat.c | 1 + bsd/dev/dtrace/scripts/Makefile | 2 +- bsd/dev/dtrace/scripts/mptcp.d | 4 +- bsd/dev/dtrace/scripts/sched.d | 6 +- bsd/dev/dtrace/sdt.c | 4 + bsd/dev/dtrace/sdt_subr.c | 8 +- bsd/dev/i386/dis_tables.c | 1213 ++- bsd/dev/i386/kern_machdep.c | 10 - bsd/dev/i386/sysctl.c | 39 +- bsd/dev/i386/systemcalls.c | 24 +- bsd/dev/memdev.c | 13 +- bsd/dev/munge.c | 123 +- bsd/dev/unix_startup.c | 22 +- bsd/dev/vn/vn.c | 4 - bsd/hfs/Makefile | 6 +- bsd/hfs/hfs.h | 196 +- bsd/hfs/hfs_attrlist.c | 2 +- bsd/hfs/hfs_btreeio.c | 8 +- bsd/hfs/hfs_catalog.c | 15 +- bsd/hfs/hfs_catalog.h | 9 +- bsd/hfs/hfs_cnode.c | 1074 +-- bsd/hfs/hfs_cnode.h | 91 +- bsd/hfs/hfs_cprotect.c | 1732 +++-- bsd/hfs/hfs_cprotect.h | 423 + bsd/hfs/hfs_endian.c | 18 + bsd/hfs/hfs_extents.c | 770 ++ bsd/hfs/hfs_extents.h | 54 + bsd/hfs/hfs_format.h | 19 +- bsd/hfs/hfs_fsctl.h | 23 +- bsd/hfs/hfs_fsinfo.c | 10 +- bsd/hfs/hfs_hotfiles.c | 2162 +++++- bsd/hfs/hfs_hotfiles.h | 22 +- bsd/hfs/hfs_kdebug.h | 44 +- bsd/hfs/hfs_link.c | 110 +- bsd/hfs/hfs_lookup.c | 47 +- bsd/hfs/hfs_readwrite.c | 1303 ++-- bsd/hfs/hfs_resize.c | 241 +- bsd/hfs/hfs_search.c | 3 +- bsd/hfs/hfs_vfsops.c | 297 +- bsd/hfs/hfs_vfsutils.c | 784 +- bsd/hfs/hfs_vnops.c | 553 +- bsd/hfs/hfs_xattr.c | 101 +- bsd/hfs/hfscommon/BTree/BTree.c | 16 +- bsd/hfs/hfscommon/BTree/BTreeAllocate.c | 8 +- bsd/hfs/hfscommon/BTree/BTreeTreeOps.c | 8 +- bsd/hfs/hfscommon/Catalog/FileIDsServices.c | 29 +- bsd/hfs/hfscommon/Misc/BTreeWrapper.c | 16 +- bsd/hfs/hfscommon/Misc/FileExtentMapping.c | 123 +- bsd/hfs/hfscommon/Misc/VolumeAllocation.c | 1253 ++- bsd/hfs/hfscommon/headers/BTreesInternal.h | 2 +- bsd/hfs/hfscommon/headers/BTreesPrivate.h | 15 +- bsd/hfs/hfscommon/headers/FileMgrInternal.h | 107 +- bsd/hfs/rangelist.c | 172 +- bsd/hfs/rangelist.h | 18 +- bsd/i386/Makefile | 5 +- bsd/kern/ast.h | 1 - bsd/kern/bsd_init.c | 75 +- bsd/kern/bsd_stubs.c | 213 +- bsd/kern/decmpfs.c | 186 +- bsd/kern/kdebug.c | 1657 ++-- bsd/kern/kern_aio.c | 10 +- bsd/kern/kern_control.c | 469 +- bsd/kern/kern_core.c | 16 +- bsd/kern/kern_credential.c | 21 +- bsd/kern/kern_cs.c | 503 +- bsd/kern/kern_csr.c | 167 +- bsd/kern/kern_descrip.c | 552 +- bsd/kern/kern_event.c | 910 ++- bsd/kern/kern_exec.c | 570 +- bsd/kern/kern_exit.c | 251 +- bsd/kern/kern_fork.c | 124 +- bsd/kern/kern_guarded.c | 66 +- bsd/kern/kern_kpc.c | 21 +- bsd/kern/kern_lockf.c | 246 +- bsd/kern/kern_malloc.c | 61 +- bsd/kern/kern_memorystatus.c | 2746 ++++++- bsd/kern/kern_mib.c | 11 + bsd/kern/kern_mman.c | 85 +- bsd/kern/kern_newsysctl.c | 2 +- bsd/kern/kern_proc.c | 369 +- bsd/kern/kern_prot.c | 197 +- bsd/kern/kern_resource.c | 45 +- bsd/kern/kern_shutdown.c | 65 +- bsd/kern/kern_sig.c | 145 +- bsd/kern/kern_subr.c | 6 - bsd/kern/kern_symfile.c | 228 +- bsd/kern/kern_sysctl.c | 110 +- bsd/kern/kern_tests.c | 48 - bsd/kern/kern_xxx.c | 19 +- bsd/kern/kpi_mbuf.c | 17 +- bsd/kern/kpi_socket.c | 12 +- bsd/kern/kpi_socketfilter.c | 6 +- bsd/kern/mach_loader.c | 529 +- bsd/kern/mach_loader.h | 1 + bsd/kern/mach_process.c | 54 +- bsd/kern/makesyscalls.sh | 6 +- bsd/kern/netboot.c | 43 +- bsd/kern/policy_check.c | 41 +- bsd/kern/posix_shm.c | 3 +- bsd/kern/proc_info.c | 301 +- bsd/kern/qsort.c | 7 + bsd/kern/socket_info.c | 20 +- bsd/kern/subr_prf.c | 23 +- bsd/kern/sys_coalition.c | 266 +- bsd/kern/sys_generic.c | 884 ++- bsd/kern/sys_pipe.c | 8 +- bsd/kern/sys_work_interval.c | 128 + bsd/kern/syscalls.master | 43 +- bsd/kern/sysv_shm.c | 53 +- bsd/kern/trace.codes | 39 +- bsd/kern/tty.c | 115 +- bsd/kern/tty_pty.c | 4 +- bsd/kern/ubc_subr.c | 602 +- bsd/kern/uipc_domain.c | 34 + bsd/kern/uipc_mbuf.c | 690 +- bsd/kern/uipc_socket.c | 1944 +++-- bsd/kern/uipc_socket2.c | 153 +- bsd/kern/uipc_syscalls.c | 1088 ++- bsd/kern/uipc_usrreq.c | 118 +- bsd/kern/vm_pressure.c | 56 +- bsd/libkern/Makefile | 4 +- bsd/libkern/memchr.c | 2 +- bsd/machine/Makefile | 5 +- bsd/man/man2/Makefile | 5 +- bsd/man/man2/accept.2 | 3 +- bsd/man/man2/bind.2 | 7 +- bsd/man/man2/chflags.2 | 4 +- bsd/man/man2/connect.2 | 8 +- bsd/man/man2/connectx.2 | 398 +- bsd/man/man2/disconnectx.2 | 90 +- bsd/man/man2/getattrlistbulk.2 | 3 + bsd/man/man2/getlcid.2 | 87 - bsd/man/man2/getsockname.2 | 2 +- bsd/man/man2/gettimeofday.2 | 2 +- bsd/man/man2/intro.2 | 16 +- bsd/man/man2/kevent_qos.2 | 1 + bsd/man/man2/kqueue.2 | 175 +- bsd/man/man2/listen.2 | 9 +- bsd/man/man2/mmap.2 | 9 +- bsd/man/man2/mount.2 | 14 +- bsd/man/man2/peeloff.2 | 4 +- bsd/man/man2/poll.2 | 9 +- bsd/man/man2/ptrace.2 | 21 +- bsd/man/man2/reboot.2 | 6 +- bsd/man/man2/recv.2 | 14 +- bsd/man/man2/searchfs.2 | 109 +- bsd/man/man2/select.2 | 11 +- bsd/man/man2/sem_open.2 | 2 +- bsd/man/man2/sem_unlink.2 | 2 +- bsd/man/man2/setattrlist.2 | 56 +- bsd/man/man2/setlcid.2 | 90 - bsd/man/man2/setpgid.2 | 5 - bsd/man/man2/shutdown.2 | 4 +- bsd/man/man2/sigaction.2 | 2 +- bsd/man/man2/socket.2 | 61 +- bsd/man/man2/stat.2 | 9 +- bsd/man/man2/write.2 | 12 +- bsd/man/man4/inet.4 | 5 +- bsd/man/man4/route.4 | 1 - bsd/man/man4/tcp.4 | 32 +- bsd/man/man4/udp.4 | 6 +- bsd/miscfs/devfs/Makefile | 10 +- bsd/miscfs/devfs/devfs_tree.c | 38 +- bsd/miscfs/devfs/devfs_vfsops.c | 2 +- bsd/miscfs/fifofs/Makefile | 4 +- bsd/miscfs/fifofs/fifo_vnops.c | 4 + bsd/miscfs/specfs/Makefile | 5 +- bsd/miscfs/specfs/spec_vnops.c | 201 +- bsd/miscfs/union/Makefile | 5 +- bsd/net/Makefile | 67 +- bsd/net/altq/Makefile | 4 +- bsd/net/bpf.c | 388 +- bsd/net/bpf.h | 6 + bsd/net/bpfdesc.h | 25 +- bsd/net/classq/Makefile | 4 +- bsd/net/classq/classq.c | 2 +- bsd/net/classq/classq_sfb.c | 73 +- bsd/net/classq/classq_sfb.h | 1 + bsd/net/classq/classq_subr.c | 15 +- bsd/net/classq/if_classq.h | 6 +- bsd/net/content_filter.c | 11 +- bsd/net/content_filter.h | 9 +- bsd/net/devtimer.c | 3 +- bsd/net/dlil.c | 725 +- bsd/net/dlil.h | 34 +- bsd/net/ether_if_module.c | 2 +- bsd/net/if.c | 305 +- bsd/net/if.h | 81 +- bsd/net/if_bond.c | 13 +- bsd/net/if_bridge.c | 10 +- bsd/net/if_gif.c | 3 +- bsd/net/if_ipsec.c | 20 +- bsd/net/if_llreach.h | 4 +- bsd/net/if_loop.c | 2 +- bsd/net/if_media.h | 4 + bsd/net/if_stf.c | 10 +- bsd/net/if_utun.c | 38 +- bsd/net/if_utun.h | 1 - bsd/net/if_utun_crypto_dtls.c | 6 +- bsd/net/if_var.h | 248 +- bsd/net/if_vlan.c | 8 +- bsd/net/kpi_interface.c | 378 +- bsd/net/kpi_interface.h | 187 +- bsd/net/kpi_protocol.c | 7 +- bsd/net/lacp.h | 4 +- bsd/net/necp.c | 1658 +++- bsd/net/necp.h | 87 +- bsd/net/net_perf.c | 104 + bsd/net/net_perf.h | 62 + bsd/net/net_stubs.c | 9 +- bsd/net/network_agent.c | 1150 +++ bsd/net/network_agent.h | 163 + bsd/net/ntstat.c | 1869 +++-- bsd/net/ntstat.h | 369 +- bsd/net/packet_mangler.c | 102 +- bsd/net/packet_mangler.h | 45 +- bsd/net/pf.c | 2501 ++++-- bsd/net/pf_ioctl.c | 67 +- bsd/net/pf_ruleset.c | 7 +- bsd/net/pf_table.c | 5 + bsd/net/pfkeyv2.h | 19 +- bsd/net/pfvar.h | 39 +- bsd/net/pktap.c | 30 +- bsd/net/pktsched/Makefile | 4 +- bsd/net/pktsched/pktsched.c | 1 - bsd/net/pktsched/pktsched_cbq.c | 2 + bsd/net/pktsched/pktsched_fairq.c | 3 + bsd/net/pktsched/pktsched_hfsc.c | 2 + bsd/net/pktsched/pktsched_priq.c | 2 + bsd/net/pktsched/pktsched_qfq.c | 2 + bsd/net/pktsched/pktsched_tcq.c | 2 + bsd/net/radix.h | 1 + bsd/net/raw_usrreq.c | 2 +- bsd/net/route.c | 204 +- bsd/net/route.h | 12 +- bsd/net/rtsock.c | 107 +- bsd/netinet/Makefile | 26 +- bsd/netinet/flow_divert.c | 817 +- bsd/netinet/flow_divert.h | 3 +- bsd/netinet/flow_divert_proto.h | 8 +- bsd/netinet/icmp6.h | 4 + bsd/netinet/igmp.c | 2 +- bsd/netinet/in.c | 65 +- bsd/netinet/in.h | 9 +- bsd/netinet/in_arp.c | 27 +- bsd/netinet/in_cksum.c | 132 +- bsd/netinet/in_dhcp.c | 935 --- bsd/netinet/in_gif.c | 20 +- bsd/netinet/in_mcast.c | 11 +- bsd/netinet/in_pcb.c | 37 +- bsd/netinet/in_pcb.h | 16 +- bsd/netinet/in_pcblist.c | 65 +- bsd/netinet/in_proto.c | 21 +- bsd/netinet/in_systm.h | 2 +- bsd/netinet/in_tclass.c | 92 +- bsd/netinet/in_var.h | 13 +- bsd/netinet/ip_dummynet.c | 8 +- bsd/netinet/ip_ecn.c | 121 +- bsd/netinet/ip_ecn.h | 10 +- bsd/netinet/ip_encap.c | 6 +- bsd/netinet/ip_flowid.h | 8 + bsd/netinet/ip_fw2.c | 24 +- bsd/netinet/ip_fw2_compat.c | 4 +- bsd/netinet/ip_icmp.c | 4 +- bsd/netinet/ip_input.c | 1492 +++- bsd/netinet/ip_output.c | 109 +- bsd/netinet/ip_var.h | 7 + bsd/netinet/mp_pcb.c | 14 +- bsd/netinet/mp_pcb.h | 2 + bsd/netinet/mptcp.c | 415 +- bsd/netinet/mptcp.h | 4 +- bsd/netinet/mptcp_opt.c | 459 +- bsd/netinet/mptcp_opt.h | 2 +- bsd/netinet/mptcp_subr.c | 1238 ++- bsd/netinet/mptcp_timer.c | 23 +- bsd/netinet/mptcp_timer.h | 2 +- bsd/netinet/mptcp_usrreq.c | 203 +- bsd/netinet/mptcp_var.h | 144 +- bsd/netinet/raw_ip.c | 23 +- bsd/netinet/tcp.h | 71 +- bsd/netinet/tcp_cache.c | 743 ++ bsd/netinet/tcp_cache.h | 52 + bsd/netinet/tcp_cc.c | 68 + bsd/netinet/tcp_cc.h | 9 + bsd/netinet/tcp_cubic.c | 56 +- bsd/netinet/tcp_debug.h | 2 +- bsd/netinet/tcp_input.c | 1050 ++- bsd/netinet/tcp_ledbat.c | 3 - bsd/netinet/tcp_newreno.c | 3 - bsd/netinet/tcp_output.c | 501 +- bsd/netinet/tcp_sack.c | 180 +- bsd/netinet/tcp_subr.c | 467 +- bsd/netinet/tcp_timer.c | 624 +- bsd/netinet/tcp_timer.h | 1 + bsd/netinet/tcp_usrreq.c | 275 +- bsd/netinet/tcp_var.h | 327 +- bsd/netinet/udp.h | 34 +- bsd/netinet/udp_usrreq.c | 477 +- bsd/netinet/udp_var.h | 4 +- bsd/netinet6/Makefile | 19 +- bsd/netinet6/ah_core.c | 6 +- bsd/netinet6/ah_input.c | 20 +- bsd/netinet6/esp.h | 4 + bsd/netinet6/esp_core.c | 85 +- bsd/netinet6/esp_input.c | 234 +- bsd/netinet6/esp_output.c | 56 +- bsd/netinet6/esp_rijndael.c | 454 ++ bsd/netinet6/esp_rijndael.h | 6 + bsd/netinet6/frag6.c | 4 +- bsd/netinet6/icmp6.c | 21 +- bsd/netinet6/in6.c | 289 +- bsd/netinet6/in6.h | 2 +- bsd/netinet6/in6_gif.c | 19 +- bsd/netinet6/in6_ifattach.c | 65 +- bsd/netinet6/in6_mcast.c | 7 +- bsd/netinet6/in6_pcb.c | 9 +- bsd/netinet6/in6_proto.c | 5 +- bsd/netinet6/in6_src.c | 181 +- bsd/netinet6/in6_var.h | 34 +- bsd/netinet6/ip6_ecn.h | 8 +- bsd/netinet6/ip6_forward.c | 4 +- bsd/netinet6/ip6_input.c | 117 +- bsd/netinet6/ip6_output.c | 860 +- bsd/netinet6/ip6_var.h | 23 +- bsd/netinet6/ipsec.c | 459 +- bsd/netinet6/ipsec.h | 50 +- bsd/netinet6/ipsec6.h | 2 +- bsd/netinet6/mld6.c | 16 +- bsd/netinet6/nd6.c | 401 +- bsd/netinet6/nd6.h | 70 +- bsd/netinet6/nd6_nbr.c | 88 +- bsd/netinet6/nd6_rtr.c | 160 +- bsd/netinet6/nd6_var.h | 83 + bsd/netinet6/raw_ip6.c | 20 +- bsd/netinet6/scope6_var.h | 7 +- bsd/netinet6/udp6_output.c | 33 +- bsd/netinet6/udp6_usrreq.c | 150 +- bsd/netkey/Makefile | 2 +- bsd/netkey/key.c | 321 +- bsd/netkey/keydb.c | 25 +- bsd/netkey/keydb.h | 1 + bsd/netkey/keysock.c | 6 +- bsd/nfs/nfs.h | 23 +- bsd/nfs/nfs4_subs.c | 12 +- bsd/nfs/nfs4_vnops.c | 180 +- bsd/nfs/nfs_bio.c | 8 +- bsd/nfs/nfs_gss.c | 801 +- bsd/nfs/nfs_gss.h | 19 +- bsd/nfs/nfs_ioctl.h | 37 +- bsd/nfs/nfs_serv.c | 9 +- bsd/nfs/nfs_socket.c | 164 +- bsd/nfs/nfs_subs.c | 38 + bsd/nfs/nfs_syscalls.c | 231 +- bsd/nfs/nfs_upcall.c | 10 +- bsd/nfs/nfs_vfsops.c | 159 +- bsd/nfs/nfs_vnops.c | 81 +- bsd/nfs/nfsm_subs.h | 4 +- bsd/nfs/nfsmount.h | 9 +- bsd/pgo/profile_runtime.c | 284 + bsd/security/audit/audit.c | 1 - bsd/security/audit/audit_arg.c | 1 - bsd/security/audit/audit_bsm.c | 2 - bsd/security/audit/audit_bsm_fcntl.c | 3 + bsd/security/audit/audit_mac.c | 1 - bsd/security/audit/audit_syscalls.c | 1 - bsd/security/audit/audit_worker.c | 1 - bsd/sys/Makefile | 52 +- bsd/sys/_types/Makefile | 3 +- bsd/sys/_types/_timeval64.h | 36 + bsd/sys/attr.h | 31 + bsd/sys/bsdtask_info.h | 4 +- bsd/sys/buf.h | 29 +- bsd/sys/buf_internal.h | 9 +- bsd/sys/cdefs.h | 109 +- bsd/sys/coalition.h | 190 +- bsd/sys/codedir_internal.h | 4 +- bsd/sys/codesign.h | 70 +- bsd/sys/content_protection.h | 6 + bsd/sys/cprotect.h | 224 +- bsd/sys/csr.h | 20 +- bsd/sys/disk.h | 52 +- bsd/sys/domain.h | 4 +- bsd/sys/dtrace.h | 17 +- bsd/sys/dtrace_impl.h | 3 + bsd/sys/event.h | 178 +- bsd/sys/eventvar.h | 9 +- bsd/sys/fcntl.h | 30 +- bsd/sys/file_internal.h | 8 +- bsd/sys/filedesc.h | 10 + bsd/sys/fsevents.h | 21 +- bsd/sys/guarded.h | 9 +- bsd/sys/imgact.h | 2 +- bsd/sys/kdebug.h | 540 +- bsd/sys/kern_control.h | 56 +- bsd/sys/kern_memorystatus.h | 126 +- bsd/sys/kern_tests.h | 5 - bsd/sys/kpi_mbuf.h | 18 +- bsd/{netinet/in_dhcp.h => sys/kpi_private.h} | 37 +- bsd/sys/kpi_socket.h | 8 +- bsd/sys/kpi_socketfilter.h | 1 + bsd/sys/lctx.h | 11 +- bsd/sys/loadable_fs.h | 14 - bsd/sys/lockf.h | 3 +- bsd/sys/malloc.h | 71 +- bsd/sys/mbuf.h | 46 +- bsd/sys/memory_maintenance.h | 64 + bsd/sys/mman.h | 32 +- bsd/sys/mount_internal.h | 4 + bsd/sys/munge.h | 3 +- bsd/sys/pgo.h | 93 + bsd/sys/priv.h | 2 + bsd/sys/proc.h | 16 +- bsd/sys/proc_info.h | 52 +- bsd/sys/proc_internal.h | 58 +- bsd/sys/protosw.h | 41 +- bsd/sys/pthread_shims.h | 10 +- bsd/sys/ptrace.h | 7 +- bsd/sys/reboot.h | 5 +- bsd/sys/resource.h | 6 +- bsd/sys/select.h | 9 +- bsd/sys/signal.h | 1 + bsd/sys/signalvar.h | 25 +- bsd/sys/socket.h | 114 +- bsd/sys/socketvar.h | 142 +- bsd/sys/sockio.h | 28 +- bsd/sys/spawn_internal.h | 60 +- bsd/sys/stackshot.h | 69 + bsd/sys/stat.h | 3 +- bsd/sys/sysctl.h | 35 +- bsd/sys/sysent.h | 4 +- bsd/sys/systm.h | 3 +- bsd/sys/time.h | 5 + bsd/sys/ubc.h | 6 + bsd/sys/ubc_internal.h | 13 +- bsd/sys/uio_internal.h | 2 +- bsd/sys/user.h | 28 +- bsd/sys/vnode.h | 145 +- bsd/sys/vnode_internal.h | 24 + bsd/sys/work_interval.h | 141 + bsd/uuid/Makefile | 3 - bsd/uxkern/ux_exception.c | 5 +- bsd/vfs/kpi_vfs.c | 69 +- bsd/vfs/vfs_attrlist.c | 49 +- bsd/vfs/vfs_bio.c | 93 +- bsd/vfs/vfs_cache.c | 33 +- bsd/vfs/vfs_cluster.c | 438 +- bsd/vfs/vfs_fsevents.c | 158 +- bsd/vfs/vfs_journal.c | 264 +- bsd/vfs/vfs_journal.h | 12 +- bsd/vfs/vfs_lookup.c | 125 +- bsd/vfs/vfs_subr.c | 365 +- bsd/vfs/vfs_syscalls.c | 268 +- bsd/vfs/vfs_utfconv.c | 101 +- bsd/vfs/vfs_vnops.c | 125 +- bsd/vfs/vfs_xattr.c | 8 +- bsd/vm/dp_backing_file.c | 2 +- bsd/vm/vm_compressor_backing_file.c | 47 +- bsd/vm/vm_unix.c | 36 +- config/BSDKernel.exports | 7 +- config/IOKit.exports | 33 +- config/IOKit.x86_64.exports | 20 +- config/Libkern.exports | 3 + config/MACFramework.exports | 6 +- config/MASTER | 222 +- config/MASTER.x86_64 | 26 +- config/Mach.exports | 2 + config/MasterVersion | 2 +- config/Private.exports | 69 +- config/Private.x86_64.exports | 10 +- config/Unsupported.exports | 12 +- config/Unsupported.x86_64.exports | 1 + config/Unused.exports | 1 + iokit/.clang-format | 30 + iokit/Families/IONVRAM/IONVRAMController.cpp | 10 +- iokit/IOKit/IOBSD.h | 31 + iokit/IOKit/IOBufferMemoryDescriptor.h | 4 +- iokit/IOKit/IOCPU.h | 25 +- iokit/IOKit/IOCatalogue.h | 4 +- iokit/IOKit/IOCommand.h | 2 +- iokit/IOKit/IOCommandGate.h | 56 +- iokit/IOKit/IOCommandPool.h | 2 +- iokit/IOKit/IOCommandQueue.h | 4 +- iokit/IOKit/IOConditionLock.h | 2 +- iokit/IOKit/IODMACommand.h | 153 +- iokit/IOKit/IODMAController.h | 3 +- iokit/IOKit/IODMAEventSource.h | 6 +- iokit/IOKit/IODataQueue.h | 2 +- iokit/IOKit/IODeviceTreeSupport.h | 1 + iokit/IOKit/IOEventSource.h | 2 +- iokit/IOKit/IOFilterInterruptEventSource.h | 6 +- iokit/IOKit/IOHibernatePrivate.h | 32 +- iokit/IOKit/IOInterleavedMemoryDescriptor.h | 8 +- iokit/IOKit/IOInterruptAccounting.h | 7 + iokit/IOKit/IOInterruptAccountingPrivate.h | 22 - iokit/IOKit/IOInterruptController.h | 14 +- iokit/IOKit/IOInterruptEventSource.h | 10 +- iokit/IOKit/IOKernelReporters.h | 30 +- iokit/IOKit/IOKitDebug.h | 106 +- iokit/IOKit/IOKitDiagnosticsUserClient.h | 17 + iokit/IOKit/IOKitKeysPrivate.h | 25 +- iokit/IOKit/IOLib.h | 20 +- iokit/IOKit/IOMapper.h | 99 +- iokit/IOKit/IOMemoryDescriptor.h | 123 +- iokit/IOKit/IOMultiMemoryDescriptor.h | 29 +- iokit/IOKit/IONVRAM.h | 23 +- iokit/IOKit/IOPlatformExpert.h | 47 +- iokit/IOKit/IOPolledInterface.h | 169 +- iokit/IOKit/IORangeAllocator.h | 4 +- iokit/IOKit/IORegistryEntry.h | 12 +- iokit/IOKit/IOReportMacros.h | 142 + iokit/IOKit/IOReturn.h | 5 +- iokit/IOKit/IOService.h | 28 +- iokit/IOKit/IOServicePM.h | 2 + iokit/IOKit/IOSharedDataQueue.h | 8 +- iokit/IOKit/IOSubMemoryDescriptor.h | 28 +- iokit/IOKit/IOSyncer.h | 2 +- iokit/IOKit/IOTimeStamp.h | 12 +- iokit/IOKit/IOTimerEventSource.h | 8 +- iokit/IOKit/IOTypes.h | 1 + iokit/IOKit/IOUserClient.h | 6 +- iokit/IOKit/IOWorkLoop.h | 5 +- iokit/IOKit/nvram/IONVRAMController.h | 2 +- iokit/IOKit/platform/AppleMacIO.h | 2 +- iokit/IOKit/platform/AppleMacIODevice.h | 6 +- iokit/IOKit/platform/AppleNMI.h | 4 +- iokit/IOKit/platform/ApplePlatformExpert.h | 16 +- iokit/IOKit/pwr_mgt/IOPMPowerSource.h | 4 +- iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h | 2 +- iokit/IOKit/pwr_mgt/IOPMPrivate.h | 25 + iokit/IOKit/pwr_mgt/IOPMinformee.h | 2 +- iokit/IOKit/pwr_mgt/IOPMinformeeList.h | 3 +- iokit/IOKit/pwr_mgt/IOPMlog.h | 1 + iokit/IOKit/pwr_mgt/IOPMpowerState.h | 6 + iokit/IOKit/pwr_mgt/RootDomain.h | 70 +- .../IOKit/system_management/IOWatchDogTimer.h | 6 +- iokit/Kernel/IOBufferMemoryDescriptor.cpp | 12 +- iokit/Kernel/IOCPU.cpp | 294 +- iokit/Kernel/IOCommandGate.cpp | 99 +- iokit/Kernel/IODMACommand.cpp | 544 +- iokit/Kernel/IODMAEventSource.cpp | 7 + iokit/Kernel/IODataQueue.cpp | 67 +- iokit/Kernel/IODeviceTreeSupport.cpp | 4 +- iokit/Kernel/IOEventSource.cpp | 4 +- iokit/Kernel/IOHibernateIO.cpp | 1805 +---- iokit/Kernel/IOHibernateInternal.h | 32 - iokit/Kernel/IOHibernateRestoreKernel.c | 19 +- iokit/Kernel/IOHistogramReporter.cpp | 3 +- iokit/Kernel/IOKitDebug.cpp | 938 ++- iokit/Kernel/IOKitKernelInternal.h | 81 +- iokit/Kernel/IOLib.cpp | 336 +- iokit/Kernel/IOMapper.cpp | 175 +- iokit/Kernel/IOMemoryDescriptor.cpp | 582 +- iokit/Kernel/IOMultiMemoryDescriptor.cpp | 162 +- iokit/Kernel/IONVRAM.cpp | 104 +- iokit/Kernel/IOPMPowerStateQueue.h | 4 +- iokit/Kernel/IOPMrootDomain.cpp | 967 ++- iokit/Kernel/IOPlatformExpert.cpp | 77 +- iokit/Kernel/IOPolledInterface.cpp | 939 ++- iokit/Kernel/IORegistryEntry.cpp | 2 +- iokit/Kernel/IOReportLegend.cpp | 44 +- iokit/Kernel/IOReporterDefs.h | 4 +- iokit/Kernel/IOService.cpp | 391 +- iokit/Kernel/IOServicePM.cpp | 603 +- iokit/Kernel/IOServicePMPrivate.h | 64 +- iokit/Kernel/IOServicePrivate.h | 39 +- iokit/Kernel/IOStartIOKit.cpp | 28 +- iokit/Kernel/IOSubMemoryDescriptor.cpp | 8 + iokit/Kernel/IOUserClient.cpp | 194 +- iokit/Kernel/IOWorkLoop.cpp | 9 +- iokit/Kernel/RootDomainUserClient.h | 10 +- iokit/Tests/TestIOMemoryDescriptor.cpp | 446 ++ iokit/Tests/Tests.cpp | 290 +- iokit/Tests/Tests.h | 77 +- iokit/bsddev/IOKitBSDInit.cpp | 177 + iokit/conf/Makefile.template | 8 +- iokit/conf/files | 9 +- libkdd/kcdata/KCDBasicTypeDescription.h | 43 + libkdd/kcdata/KCDBasicTypeDescription.m | 131 + libkdd/kcdata/KCDStructTypeDescription.h | 38 + libkdd/kcdata/KCDStructTypeDescription.m | 106 + libkdd/kcdata/kcdata_core.m | 207 + libkdd/kcdata/kcdtypes.c | 552 ++ libkdd/kcdata/kdd.h | 136 + libkdd/kcdata/kdd.m | 43 + libkdd/kdd.xcodeproj/project.pbxproj | 269 + libkern/.clang-format | 30 + libkern/Makefile | 2 + libkern/OSKextVersion.c | 2 +- libkern/c++/OSArray.cpp | 19 +- libkern/c++/OSCollectionIterator.cpp | 21 +- libkern/c++/OSData.cpp | 40 +- libkern/c++/OSDictionary.cpp | 19 +- libkern/c++/OSKext.cpp | 791 +- libkern/c++/OSMetaClass.cpp | 95 +- libkern/c++/OSObject.cpp | 134 +- libkern/c++/OSOrderedSet.cpp | 20 +- libkern/c++/OSRuntime.cpp | 23 +- libkern/c++/OSSerialize.cpp | 22 +- libkern/c++/OSSerializeBinary.cpp | 9 +- libkern/c++/OSString.cpp | 64 +- libkern/c++/OSSymbol.cpp | 54 +- libkern/conf/Makefile.template | 8 +- libkern/conf/files | 1 + libkern/conf/files.x86_64 | 1 - libkern/crypto/corecrypto_aesxts.c | 3 +- libkern/crypto/corecrypto_sha2.c | 2 +- libkern/gen/OSAtomicOperations.c | 252 +- libkern/gen/OSDebug.cpp | 7 + libkern/kxld/Makefile | 8 +- libkern/kxld/kxld.c | 17 +- libkern/kxld/kxld_demangle.c | 2 - libkern/kxld/kxld_demangle.h | 2 +- libkern/kxld/kxld_object.c | 11 +- libkern/kxld/kxld_reloc.c | 4 +- libkern/kxld/kxld_seg.c | 38 +- libkern/kxld/kxld_stubs.c | 2 +- libkern/kxld/kxld_sym.c | 2 +- libkern/kxld/kxld_util.c | 60 +- libkern/kxld/kxld_util.h | 10 + libkern/kxld/kxld_versionmin.c | 8 +- libkern/kxld/kxld_versionmin.h | 3 +- libkern/kxld/tests/kextcopyright.c | 2 +- libkern/libkern/Makefile | 73 +- libkern/libkern/OSAtomic.h | 50 +- libkern/libkern/OSKextLib.h | 33 +- libkern/libkern/OSKextLibPrivate.h | 10 +- libkern/libkern/c++/OSArray.h | 28 +- libkern/libkern/c++/OSBoolean.h | 10 +- libkern/libkern/c++/OSCollection.h | 4 +- libkern/libkern/c++/OSCollectionIterator.h | 8 +- libkern/libkern/c++/OSData.h | 6 +- libkern/libkern/c++/OSDictionary.h | 28 +- libkern/libkern/c++/OSKext.h | 58 +- libkern/libkern/c++/OSLib.h | 27 + libkern/libkern/c++/OSMetaClass.h | 27 +- libkern/libkern/c++/OSNumber.h | 6 +- libkern/libkern/c++/OSObject.h | 22 +- libkern/libkern/c++/OSOrderedSet.h | 26 +- libkern/libkern/c++/OSSerialize.h | 4 +- libkern/libkern/c++/OSSet.h | 28 +- libkern/libkern/c++/OSString.h | 8 +- libkern/libkern/c++/OSSymbol.h | 16 +- libkern/libkern/crypto/sha2.h | 2 +- libkern/libkern/kxld.h | 4 +- libkern/libkern/zlib.h | 15 + libkern/x86_64/OSAtomic.s | 105 - libkern/zlib/deflate.c | 41 +- libkern/zlib/deflate.h | 2 + libsa/conf/Makefile.template | 19 +- libsa/lastkerneldataconst.c | 45 + libsyscall/Libsyscall.xcconfig | 20 +- .../Libsyscall.xcodeproj/project.pbxproj | 19 +- libsyscall/mach/host.c | 42 + libsyscall/mach/mach/mach.h | 7 + libsyscall/mach/mach_init.c | 1 + libsyscall/mach/mach_msg.c | 136 +- libsyscall/mach/watchos_prohibited_mig.txt | 53 + libsyscall/wrappers/cancelable/fcntl-base.c | 7 + libsyscall/wrappers/csr.c | 9 +- libsyscall/wrappers/kdebug_trace.c | 90 +- libsyscall/wrappers/libproc/libproc.c | 85 + libsyscall/wrappers/libproc/libproc.h | 14 + .../wrappers/libproc/libproc_internal.h | 2 + libsyscall/wrappers/spawn/posix_spawn.c | 101 +- libsyscall/wrappers/spawn/spawn.h | 49 +- libsyscall/wrappers/spawn/spawn_private.h | 6 +- libsyscall/wrappers/stackshot.c | 272 + libsyscall/wrappers/work_interval.c | 113 + libsyscall/xcodescripts/create-syscalls.pl | 4 +- libsyscall/xcodescripts/filter_mig.awk | 37 + libsyscall/xcodescripts/mach_install_mig.sh | 16 +- makedefs/MakeInc.cmd | 19 +- makedefs/MakeInc.def | 74 +- makedefs/MakeInc.kernel | 29 +- makedefs/MakeInc.rule | 34 +- makedefs/MakeInc.top | 25 +- osfmk/.DS_Store | Bin 8196 -> 0 bytes osfmk/Makefile | 24 +- osfmk/UserNotification/Makefile | 13 +- osfmk/atm/atm.c | 639 +- osfmk/atm/atm_internal.h | 63 +- osfmk/atm/atm_types.h | 7 +- osfmk/bank/bank.c | 54 +- osfmk/chud/chud_thread.c | 146 +- osfmk/chud/i386/chud_thread_i386.c | 16 +- osfmk/conf/Makefile.template | 24 +- osfmk/conf/files | 12 +- osfmk/conf/files.x86_64 | 1 + osfmk/console/i386/serial_console.c | 4 +- osfmk/console/video_console.c | 57 +- osfmk/console/video_console.h | 9 + osfmk/corecrypto/cc/src/cc_clear.c | 27 + osfmk/corecrypto/ccdbrg/src/ccdrbg_nisthmac.c | 385 +- osfmk/corecrypto/ccdigest/src/ccdigest_init.c | 7 +- .../corecrypto/ccdigest/src/ccdigest_update.c | 7 +- osfmk/corecrypto/cchmac/src/cchmac.c | 5 +- osfmk/corecrypto/cchmac/src/cchmac_final.c | 6 +- osfmk/corecrypto/cchmac/src/cchmac_init.c | 7 +- osfmk/corecrypto/cchmac/src/cchmac_update.c | 5 +- osfmk/corecrypto/ccn/src/ccn_set.c | 19 +- .../ccsha1/src/ccdigest_final_64be.c | 5 +- osfmk/corecrypto/ccsha1/src/ccsha1_eay.c | 5 +- .../ccsha1/src/ccsha1_initial_state.c | 5 +- osfmk/{pmc => corpses}/Makefile | 15 +- osfmk/corpses/corpse.c | 228 + osfmk/corpses/task_corpse.h | 115 + osfmk/default_pager/default_pager.c | 4 +- osfmk/default_pager/dp_backing_store.c | 12 +- osfmk/default_pager/dp_memory_object.c | 4 +- osfmk/device/Makefile | 5 + osfmk/device/device.defs | 20 + osfmk/device/device_init.c | 6 +- osfmk/device/device_types.h | 3 +- osfmk/gssd/Makefile | 6 +- osfmk/i386/AT386/model_dep.c | 38 +- osfmk/i386/Diagnostics.c | 13 +- osfmk/i386/Makefile | 2 + osfmk/i386/acpi.c | 21 +- osfmk/i386/ast.h | 67 - osfmk/i386/ast_types.h | 69 - osfmk/i386/atomic.h | 53 + osfmk/i386/bsd_i386.c | 24 + osfmk/i386/commpage/commpage.c | 48 +- osfmk/i386/commpage/commpage.h | 1 + osfmk/i386/cpu.c | 2 +- osfmk/i386/cpu_capabilities.h | 4 +- osfmk/i386/cpu_data.h | 4 - osfmk/i386/cpuid.c | 25 +- osfmk/i386/cpuid.h | 3 +- osfmk/i386/fpu.c | 22 +- osfmk/i386/genassym.c | 26 +- osfmk/i386/i386_init.c | 25 +- osfmk/i386/i386_lock.s | 26 +- osfmk/i386/i386_vm_init.c | 35 +- osfmk/i386/io_map.c | 2 +- osfmk/i386/lapic_native.c | 17 +- osfmk/i386/locks.h | 2 + osfmk/i386/locks_i386.c | 202 +- osfmk/i386/machine_routines.c | 41 +- osfmk/i386/machine_routines.h | 8 +- osfmk/i386/misc_protos.h | 5 + osfmk/i386/mp.c | 2 +- osfmk/i386/mp.h | 8 +- osfmk/i386/mp_desc.c | 20 +- osfmk/i386/pal_routines.c | 5 - osfmk/i386/pal_routines.h | 3 - osfmk/i386/panic_hooks.c | 6 +- osfmk/i386/pcb.c | 150 +- osfmk/i386/pmCPU.c | 38 +- osfmk/i386/pmap.h | 125 +- osfmk/i386/pmap_common.c | 47 +- osfmk/i386/pmap_internal.h | 40 +- osfmk/i386/pmap_x86_common.c | 340 +- osfmk/i386/proc_reg.h | 6 +- osfmk/i386/rtclock.c | 6 + osfmk/{machine/ast_types.h => i386/smp.h} | 16 +- osfmk/i386/thread.h | 1 + osfmk/i386/trap.c | 21 +- osfmk/i386/trap.h | 1 + osfmk/i386/tsc.c | 83 +- osfmk/i386/ucode.c | 2 +- osfmk/i386/vmx/vmx_shims.c | 2 +- osfmk/ipc/ipc_importance.c | 162 +- osfmk/ipc/ipc_importance.h | 2 +- osfmk/ipc/ipc_init.c | 8 +- osfmk/ipc/ipc_kmsg.c | 298 +- osfmk/ipc/ipc_kmsg.h | 12 - osfmk/ipc/ipc_mqueue.c | 602 +- osfmk/ipc/ipc_mqueue.h | 75 +- osfmk/ipc/ipc_object.c | 18 +- osfmk/ipc/ipc_object.h | 2 +- osfmk/ipc/ipc_port.c | 31 +- osfmk/ipc/ipc_port.h | 32 +- osfmk/ipc/ipc_pset.c | 86 +- osfmk/ipc/ipc_pset.h | 9 +- osfmk/ipc/ipc_right.c | 94 +- osfmk/ipc/ipc_right.h | 5 +- osfmk/ipc/ipc_table.c | 44 +- osfmk/ipc/ipc_voucher.c | 14 +- osfmk/ipc/mach_debug.c | 25 +- osfmk/ipc/mach_msg.c | 32 +- osfmk/ipc/mach_port.c | 105 +- osfmk/kdp/kdp_core.c | 813 ++ osfmk/kdp/kdp_core.h | 15 +- osfmk/kdp/kdp_udp.c | 73 +- osfmk/kdp/ml/i386/kdp_x86_common.c | 295 +- osfmk/kdp/ml/i386/kdp_x86_common.h | 21 - osfmk/kdp/ml/x86_64/kdp_machdep.c | 57 +- osfmk/kdp/ml/x86_64/kdp_vm.c | 18 +- osfmk/kern/Makefile | 16 +- osfmk/kern/assert.h | 4 +- osfmk/kern/ast.c | 68 +- osfmk/kern/ast.h | 91 +- osfmk/kern/bsd_kern.c | 91 +- osfmk/kern/btlog.c | 2 +- osfmk/kern/call_entry.h | 1 + osfmk/kern/clock.c | 90 +- osfmk/kern/clock.h | 8 + osfmk/kern/coalition.c | 1488 +++- osfmk/kern/coalition.h | 107 +- osfmk/kern/debug.c | 44 +- osfmk/kern/debug.h | 155 +- osfmk/kern/ecc.h | 1 - osfmk/kern/energy_perf.c | 25 + osfmk/kern/energy_perf.h | 21 + osfmk/kern/exception.c | 55 +- osfmk/kern/exception.h | 2 +- osfmk/kern/gzalloc.c | 7 +- osfmk/kern/hibernate.c | 12 +- osfmk/kern/host.c | 719 +- osfmk/kern/hv_support.c | 64 - osfmk/kern/hv_support.h | 2 - osfmk/kern/ipc_host.c | 22 +- osfmk/kern/ipc_kobject.c | 80 +- osfmk/kern/ipc_mig.c | 6 +- osfmk/kern/ipc_sync.c | 130 +- osfmk/kern/ipc_sync.h | 11 +- osfmk/kern/ipc_tt.c | 21 +- osfmk/kern/kalloc.c | 207 +- osfmk/kern/kalloc.h | 40 + osfmk/kern/kern_cdata.c | 469 ++ osfmk/kern/kern_cdata.h | 246 + osfmk/kern/kern_ecc.c | 9 - osfmk/kern/kern_stackshot.c | 1372 +++- osfmk/kern/kern_types.h | 15 +- osfmk/kern/kext_alloc.c | 4 +- osfmk/kern/kpc.h | 153 +- osfmk/kern/kpc_common.c | 551 +- osfmk/kern/kpc_thread.c | 16 +- osfmk/kern/ledger.c | 28 +- osfmk/kern/ledger.h | 2 +- osfmk/kern/locks.c | 161 +- osfmk/kern/locks.h | 11 +- osfmk/kern/machine.h | 33 + osfmk/kern/misc_protos.h | 11 +- osfmk/kern/page_decrypt.h | 1 + osfmk/kern/printf.c | 43 +- osfmk/kern/priority.c | 274 +- osfmk/kern/processor.c | 325 +- osfmk/kern/processor.h | 64 +- osfmk/kern/processor_data.h | 8 + osfmk/kern/queue.h | 371 +- osfmk/kern/sched.h | 17 +- osfmk/kern/sched_average.c | 8 +- osfmk/kern/sched_dualq.c | 45 +- osfmk/kern/sched_grrr.c | 132 +- osfmk/kern/sched_multiq.c | 143 +- osfmk/kern/sched_prim.c | 2298 +++--- osfmk/kern/sched_prim.h | 217 +- osfmk/kern/sched_proto.c | 50 +- osfmk/kern/sched_traditional.c | 740 ++ osfmk/kern/sfi.c | 117 +- osfmk/{i386/flipc_page.h => kern/smp.h} | 23 +- osfmk/kern/stack.c | 3 +- osfmk/kern/startup.c | 66 +- osfmk/kern/sync_lock.h | 1 - osfmk/kern/sync_sema.c | 182 +- osfmk/kern/sync_sema.h | 13 +- osfmk/kern/syscall_subr.c | 86 +- osfmk/kern/syscall_sw.c | 2 +- osfmk/kern/syscall_sw.h | 6 +- osfmk/kern/sysdiagnose.c | 58 + osfmk/kern/task.c | 631 +- osfmk/kern/task.h | 78 +- osfmk/kern/task_policy.c | 237 +- osfmk/kern/telemetry.c | 17 +- osfmk/kern/thread.c | 462 +- osfmk/kern/thread.h | 158 +- osfmk/kern/thread_act.c | 105 +- osfmk/kern/thread_call.c | 27 +- osfmk/kern/thread_policy.c | 100 +- osfmk/kern/timer_call.c | 9 +- osfmk/kern/wait_queue.c | 2172 ------ osfmk/kern/wait_queue.h | 467 -- osfmk/kern/waitq.c | 5846 ++++++++++++++ osfmk/kern/waitq.h | 452 ++ osfmk/kern/zalloc.c | 228 +- osfmk/kern/zalloc.h | 5 +- osfmk/kextd/Makefile | 10 +- osfmk/kperf/action.c | 547 +- osfmk/kperf/action.h | 36 +- osfmk/kperf/buffer.h | 33 +- osfmk/kperf/callstack.c | 4 +- osfmk/kperf/context.h | 2 + osfmk/kperf/kperf.h | 26 +- osfmk/kperf/kperf_arch.h | 1 + osfmk/kperf/kperf_kpc.c | 153 +- osfmk/kperf/kperf_kpc.h | 24 +- osfmk/kperf/kperfbsd.c | 70 +- osfmk/kperf/meminfo.c | 91 + osfmk/kperf/meminfo.h | 45 + osfmk/kperf/pet.c | 3 +- osfmk/kperf/sample.h | 2 + osfmk/kperf/threadinfo.c | 4 +- osfmk/kperf/x86_64/kperf_meminfo.c | 62 + osfmk/lockd/Makefile | 9 +- osfmk/mach/Makefile | 37 +- osfmk/mach/coalition.h | 123 + osfmk/mach/exception_types.h | 5 +- osfmk/mach/flipc_cb.h | 1220 --- osfmk/mach/flipc_debug.h | 248 - osfmk/mach/flipc_device.h | 104 - osfmk/mach/flipc_locks.h | 180 - osfmk/mach/flipc_types.h | 260 - osfmk/mach/host_info.h | 48 +- osfmk/mach/host_special_ports.h | 21 +- osfmk/mach/i386/Makefile | 5 + osfmk/mach/i386/exception.h | 2 +- osfmk/mach/i386/flipc_dep.h | 104 - osfmk/mach/mach_host.defs | 25 + osfmk/mach/mach_types.defs | 9 +- osfmk/mach/machine.h | 1 + osfmk/mach/machine/Makefile | 5 + osfmk/mach/machine/sdt.h | 8 + osfmk/mach/memory_object_types.h | 80 +- osfmk/mach/message.h | 12 +- osfmk/mach/port.h | 10 +- osfmk/mach/shared_region.h | 4 +- osfmk/mach/sync_policy.h | 1 + osfmk/mach/sysdiagnose_notification.defs | 50 + osfmk/mach/task_info.h | 51 +- osfmk/mach/thread_info.h | 46 +- osfmk/mach/vm_behavior.h | 1 + osfmk/mach/vm_param.h | 24 +- osfmk/mach/vm_prot.h | 6 + osfmk/mach/vm_statistics.h | 81 +- osfmk/mach/vm_types.h | 18 + osfmk/mach_debug/mach_debug_types.defs | 3 + osfmk/mach_debug/mach_debug_types.h | 10 + osfmk/mach_debug/zone_info.h | 12 + osfmk/machine/Makefile | 9 +- osfmk/machine/atomic.h | 48 + osfmk/machine/{ast.h => smp.h} | 13 +- osfmk/pmc/pmc.c | 2953 ------- osfmk/pmc/pmc.h | 772 -- osfmk/prng/prng_yarrow.c | 8 +- osfmk/prng/random.c | 35 +- osfmk/profiling/Makefile | 10 + osfmk/vm/WKdm_new.h | 2 +- osfmk/vm/bsd_vm.c | 19 +- osfmk/vm/device_vm.c | 12 +- osfmk/vm/memory_object.c | 25 +- osfmk/vm/memory_object.h | 10 - osfmk/vm/pmap.h | 35 +- osfmk/vm/vm_apple_protect.c | 515 +- osfmk/vm/vm_compressor.c | 1737 +++-- osfmk/vm/vm_compressor.h | 112 +- osfmk/vm/vm_compressor_backing_store.c | 271 +- osfmk/vm/vm_compressor_backing_store.h | 5 +- osfmk/vm/vm_compressor_pager.c | 117 +- osfmk/vm/vm_compressor_pager.h | 9 +- osfmk/vm/vm_debug.c | 20 +- osfmk/vm/vm_fault.c | 469 +- osfmk/vm/vm_fault.h | 17 +- osfmk/vm/vm_fourk_pager.c | 1348 ++++ osfmk/vm/vm_init.c | 4 +- osfmk/vm/vm_kern.c | 276 +- osfmk/vm/vm_kern.h | 105 +- osfmk/vm/vm_map.c | 2168 ++++-- osfmk/vm/vm_map.h | 217 +- osfmk/vm/vm_map_store.c | 53 +- osfmk/vm/vm_map_store.h | 7 +- osfmk/vm/vm_map_store_ll.c | 3 + osfmk/vm/vm_map_store_rb.c | 555 +- osfmk/vm/vm_map_store_rb.h | 2 +- osfmk/vm/vm_object.c | 647 +- osfmk/vm/vm_object.h | 78 +- osfmk/vm/vm_page.h | 218 +- osfmk/vm/vm_pageout.c | 1956 +++-- osfmk/vm/vm_pageout.h | 39 +- osfmk/vm/vm_phantom_cache.c | 4 +- osfmk/vm/vm_protos.h | 36 +- osfmk/vm/vm_purgeable.c | 98 +- osfmk/vm/vm_purgeable_internal.h | 5 + osfmk/vm/vm_resident.c | 1103 ++- osfmk/vm/vm_shared_region.c | 16 +- osfmk/vm/vm_swapfile_pager.c | 6 +- osfmk/vm/vm_user.c | 212 +- osfmk/x86_64/WKdmCompress_new.s | 247 +- osfmk/x86_64/WKdmDecompress_new.s | 73 +- osfmk/x86_64/bcopy.s | 1 + osfmk/x86_64/bzero.s | 22 + osfmk/x86_64/copyio.c | 12 + osfmk/x86_64/cswitch.s | 1 - osfmk/x86_64/idt64.s | 9 +- osfmk/x86_64/kpc_x86.c | 241 +- osfmk/x86_64/locore.s | 15 +- osfmk/x86_64/loose_ends.c | 32 +- osfmk/x86_64/machine_kpc.h | 9 - osfmk/x86_64/machine_routines_asm.s | 5 + osfmk/x86_64/pmap.c | 329 +- osfmk/x86_64/start.s | 1 - pexpert/Makefile | 2 + pexpert/conf/Makefile.template | 4 +- pexpert/gen/bootargs.c | 109 +- pexpert/gen/pe_gen.c | 45 +- pexpert/i386/pe_init.c | 30 +- pexpert/i386/pe_kprintf.c | 4 +- pexpert/pexpert/Makefile | 12 +- pexpert/pexpert/i386/boot.h | 9 +- pexpert/pexpert/pexpert.h | 30 + pexpert/pexpert/protos.h | 9 +- security/Makefile | 9 +- security/conf/Makefile.template | 4 +- security/conf/files | 1 + security/mac.h | 11 +- security/mac_audit.c | 39 +- security/mac_base.c | 243 +- security/mac_file.c | 4 +- security/mac_framework.h | 23 +- security/mac_internal.h | 14 +- security/mac_iokit.c | 29 + security/mac_kext.c | 10 + security/mac_mach.c | 139 + security/mac_mach_internal.h | 12 +- security/mac_pipe.c | 50 +- security/mac_policy.h | 320 +- security/mac_posix_sem.c | 35 +- security/mac_posix_shm.c | 42 +- security/mac_process.c | 318 +- security/mac_socket.c | 147 +- security/mac_system.c | 36 +- security/mac_sysv_msg.c | 49 +- security/mac_sysv_sem.c | 21 +- security/mac_sysv_shm.c | 28 +- security/mac_vfs.c | 613 +- tools/lldbmacros/Makefile | 10 +- tools/lldbmacros/README | 12 +- tools/lldbmacros/atm.py | 10 +- tools/lldbmacros/core/kernelcore.py | 95 +- tools/lldbmacros/core/operating_system.py | 99 +- tools/lldbmacros/core/xnu_lldb_init.py | 5 + tools/lldbmacros/ioreg.py | 78 +- tools/lldbmacros/ipc.py | 448 +- tools/lldbmacros/kauth.py | 35 + tools/lldbmacros/kcdata.py | 993 +++ tools/lldbmacros/mbufdefines.py | 4 +- tools/lldbmacros/mbufs.py | 110 +- tools/lldbmacros/memory.py | 646 +- tools/lldbmacros/misc.py | 309 + tools/lldbmacros/net.py | 11 + tools/lldbmacros/pmap.py | 139 +- tools/lldbmacros/process.py | 572 +- tools/lldbmacros/scheduler.py | 181 +- tools/lldbmacros/userspace.py | 151 +- tools/lldbmacros/usertaskgdbserver.py | 29 + tools/lldbmacros/utils.py | 28 + tools/lldbmacros/waitq.py | 1064 +++ tools/lldbmacros/xnu.py | 65 +- tools/lldbmacros/xnudefines.py | 6 +- tools/reindent.sh | 27 + tools/remote_build.sh | 19 +- tools/tests/MPMMTest/MPMMtest.c | 322 +- tools/tests/MPMMTest/Makefile | 12 +- tools/tests/Makefile | 52 +- tools/tests/Makefile.common | 48 + tools/tests/affinity/Makefile | 9 +- tools/tests/execperf/Makefile | 7 +- tools/tests/jitter/Makefile | 11 +- tools/tests/kqueue_tests/Makefile | 9 +- tools/tests/libMicro/bench.sh | 8 +- tools/tests/libMicro/benchDS.sh | 8 +- tools/tests/libMicro/coreos_bench.sh | 8 +- tools/tests/libMicro/embd_bench.sh | 8 +- tools/tests/libMicro/od_account_create.sh | 2 +- tools/tests/libMicro/od_account_delete.sh | 2 +- tools/tests/memorystatus/Makefile | 47 - tools/tests/memorystatus/memorystatus.c | 822 -- .../tests/memorystatus/memorystatus_groups.c | 653 -- tools/tests/perf_index/Makefile | 10 +- tools/tests/superpages/Makefile | 11 +- .../tests/testkext/testthreadcall-Info.plist | 8 +- tools/tests/testkext/testthreadcall.cpp | 28 +- .../tests/xnu_quick_test/32bit_inode_tests.c | 293 - tools/tests/xnu_quick_test/README | 199 - .../xnu_quick_test/atomic_fifo_queue_test.c | 33 - tools/tests/xnu_quick_test/commpage_tests.c | 362 - .../xnu_quick_test/content_protection_test.c | 963 --- tools/tests/xnu_quick_test/helpers/arch.c | 17 - .../tests/xnu_quick_test/helpers/data_exec.c | 214 - tools/tests/xnu_quick_test/helpers/launch.c | 87 - tools/tests/xnu_quick_test/helpers/sleep.c | 6 - tools/tests/xnu_quick_test/kqueue_tests.c | 530 -- tools/tests/xnu_quick_test/machvm_tests.c | 364 - tools/tests/xnu_quick_test/main.c | 639 -- tools/tests/xnu_quick_test/makefile | 199 - tools/tests/xnu_quick_test/memory_tests.c | 333 - tools/tests/xnu_quick_test/misc.c | 436 -- tools/tests/xnu_quick_test/pipes_tests.c | 882 --- tools/tests/xnu_quick_test/sched_tests.c | 231 - tools/tests/xnu_quick_test/sema_tests.c | 165 - .../xnu_quick_test/shared_memory_tests.c | 163 - tools/tests/xnu_quick_test/socket_tests.c | 603 -- tools/tests/xnu_quick_test/tests.c | 6915 ----------------- tools/tests/xnu_quick_test/tests.h | 158 - tools/tests/xnu_quick_test/xattr_tests.c | 179 - .../xnu_quick_test.entitlements | 8 - tools/tests/zero-to-n/Makefile | 11 +- tools/tests/zero-to-n/zero-to-n.c | 778 +- 1152 files changed, 107761 insertions(+), 59764 deletions(-) delete mode 100644 .DS_Store create mode 100644 .clang-format create mode 100644 .gitignore create mode 100644 EXTERNAL_HEADERS/corecrypto/cc_debug.h create mode 100644 EXTERNAL_HEADERS/corecrypto/cc_macros.h create mode 100644 SETUP/json_compilation_db/Makefile create mode 100644 SETUP/json_compilation_db/json_compilation_db.c create mode 100644 bsd/hfs/hfs_cprotect.h create mode 100644 bsd/hfs/hfs_extents.c create mode 100644 bsd/hfs/hfs_extents.h delete mode 100644 bsd/kern/kern_tests.c create mode 100644 bsd/kern/sys_work_interval.c delete mode 100644 bsd/man/man2/getlcid.2 create mode 100644 bsd/man/man2/kevent_qos.2 delete mode 100644 bsd/man/man2/setlcid.2 create mode 100644 bsd/net/net_perf.c create mode 100644 bsd/net/net_perf.h create mode 100644 bsd/net/network_agent.c create mode 100644 bsd/net/network_agent.h delete mode 100644 bsd/netinet/in_dhcp.c create mode 100644 bsd/netinet/tcp_cache.c create mode 100644 bsd/netinet/tcp_cache.h create mode 100644 bsd/netinet6/nd6_var.h create mode 100644 bsd/pgo/profile_runtime.c create mode 100644 bsd/sys/_types/_timeval64.h delete mode 100644 bsd/sys/kern_tests.h rename bsd/{netinet/in_dhcp.h => sys/kpi_private.h} (69%) create mode 100644 bsd/sys/memory_maintenance.h create mode 100644 bsd/sys/pgo.h create mode 100644 bsd/sys/stackshot.h create mode 100644 bsd/sys/work_interval.h create mode 100644 iokit/.clang-format create mode 100644 iokit/IOKit/IOKitDiagnosticsUserClient.h create mode 100644 iokit/Tests/TestIOMemoryDescriptor.cpp create mode 100644 libkdd/kcdata/KCDBasicTypeDescription.h create mode 100644 libkdd/kcdata/KCDBasicTypeDescription.m create mode 100644 libkdd/kcdata/KCDStructTypeDescription.h create mode 100644 libkdd/kcdata/KCDStructTypeDescription.m create mode 100644 libkdd/kcdata/kcdata_core.m create mode 100644 libkdd/kcdata/kcdtypes.c create mode 100644 libkdd/kcdata/kdd.h create mode 100644 libkdd/kcdata/kdd.m create mode 100644 libkdd/kdd.xcodeproj/project.pbxproj create mode 100644 libkern/.clang-format delete mode 100644 libkern/x86_64/OSAtomic.s create mode 100644 libsa/lastkerneldataconst.c create mode 100644 libsyscall/mach/host.c create mode 100644 libsyscall/mach/watchos_prohibited_mig.txt create mode 100644 libsyscall/wrappers/stackshot.c create mode 100644 libsyscall/wrappers/work_interval.c create mode 100755 libsyscall/xcodescripts/filter_mig.awk delete mode 100644 osfmk/.DS_Store create mode 100644 osfmk/corecrypto/cc/src/cc_clear.c rename osfmk/{pmc => corpses}/Makefile (64%) create mode 100644 osfmk/corpses/corpse.c create mode 100644 osfmk/corpses/task_corpse.h delete mode 100644 osfmk/i386/ast.h delete mode 100644 osfmk/i386/ast_types.h create mode 100644 osfmk/i386/atomic.h rename osfmk/{machine/ast_types.h => i386/smp.h} (82%) create mode 100644 osfmk/kdp/kdp_core.c create mode 100644 osfmk/kern/kern_cdata.c create mode 100644 osfmk/kern/kern_cdata.h create mode 100644 osfmk/kern/sched_traditional.c rename osfmk/{i386/flipc_page.h => kern/smp.h} (78%) create mode 100644 osfmk/kern/sysdiagnose.c delete mode 100644 osfmk/kern/wait_queue.c delete mode 100644 osfmk/kern/wait_queue.h create mode 100644 osfmk/kern/waitq.c create mode 100644 osfmk/kern/waitq.h create mode 100644 osfmk/kperf/meminfo.c create mode 100644 osfmk/kperf/meminfo.h create mode 100644 osfmk/kperf/x86_64/kperf_meminfo.c create mode 100644 osfmk/mach/coalition.h delete mode 100644 osfmk/mach/flipc_cb.h delete mode 100644 osfmk/mach/flipc_debug.h delete mode 100644 osfmk/mach/flipc_device.h delete mode 100644 osfmk/mach/flipc_locks.h delete mode 100644 osfmk/mach/flipc_types.h delete mode 100644 osfmk/mach/i386/flipc_dep.h create mode 100644 osfmk/mach/sysdiagnose_notification.defs create mode 100644 osfmk/machine/atomic.h rename osfmk/machine/{ast.h => smp.h} (86%) delete mode 100644 osfmk/pmc/pmc.c delete mode 100644 osfmk/pmc/pmc.h create mode 100644 osfmk/vm/vm_fourk_pager.c create mode 100644 security/mac_mach.c create mode 100644 tools/lldbmacros/kauth.py create mode 100644 tools/lldbmacros/kcdata.py create mode 100644 tools/lldbmacros/usertaskgdbserver.py create mode 100644 tools/lldbmacros/waitq.py create mode 100755 tools/reindent.sh create mode 100644 tools/tests/Makefile.common delete mode 100644 tools/tests/memorystatus/Makefile delete mode 100644 tools/tests/memorystatus/memorystatus.c delete mode 100644 tools/tests/memorystatus/memorystatus_groups.c delete mode 100644 tools/tests/xnu_quick_test/32bit_inode_tests.c delete mode 100644 tools/tests/xnu_quick_test/README delete mode 100644 tools/tests/xnu_quick_test/atomic_fifo_queue_test.c delete mode 100644 tools/tests/xnu_quick_test/commpage_tests.c delete mode 100644 tools/tests/xnu_quick_test/content_protection_test.c delete mode 100644 tools/tests/xnu_quick_test/helpers/arch.c delete mode 100644 tools/tests/xnu_quick_test/helpers/data_exec.c delete mode 100644 tools/tests/xnu_quick_test/helpers/launch.c delete mode 100644 tools/tests/xnu_quick_test/helpers/sleep.c delete mode 100644 tools/tests/xnu_quick_test/kqueue_tests.c delete mode 100644 tools/tests/xnu_quick_test/machvm_tests.c delete mode 100644 tools/tests/xnu_quick_test/main.c delete mode 100644 tools/tests/xnu_quick_test/makefile delete mode 100644 tools/tests/xnu_quick_test/memory_tests.c delete mode 100644 tools/tests/xnu_quick_test/misc.c delete mode 100644 tools/tests/xnu_quick_test/pipes_tests.c delete mode 100644 tools/tests/xnu_quick_test/sched_tests.c delete mode 100644 tools/tests/xnu_quick_test/sema_tests.c delete mode 100644 tools/tests/xnu_quick_test/shared_memory_tests.c delete mode 100644 tools/tests/xnu_quick_test/socket_tests.c delete mode 100644 tools/tests/xnu_quick_test/tests.c delete mode 100644 tools/tests/xnu_quick_test/tests.h delete mode 100644 tools/tests/xnu_quick_test/xattr_tests.c delete mode 100644 tools/tests/xnu_quick_test/xnu_quick_test.entitlements diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 638852157c82519a3080b94aefac3897f6280f8c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5Zp@)hFdyG|&LZQ+kR%${bU93rIQj3&A&OV0!k0FS@ zgOB3P*&mCgdhsAqW?<%J$|PRaSm`MrDX7LSuc;f-ddvePrOhB*haFlQ|;99NoA zuadY)H!iuIBRS%J;Ay|?Ce2U(Ac|T;-}S0I=rye6ohVFP9tNE+sr4EU{is)px}hJ} zD{8+Q_mgJBS}Ave3rH=yVNm5U_jrpZmnMi|HmqE~zg4vJY`tg?@~mI9xAJVWurU}| zPlmC&W^eBwwg*?&H@A2950B3;>LjQrUC5Gxp?w{C=pX~g05UM44Co8cn4i%4uqP~Ch`{bi~<6e_ +#define __TVOS_9_0 90000 + +#define __WATCHOS_1_0 10000 +#define __WATCHOS_2_0 20000 +#include #ifdef __IPHONE_OS_VERSION_MIN_REQUIRED #define __OSX_AVAILABLE_STARTING(_osx, _ios) __AVAILABILITY_INTERNAL##_ios @@ -172,4 +184,139 @@ #endif +#if defined(__has_feature) + #if __has_feature(attribute_availability_with_message) + #define __OS_AVAILABILITY(_target, _availability) __attribute__((availability(_target,_availability))) + #define __OS_AVAILABILITY_MSG(_target, _availability, _msg) __attribute__((availability(_target,_availability,message=_msg))) + #else + #define __OS_AVAILABILITY(_target, _availability) + #define __OS_AVAILABILITY_MSG(_target, _availability, _msg) + #endif +#else + #define __OS_AVAILABILITY(_target, _availability) + #define __OS_AVAILABILITY_MSG(_target, _availability, _msg) +#endif + + +/* for use to document app extension usage */ +#if defined(__has_feature) + #if __has_feature(attribute_availability_app_extension) + #define __OSX_EXTENSION_UNAVAILABLE(_msg) __OS_AVAILABILITY_MSG(macosx_app_extension,unavailable,_msg) + #define __IOS_EXTENSION_UNAVAILABLE(_msg) __OS_AVAILABILITY_MSG(ios_app_extension,unavailable,_msg) + #else + #define __OSX_EXTENSION_UNAVAILABLE(_msg) + #define __IOS_EXTENSION_UNAVAILABLE(_msg) + #endif +#else + #define __OSX_EXTENSION_UNAVAILABLE(_msg) + #define __IOS_EXTENSION_UNAVAILABLE(_msg) +#endif + +#define __OS_EXTENSION_UNAVAILABLE(_msg) __OSX_EXTENSION_UNAVAILABLE(_msg) __IOS_EXTENSION_UNAVAILABLE(_msg) + + + +/* for use marking APIs available info for Mac OSX */ +#if defined(__has_feature) + #if __has_attribute(availability) + #define __OSX_UNAVAILABLE __OS_AVAILABILITY(macosx,unavailable) + #define __OSX_AVAILABLE(_vers) __OS_AVAILABILITY(macosx,introduced=_vers) + #define __OSX_DEPRECATED(_start, _dep, _msg) __OSX_AVAILABLE(_start) __OS_AVAILABILITY_MSG(macosx,deprecated=_dep,_msg) + #endif +#endif + +#ifndef __OSX_UNAVAILABLE + #define __OSX_UNAVAILABLE +#endif + +#ifndef __OSX_AVAILABLE + #define __OSX_AVAILABLE(_vers) +#endif + +#ifndef __OSX_DEPRECATED + #define __OSX_DEPRECATED(_start, _dep, _msg) +#endif + + +/* for use marking APIs available info for iOS */ +#if defined(__has_feature) + #if __has_attribute(availability) + #define __IOS_UNAVAILABLE __OS_AVAILABILITY(ios,unavailable) + #define __IOS_PROHIBITED __OS_AVAILABILITY(ios,unavailable) + #define __IOS_AVAILABLE(_vers) __OS_AVAILABILITY(ios,introduced=_vers) + #define __IOS_DEPRECATED(_start, _dep, _msg) __IOS_AVAILABLE(_start) __OS_AVAILABILITY_MSG(ios,deprecated=_dep,_msg) + #endif +#endif + +#ifndef __IOS_UNAVAILABLE + #define __IOS_UNAVAILABLE +#endif + +#ifndef __IOS_PROHIBITED + #define __IOS_PROHIBITED +#endif + +#ifndef __IOS_AVAILABLE + #define __IOS_AVAILABLE(_vers) +#endif + +#ifndef __IOS_DEPRECATED + #define __IOS_DEPRECATED(_start, _dep, _msg) +#endif + + +/* for use marking APIs available info for tvOS */ +#if defined(__has_feature) + #if __has_feature(attribute_availability_tvos) + #define __TVOS_UNAVAILABLE __OS_AVAILABILITY(tvos,unavailable) + #define __TVOS_PROHIBITED __OS_AVAILABILITY(tvos,unavailable) + #define __TVOS_AVAILABLE(_vers) __OS_AVAILABILITY(tvos,introduced=_vers) + #define __TVOS_DEPRECATED(_start, _dep, _msg) __TVOS_AVAILABLE(_start) __OS_AVAILABILITY_MSG(tvos,deprecated=_dep,_msg) + #endif +#endif + +#ifndef __TVOS_UNAVAILABLE + #define __TVOS_UNAVAILABLE +#endif + +#ifndef __TVOS_PROHIBITED + #define __TVOS_PROHIBITED +#endif + +#ifndef __TVOS_AVAILABLE + #define __TVOS_AVAILABLE(_vers) +#endif + +#ifndef __TVOS_DEPRECATED + #define __TVOS_DEPRECATED(_start, _dep, _msg) +#endif + + +/* for use marking APIs available info for Watch OS */ +#if defined(__has_feature) + #if __has_feature(attribute_availability_watchos) + #define __WATCHOS_UNAVAILABLE __OS_AVAILABILITY(watchos,unavailable) + #define __WATCHOS_PROHIBITED __OS_AVAILABILITY(watchos,unavailable) + #define __WATCHOS_AVAILABLE(_vers) __OS_AVAILABILITY(watchos,introduced=_vers) + #define __WATCHOS_DEPRECATED(_start, _dep, _msg) __WATCHOS_AVAILABLE(_start) __OS_AVAILABILITY_MSG(watchos,deprecated=_dep,_msg) + #endif +#endif + +#ifndef __WATCHOS_UNAVAILABLE + #define __WATCHOS_UNAVAILABLE +#endif + +#ifndef __WATCHOS_PROHIBITED + #define __WATCHOS_PROHIBITED +#endif + +#ifndef __WATCHOS_AVAILABLE + #define __WATCHOS_AVAILABLE(_vers) +#endif + +#ifndef __WATCHOS_DEPRECATED + #define __WATCHOS_DEPRECATED(_start, _dep, _msg) +#endif + + #endif /* __AVAILABILITY__ */ diff --git a/EXTERNAL_HEADERS/AvailabilityInternal.h b/EXTERNAL_HEADERS/AvailabilityInternal.h index e8b7b3de7..81bbd59b7 100644 --- a/EXTERNAL_HEADERS/AvailabilityInternal.h +++ b/EXTERNAL_HEADERS/AvailabilityInternal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 by Apple Inc.. All rights reserved. + * Copyright (c) 2007-2015 by Apple Inc.. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -39,6 +39,28 @@ #endif #endif +#ifndef __TV_OS_VERSION_MIN_REQUIRED + #ifdef __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ + /* compiler sets __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ when -mtvos-version-min is used */ + #define __TV_OS_VERSION_MIN_REQUIRED __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ + #define __TV_OS_VERSION_MAX_ALLOWED __IPHONE_9_0 + /* for compatibility with existing code. New code should use platform specific checks */ + #define __IPHONE_OS_VERSION_MIN_REQUIRED 90000 + #endif +#endif + +#ifndef __WATCH_OS_VERSION_MIN_REQUIRED + #ifdef __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ + /* compiler sets __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ when -mwatchos-version-min is used */ + #define __WATCH_OS_VERSION_MIN_REQUIRED __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ + #define __WATCH_OS_VERSION_MAX_ALLOWED 20000 + /* for compatibility with existing code. New code should use platform specific checks */ + #define __IPHONE_OS_VERSION_MIN_REQUIRED 90000 + #endif +#endif + + + #define __AVAILABILITY_INTERNAL_DEPRECATED __attribute__((deprecated)) #ifdef __has_feature #if __has_feature(attribute_deprecated_with_message) @@ -58,7 +80,7 @@ #ifdef __IPHONE_OS_VERSION_MIN_REQUIRED /* make sure a default max version is set */ #ifndef __IPHONE_OS_VERSION_MAX_ALLOWED - #define __IPHONE_OS_VERSION_MAX_ALLOWED __IPHONE_8_0 + #define __IPHONE_OS_VERSION_MAX_ALLOWED __IPHONE_9_0 #endif /* make sure a valid min is set */ #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0 @@ -172,6 +194,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=2.0,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=2.0,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=2.0,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=2.0,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=2.0,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.0))) #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=2.0))) #define __AVAILABILITY_INTERNAL__IPHONE_2_1 __attribute__((availability(ios,introduced=2.1))) @@ -271,6 +323,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=2.1,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=2.1,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=2.1,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=2.1,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=2.1,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.1))) #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=2.1))) #define __AVAILABILITY_INTERNAL__IPHONE_2_2 __attribute__((availability(ios,introduced=2.2))) @@ -364,6 +446,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=2.2,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=2.2,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=2.2,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=2.2,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=2.2,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.2))) #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=2.2))) #define __AVAILABILITY_INTERNAL__IPHONE_3_0 __attribute__((availability(ios,introduced=3.0))) @@ -451,6 +563,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=3.0,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=3.0,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=3.0,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=3.0,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=3.0,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.0))) #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=3.0))) #define __AVAILABILITY_INTERNAL__IPHONE_3_1 __attribute__((availability(ios,introduced=3.1))) @@ -532,6 +674,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=3.1,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=3.1,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=3.1,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=3.1,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=3.1,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.1))) #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=3.1))) #define __AVAILABILITY_INTERNAL__IPHONE_3_2 __attribute__((availability(ios,introduced=3.2))) @@ -607,6 +779,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=3.2,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=3.2,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=3.2,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=3.2,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=3.2,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.2))) #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=3.2))) #define __AVAILABILITY_INTERNAL__IPHONE_4_0 __attribute__((availability(ios,introduced=4.0))) @@ -676,6 +878,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=4.0,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=4.0,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=4.0,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=4.0,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=4.0,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.0))) #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.0))) #define __AVAILABILITY_INTERNAL__IPHONE_4_1 __attribute__((availability(ios,introduced=4.1))) @@ -739,6 +971,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=4.1,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=4.1,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=4.1,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=4.1,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=4.1,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.1))) #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.1))) #define __AVAILABILITY_INTERNAL__IPHONE_4_2 __attribute__((availability(ios,introduced=4.2))) @@ -796,6 +1058,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=4.2,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=4.2,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=4.2,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=4.2,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=4.2,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.2))) #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.2))) #define __AVAILABILITY_INTERNAL__IPHONE_4_3 __attribute__((availability(ios,introduced=4.3))) @@ -847,6 +1139,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=4.3,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=4.3,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=4.3,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=4.3,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=4.3,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.3))) #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.3))) #define __AVAILABILITY_INTERNAL__IPHONE_5_0 __attribute__((availability(ios,introduced=5.0))) @@ -892,6 +1214,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=5.0,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=5.0,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=5.0,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=5.0,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=5.0,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=5.0))) #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=5.0))) #define __AVAILABILITY_INTERNAL__IPHONE_5_1 __attribute__((availability(ios,introduced=5.1))) @@ -931,6 +1283,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=5.1,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=5.1,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=5.1,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=5.1,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=5.1,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=5.1))) #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=5.1))) #define __AVAILABILITY_INTERNAL__IPHONE_6_0 __attribute__((availability(ios,introduced=6.0))) @@ -964,6 +1346,36 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=6.0,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=6.0,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=6.0,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=6.0,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=6.0,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=6.0))) #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=6.0))) #define __AVAILABILITY_INTERNAL__IPHONE_6_1 __attribute__((availability(ios,introduced=6.1))) @@ -991,53 +1403,278 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=6.1,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=6.1,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=6.1,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=6.1,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=6.1,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=9.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=6.1))) #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=6.1))) #define __AVAILABILITY_INTERNAL__IPHONE_7_0 __attribute__((availability(ios,introduced=7.0))) #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=7.0,deprecated=7.0))) #if __has_feature(attribute_availability_with_message) - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.0,message=_msg))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=7.0,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=7.0,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=7.0,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=7.0,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=7.0,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=7.0,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=7.0,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=9.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=7.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=7.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1 __attribute__((availability(ios,introduced=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=7.1,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=7.1,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=7.1,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=7.1,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=7.1,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=7.1,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=7.1,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=9.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0 __attribute__((availability(ios,introduced=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=8.0,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=8.0,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=8.0,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=8.0,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=8.0,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=8.0,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=9.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1 __attribute__((availability(ios,introduced=8.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __attribute__((availability(ios,introduced=8.1,deprecated=8.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=8.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=8.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=8.1,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=8.1,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=8.1,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=8.1,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.1,deprecated=9.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=8.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=8.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2 __attribute__((availability(ios,introduced=8.2))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __attribute__((availability(ios,introduced=8.2,deprecated=8.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=8.2,deprecated=8.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __attribute__((availability(ios,introduced=8.2,deprecated=8.2))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=8.2,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=8.2,deprecated=8.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=8.2,deprecated=8.3))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=8.2,deprecated=8.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.2,deprecated=8.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.2,deprecated=8.4))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=8.2,deprecated=9.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.2,deprecated=9.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.2,deprecated=9.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=8.2))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=8.2))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3 __attribute__((availability(ios,introduced=8.3))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __attribute__((availability(ios,introduced=8.3,deprecated=8.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=8.3,deprecated=8.3,message=_msg))) #else - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __attribute__((availability(ios,introduced=8.3,deprecated=8.3))) #endif - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=7.0,deprecated=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=8.3,deprecated=8.4))) #if __has_feature(attribute_availability_with_message) - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.1,message=_msg))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.3,deprecated=8.4,message=_msg))) #else - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.3,deprecated=8.4))) #endif - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=7.0,deprecated=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=8.3,deprecated=9.0))) #if __has_feature(attribute_availability_with_message) - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.0,message=_msg))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.3,deprecated=9.0,message=_msg))) #else - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.3,deprecated=9.0))) #endif - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=7.0))) - #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=7.0))) - #define __AVAILABILITY_INTERNAL__IPHONE_7_1 __attribute__((availability(ios,introduced=7.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=7.1,deprecated=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_NA __attribute__((availability(ios,introduced=8.3))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=8.3))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4 __attribute__((availability(ios,introduced=8.4))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __attribute__((availability(ios,introduced=8.4,deprecated=8.4))) #if __has_feature(attribute_availability_with_message) - #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=7.1,message=_msg))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.4,deprecated=8.4,message=_msg))) #else - #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __attribute__((availability(ios,introduced=8.4,deprecated=8.4))) #endif - #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=7.1,deprecated=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=8.4,deprecated=9.0))) #if __has_feature(attribute_availability_with_message) - #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.0,message=_msg))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.4,deprecated=9.0,message=_msg))) #else - #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=8.4,deprecated=9.0))) #endif - #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=7.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=7.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_8_0 __attribute__((availability(ios,introduced=8.0))) - #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=8.0,deprecated=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_NA __attribute__((availability(ios,introduced=8.4))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=8.4))) + #define __AVAILABILITY_INTERNAL__IPHONE_9_0 __attribute__((availability(ios,introduced=9.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __attribute__((availability(ios,introduced=9.0,deprecated=9.0))) #if __has_feature(attribute_availability_with_message) - #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.0,message=_msg))) + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=9.0,deprecated=9.0,message=_msg))) #else - #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __attribute__((availability(ios,introduced=9.0,deprecated=9.0))) #endif - #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=8.0))) - #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=9.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=9.0))) #define __AVAILABILITY_INTERNAL__IPHONE_NA __attribute__((availability(ios,unavailable))) #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA __attribute__((availability(ios,unavailable))) #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,unavailable))) @@ -4953,6 +5590,4181 @@ #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif + /* set up old style internal macros (up to 8.1) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif + /* set up old style internal macros (up to 8.2) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif + /* set up old style internal macros (up to 8.3) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_3 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_3 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif + /* set up old style internal macros (up to 8.4) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_8_4 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_4 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_8_4 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_4 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_4 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL__IPHONE_8_4 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_4 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif + /* set up old style internal macros (up to 9.0) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_9_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_9_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_9_0 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_1 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_2 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_3 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_4 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_4 + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_4 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_8_4 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_4 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL__IPHONE_9_0 + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_9_0 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif /* set up internal macros (n/a) */ #define __AVAILABILITY_INTERNAL__IPHONE_NA __AVAILABILITY_INTERNAL_UNAVAILABLE #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA __AVAILABILITY_INTERNAL_UNAVAILABLE @@ -4964,7 +9776,7 @@ #define __MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ /* make sure a default max version is set */ #ifndef __MAC_OS_X_VERSION_MAX_ALLOWED - #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_10 + #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_11 #endif #if defined(__has_attribute) && defined(__has_feature) @@ -5037,6 +9849,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.0,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.0))) #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.0))) #define __AVAILABILITY_INTERNAL__MAC_10_1 __attribute__((availability(macosx,introduced=10.1))) @@ -5100,6 +9930,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.1,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.1))) #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.1))) #define __AVAILABILITY_INTERNAL__MAC_10_2 __attribute__((availability(macosx,introduced=10.2))) @@ -5157,6 +10005,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.2,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.2))) #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.2))) #define __AVAILABILITY_INTERNAL__MAC_10_3 __attribute__((availability(macosx,introduced=10.3))) @@ -5208,6 +10074,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.3,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.3))) #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.3))) #define __AVAILABILITY_INTERNAL__MAC_10_4 __attribute__((availability(macosx,introduced=10.4))) @@ -5253,6 +10137,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.4,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.4))) #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.4))) #define __AVAILABILITY_INTERNAL__MAC_10_5 __attribute__((availability(macosx,introduced=10.5))) @@ -5292,6 +10194,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.5,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.5))) #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.5))) #define __AVAILABILITY_INTERNAL__MAC_10_6 __attribute__((availability(macosx,introduced=10.6))) @@ -5325,6 +10245,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.6,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.6))) #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.6))) #define __AVAILABILITY_INTERNAL__MAC_10_7 __attribute__((availability(macosx,introduced=10.7))) @@ -5352,6 +10290,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.7,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.7))) #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.7))) #define __AVAILABILITY_INTERNAL__MAC_10_8 __attribute__((availability(macosx,introduced=10.8))) @@ -5373,6 +10329,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.8,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.8))) #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.8))) #define __AVAILABILITY_INTERNAL__MAC_10_9 __attribute__((availability(macosx,introduced=10.9))) @@ -5388,6 +10362,24 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.9,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.9))) #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.9))) #define __AVAILABILITY_INTERNAL__MAC_10_10 __attribute__((availability(macosx,introduced=10.10))) @@ -5397,8 +10389,71 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.10,deprecated=10.10))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.10,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.10,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.10,deprecated=10.11))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.10))) #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.10))) + #define __AVAILABILITY_INTERNAL__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.10.2))) + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2 __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.2))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.11))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.2))) + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.10.2))) + #define __AVAILABILITY_INTERNAL__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.10.3))) + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3 __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.10.3))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.11))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.10.3))) + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.10.3))) + #define __AVAILABILITY_INTERNAL__MAC_10_11 __attribute__((availability(macosx,introduced=10.11))) + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11 __attribute__((availability(macosx,introduced=10.11,deprecated=10.11))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.11,deprecated=10.11,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11_MSG(_msg) __attribute__((availability(macosx,introduced=10.11,deprecated=10.11))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.11))) + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.11))) #define __AVAILABILITY_INTERNAL__MAC_NA __attribute__((availability(macosx,unavailable))) #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA __attribute__((availability(macosx,unavailable))) #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,unavailable))) @@ -5407,6 +10462,27 @@ #ifndef __AVAILABILITY_INTERNAL__MAC_10_0 /* use old style attributes */ + #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_11 + #define __AVAILABILITY_INTERNAL__MAC_10_11 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_11 + #define __AVAILABILITY_INTERNAL__MAC_10_11 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__MAC_10_11 __AVAILABILITY_INTERNAL_REGULAR + #endif + #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_10_3 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_10_3 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_3 __AVAILABILITY_INTERNAL_REGULAR + #endif + #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_2 __AVAILABILITY_INTERNAL_REGULAR + #endif #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_10 #define __AVAILABILITY_INTERNAL__MAC_10_10 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_10 @@ -5775,6 +10851,171 @@ #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_10 #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10 #endif + #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2 __AVAILABILITY_INTERNAL__MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10_2 + #endif + #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3 __AVAILABILITY_INTERNAL__MAC_10_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10_3 + #endif + #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_11 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11 __AVAILABILITY_INTERNAL__MAC_10_11 + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_11 + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_0 #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_1 @@ -5797,6 +11038,12 @@ #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_9 #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_10 #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_11 + #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_11 #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA __AVAILABILITY_INTERNAL_UNAVAILABLE #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL_UNAVAILABLE #endif diff --git a/EXTERNAL_HEADERS/AvailabilityMacros.h b/EXTERNAL_HEADERS/AvailabilityMacros.h index 629697908..9ff820a80 100644 --- a/EXTERNAL_HEADERS/AvailabilityMacros.h +++ b/EXTERNAL_HEADERS/AvailabilityMacros.h @@ -100,36 +100,37 @@ #define MAC_OS_X_VERSION_10_8 1080 #define MAC_OS_X_VERSION_10_9 1090 #define MAC_OS_X_VERSION_10_10 101000 +#define MAC_OS_X_VERSION_10_10_2 101002 +#define MAC_OS_X_VERSION_10_10_3 101003 +#define MAC_OS_X_VERSION_10_11 101100 /* - * If min OS not specified, assume 10.1 for ppc and 10.4 for all others - * Note: gcc driver may set _ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED_ based on MACOSX_DEPLOYMENT_TARGET environment variable + * If min OS not specified, assume 10.4 for intel + * Note: compiler driver may set _ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED_ based on MACOSX_DEPLOYMENT_TARGET environment variable */ #ifndef MAC_OS_X_VERSION_MIN_REQUIRED #ifdef __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ #if (__i386__ || __x86_64__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < MAC_OS_X_VERSION_10_4) #warning Building for Intel with Mac OS X Deployment Target < 10.4 is invalid. - #elif __ppc64__ && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < MAC_OS_X_VERSION_10_4) - #warning Building for ppc64 with Mac OS X Deployment Target < 10.4 is invalid. #endif #define MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ #else - #if __ppc64__ || __i386__ || __x86_64__ + #if __i386__ || __x86_64__ #define MAC_OS_X_VERSION_MIN_REQUIRED MAC_OS_X_VERSION_10_4 #else #define MAC_OS_X_VERSION_MIN_REQUIRED MAC_OS_X_VERSION_10_1 #endif - #endif + #endif #endif /* - * if max OS not specified, assume larger of (10.10, min) + * if max OS not specified, assume larger of (10.11, min) */ #ifndef MAC_OS_X_VERSION_MAX_ALLOWED - #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_10 + #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_11 #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_MIN_REQUIRED #else - #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_10 + #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_11 #endif #endif @@ -1422,6 +1423,639 @@ #endif +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER + * + * Used on declarations introduced in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_10_2, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER UNAVAILABLE_ATTRIBUTE +#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER WEAK_IMPORT_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED + * + * Used on declarations introduced in Mac OS X 10.10.2, + * and deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_2, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.0, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.1, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.2, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.3, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.4, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.5, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.6, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.7, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.8, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.9, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 + * + * Used on declarations introduced in Mac OS X 10.10, + * but later deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2 AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER +#endif + +/* + * DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2_AND_LATER + * + * Used on types deprecated in Mac OS X 10.10.2 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2 + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2_AND_LATER DEPRECATED_ATTRIBUTE +#else + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2_AND_LATER +#endif + + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER + * + * Used on declarations introduced in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_10_3, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER UNAVAILABLE_ATTRIBUTE +#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER WEAK_IMPORT_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED + * + * Used on declarations introduced in Mac OS X 10.10.3, + * and deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_3, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.0, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.1, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.2, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.3, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.4, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.5, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.6, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.7, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.8, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.9, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.10, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 + * + * Used on declarations introduced in Mac OS X 10.10.2, + * but later deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_2, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3 AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER +#endif + +/* + * DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3_AND_LATER + * + * Used on types deprecated in Mac OS X 10.10.3 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3 + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3_AND_LATER DEPRECATED_ATTRIBUTE +#else + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3_AND_LATER +#endif + + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER + * + * Used on declarations introduced in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER UNAVAILABLE_ATTRIBUTE +#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER WEAK_IMPORT_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER_BUT_DEPRECATED + * + * Used on declarations introduced in Mac OS X 10.11, + * and deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_11, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.0, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.1, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.2, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.3, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.4, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.5, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.6, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.7, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.8, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.9, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.10, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.10.2, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_2, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 + * + * Used on declarations introduced in Mac OS X 10.10.3, + * but later deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_3, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11 AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER +#endif + +/* + * DEPRECATED_IN_MAC_OS_X_VERSION_10_11_AND_LATER + * + * Used on types deprecated in Mac OS X 10.11 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_11_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_11, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11 + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_11_AND_LATER DEPRECATED_ATTRIBUTE +#else + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_11_AND_LATER +#endif + + #endif /* __AVAILABILITYMACROS__ */ diff --git a/EXTERNAL_HEADERS/Makefile b/EXTERNAL_HEADERS/Makefile index 39cb58f90..edf9fe869 100644 --- a/EXTERNAL_HEADERS/Makefile +++ b/EXTERNAL_HEADERS/Makefile @@ -20,6 +20,8 @@ INSTINC_SUBDIRS_X86_64H = \ INSTINC_SUBDIRS_ARM = \ architecture +INSTINC_SUBDIRS_ARM64 = \ + architecture EXPORT_FILES = \ Availability.h \ diff --git a/EXTERNAL_HEADERS/architecture/Makefile b/EXTERNAL_HEADERS/architecture/Makefile index 1054ba8f1..ea393a5bf 100644 --- a/EXTERNAL_HEADERS/architecture/Makefile +++ b/EXTERNAL_HEADERS/architecture/Makefile @@ -18,6 +18,8 @@ INSTINC_SUBDIRS_X86_64H = \ INSTINC_SUBDIRS_ARM = \ arm +INSTINC_SUBDIRS_ARM64 = \ + arm EXPORT_FILES = diff --git a/EXTERNAL_HEADERS/corecrypto/cc.h b/EXTERNAL_HEADERS/corecrypto/cc.h index f44384513..6a05f106c 100644 --- a/EXTERNAL_HEADERS/corecrypto/cc.h +++ b/EXTERNAL_HEADERS/corecrypto/cc.h @@ -2,8 +2,9 @@ * cc.h * corecrypto * - * Created by Michael Brouwer on 12/16/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 12/16/2010 + * + * Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved. * */ @@ -14,11 +15,14 @@ #include #include +/* Manage asserts here because a few functions in header public files do use asserts */ +#define cc_assert(x) assert(x) #if CC_KERNEL #include +#elif CC_USE_S3 +#define assert(args) // No assert in S3 #else #include -#include #endif /* Declare a struct element with a guarenteed alignment of _alignment_. @@ -45,26 +49,39 @@ #define cc_zero(_size_,_data_) memset((_data_),0 ,(_size_)) #endif -#if CC_KERNEL -#define cc_printf(x...) printf(x) -#else -#define cc_printf(x...) fprintf(stderr, x) -#endif - -#define cc_assert(x) assert(x) +/* cc_clear: + Set "len" bytes of memory to zero at address "dst". + cc_clear has been developed so that it won't be optimized out. + To be used to clear key buffers or sensitive data. +*/ +CC_NONNULL2 +void cc_clear(size_t len, void *dst); #define cc_copy(_size_, _dst_, _src_) memcpy(_dst_, _src_, _size_) CC_INLINE CC_NONNULL2 CC_NONNULL3 CC_NONNULL4 void cc_xor(size_t size, void *r, const void *s, const void *t) { uint8_t *_r=(uint8_t *)r; - const uint8_t *_s=(uint8_t *)s; - const uint8_t *_t=(uint8_t *)t; + const uint8_t *_s=(const uint8_t *)s; + const uint8_t *_t=(const uint8_t *)t; while (size--) { _r[size] = _s[size] ^ _t[size]; } } +/* cc_cmp_safe: + Compare "num" pointed by ptr1 and ptr2, array of identical size. + Functional behavior: Returns 0 if the "num" bytes starting at ptr1 are identical to the "num" + bytes starting at ptr2. + Return !=0 if they are different or if "num" is 0 (empty arrays) + Security: The execution time/cycles is *independent* of the data and therefore guarantees + no leak about the data. + However, the execution time depends on "num". +*/ +CC_NONNULL2 CC_NONNULL3 +int cc_cmp_safe (size_t num, const void * ptr1, const void * ptr2); + + /* Exchange S and T of any type. NOTE: Both and S and T are evaluated mutliple times and MUST NOT be expressions. */ #define CC_SWAP(S,T) do { \ diff --git a/EXTERNAL_HEADERS/corecrypto/cc_config.h b/EXTERNAL_HEADERS/corecrypto/cc_config.h index 9149edb00..45979d8cf 100644 --- a/EXTERNAL_HEADERS/corecrypto/cc_config.h +++ b/EXTERNAL_HEADERS/corecrypto/cc_config.h @@ -2,8 +2,9 @@ * cc_config.h * corecrypto * - * Created by Michael Brouwer on 10/18/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 11/16/2010 + * + * Copyright (c) 2010,2011,2012,2013,2014,2015 Apple Inc. All rights reserved. * */ @@ -44,7 +45,7 @@ */ -#if defined(DEBUG) && (DEBUG) +#if (defined(DEBUG) && (DEBUG)) /* CC_DEBUG is already used in CommonCrypto */ #define CORECRYPTO_DEBUG 1 #else @@ -52,17 +53,37 @@ #endif #if defined(KERNEL) && (KERNEL) -#define CC_KERNEL 1 +#define CC_KERNEL 1 // KEXT, XNU repo or kernel components such as AppleKeyStore #else #define CC_KERNEL 0 #endif +// LINUX_BUILD_TEST is for sanity check of the configuration +// > xcodebuild -scheme "corecrypto_test" OTHER_CFLAGS="$(values) -DLINUX_BUILD_TEST" +#if defined(__linux__) || defined(LINUX_BUILD_TEST) +#define CC_LINUX 1 +#else +#define CC_LINUX 0 +#endif + #if defined(USE_L4) && (USE_L4) #define CC_USE_L4 1 #else #define CC_USE_L4 0 #endif +#if defined(USE_SEPROM) && (USE_SEPROM) +#define CC_USE_SEPROM 1 +#else +#define CC_USE_SEPROM 0 +#endif + +#if defined(USE_S3) && (USE_S3) +#define CC_USE_S3 1 +#else +#define CC_USE_S3 0 +#endif + #if defined(MAVERICK) && (MAVERICK) #define CC_MAVERICK 1 #else @@ -121,12 +142,20 @@ #define CCN_OSX 1 #endif +#if CC_USE_L4 || CC_USE_S3 /* No dynamic linking allowed in L4, e.g. avoid nonlazy symbols */ -/* For corecrypto kext, CC_STATIC should be 0 */ -#if CC_USE_L4 +/* For corecrypto kext, CC_STATIC should be undefined */ #define CC_STATIC 1 #endif +#if CC_USE_L4 || CC_IBOOT +/* For L4, stack is too short, need to use HEAP for some computations */ +/* CC_USE_HEAP_FOR_WORKSPACE not supported for KERNEL! */ +#define CC_USE_HEAP_FOR_WORKSPACE 1 +#else +#define CC_USE_HEAP_FOR_WORKSPACE 0 +#endif + /* L4 do not have bzero, neither does hexagon of ARMCC even with gnu compatibility mode */ #if CC_USE_L4 || defined(__CC_ARM) || defined(__hexagon__) #define CC_HAS_BZERO 0 @@ -134,9 +163,18 @@ #define CC_HAS_BZERO 1 #endif -#if defined(__CC_ARM) || defined(__hexagon__) -// ARMASM.exe does not to like the file syntax of the asm implementation +/* memset_s is only available in few target */ +#if CC_USE_L4 || CC_KERNEL || CC_IBOOT || CC_USE_SEPROM || defined(__CC_ARM) || defined(__hexagon__) +#define CC_HAS_MEMSET_S 0 +#else +#define CC_HAS_MEMSET_S 1 +#endif + +#if defined(__CC_ARM) || defined(__hexagon__) || CC_LINUX || defined(__NO_ASM__) +// ARMASM.exe does not to like the file syntax of the asm implementation +#define CCN_DEDICATED_SQR 1 +#define CCN_MUL_KARATSUBA 1 // 4*n CCN_UNIT extra memory required. #define CCN_ADD_ASM 0 #define CCN_SUB_ASM 0 #define CCN_MUL_ASM 0 @@ -150,28 +188,40 @@ #define CCAES_ARM 0 #define CCAES_INTEL 0 #define CCN_USE_BUILTIN_CLZ 0 +#if !defined(__NO_ASM__) #define CCSHA1_VNG_INTEL 0 #define CCSHA2_VNG_INTEL 0 +#define CCSHA1_VNG_ARMV7NEON 0 +#define CCSHA2_VNG_ARMV7NEON 0 +#endif +#define CCAES_MUX 0 #elif defined(__x86_64__) || defined(__i386__) - +#define CCN_DEDICATED_SQR 1 +#define CCN_MUL_KARATSUBA 1 // 4*n CCN_UNIT extra memory required. /* These assembly routines only work for a single CCN_UNIT_SIZE. */ #if (defined(__x86_64__) && CCN_UNIT_SIZE == 8) || (defined(__i386__) && CCN_UNIT_SIZE == 4) #define CCN_ADD_ASM 1 #define CCN_SUB_ASM 1 -#define CCN_MUL_ASM 1 +#define CCN_MUL_ASM 0 #else #define CCN_ADD_ASM 0 #define CCN_SUB_ASM 0 #define CCN_MUL_ASM 0 #endif +#if (defined(__x86_64__) && CCN_UNIT_SIZE == 8) +#define CCN_CMP_ASM 1 +#define CCN_N_ASM 1 +#else +#define CCN_CMP_ASM 0 +#define CCN_N_ASM 0 +#endif + #define CCN_ADDMUL1_ASM 0 #define CCN_MUL1_ASM 0 -#define CCN_CMP_ASM 0 #define CCN_ADD1_ASM 0 #define CCN_SUB1_ASM 0 -#define CCN_N_ASM 0 #define CCN_SET_ASM 0 #define CCAES_ARM 0 #define CCAES_INTEL 1 @@ -183,7 +233,8 @@ #define CCSHA2_VNG_ARMV7NEON 0 #else - +#define CCN_DEDICATED_SQR 1 +#define CCN_MUL_KARATSUBA 1 // 4*n CCN_UNIT extra memory required. #define CCN_ADD_ASM 0 #define CCN_SUB_ASM 0 #define CCN_MUL_ASM 0 @@ -205,9 +256,6 @@ #endif /* !defined(__i386__) */ -#define CCN_N_INLINE 0 -#define CCN_CMP_INLINE 0 - #define CC_INLINE static inline #ifdef __GNUC__ @@ -273,4 +321,5 @@ #define CC_MALLOC #endif /* !__GNUC__ */ + #endif /* _CORECRYPTO_CC_CONFIG_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/cc_debug.h b/EXTERNAL_HEADERS/corecrypto/cc_debug.h new file mode 100644 index 000000000..a04402247 --- /dev/null +++ b/EXTERNAL_HEADERS/corecrypto/cc_debug.h @@ -0,0 +1,66 @@ +/* + * cc_debug.h + * corecrypto + * + * Created on 01/25/2012 + * + * Copyright (c) 2012,2014,2015 Apple Inc. All rights reserved. + * + */ + +//debug configuration header file +#ifndef _CORECRYPTO_CCN_DEBUG_H_ +#define _CORECRYPTO_CCN_DEBUG_H_ + +#include + +// DO NOT INCLUDE this HEADER file in CoreCrypto files added for XNU project or headers +// included by external clients. + +// ======================== +// Printf for corecrypto +// ======================== +#if CC_KERNEL +#include +#define cc_printf(x...) printf(x) +extern int printf(const char *format, ...) __printflike(1,2); +#elif CC_USE_S3 +#define cc_printf(x...) printf(x) +#else +#include +#define cc_printf(x...) fprintf(stderr, x) +#endif + +// ======================== +// Integer types +// ======================== + +#if CC_KERNEL +/* Those are not defined in libkern */ +#define PRIx64 "llx" +#define PRIx32 "x" +#define PRIx16 "hx" +#define PRIx8 "hhx" +#else +#include +#endif + +#if CCN_UNIT_SIZE == 8 +#define CCPRIx_UNIT ".016" PRIx64 +#elif CCN_UNIT_SIZE == 4 +#define CCPRIx_UNIT ".08" PRIx32 +#elif CCN_UNIT_SIZE == 2 +#define CCPRIx_UNIT ".04" PRIx16 +#elif CCN_UNIT_SIZE == 1 +#define CCPRIx_UNIT ".02" PRIx8 +#else +#error invalid CCN_UNIT_SIZE +#endif + +// ======================== +// Print utilities for corecrypto +// ======================== +/* Print a byte array of arbitrary size */ +void cc_print(const char *label, unsigned long count, const uint8_t *s); + +#endif /* _CORECRYPTO_CCN_DEBUG_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/cc_macros.h b/EXTERNAL_HEADERS/corecrypto/cc_macros.h new file mode 100644 index 000000000..4d0b0be38 --- /dev/null +++ b/EXTERNAL_HEADERS/corecrypto/cc_macros.h @@ -0,0 +1,80 @@ +/* + * cc_macros.h + * corecrypto + * + * Created on 01/11/2012 + * + * Copyright (c) 2012,2015 Apple Inc. All rights reserved. + * + */ + +#ifndef _CORECRYPTO_CC_MACROS_H_ +#define _CORECRYPTO_CC_MACROS_H_ + +#include + +#ifndef __CC_DEBUG_ASSERT_COMPONENT_NAME_STRING +#define __CC_DEBUG_ASSERT_COMPONENT_NAME_STRING "" +#endif + +#ifndef __CC_DEBUG_ASSERT_PRODUCTION_CODE +#define __CC_DEBUG_ASSERT_PRODUCTION_CODE !CORECRYPTO_DEBUG +#endif + +#ifndef __CC_DEBUG_ASSERT_MESSAGE +#define __CC_DEBUG_ASSERT_MESSAGE(name, assertion, label, message, file, line, value) \ +cc_printf( "CCAssertMacros: %s, %s file: %s, line: %d\n", assertion, (message!=0) ? message : "", file, line); +#endif + +#ifndef cc_require +#if __CC_DEBUG_ASSERT_PRODUCTION_CODE + #define cc_require(assertion, exceptionLabel) \ + do { \ + if ( __builtin_expect(!(assertion), 0) ) { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define cc_require(assertion, exceptionLabel) \ + do { \ + if ( __builtin_expect(!(assertion), 0) ) { \ + __CC_DEBUG_ASSERT_MESSAGE(__CC_DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, #exceptionLabel, 0, __FILE__, __LINE__, 0); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif +#endif + +#ifndef cc_require_action +#if __CC_DEBUG_ASSERT_PRODUCTION_CODE + #define cc_require_action(assertion, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define cc_require_action(assertion, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + __CC_DEBUG_ASSERT_MESSAGE( \ + __CC_DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, #exceptionLabel, 0, __FILE__, __LINE__, 0); \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif +#endif + +#endif /* _CORECRYPTO_CC_MACROS_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/cc_priv.h b/EXTERNAL_HEADERS/corecrypto/cc_priv.h index fbfadddcc..2d0a47a5b 100644 --- a/EXTERNAL_HEADERS/corecrypto/cc_priv.h +++ b/EXTERNAL_HEADERS/corecrypto/cc_priv.h @@ -2,8 +2,9 @@ * cc_priv.h * corecrypto * - * Created by Michael Brouwer on 12/1/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 12/01/2010 + * + * Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved. * */ @@ -18,7 +19,7 @@ CC_MEMCPY : optimized memcpy. CC_MEMMOVE : optimized memmove. CC_MEMSET : optimized memset. - CC_BZERO : optimized bzero. + CC_BZERO : optimized bzero, CC_STORE32_BE : store 32 bit value in big endian in unaligned buffer. CC_STORE32_LE : store 32 bit value in little endian in unaligned buffer. @@ -71,7 +72,7 @@ The following are not defined yet... define them if needed. #define CC_MEMCPY(D,S,L) memcpy((D),(S),(L)) #define CC_MEMMOVE(D,S,L) memmove((D),(S),(L)) #define CC_MEMSET(D,V,L) memset((D),(V),(L)) -#define CC_BZERO(D,L) memset((D),0,(L)) +#define CC_BZERO(D,L) memset((D),0,(L)) // Deprecated, DO NOT USE // MARK: - Loads and Store @@ -88,10 +89,10 @@ The following are not defined yet... define them if needed. } while(0) #define CC_LOAD32_LE(x, y) do { \ -x = ((uint32_t)(((unsigned char *)(y))[3] & 255)<<24) | \ - ((uint32_t)(((unsigned char *)(y))[2] & 255)<<16) | \ - ((uint32_t)(((unsigned char *)(y))[1] & 255)<<8) | \ - ((uint32_t)(((unsigned char *)(y))[0] & 255)); \ +x = ((uint32_t)(((const unsigned char *)(y))[3] & 255)<<24) | \ + ((uint32_t)(((const unsigned char *)(y))[2] & 255)<<16) | \ + ((uint32_t)(((const unsigned char *)(y))[1] & 255)<<8) | \ + ((uint32_t)(((const unsigned char *)(y))[0] & 255)); \ } while(0) // MARK: -- 64 bits - little endian @@ -108,14 +109,14 @@ x = ((uint32_t)(((unsigned char *)(y))[3] & 255)<<24) | \ } while(0) #define CC_LOAD64_LE(x, y) do { \ -x = (((uint64_t)(((unsigned char *)(y))[7] & 255))<<56) | \ - (((uint64_t)(((unsigned char *)(y))[6] & 255))<<48) | \ - (((uint64_t)(((unsigned char *)(y))[5] & 255))<<40) | \ - (((uint64_t)(((unsigned char *)(y))[4] & 255))<<32) | \ - (((uint64_t)(((unsigned char *)(y))[3] & 255))<<24) | \ - (((uint64_t)(((unsigned char *)(y))[2] & 255))<<16) | \ - (((uint64_t)(((unsigned char *)(y))[1] & 255))<<8) | \ - (((uint64_t)(((unsigned char *)(y))[0] & 255))); \ +x = (((uint64_t)(((const unsigned char *)(y))[7] & 255))<<56) | \ + (((uint64_t)(((const unsigned char *)(y))[6] & 255))<<48) | \ + (((uint64_t)(((const unsigned char *)(y))[5] & 255))<<40) | \ + (((uint64_t)(((const unsigned char *)(y))[4] & 255))<<32) | \ + (((uint64_t)(((const unsigned char *)(y))[3] & 255))<<24) | \ + (((uint64_t)(((const unsigned char *)(y))[2] & 255))<<16) | \ + (((uint64_t)(((const unsigned char *)(y))[1] & 255))<<8) | \ + (((uint64_t)(((const unsigned char *)(y))[0] & 255))); \ } while(0) // MARK: -- 32 bits - big endian @@ -146,10 +147,10 @@ x = (((uint64_t)(((unsigned char *)(y))[7] & 255))<<56) | \ } while(0) #define CC_LOAD32_BE(x, y) do { \ -x = ((uint32_t)(((unsigned char *)(y))[0] & 255)<<24) | \ - ((uint32_t)(((unsigned char *)(y))[1] & 255)<<16) | \ - ((uint32_t)(((unsigned char *)(y))[2] & 255)<<8) | \ - ((uint32_t)(((unsigned char *)(y))[3] & 255)); \ +x = ((uint32_t)(((const unsigned char *)(y))[0] & 255)<<24) | \ + ((uint32_t)(((const unsigned char *)(y))[1] & 255)<<16) | \ + ((uint32_t)(((const unsigned char *)(y))[2] & 255)<<8) | \ + ((uint32_t)(((const unsigned char *)(y))[3] & 255)); \ } while(0) #endif @@ -189,14 +190,14 @@ __asm__ __volatile__ ( \ } while(0) #define CC_LOAD64_BE(x, y) do { \ -x = (((uint64_t)(((unsigned char *)(y))[0] & 255))<<56) | \ - (((uint64_t)(((unsigned char *)(y))[1] & 255))<<48) | \ - (((uint64_t)(((unsigned char *)(y))[2] & 255))<<40) | \ - (((uint64_t)(((unsigned char *)(y))[3] & 255))<<32) | \ - (((uint64_t)(((unsigned char *)(y))[4] & 255))<<24) | \ - (((uint64_t)(((unsigned char *)(y))[5] & 255))<<16) | \ - (((uint64_t)(((unsigned char *)(y))[6] & 255))<<8) | \ - (((uint64_t)(((unsigned char *)(y))[7] & 255))); \ +x = (((uint64_t)(((const unsigned char *)(y))[0] & 255))<<56) | \ + (((uint64_t)(((const unsigned char *)(y))[1] & 255))<<48) | \ + (((uint64_t)(((const unsigned char *)(y))[2] & 255))<<40) | \ + (((uint64_t)(((const unsigned char *)(y))[3] & 255))<<32) | \ + (((uint64_t)(((const unsigned char *)(y))[4] & 255))<<24) | \ + (((uint64_t)(((const unsigned char *)(y))[5] & 255))<<16) | \ + (((uint64_t)(((const unsigned char *)(y))[6] & 255))<<8) | \ + (((uint64_t)(((const unsigned char *)(y))[7] & 255))); \ } while(0) #endif @@ -378,35 +379,26 @@ static inline uint32_t CC_BSWAP(uint32_t x) Run in constant time (log2()) Useful to run constant time checks */ -#define HEAVISIDE_STEP_UINT64(x) {unsigned long t; \ - t=(((uint64_t)x>>32) | (unsigned long)x); \ - t=((t>>16) | t); \ - t=((t>>8) | t); \ - t=((t>>4) | t); \ - t=((t>>2) | t); \ - t=((t>>1) | t); \ - x=t & 0x1;} - -#define HEAVISIDE_STEP_UINT32(x) {uint16_t t; \ - t=(((unsigned long)x>>16) | (uint16_t)x); \ - t=((t>>8) | t); \ - t=((t>>4) | t); \ - t=((t>>2) | t); \ - t=((t>>1) | t); \ - x=t & 0x1;} - -#define HEAVISIDE_STEP_UINT16(x) {uint8_t t; \ - t=(((uint16_t)x>>8) | (uint8_t)x); \ - t=((t>>4) | t); \ - t=((t>>2) | t); \ - t=((t>>1) | t); \ - x=t & 0x1;} - -#define HEAVISIDE_STEP_UINT8(x) {uint8_t t; \ - t=(((uint8_t)x>>4) | (uint8_t)x); \ - t=((t>>2) | t); \ - t=((t>>1) | t); \ - x=t & 0x1;} +#define HEAVISIDE_STEP_UINT64(x) {uint64_t _t; \ + _t=(((uint64_t)x>>32) | x); \ + _t=(0xFFFFFFFF + (_t & 0xFFFFFFFF)); \ + x=_t >> 32;} + +#define HEAVISIDE_STEP_UINT32(x) {uint32_t _t; \ + _t=(((uint32_t)x>>16) | x); \ + _t=(0xFFFF + (_t & 0xFFFF)); \ + x=_t >> 16;} + +#define HEAVISIDE_STEP_UINT16(x) {uint16_t _t; \ + _t=(((uint16_t)x>>8) | x); \ + _t=(0xFF + (_t & 0xFF)); \ + x=_t >> 8;} + +#define HEAVISIDE_STEP_UINT8(x) {uint8_t _t; \ + _t=(((uint8_t)x>>4) | (uint8_t)x); \ + _t=((_t>>2) | _t); \ + _t=((_t>>1) | _t); \ + x=_t & 0x1;} #define CC_HEAVISIDE_STEP(x) { \ if (sizeof(x) == 1) {HEAVISIDE_STEP_UINT8(x);} \ @@ -416,9 +408,14 @@ static inline uint32_t CC_BSWAP(uint32_t x) else {x=((x==0)?0:1);} \ } +/* Return 1 if x mod 4 =1,2,3, 0 otherwise */ +#define CC_CARRY_2BITS(x) (((x>>1) | x) & 0x1) +#define CC_CARRY_3BITS(x) (((x>>2) | (x>>1) | x) & 0x1) /* Set a variable to the biggest power of 2 which can be represented */ #define MAX_POWER_OF_2(x) ((__typeof__(x))1<<(8*sizeof(x)-1)) - + +#define cc_ceiling(a,b) (((a)+((b)-1))/(b)) +#define CC_BITLEN_TO_BYTELEN(x) cc_ceiling((x), 8) #endif /* _CORECRYPTO_CC_PRIV_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccaes.h b/EXTERNAL_HEADERS/corecrypto/ccaes.h index 67c4404ca..85adca2fe 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccaes.h +++ b/EXTERNAL_HEADERS/corecrypto/ccaes.h @@ -2,8 +2,9 @@ * ccaes.h * corecrypto * - * Created by Michael Brouwer on 12/10/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 12/10/2010 + * + * Copyright (c) 2010,2011,2012,2013,2015 Apple Inc. All rights reserved. * */ @@ -24,12 +25,21 @@ extern const struct ccmode_ecb ccaes_ltc_ecb_encrypt_mode; extern const struct ccmode_cbc ccaes_gladman_cbc_encrypt_mode; extern const struct ccmode_cbc ccaes_gladman_cbc_decrypt_mode; -#if CCAES_ARM +#if !defined(__NO_ASM__) && CCAES_ARM extern const struct ccmode_ecb ccaes_arm_ecb_encrypt_mode; extern const struct ccmode_ecb ccaes_arm_ecb_decrypt_mode; extern const struct ccmode_cbc ccaes_arm_cbc_encrypt_mode; extern const struct ccmode_cbc ccaes_arm_cbc_decrypt_mode; + +extern const struct ccmode_xts ccaes_arm_xts_encrypt_mode; +extern const struct ccmode_xts ccaes_arm_xts_decrypt_mode; + +extern const struct ccmode_cfb ccaes_arm_cfb_encrypt_mode; +extern const struct ccmode_cfb ccaes_arm_cfb_decrypt_mode; + +extern const struct ccmode_ofb ccaes_arm_ofb_crypt_mode; + #endif #if CCAES_MUX @@ -40,7 +50,7 @@ extern const struct ccmode_cbc *ccaes_ios_mux_cbc_encrypt_mode(void); extern const struct ccmode_cbc *ccaes_ios_mux_cbc_decrypt_mode(void); #endif -#if CCAES_INTEL +#if !defined(__NO_ASM__) && CCAES_INTEL //extern const struct ccmode_ecb ccaes_intel_ecb_encrypt_mode; //extern const struct ccmode_ecb ccaes_intel_ecb_decrypt_mode; diff --git a/EXTERNAL_HEADERS/corecrypto/ccasn1.h b/EXTERNAL_HEADERS/corecrypto/ccasn1.h index 3f67e2e6f..7fe1cc66c 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccasn1.h +++ b/EXTERNAL_HEADERS/corecrypto/ccasn1.h @@ -2,8 +2,9 @@ * ccasn1.h * corecrypto * - * Created by Michael Brouwer on 8/6/10. - * Copyright 2010-2012 Apple Inc. All rights reserved. + * Created on 11/16/2010 + * + * Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved. * */ @@ -66,10 +67,6 @@ enum { CCASN1_CONSTRUCTED_SET = CCASN1_SET | CCASN1_CONSTRUCTED, CCASN1_CONSTRUCTED_SEQUENCE = CCASN1_SEQUENCE | CCASN1_CONSTRUCTED, - - // TODO: Remove these 2: */ - // ASN1_INTEGER = 0x02, - ASN1_CONSTRUCTED_SEQUENCE = 0x30 }; typedef union { diff --git a/EXTERNAL_HEADERS/corecrypto/ccder.h b/EXTERNAL_HEADERS/corecrypto/ccder.h index 7c7f08be6..12e940cc0 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccder.h +++ b/EXTERNAL_HEADERS/corecrypto/ccder.h @@ -2,8 +2,9 @@ * ccder.h * corecrypto * - * Created by Michael Brouwer on 2/28/12. - * Copyright 2012 Apple Inc. All rights reserved. + * Created on 03/14/2012 + * + * Copyright (c) 2012,2013,2014,2015 Apple Inc. All rights reserved. * */ @@ -246,7 +247,7 @@ CC_NO_INLINE CC_NONNULL((2, 4)) const uint8_t *ccder_decode_uint(cc_size n, cc_unit *r, const uint8_t *der, const uint8_t *der_end); -CC_NO_INLINE CC_NONNULL((1, 3)) +CC_NO_INLINE CC_NONNULL((3)) const uint8_t *ccder_decode_uint64(uint64_t* r, const uint8_t *der, const uint8_t *der_end); @@ -260,12 +261,12 @@ CC_NO_INLINE CC_NONNULL_TU((1)) CC_NONNULL((3)) const uint8_t *ccder_decode_oid(ccoid_t *oidp, const uint8_t *der, const uint8_t *der_end); -CC_NO_INLINE CC_NONNULL_ALL +CC_NO_INLINE CC_NONNULL((1,2,4)) const uint8_t *ccder_decode_bitstring(const uint8_t **bit_string, size_t *bit_length, const uint8_t *der, const uint8_t *der_end); -CC_NO_INLINE CC_NONNULL_ALL +CC_NO_INLINE CC_NONNULL_TU((4)) CC_NONNULL((1,2,3,5,6,8)) const uint8_t *ccder_decode_eckey(uint64_t *version, size_t *priv_size, const uint8_t **priv_key, ccoid_t *oid, diff --git a/EXTERNAL_HEADERS/corecrypto/ccdes.h b/EXTERNAL_HEADERS/corecrypto/ccdes.h index aff622bfb..6ca3c2a1e 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccdes.h +++ b/EXTERNAL_HEADERS/corecrypto/ccdes.h @@ -2,8 +2,9 @@ * ccdes.h * corecrypto * - * Created by Fabrice Gautier on 12/20/10. - * Copyright 2010 Apple, Inc. All rights reserved. + * Created on 12/20/2010 + * + * Copyright (c) 2010,2012,2015 Apple Inc. All rights reserved. * */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccdigest.h b/EXTERNAL_HEADERS/corecrypto/ccdigest.h index 9079c4a18..0857678ff 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccdigest.h +++ b/EXTERNAL_HEADERS/corecrypto/ccdigest.h @@ -2,8 +2,9 @@ * ccdigest.h * corecrypto * - * Created by Michael Brouwer on 11/30/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 11/30/2010 + * + * Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved. * */ @@ -73,11 +74,11 @@ struct ccdigest_info { size_t _block_size_, named _name_. Can be used in structs or on the stack. */ #define ccdigest_ctx_decl(_state_size_, _block_size_, _name_) cc_ctx_decl(struct ccdigest_ctx, ccdigest_ctx_size(_state_size_, _block_size_), _name_) -#define ccdigest_ctx_clear(_state_size_, _block_size_, _name_) cc_zero(ccdigest_ctx_size(_state_size_, _block_size_), _name_) +#define ccdigest_ctx_clear(_state_size_, _block_size_, _name_) cc_clear(ccdigest_ctx_size(_state_size_, _block_size_), _name_) /* Declare a ccdigest_ctx for a given size_t _state_size_ and size_t _block_size_, named _name_. Can be used on the stack. */ #define ccdigest_di_decl(_di_, _name_) cc_ctx_decl(struct ccdigest_ctx, ccdigest_di_size(_di_), _name_) -#define ccdigest_di_clear(_di_, _name_) cc_zero(ccdigest_di_size(_di_), _name_) +#define ccdigest_di_clear(_di_, _name_) cc_clear(ccdigest_di_size(_di_), _name_) /* Digest context field accessors. Consider the implementation private. */ @@ -136,34 +137,25 @@ int ccdigest_test_vector(const struct ccdigest_info *di, const struct ccdigest_v int ccdigest_test_chunk_vector(const struct ccdigest_info *di, const struct ccdigest_vector *v, unsigned long chunk); #ifdef USE_SUPER_COOL_NEW_CCOID_T -#define OID_DEF(_NAME_, _VALUE_) _NAME_ {((unsigned char *) _VALUE_)} -#define CC_DIGEST_OID_MD2 {((unsigned char *)"\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x02")} -#define CC_DIGEST_OID_MD4 {((unsigned char *)"\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x04")} -#define CC_DIGEST_OID_MD5 {((unsigned char *)"\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x05")} -#define CC_DIGEST_OID_SHA1 {((unsigned char *)"\x06\x05\x2b\x0e\x03\x02\x1a")} -#define CC_DIGEST_OID_SHA224 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04")} -#define CC_DIGEST_OID_SHA256 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01")} -#define CC_DIGEST_OID_SHA384 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02")} -#define CC_DIGEST_OID_SHA512 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03")} -#define CC_DIGEST_OID_RMD128 {((unsigned char *)"\x06\x06\x28\xCF\x06\x03\x00\x32")} -#define CC_DIGEST_OID_RMD160 {((unsigned char *)"\x06\x05\x2B\x24\x03\x02\x01")} -#define CC_DIGEST_OID_RMD256 {((unsigned char *)"\x06\x05\x2B\x24\x03\x02\x03")} -#define CC_DIGEST_OID_RMD320 {((unsigned char *)NULL)} +#define OID_DEF(_VALUE_) {((const unsigned char *) _VALUE_)} #else -#define CC_DIGEST_OID_MD2 "\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x02" -#define CC_DIGEST_OID_MD4 "\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x04" -#define CC_DIGEST_OID_MD5 "\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x05" -#define CC_DIGEST_OID_SHA1 "\x06\x05\x2b\x0e\x03\x02\x1a" -#define CC_DIGEST_OID_SHA224 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04" -#define CC_DIGEST_OID_SHA256 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01" -#define CC_DIGEST_OID_SHA384 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02" -#define CC_DIGEST_OID_SHA512 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03" -#define CC_DIGEST_OID_RMD128 "\x06\x06\x28\xCF\x06\x03\x00\x32" -#define CC_DIGEST_OID_RMD160 "\x06\x05\x2B\x24\x03\x02\x01" -#define CC_DIGEST_OID_RMD256 "\x06\x05\x2B\x24\x03\x02\x03" -#define CC_DIGEST_OID_RMD320 NULL +#define OID_DEF(_VALUE_) _VALUE_ #endif +#define CC_DIGEST_OID_MD2 OID_DEF("\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x02") +#define CC_DIGEST_OID_MD4 OID_DEF("\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x04") +#define CC_DIGEST_OID_MD5 OID_DEF("\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x05") +#define CC_DIGEST_OID_SHA1 OID_DEF("\x06\x05\x2b\x0e\x03\x02\x1a") +#define CC_DIGEST_OID_SHA224 OID_DEF("\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04") +#define CC_DIGEST_OID_SHA256 OID_DEF("\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01") +#define CC_DIGEST_OID_SHA384 OID_DEF("\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02") +#define CC_DIGEST_OID_SHA512 OID_DEF("\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03") +#define CC_DIGEST_OID_RMD128 OID_DEF("\x06\x06\x28\xCF\x06\x03\x00\x32") +#define CC_DIGEST_OID_RMD160 OID_DEF("\x06\x05\x2B\x24\x03\x02\x01") +#define CC_DIGEST_OID_RMD256 OID_DEF("\x06\x05\x2B\x24\x03\x02\x03") +#define CC_DIGEST_OID_RMD320 OID_DEF(NULL) + + #ifdef USE_SUPER_COOL_NEW_CCOID_T CC_INLINE CC_NONNULL_TU((1)) CC_NONNULL_TU((2)) bool ccdigest_oid_equal(const struct ccdigest_info *di, ccoid_t oid) { diff --git a/EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h b/EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h index 407a9b19b..fa8d85de6 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h +++ b/EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h @@ -2,8 +2,9 @@ * ccdigest_priv.h * corecrypto * - * Created by Fabrice Gautier on 12/7/10. - * Copyright 2010,2011 Apple, Inc. All rights reserved. + * Created on 12/07/2010 + * + * Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved. * */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccdrbg.h b/EXTERNAL_HEADERS/corecrypto/ccdrbg.h index 152e0801f..fdf450e13 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccdrbg.h +++ b/EXTERNAL_HEADERS/corecrypto/ccdrbg.h @@ -1,24 +1,11 @@ /* - * Copyright (c) 2007-2010 Apple Inc. All Rights Reserved. + * ccdrbg.h + * corecrypto * - * @APPLE_LICENSE_HEADER_START@ + * Created on 08/17/2010 * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. + * Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved. * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ */ /*! @@ -34,16 +21,32 @@ #include #include -/* TODO: Error codes ? */ +/* error codes */ #define CCDRBG_STATUS_OK 0 #define CCDRBG_STATUS_ERROR (-1) #define CCDRBG_STATUS_NEED_RESEED (-2) #define CCDRBG_STATUS_PARAM_ERROR (-3) -CC_INLINE size_t ccdrbg_context_size(const struct ccdrbg_info *drbg) -{ - return drbg->size; -} +/* + * The maximum length of the entropy_input, additional_input (max_additional_input_length) , personalization string + * (max_personalization_string_length) and max_number_of_bits_per_request are implementation dependent + * but shall fit in a 32 bit register and be be less than or equal to the specified maximum length for the + * selected DRBG mechanism (NIST 800-90A Section 10). + */ + +#define CCDRBG_MAX_ENTROPY_SIZE ((uint32_t)1<<16) +#define CCDRBG_MAX_ADDITIONALINPUT_SIZE ((uint32_t)1<<16) +#define CCDRBG_MAX_PSINPUT_SIZE ((uint32_t)1<<16) +#define CCDRBG_MAX_REQUEST_SIZE ((uint32_t)1<<16) //this is the the absolute maximum in NIST 800-90A +#define CCDRBG_RESEED_INTERVAL ((uint64_t)1<<30) // must be able to fit the NIST maximum of 2^48 + + +/* + * The entropyLength is forced to be greater or equal than the security strength. + * Nonce is not forced. It either needs to have 0.5*security strength entropy. Or, a vale that is repeated + * less than a 0.5*security strength bit random string. + * see below or NIST 800-90A for the definition of security strength + */ CC_INLINE int ccdrbg_init(const struct ccdrbg_info *info, struct ccdrbg_state *drbg, @@ -54,33 +57,42 @@ CC_INLINE int ccdrbg_init(const struct ccdrbg_info *info, return info->init(info, drbg, entropyLength, entropy, nonceLength, nonce, psLength, ps); } +/* + * The entropyLength is forced to be greater or equal than the security strength. + */ CC_INLINE int ccdrbg_reseed(const struct ccdrbg_info *info, - struct ccdrbg_state *prng, - unsigned long entropylen, const void *entropy, - unsigned long inlen, const void *in) + struct ccdrbg_state *drbg, + unsigned long entropyLength, const void *entropy, + unsigned long additionalLength, const void *additional) { - return info->reseed(prng, entropylen, entropy, inlen, in); + return info->reseed(drbg, entropyLength, entropy, additionalLength, additional); } CC_INLINE int ccdrbg_generate(const struct ccdrbg_info *info, - struct ccdrbg_state *prng, - unsigned long outlen, void *out, - unsigned long inlen, const void *in) + struct ccdrbg_state *drbg, + unsigned long dataOutLength, void *dataOut, + unsigned long additionalLength, const void *additional) { - return info->generate(prng, outlen, out, inlen, in); + return info->generate(drbg, dataOutLength, dataOut, additionalLength, additional); } CC_INLINE void ccdrbg_done(const struct ccdrbg_info *info, - struct ccdrbg_state *prng) + struct ccdrbg_state *drbg) { - info->done(prng); + info->done(drbg); } +CC_INLINE size_t ccdrbg_context_size(const struct ccdrbg_info *drbg) +{ + return drbg->size; +} -extern struct ccdrbg_info ccdrbg_dummy_info; -extern struct ccdrbg_info ccdrbg_fipssha1_info; +/* + * NIST SP 800-90 CTR_DRBG + * the mximum security strengh of drbg equals to the block size of the corresponding ECB. + */ struct ccdrbg_nistctr_custom { const struct ccmode_ecb *ecb; unsigned long keylen; @@ -90,6 +102,10 @@ struct ccdrbg_nistctr_custom { void ccdrbg_factory_nistctr(struct ccdrbg_info *info, const struct ccdrbg_nistctr_custom *custom); +/* + * NIST SP 800-90 HMAC_DRBG + * the mximum security strengh of drbg is half of output size of the input hash function and it internally is limited to 256 bits + */ extern struct ccdrbg_info ccdrbg_nistdigest_info; struct ccdrbg_nisthmac_custom { @@ -97,9 +113,12 @@ struct ccdrbg_nisthmac_custom { int strictFIPS; }; -// "class" method on nisthmac dbrg's to ask about their security_strength for a given di -int ccdbrg_nisthmac_security_strength(const struct ccdrbg_nisthmac_custom *custom); - void ccdrbg_factory_nisthmac(struct ccdrbg_info *info, const struct ccdrbg_nisthmac_custom *custom); + +/* + * Dummy DRBG + */ +extern struct ccdrbg_info ccdrbg_dummy_info; + #endif /* _CORECRYPTO_CCDRBG_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h b/EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h index efa1ef9ba..129f92e7c 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h +++ b/EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h @@ -2,8 +2,9 @@ * ccdrbg_impl.h * corecrypto * - * Created by James Murphy on 12/9/11. - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Created on 01/03/2012 + * + * Copyright (c) 2012,2015 Apple Inc. All rights reserved. * */ @@ -14,7 +15,7 @@ struct ccdrbg_state; struct ccdrbg_info { - /** Size of the DRBG state in bytes **/ + /*! Size of the DRBG state in bytes **/ size_t size; /** Instantiate the PRNG @@ -30,7 +31,7 @@ struct ccdrbg_info { unsigned long nonceLength, const void* nonce, unsigned long psLength, const void* ps); - /** Add entropy to the PRNG + /*! Add entropy to the PRNG @param prng The PRNG state @param entropylen Length of entropy @param entropy Entropy bytes @@ -42,7 +43,7 @@ struct ccdrbg_info { unsigned long entropylen, const void *entropy, unsigned long inlen, const void *in); - /** Read from the PRNG in a FIPS Testing compliant manor + /*! Read from the PRNG in a FIPS Testing compliant manor @param prng The PRNG state to read from @param out [out] Where to store the data @param outlen Length of data desired (octets) @@ -54,7 +55,7 @@ struct ccdrbg_info { unsigned long outlen, void *out, unsigned long inlen, const void *in); - /** Terminate a PRNG state + /*! Terminate a PRNG state @param prng The PRNG state to terminate */ void (*done)(struct ccdrbg_state *prng); diff --git a/EXTERNAL_HEADERS/corecrypto/cchmac.h b/EXTERNAL_HEADERS/corecrypto/cchmac.h index 17e295fb6..6e8d5134c 100644 --- a/EXTERNAL_HEADERS/corecrypto/cchmac.h +++ b/EXTERNAL_HEADERS/corecrypto/cchmac.h @@ -2,8 +2,9 @@ * cchmac.h * corecrypto * - * Created by Michael Brouwer on 12/7/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 12/07/2010 + * + * Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved. * */ @@ -29,7 +30,7 @@ typedef union { #define cchmac_ctx_n(STATE_SIZE, BLOCK_SIZE) ccn_nof_size(cchmac_ctx_size((STATE_SIZE), (BLOCK_SIZE))) #define cchmac_ctx_decl(STATE_SIZE, BLOCK_SIZE, _name_) cc_ctx_decl(struct cchmac_ctx, cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_) -#define cchmac_ctx_clear(STATE_SIZE, BLOCK_SIZE, _name_) cc_zero(cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_) +#define cchmac_ctx_clear(STATE_SIZE, BLOCK_SIZE, _name_) cc_clear(cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_) #define cchmac_di_decl(_di_, _name_) cchmac_ctx_decl((_di_)->state_size, (_di_)->block_size, _name_) #define cchmac_di_clear(_di_, _name_) cchmac_ctx_clear((_di_)->state_size, (_di_)->block_size, _name_) diff --git a/EXTERNAL_HEADERS/corecrypto/ccmd5.h b/EXTERNAL_HEADERS/corecrypto/ccmd5.h index 128522500..602fb0868 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccmd5.h +++ b/EXTERNAL_HEADERS/corecrypto/ccmd5.h @@ -2,8 +2,9 @@ * ccmd5.h * corecrypto * - * Created by Fabrice Gautier on 12/3/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 12/06/2010 + * + * Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved. * */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccmode.h b/EXTERNAL_HEADERS/corecrypto/ccmode.h index 0c7a19479..4a8c78958 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccmode.h +++ b/EXTERNAL_HEADERS/corecrypto/ccmode.h @@ -2,8 +2,9 @@ * ccmode.h * corecrypto * - * Created by Michael Brouwer on 12/6/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 12/07/2010 + * + * Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved. * */ @@ -18,7 +19,7 @@ /* Declare a ecb key named _name_. Pass the size field of a struct ccmode_ecb for _size_. */ #define ccecb_ctx_decl(_size_, _name_) cc_ctx_decl(ccecb_ctx, _size_, _name_) -#define ccecb_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccecb_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) CC_INLINE size_t ccecb_context_size(const struct ccmode_ecb *mode) { @@ -63,12 +64,12 @@ CC_INLINE void ccecb_one_shot(const struct ccmode_ecb *mode, /* Declare a cbc key named _name_. Pass the size field of a struct ccmode_cbc for _size_. */ #define cccbc_ctx_decl(_size_, _name_) cc_ctx_decl(cccbc_ctx, _size_, _name_) -#define cccbc_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define cccbc_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) /* Declare a cbc iv tweak named _name_. Pass the blocksize field of a struct ccmode_cbc for _size_. */ #define cccbc_iv_decl(_size_, _name_) cc_ctx_decl(cccbc_iv, _size_, _name_) -#define cccbc_iv_clear(_size_, _name_) cc_ctx_clear(cccbc_iv, _size_, _name_) +#define cccbc_iv_clear(_size_, _name_) cc_clear(_size_, _name_) /* Actual symmetric algorithm implementation can provide you one of these. @@ -131,7 +132,7 @@ CC_INLINE void cccbc_one_shot(const struct ccmode_cbc *mode, /* Declare a cfb key named _name_. Pass the size field of a struct ccmode_cfb for _size_. */ #define cccfb_ctx_decl(_size_, _name_) cc_ctx_decl(cccfb_ctx, _size_, _name_) -#define cccfb_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define cccfb_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) CC_INLINE size_t cccfb_context_size(const struct ccmode_cfb *mode) { @@ -171,7 +172,7 @@ CC_INLINE void cccfb_one_shot(const struct ccmode_cfb *mode, /* Declare a cfb8 key named _name_. Pass the size field of a struct ccmode_cfb8 for _size_. */ #define cccfb8_ctx_decl(_size_, _name_) cc_ctx_decl(cccfb8_ctx, _size_, _name_) -#define cccfb8_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define cccfb8_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) CC_INLINE size_t cccfb8_context_size(const struct ccmode_cfb8 *mode) { @@ -210,7 +211,7 @@ CC_INLINE void cccfb8_one_shot(const struct ccmode_cfb8 *mode, /* Declare a ctr key named _name_. Pass the size field of a struct ccmode_ctr for _size_. */ #define ccctr_ctx_decl(_size_, _name_) cc_ctx_decl(ccctr_ctx, _size_, _name_) -#define ccctr_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccctr_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) /* This is Integer Counter Mode: The IV is the initial value of the counter that is incremented by 1 for each new block. Use the mode flags to select @@ -254,7 +255,7 @@ CC_INLINE void ccctr_one_shot(const struct ccmode_ctr *mode, /* Declare a ofb key named _name_. Pass the size field of a struct ccmode_ofb for _size_. */ #define ccofb_ctx_decl(_size_, _name_) cc_ctx_decl(ccofb_ctx, _size_, _name_) -#define ccofb_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccofb_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) CC_INLINE size_t ccofb_context_size(const struct ccmode_ofb *mode) { @@ -295,12 +296,12 @@ CC_INLINE void ccofb_one_shot(const struct ccmode_ofb *mode, /* Declare a xts key named _name_. Pass the size field of a struct ccmode_xts for _size_. */ #define ccxts_ctx_decl(_size_, _name_) cc_ctx_decl(ccxts_ctx, _size_, _name_) -#define ccxts_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccxts_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) /* Declare a xts tweak named _name_. Pass the tweak_size field of a struct ccmode_xts for _size_. */ #define ccxts_tweak_decl(_size_, _name_) cc_ctx_decl(ccxts_tweak, _size_, _name_) -#define ccxts_tweak_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccxts_tweak_clear(_size_, _name_) cc_clear(_size_, _name_) /* Actual symmetric algorithm implementation can provide you one of these. @@ -363,7 +364,7 @@ CC_INLINE void ccxts_one_shot(const struct ccmode_xts *mode, /* Declare a gcm key named _name_. Pass the size field of a struct ccmode_gcm for _size_. */ #define ccgcm_ctx_decl(_size_, _name_) cc_ctx_decl(ccgcm_ctx, _size_, _name_) -#define ccgcm_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccgcm_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) CC_INLINE size_t ccgcm_context_size(const struct ccmode_gcm *mode) { @@ -430,11 +431,11 @@ CC_INLINE void ccgcm_one_shot(const struct ccmode_gcm *mode, /* CCM */ #define ccccm_ctx_decl(_size_, _name_) cc_ctx_decl(ccccm_ctx, _size_, _name_) -#define ccccm_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccccm_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) /* Declare a ccm nonce named _name_. Pass the mode->nonce_ctx_size for _size_. */ #define ccccm_nonce_decl(_size_, _name_) cc_ctx_decl(ccccm_nonce, _size_, _name_) -#define ccccm_nonce_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccccm_nonce_clear(_size_, _name_) cc_clear(_size_, _name_) CC_INLINE size_t ccccm_context_size(const struct ccmode_ccm *mode) @@ -509,7 +510,7 @@ CC_INLINE void ccccm_one_shot(const struct ccmode_ccm *mode, /* Declare a omac key named _name_. Pass the size field of a struct ccmode_omac for _size_. */ #define ccomac_ctx_decl(_size_, _name_) cc_ctx_decl(ccomac_ctx, _size_, _name_) -#define ccomac_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccomac_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) CC_INLINE size_t ccomac_context_size(const struct ccmode_omac *mode) { diff --git a/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h b/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h index 8ffe1fbd6..3a29111ae 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h +++ b/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h @@ -2,8 +2,9 @@ * ccmode_factory.h * corecrypto * - * Created by Fabrice Gautier on 1/21/11. - * Copyright 2011 Apple, Inc. All rights reserved. + * Created on 01/21/2011 + * + * Copyright (c) 2011,2012,2013,2014,2015 Apple Inc. All rights reserved. * */ @@ -13,6 +14,25 @@ #include /* TODO: Remove dependency on this header. */ #include +#if !defined(__NO_ASM__) +#if (defined(__x86_64__) && CCAES_INTEL) || (CCAES_ARM && defined(__ARM_NEON__)) +#define CCMODE_GCM_VNG_SPEEDUP 1 +#define CCMODE_CCM_VNG_SPEEDUP 1 +#else +#define CCMODE_GCM_VNG_SPEEDUP 0 +#define CCMODE_CCM_VNG_SPEEDUP 0 +#endif + +#if ( (defined(__x86_64__) && CCAES_INTEL) \ + || (defined(__arm64__) && CCAES_ARM) \ + || defined(__ARM_NEON__)) // Supported even when not using the ARM AES + +#define CCMODE_CTR_VNG_SPEEDUP 1 +#else +#define CCMODE_CTR_VNG_SPEEDUP 0 +#endif +#endif /* !defined(__NO_ASM__) */ + /* For CBC, direction of underlying ecb is the same as the cbc direction */ #define CCMODE_CBC_FACTORY(_cipher_, _dir_) \ static struct ccmode_cbc cbc_##_cipher_##_##_dir_; \ @@ -170,7 +190,6 @@ void ccmode_cfb_decrypt(cccfb_ctx *ctx, size_t nbytes, const void *in, void *out); void ccmode_cfb_encrypt(cccfb_ctx *ctx, size_t nbytes, const void *in, void *out); - struct _ccmode_cfb_key { const struct ccmode_ecb *ecb; size_t pad_len; @@ -217,7 +236,6 @@ void ccmode_factory_cfb_encrypt(struct ccmode_cfb *cfb, *cfb = cfb_encrypt; } - void ccmode_cfb8_init(const struct ccmode_cfb8 *cfb8, cccfb8_ctx *ctx, size_t rawkey_len, const void *rawkey, const void *iv); void ccmode_cfb8_decrypt(cccfb8_ctx *ctx, size_t nbytes, @@ -290,6 +308,22 @@ struct _ccmode_ctr_key { .custom = (ECB_ENCRYPT) \ } +#if !defined(__NO_ASM__) +#if CCMODE_CTR_VNG_SPEEDUP +void ccmode_aes_ctr_crypt_vng(ccctr_ctx *ctx, size_t nbytes, + const void *in, void *out); + +/* Use this to statically initialize a ccmode_ctr object for decryption. */ +#define CCMODE_VNG_AES_CTR_CRYPT(ECB_ENCRYPT) { \ +.size = ccn_sizeof_size(sizeof(struct _ccmode_ctr_key)) + 2 * ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \ +.block_size = 1, \ +.init = ccmode_ctr_init, \ +.ctr = ccmode_aes_ctr_crypt_vng, \ +.custom = (ECB_ENCRYPT) \ +} +#endif /* CCMODE_CTR_VNG_SPEEDUP */ +#endif /* defined(__NO_ASM__) */ + /* Use these function to runtime initialize a ccmode_ctr decrypt object (for example if it's part of a larger structure). Normally you would pass a ecb encrypt mode implementation of some underlying algorithm as the ecb @@ -315,9 +349,6 @@ void ccmode_factory_ctr_crypt(struct ccmode_ctr *ctr, extern const unsigned char gcm_shift_table[256*2]; #endif -#if defined(__x86_64__) || defined(__arm64__) -#define VNG_SPEEDUP 1 -#endif /* Create a gcm key from a gcm mode object. key must point to at least sizeof(CCMODE_GCM_KEY(ecb)) bytes of free @@ -358,10 +389,15 @@ struct _ccmode_gcm_key { ; #endif /* CCMODE_GCM_TABLES */ -#ifdef VNG_SPEEDUP +#if !defined(__NO_ASM__) +#if CCMODE_GCM_VNG_SPEEDUP +#if !defined(__arm64__) && defined(__ARM_NEON__) + unsigned char Htable[8*2] __attribute__((aligned(16))); +#else unsigned char Htable[16*8*2] __attribute__((aligned(16))); #endif - +#endif /* CCMODE_GCM_VNG_SPEEDUP */ +#endif /* !defined(__NO_ASM__) */ cc_unit u[]; }; @@ -430,6 +466,14 @@ void ccmode_ccm_decrypt(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, c void *out); void ccmode_ccm_encrypt(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in, void *out); +#if !defined(__NO_ASM__) +#if CCMODE_CCM_VNG_SPEEDUP +void ccmode_ccm_decrypt_vector(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in, + void *out); +void ccmode_ccm_encrypt_vector(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in, + void *out); +#endif /* CCMODE_CCM_VNG_SPEEDUP */ +#endif /* !defined(__NO_ASM__) */ void ccmode_ccm_finalize(ccccm_ctx *key, ccccm_nonce *nonce_ctx, void *mac); void ccmode_ccm_reset(ccccm_ctx *key, ccccm_nonce *nonce_ctx); @@ -480,6 +524,39 @@ struct _ccmode_ccm_nonce { .custom = (ECB_ENCRYPT) \ } +#if !defined(__NO_ASM__) +/* for x86_64/arm64 speedup */ +#if CCMODE_CCM_VNG_SPEEDUP +/* Use this to statically initialize a ccmode_ccm object for decryption. */ +#define CCMODE_VNG_CCM_DECRYPT(ECB_ENCRYPT) { \ +.size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_key)) + ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \ +.nonce_size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_nonce)), \ +.block_size = 1, \ +.init = ccmode_ccm_init, \ +.set_iv = ccmode_ccm_set_iv, \ +.cbcmac = ccmode_ccm_cbcmac, \ +.ccm = ccmode_ccm_decrypt_vector, \ +.finalize = ccmode_ccm_finalize, \ +.reset = ccmode_ccm_reset, \ +.custom = (ECB_ENCRYPT) \ +} + +/* Use this to statically initialize a ccmode_ccm object for encryption. */ +#define CCMODE_VNG_CCM_ENCRYPT(ECB_ENCRYPT) { \ +.size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_key)) + ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \ +.nonce_size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_nonce)), \ +.block_size = 1, \ +.init = ccmode_ccm_init, \ +.set_iv = ccmode_ccm_set_iv, \ +.cbcmac = ccmode_ccm_cbcmac, \ +.ccm = ccmode_ccm_encrypt_vector, \ +.finalize = ccmode_ccm_finalize, \ +.reset = ccmode_ccm_reset, \ +.custom = (ECB_ENCRYPT) \ +} +#endif /* CCMODE_CCM_VNG_SPEEDUP */ +#endif /* !defined(__NO_ASM__) */ + /* Use these function to runtime initialize a ccmode_ccm decrypt object (for example if it's part of a larger structure). For CCM you always pass a ecb encrypt mode implementation of some underlying algorithm as the ecb @@ -487,7 +564,11 @@ struct _ccmode_ccm_nonce { CC_INLINE void ccmode_factory_ccm_decrypt(struct ccmode_ccm *ccm, const struct ccmode_ecb *ecb_encrypt) { +#if !defined(__NO_ASM__) && CCMODE_CCM_VNG_SPEEDUP + struct ccmode_ccm ccm_decrypt = CCMODE_VNG_CCM_DECRYPT(ecb_encrypt); +#else struct ccmode_ccm ccm_decrypt = CCMODE_FACTORY_CCM_DECRYPT(ecb_encrypt); +#endif /* CCMODE_CCM_VNG_SPEEDUP */ *ccm = ccm_decrypt; } @@ -498,7 +579,11 @@ void ccmode_factory_ccm_decrypt(struct ccmode_ccm *ccm, CC_INLINE void ccmode_factory_ccm_encrypt(struct ccmode_ccm *ccm, const struct ccmode_ecb *ecb_encrypt) { +#if !defined(__NO_ASM__) && CCMODE_CCM_VNG_SPEEDUP + struct ccmode_ccm ccm_encrypt = CCMODE_VNG_CCM_ENCRYPT(ecb_encrypt); +#else struct ccmode_ccm ccm_encrypt = CCMODE_FACTORY_CCM_ENCRYPT(ecb_encrypt); +#endif /* CCMODE_CCM_VNG_SPEEDUP */ *ccm = ccm_encrypt; } diff --git a/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h b/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h index ce1d1e114..94279d7e4 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h +++ b/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h @@ -2,8 +2,9 @@ * ccmode_impl.h * corecrypto * - * Created by James Murphy on 12/9/11. - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Created on 12/07/2010 + * + * Copyright (c) 2012,2015 Apple Inc. All rights reserved. * */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccn.h b/EXTERNAL_HEADERS/corecrypto/ccn.h index 3aa1bd8c5..a66d0d618 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccn.h +++ b/EXTERNAL_HEADERS/corecrypto/ccn.h @@ -2,26 +2,25 @@ * ccn.h * corecrypto * - * Created by Michael Brouwer on 7/25/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 11/16/2010 + * + * Copyright (c) 2010,2011,2012,2013,2014,2015 Apple Inc. All rights reserved. * */ #ifndef _CORECRYPTO_CCN_H_ #define _CORECRYPTO_CCN_H_ -#include -#include /* TODO: Get rid of this include in this header. */ +#include #include #include - typedef uint8_t cc_byte; typedef size_t cc_size; #if CCN_UNIT_SIZE == 8 typedef uint64_t cc_unit; // 64 bit unit -//typedef uint128_t cc_dunit; // 128 bit double width unit +typedef unsigned cc_dunit __attribute__((mode(TI))); // 128 bit double width unit #define CCN_LOG2_BITS_PER_UNIT 6 // 2^6 = 64 bits #define CC_UNIT_C(x) UINT64_C(x) #elif CCN_UNIT_SIZE == 4 @@ -56,6 +55,10 @@ typedef const cc_unit *cc2np2_in_t; // 2 * n + 2 unit long mp #define CCN_UNIT_BITS (sizeof(cc_unit) * 8) #define CCN_UNIT_MASK ((cc_unit)~0) +typedef struct { + cc_unit *start; // First cc_unit of the workspace + cc_unit *end; // address and beyond NOT TO BE TOUCHED +} cc_ws,*cc_ws_t; /* Conversions between n sizeof and bits */ @@ -84,6 +87,7 @@ typedef const cc_unit *cc2np2_in_t; // 2 * n + 2 unit long mp #define ccn_bit(_ccn_, _k_) ({__typeof__ (_k_) __k = (_k_); \ 1 & ((_ccn_)[__k / CCN_UNIT_BITS] >> (__k & (CCN_UNIT_BITS - 1)));}) +/* Set the value of bit _k_ of _ccn_ to the value _v_ */ #define ccn_set_bit(_ccn_, _k_, _v_) ({__typeof__ (_k_) __k = (_k_); \ if (_v_) \ (_ccn_)[__k/CCN_UNIT_BITS] |= CC_UNIT_C(1) << (__k & (CCN_UNIT_BITS - 1)); \ @@ -156,12 +160,12 @@ typedef const cc_unit *cc2np2_in_t; // 2 * n + 2 unit long mp 64 bit units respectively. */ #if CCN_UNIT_SIZE == 8 -#define ccn64_32(a1,a0) (((cc_unit)a1) << 32 | ((cc_unit)a0)) +#define ccn64_32(a1,a0) (((const cc_unit)a1) << 32 | ((const cc_unit)a0)) #define ccn32_32(a0) a0 #if __LITTLE_ENDIAN__ -#define ccn32_32_parse(p,i) (((uint32_t *)p)[i]) +#define ccn32_32_parse(p,i) (((const uint32_t *)p)[i]) #else -#define ccn32_32_parse(p,i) (((uint32_t *)p)[i^1]) +#define ccn32_32_parse(p,i) (((const uint32_t *)p)[i^1]) #endif #define ccn32_32_null 0 @@ -255,72 +259,12 @@ typedef const cc_unit *cc2np2_in_t; // 2 * n + 2 unit long mp #define CCN224_N ccn_nof(224) #define CCN256_N ccn_nof(256) #define CCN384_N ccn_nof(384) +#define CCN512_N ccn_nof(512) #define CCN521_N ccn_nof(521) -#if defined(_ARM_ARCH_6) || defined(_ARM_ARCH_7) -#if CCN_USE_BUILTIN_CLZ -CC_INLINE CC_CONST -cc_unit cc_clz(cc_unit data) -{ - return __builtin_clzl(data); -} -#else -CC_INLINE CC_CONST -cc_unit cc_clz(cc_unit data) -{ - __asm__ ("clz %0, %1\n" : "=l" (data) : "l" (data)); - return data; -} -#endif /* CCN_USE_BUILTIN_CLZ */ -#endif /* !defined(_ARM_ARCH_6) && !defined(_ARM_ARCH_7) */ - - -#if CCN_N_INLINE -/* Return the number of used units after stripping leading 0 units. */ -CC_INLINE CC_PURE CC_NONNULL2 -cc_size ccn_n(cc_size n, const cc_unit *s) { -#if 1 - while (n-- && s[n] == 0) {} - return n + 1; -#elif 0 - while (n && s[n - 1] == 0) { - n -= 1; - } - return n; -#else - if (n & 1) { - if (s[n - 1]) - return n; - n &= ~1; - } - if (n & 2) { - cc_unit a[2] = { s[n - 1], s[n - 2] }; - if (a[0]) - return n - 1; - if (a[1]) - return n - 2; - n &= ~2; - } - while (n) { - cc_unit a[4] = { s[n - 1], s[n - 2], s[n - 3], s[n - 4] }; - if (a[0]) - return n - 1; - if (a[1]) - return n - 2; - if (a[2]) - return n - 3; - if (a[3]) - return n - 4; - n -= 4; - } - return n; -#endif -} -#else /* Return the number of used units after stripping leading 0 units. */ CC_PURE CC_NONNULL2 cc_size ccn_n(cc_size n, const cc_unit *s); -#endif /* s >> k -> r return bits shifted out of least significant word in bits [0, n> { N bit, scalar -> N bit } N = n * sizeof(cc_unit) * 8 @@ -361,24 +305,10 @@ size_t ccn_trailing_zeros(cc_size n, const cc_unit *s); #define ccn_is_zero_or_one(_n_, _s_) (((_n_)==0) || ((ccn_n(_n_, _s_) <= 1) && (_s_[0] <= 1))) -#if CCN_CMP_INLINE -CC_INLINE CC_PURE CC_NONNULL((2, 3)) -int ccn_cmp(cc_size n, const cc_unit *s, const cc_unit *t) { - while (n) { - n--; - cc_unit si = s[n]; - cc_unit ti = t[n]; - if (si != ti) - return si > ti ? 1 : -1; - } - return n; -} -#else /* s < t -> return - 1 | s == t -> return 0 | s > t -> return 1 { N bit, N bit -> int } N = n * sizeof(cc_unit) * 8 */ CC_PURE CC_NONNULL((2, 3)) int ccn_cmp(cc_size n, const cc_unit *s, const cc_unit *t); -#endif /* s < t -> return - 1 | s == t -> return 0 | s > t -> return 1 { N bit, M bit -> int } N = ns * sizeof(cc_unit) * 8 M = nt * sizeof(cc_unit) * 8 */ @@ -448,6 +378,13 @@ void ccn_lcm(cc_size n, cc_unit *r2n, const cc_unit *s, const cc_unit *t); CC_NONNULL((2, 3, 4)) void ccn_mul(cc_size n, cc_unit *r_2n, const cc_unit *s, const cc_unit *t); +/* s * t -> r_2n r_2n must not overlap with s nor t + { n bit, n bit -> 2 * n bit } n = count * sizeof(cc_unit) * 8 + { N bit, N bit -> 2N bit } N = ccn_bitsof(n) + Provide a workspace for potential speedup */ +CC_NONNULL((2, 3, 4, 5)) +void ccn_mul_ws(cc_size count, cc_unit *r, const cc_unit *s, const cc_unit *t, cc_ws_t ws); + /* s[0..n) * v -> r[0..n)+return value { N bit, sizeof(cc_unit) * 8 bit -> N + sizeof(cc_unit) * 8 bit } N = n * sizeof(cc_unit) * 8 */ CC_NONNULL((2, 3)) @@ -534,6 +471,19 @@ size_t ccn_write_int_size(cc_size n, const cc_unit *s); CC_NONNULL((2, 4)) void ccn_write_int(cc_size n, const cc_unit *s, size_t out_size, void *out); +#if CCN_DEDICATED_SQR + +/* s^2 -> r + { n bit -> 2 * n bit } */ +CC_NONNULL((2, 3)) +void ccn_sqr(cc_size n, cc_unit *r, const cc_unit *s); + +/* s^2 -> r + { n bit -> 2 * n bit } */ +CC_NONNULL((2, 3, 4)) +void ccn_sqr_ws(cc_size n, cc_unit *r, const cc_unit *s, cc_ws_t ws); + +#else /* s^2 -> r { n bit -> 2 * n bit } */ @@ -542,6 +492,15 @@ void ccn_sqr(cc_size n, cc_unit *r, const cc_unit *s) { ccn_mul(n, r, s, s); } +/* s^2 -> r + { n bit -> 2 * n bit } */ +CC_INLINE CC_NONNULL((2, 3, 4)) +void ccn_sqr_ws(cc_size n, cc_unit *r, const cc_unit *s, cc_ws_t ws) { + ccn_mul_ws(n, r, s, s, ws); +} + +#endif + /* s -> r { n bit -> n bit } */ CC_NONNULL((2, 3)) @@ -549,15 +508,17 @@ void ccn_set(cc_size n, cc_unit *r, const cc_unit *s); CC_INLINE CC_NONNULL2 void ccn_zero(cc_size n, cc_unit *r) { - CC_BZERO(r, ccn_sizeof_n(n)); + cc_zero(ccn_sizeof_n(n),r); +} + +CC_INLINE CC_NONNULL2 +void ccn_clear(cc_size n, cc_unit *r) { + cc_clear(ccn_sizeof_n(n),r); } CC_NONNULL2 void ccn_zero_multi(cc_size n, cc_unit *r, ...); -/* Burn (zero fill or otherwise overwrite) n cc_units of stack space. */ -void ccn_burn_stack(cc_size n); - CC_INLINE CC_NONNULL2 void ccn_seti(cc_size n, cc_unit *r, cc_unit v) { /* assert(n > 0); */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccpad.h b/EXTERNAL_HEADERS/corecrypto/ccpad.h index 86001c2e6..451436615 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccpad.h +++ b/EXTERNAL_HEADERS/corecrypto/ccpad.h @@ -2,8 +2,9 @@ * ccpad.h * corecrypto * - * Created by Michael Brouwer on 12/6/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 12/07/2010 + * + * Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved. * */ @@ -62,6 +63,9 @@ size_t ccpad_pkcs7_ecb_decrypt(const struct ccmode_ecb *ecb, ccecb_ctx *ecb_key, void ccpad_pkcs7_ecb_encrypt(const struct ccmode_ecb *ecb, ccecb_ctx *ctx, size_t nbytes, const void *in, void *out); +/* Function common to ccpad_pkcs7_ecb_decrypt and ccpad_pkcs7_decrypt */ +size_t ccpad_pkcs7_decode(const size_t block_size, const uint8_t* last_block); + /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ void ccpad_xts_decrypt(const struct ccmode_xts *xts, ccxts_ctx *ctx, ccxts_tweak *tweak, size_t nbytes, const void *in, void *out); diff --git a/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h b/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h index ee980159f..9e296ff19 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h +++ b/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h @@ -1,10 +1,10 @@ /* - * ccpbkdf.h + * ccpbkdf2.h * corecrypto * - * Copyright 1999-2001, 2010 Apple Inc. All rights reserved. + * Created on 12/15/2010 * - * Derived from pbkdf2.h by Mitch Adler on 09-12-2010. + * Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved. * */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccrc4.h b/EXTERNAL_HEADERS/corecrypto/ccrc4.h index 84204bb35..6e1ec736a 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccrc4.h +++ b/EXTERNAL_HEADERS/corecrypto/ccrc4.h @@ -2,8 +2,9 @@ * ccrc4.h * corecrypto * - * Created by Fabrice Gautier on 12/22/10. - * Copyright 2010,2011 Apple, Inc. All rights reserved. + * Created on 12/22/2010 + * + * Copyright (c) 2010,2011,2012,2013,2014,2015 Apple Inc. All rights reserved. * */ @@ -17,7 +18,7 @@ cc_aligned_struct(16) ccrc4_ctx; /* Declare a rc4 key named _name_. Pass the size field of a struct ccmode_ecb for _size_. */ #define ccrc4_ctx_decl(_size_, _name_) cc_ctx_decl(ccrc4_ctx, _size_, _name_) -#define ccrc4_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) +#define ccrc4_ctx_clear(_size_, _name_) cc_clear(_size_, _name_) struct ccrc4_info { size_t size; /* first argument to ccrc4_ctx_decl(). */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccrng.h b/EXTERNAL_HEADERS/corecrypto/ccrng.h index c748bc6e6..a3291c830 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccrng.h +++ b/EXTERNAL_HEADERS/corecrypto/ccrng.h @@ -2,8 +2,9 @@ * ccrng.h * corecrypto * - * Created by Fabrice Gautier on 12/13/10. - * Copyright 2010 Apple, Inc. All rights reserved. + * Created on 12/13/2010 + * + * Copyright (c) 2010,2011,2013,2014,2015 Apple Inc. All rights reserved. * */ @@ -12,12 +13,14 @@ #include -#define CC_ERR_DEVICE -100 -#define CC_ERR_INTERUPTS -101 -#define CC_ERR_CRYPTO_CONFIG -102 -#define CC_ERR_PERMS -103 -#define CC_ERR_PARAMETER -104 -#define CC_ERR_MEMORY -105 +#define CC_ERR_DEVICE -100 +#define CC_ERR_INTERUPTS -101 +#define CC_ERR_CRYPTO_CONFIG -102 +#define CC_ERR_PERMS -103 +#define CC_ERR_PARAMETER -104 +#define CC_ERR_MEMORY -105 +#define CC_ERR_FILEDESC -106 +#define CC_ERR_OUT_OF_ENTROPY -107 #define CCRNG_STATE_COMMON \ int (*generate)(struct ccrng_state *rng, unsigned long outlen, void *out); diff --git a/EXTERNAL_HEADERS/corecrypto/ccrng_system.h b/EXTERNAL_HEADERS/corecrypto/ccrng_system.h index 3ecc428f7..b6c8c06fd 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccrng_system.h +++ b/EXTERNAL_HEADERS/corecrypto/ccrng_system.h @@ -2,8 +2,9 @@ * ccrng_system.h * corecrypto * - * Created by Fabrice Gautier on 12/13/10. - * Copyright 2010 Apple, Inc. All rights reserved. + * Created on 12/13/2010 + * + * Copyright (c) 2010,2013,2014,2015 Apple Inc. All rights reserved. * */ @@ -17,8 +18,11 @@ struct ccrng_system_state { int fd; }; +// Setup the system RNG (open descriptor on file /dev/random) int ccrng_system_init(struct ccrng_system_state *rng); +// Close the system RNG +// Mandatory step to avoid leaking file descriptor void ccrng_system_done(struct ccrng_system_state *rng); #endif /* _CORECRYPTO_CCRNG_SYSTEM_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccsha1.h b/EXTERNAL_HEADERS/corecrypto/ccsha1.h index 8e4480168..1990c197e 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccsha1.h +++ b/EXTERNAL_HEADERS/corecrypto/ccsha1.h @@ -2,8 +2,9 @@ * ccsha1.h * corecrypto * - * Created by Michael Brouwer on 12/1/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 12/01/2010 + * + * Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved. * */ @@ -31,13 +32,13 @@ void ccsha1_final(const struct ccdigest_info *di, ccdigest_ctx_t, extern const struct ccdigest_info ccsha1_ltc_di; extern const struct ccdigest_info ccsha1_eay_di; -#if CCSHA1_VNG_INTEL +#if !defined(__NO_ASM__) && CCSHA1_VNG_INTEL //extern const struct ccdigest_info ccsha1_vng_intel_di; extern const struct ccdigest_info ccsha1_vng_intel_SupplementalSSE3_di; extern const struct ccdigest_info ccsha1_vng_intel_NOSupplementalSSE3_di; #endif -#if CCSHA1_VNG_ARMV7NEON +#if !defined(__NO_ASM__) && CCSHA1_VNG_ARMV7NEON extern const struct ccdigest_info ccsha1_vng_armv7neon_di; #endif diff --git a/EXTERNAL_HEADERS/corecrypto/ccsha2.h b/EXTERNAL_HEADERS/corecrypto/ccsha2.h index 5f55b9f40..2029e327b 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccsha2.h +++ b/EXTERNAL_HEADERS/corecrypto/ccsha2.h @@ -2,8 +2,9 @@ * ccsha2.h * corecrypto * - * Created by Fabrice Gautier on 12/3/10. - * Copyright 2010,2011 Apple Inc. All rights reserved. + * Created on 12/03/2010 + * + * Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved. * */ @@ -37,7 +38,7 @@ const struct ccdigest_info *ccsha512_di(void); #define CCSHA256_OUTPUT_SIZE 32 #define CCSHA256_STATE_SIZE 32 extern const struct ccdigest_info ccsha256_ltc_di; -#if CCSHA2_VNG_INTEL +#if !defined(__NO_ASM__) && CCSHA2_VNG_INTEL #if defined __x86_64__ extern const struct ccdigest_info ccsha256_vng_intel_AVX2_di; extern const struct ccdigest_info ccsha256_vng_intel_AVX1_di; @@ -45,7 +46,7 @@ extern const struct ccdigest_info ccsha256_vng_intel_AVX1_di; extern const struct ccdigest_info ccsha256_vng_intel_SupplementalSSE3_di; extern const struct ccdigest_info ccsha256_vng_intel_NOSupplementalSSE3_di; #endif -#if CCSHA2_VNG_ARMV7NEON +#if !defined(__NO_ASM__) && CCSHA2_VNG_ARMV7NEON extern const struct ccdigest_info ccsha256_vng_armv7neon_di; #endif extern const uint32_t ccsha256_K[64]; @@ -53,9 +54,13 @@ extern const uint32_t ccsha256_K[64]; /* SHA224 */ #define CCSHA224_OUTPUT_SIZE 28 extern const struct ccdigest_info ccsha224_ltc_di; +#if !defined(__NO_ASM__) && CCSHA2_VNG_INTEL extern const struct ccdigest_info ccsha224_vng_intel_SupplementalSSE3_di; extern const struct ccdigest_info ccsha224_vng_intel_NOSupplementalSSE3_di; +#endif +#if !defined(__NO_ASM__) && CCSHA2_VNG_ARMV7NEON extern const struct ccdigest_info ccsha224_vng_armv7neon_di; +#endif /* SHA512 */ #define CCSHA512_BLOCK_SIZE 128 diff --git a/EXTERNAL_HEADERS/mach-o/loader.h b/EXTERNAL_HEADERS/mach-o/loader.h index d26ad14e3..aba7cb7f6 100644 --- a/EXTERNAL_HEADERS/mach-o/loader.h +++ b/EXTERNAL_HEADERS/mach-o/loader.h @@ -207,6 +207,9 @@ struct mach_header_64 { require it. Only used in MH_EXECUTE filetypes. */ +#define MH_APP_EXTENSION_SAFE 0x02000000 /* The code was linked for use in an + application extension. */ + /* * The load commands directly follow the mach_header. The total size of all * of the commands is given by the sizeofcmds field in the mach_header. All @@ -295,7 +298,9 @@ struct load_command { #define LC_SOURCE_VERSION 0x2A /* source version used to build binary */ #define LC_DYLIB_CODE_SIGN_DRS 0x2B /* Code signing DRs copied from linked dylibs */ #define LC_ENCRYPTION_INFO_64 0x2C /* 64-bit encrypted segment information */ - +#define LC_LINKER_OPTION 0x2D /* linker options in MH_OBJECT files */ +#define LC_LINKER_OPTIMIZATION_HINT 0x2E /* optimization hints in MH_OBJECT files */ +#define LC_VERSION_MIN_WATCHOS 0x30 /* build for Watch min OS version */ /* * A variable length string in a load command is represented by an lc_str @@ -1156,7 +1161,8 @@ struct rpath_command { struct linkedit_data_command { uint32_t cmd; /* LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, - or LC_DYLIB_CODE_SIGN_DRS */ + LC_DYLIB_CODE_SIGN_DRS or + LC_LINKER_OPTIMIZATION_HINT. */ uint32_t cmdsize; /* sizeof(struct linkedit_data_command) */ uint32_t dataoff; /* file offset of data in __LINKEDIT segment */ uint32_t datasize; /* file size of data in __LINKEDIT segment */ @@ -1177,7 +1183,7 @@ struct encryption_info_command { /* * The encryption_info_command_64 contains the file offset and size of an - * of an encrypted segment (for use in 64-bit targets). + * of an encrypted segment (for use in x86_64 targets). */ struct encryption_info_command_64 { uint32_t cmd; /* LC_ENCRYPTION_INFO_64 */ @@ -1196,7 +1202,8 @@ struct encryption_info_command_64 { */ struct version_min_command { uint32_t cmd; /* LC_VERSION_MIN_MACOSX or - LC_VERSION_MIN_IPHONEOS */ + LC_VERSION_MIN_IPHONEOS + LC_VERSION_MIN_WATCHOS */ uint32_t cmdsize; /* sizeof(struct min_version_command) */ uint32_t version; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */ @@ -1375,6 +1382,17 @@ struct dyld_info_command { #define EXPORT_SYMBOL_FLAGS_REEXPORT 0x08 #define EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER 0x10 +/* + * The linker_option_command contains linker options embedded in object files. + */ +struct linker_option_command { + uint32_t cmd; /* LC_LINKER_OPTION only used in MH_OBJECT filetypes */ + uint32_t cmdsize; + uint32_t count; /* number of strings */ + /* concatenation of zero terminated UTF8 strings. + Zero filled at end to align */ +}; + /* * The symseg_command contains the offset and size of the GNU style * symbol table information as described in the header file . diff --git a/EXTERNAL_HEADERS/mach-o/nlist.h b/EXTERNAL_HEADERS/mach-o/nlist.h index 1c1941012..133e36b49 100644 --- a/EXTERNAL_HEADERS/mach-o/nlist.h +++ b/EXTERNAL_HEADERS/mach-o/nlist.h @@ -78,7 +78,7 @@ struct nlist { #ifndef __LP64__ char *n_name; /* for use when in-core */ #endif - int32_t n_strx; /* index into the string table */ + uint32_t n_strx; /* index into the string table */ } n_un; uint8_t n_type; /* type flag, see below */ uint8_t n_sect; /* section number or NO_SECT */ @@ -296,15 +296,21 @@ struct nlist_64 { */ #define N_SYMBOL_RESOLVER 0x0100 +/* + * The N_ALT_ENTRY bit of the n_desc field indicates that the + * symbol is pinned to the previous content. + */ +#define N_ALT_ENTRY 0x0200 + #ifndef __STRICT_BSD__ -#if __cplusplus +#ifdef __cplusplus extern "C" { #endif /* __cplusplus */ /* * The function nlist(3) from the C library. */ extern int nlist (const char *filename, struct nlist *list); -#if __cplusplus +#ifdef __cplusplus } #endif /* __cplusplus */ #endif /* __STRICT_BSD__ */ diff --git a/EXTERNAL_HEADERS/mach-o/stab.h b/EXTERNAL_HEADERS/mach-o/stab.h index e9e15b27a..0c89939ef 100644 --- a/EXTERNAL_HEADERS/mach-o/stab.h +++ b/EXTERNAL_HEADERS/mach-o/stab.h @@ -90,6 +90,7 @@ #define N_STSYM 0x26 /* static symbol: name,,n_sect,type,address */ #define N_LCSYM 0x28 /* .lcomm symbol: name,,n_sect,type,address */ #define N_BNSYM 0x2e /* begin nsect sym: 0,,n_sect,0,address */ +#define N_AST 0x32 /* AST file path: name,,NO_SECT,0,0 */ #define N_OPT 0x3c /* emitted with gcc2_compiled and in gcc source */ #define N_RSYM 0x40 /* register sym: name,,NO_SECT,type,register */ #define N_SLINE 0x44 /* src line: 0,,n_sect,linenumber,address */ diff --git a/Makefile b/Makefile index c4036c896..7fbedaceb 100644 --- a/Makefile +++ b/Makefile @@ -138,20 +138,22 @@ else MAKEJOBS := --jobs=$(SYSCTL_HW_LOGICALCPU) endif -TOP_TARGETS = \ - clean \ - installsrc \ - exporthdrs \ - all all_desktop all_embedded \ - all_release_embedded all_development_embedded \ - installhdrs installhdrs_desktop installhdrs_embedded \ - installhdrs_release_embedded installhdrs_development_embedded \ - install install_desktop install_embedded \ - install_release_embedded install_development_embedded \ - installopensource \ - cscope tags \ +TOP_TARGETS = \ + clean \ + installsrc \ + exporthdrs \ + all all_desktop all_embedded \ + all_release_embedded all_development_embedded \ + installhdrs installhdrs_desktop installhdrs_embedded \ + installhdrs_release_embedded installhdrs_development_embedded \ + install install_desktop install_embedded \ + install_release_embedded install_development_embedded \ + installopensource \ + cscope tags TAGS reindent \ help +DEFAULT_TARGET = all + # Targets for internal build system debugging TOP_TARGETS += \ print_exports print_exports_first_build_config \ @@ -161,16 +163,21 @@ TOP_TARGETS += \ install_textfiles \ install_config +ifeq ($(BUILD_JSON_COMPILATION_DATABASE),1) +MAKEARGS += -B +DEFAULT_TARGET := build +endif + .PHONY: $(TOP_TARGETS) -default: all +default: $(DEFAULT_TARGET) ifneq ($(REMOTEBUILD),) $(TOP_TARGETS): $(_v)$(VERSDIR)/tools/remote_build.sh _REMOTEBUILD_TARGET=$@ _REMOTEBUILD_MAKE=$(MAKE) $(if $(filter --,$(MAKEFLAGS)),-,)$(MAKEFLAGS) else $(TOP_TARGETS): - $(_v)$(MAKE) -r $(if $(filter -j,$(MAKEFLAGS)),,$(MAKEJOBS)) -f $(MakeInc_top) $@ + $(_v)$(MAKE) $(MAKEARGS) -r $(if $(filter -j,$(MAKEFLAGS)),,$(MAKEJOBS)) -f $(MakeInc_top) $@ endif else # CURRENT_BUILD_CONFIG @@ -194,17 +201,20 @@ INSTINC_SUBDIRS = $(ALL_SUBDIRS) EXTERNAL_HEADERS INSTINC_SUBDIRS_X86_64 = $(INSTINC_SUBDIRS) INSTINC_SUBDIRS_X86_64H = $(INSTINC_SUBDIRS) INSTINC_SUBDIRS_ARM = $(INSTINC_SUBDIRS) +INSTINC_SUBDIRS_ARM64 = $(INSTINC_SUBDIRS) EXPINC_SUBDIRS = $(ALL_SUBDIRS) EXPINC_SUBDIRS_X86_64 = $(EXPINC_SUBDIRS) EXPINC_SUBDIRS_X86_64H = $(EXPINC_SUBDIRS) EXPINC_SUBDIRS_ARM = $(EXPINC_SUBDIRS) +EXPINC_SUBDIRS_ARM64 = $(EXPINC_SUBDIRS) SETUP_SUBDIRS = SETUP COMP_SUBDIRS_X86_64 = $(ALL_SUBDIRS) COMP_SUBDIRS_X86_64H = $(ALL_SUBDIRS) COMP_SUBDIRS_ARM = $(ALL_SUBDIRS) +COMP_SUBDIRS_ARM64 = $(ALL_SUBDIRS) INSTTEXTFILES_SUBDIRS = \ bsd @@ -217,6 +227,16 @@ endif # CURRENT_BUILD_CONFIG endif # all other RC_ProjectName +installhdrs_libkdd install_libkdd: + cd libkdd; \ + xcodebuild $(subst _libkdd,,$@) \ + "SRCROOT=$(SRCROOT)/libkdd" \ + "OBJROOT=$(OBJROOT)" \ + "SYMROOT=$(SYMROOT)" \ + "DSTROOT=$(DSTROOT)" \ + "SDKROOT=$(SDKROOT)" + + # "xnu_quick_test" and "testbots" are targets that can be invoked via a standalone # "make xnu_quick_test" or via buildit/XBS with the RC_ProjectName=xnu_quick_test. # Define the target here in the outermost scope of the initial Makefile diff --git a/README b/README index f6b632335..1294b6726 100644 --- a/README +++ b/README @@ -118,7 +118,18 @@ A. How to build XNU: $ make cscope # this will build cscope database -8) Other makefile options +8) Reindenting files + + Source files can be reindented using clang-format setup in .clang-format. XNU follow a variant of WebKit style for source code formatting. Please refer to format styles at http://www.webkit.org/coding/coding-style.html. Further options about style options is available at http://clang.llvm.org/docs/ClangFormatStyleOptions.html + + Note: clang-format binary may not be part of base installation. It can be compiled from llvm clang sources and is reachable in $PATH. + + From the top directory, run: + + $ make reindent # reindent all source files using clang format. + + +9) Other makefile options $ make MAKEJOBS=-j8 # this will use 8 processes during the build. The default is 2x the number of active CPUS. $ make -j8 # the standard command-line option is also accepted @@ -129,6 +140,8 @@ A. How to build XNU: $ make REMOTEBUILD=user@remotehost # perform build on remote host + $ make BUILD_JSON_COMPILATION_DATABASE=1 # Build Clang JSON Compilation Database + ============================================= B. How to install a new header file from XNU @@ -137,9 +150,8 @@ B. How to install a new header file from XNU 1) XNU installs header files at the following locations - a. $(DSTROOT)/System/Library/Frameworks/Kernel.framework/Headers b. $(DSTROOT)/System/Library/Frameworks/Kernel.framework/PrivateHeaders - c. $(DSTROOT)/System/Library/Frameworks/System.framework/Headers + c. $(DSTROOT)/usr/include/ d. $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders - e. $(DSTROOT)/usr/include/ Kernel.framework is used by kernel extensions. System.framework and /usr/include are used by user level applications. The header @@ -156,9 +168,7 @@ B. How to install a new header file from XNU from each file list are - a. DATAFILES : To make header file available in user level - - $(DSTROOT)/System/Library/Frameworks/System.framework/Headers - $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders - $(DSTROOT)/usr/include/ + $(DSTROOT)/usr/include b. PRIVATE_DATAFILES : To make header file available to Apple internal in user level - @@ -179,20 +189,19 @@ B. How to install a new header file from XNU by adding the appropriate file lists. The default install lists, its member file lists and their default location are described below - - a. INSTALL_MI_LIST : Installs header file to location that is available to + a. INSTALL_MI_LIST : Installs header file to a location that is available to everyone in user level. Locations - - $(DSTROOT)/System/Library/Frameworks/System.framework/Headers - $(DSTROOT)/usr/include/ + $(DSTROOT)/usr/include Definition - INSTALL_MI_LIST = ${DATAFILES} - b. INSTALL_MI_LCL_LIST : Installs header file to location that is available + b. INSTALL_MI_LCL_LIST : Installs header file to a location that is available for Apple internal in user level. Locations - $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders Definition - - INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} + INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} c. INSTALL_KF_MI_LIST : Installs header file to location that is available to everyone for kernel extensions. diff --git a/SETUP/Makefile b/SETUP/Makefile index 4ef2047d4..6236960ac 100644 --- a/SETUP/Makefile +++ b/SETUP/Makefile @@ -14,5 +14,9 @@ SETUP_SUBDIRS = \ installfile \ replacecontents +ifeq ($(BUILD_JSON_COMPILATION_DATABASE),1) +SETUP_SUBDIRS += json_compilation_db +endif + include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/SETUP/json_compilation_db/Makefile b/SETUP/json_compilation_db/Makefile new file mode 100644 index 000000000..c3634fe9e --- /dev/null +++ b/SETUP/json_compilation_db/Makefile @@ -0,0 +1,30 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + +include $(MakeInc_cmd) +include $(MakeInc_def) + +OBJS = json_compilation_db.o + +CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O0 -I$(SOURCE) -I. + +WARNFLAGS = -Wall + +LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) + +json_compilation_db: $(OBJS) + @echo HOST_LD $@ + $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ + @echo HOST_CODESIGN $@ + $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@ + +%.o: %.c + @echo HOST_CC $@ + $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< + +do_build_setup:: json_compilation_db + +include $(MakeInc_rule) +include $(MakeInc_dir) diff --git a/SETUP/json_compilation_db/json_compilation_db.c b/SETUP/json_compilation_db/json_compilation_db.c new file mode 100644 index 000000000..7a148aebc --- /dev/null +++ b/SETUP/json_compilation_db/json_compilation_db.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * json_compilation_db is a helper tool that takes a compiler invocation, and + * appends it in JSON format to the specified database. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +void usage(void); +char *escape_string(const char *); + +/* + * We support appending to two databases. + * + * 0-byte: "" + * + * or + * + * "[" + * "{" + * " ..." + * "}" + * "]" + */ + +int main(int argc, char * argv[]) +{ + struct stat sb; + int ret; + int dstfd; + FILE *dst = NULL; + const char *json_output = NULL; + const char *cwd = NULL; + const char *input_file = NULL; + char start[2]; + size_t read_bytes; + int i; + size_t input_file_len; + + if (argc < 5) { + usage(); + } + + json_output = argv[1]; + cwd = argv[2]; + input_file = argv[3]; + + argv += 4; + argc -= 4; + + input_file_len = strlen(input_file); + if (!(input_file_len > 2 && 0 == strcmp(".c", input_file + input_file_len - 2)) && + !(input_file_len > 3 && 0 == strcmp(".cp", input_file + input_file_len - 3)) && + !(input_file_len > 4 && 0 == strcmp(".cpp", input_file + input_file_len - 4))) { + /* Not a C/C++ file, just skip it */ + return 0; + } + + dstfd = open(json_output, O_RDWR | O_CREAT | O_EXLOCK, DEFFILEMODE); + if (dstfd < 0) + err(EX_NOINPUT, "open(%s)", json_output); + + ret = fstat(dstfd, &sb); + if (ret < 0) + err(EX_NOINPUT, "fstat(%s)", json_output); + + if (!S_ISREG(sb.st_mode)) + err(EX_USAGE, "%s is not a regular file", json_output); + + dst = fdopen(dstfd, "w+"); + if (dst == NULL) + err(EX_UNAVAILABLE, "fdopen"); + + read_bytes = fread(start, sizeof(start[0]), sizeof(start)/sizeof(start[0]), dst); + if ((read_bytes != sizeof(start)) || (0 != memcmp(start, "[\n", sizeof(start)/sizeof(start[0])))) { + /* no JSON start, we don't really care why */ + ret = fseeko(dst, 0, SEEK_SET); + if (ret < 0) + err(EX_UNAVAILABLE, "fseeko"); + + ret = fputs("[", dst); + if (ret < 0) + err(EX_UNAVAILABLE, "fputs"); + } else { + /* has at least two bytes at the start. Seek to 3 bytes before the end */ + ret = fseeko(dst, -3, SEEK_END); + if (ret < 0) + err(EX_UNAVAILABLE, "fseeko"); + + ret = fputs(",", dst); + if (ret < 0) + err(EX_UNAVAILABLE, "fputs"); + } + + fprintf(dst, "\n"); + fprintf(dst, "{\n"); + fprintf(dst, " \"directory\": \"%s\",\n", cwd); + fprintf(dst, " \"file\": \"%s\",\n", input_file); + fprintf(dst, " \"command\": \""); + for (i=0; i < argc; i++) { + bool needs_escape = strchr(argv[i], '\\') || strchr(argv[i], '"') || strchr(argv[i], ' '); + + if (needs_escape) { + char *escaped_string = escape_string(argv[i]); + fprintf(dst, "%s\\\"%s\\\"", i == 0 ? "" : " ", escaped_string); + free(escaped_string); + } else { + fprintf(dst, "%s%s", i == 0 ? "" : " ", argv[i]); + } + } + fprintf(dst, "\"\n"); + fprintf(dst, "}\n"); + fprintf(dst, "]\n"); + + ret = fclose(dst); + if (ret < 0) + err(EX_UNAVAILABLE, "fclose"); + + return 0; +} + +void usage(void) +{ + fprintf(stderr, "Usage: %s [ ...]\n", getprogname()); + exit(EX_USAGE); +} + +/* + * A valid JSON string can't contain \ or ", so we look for these in our argv[] array (which + * our parent shell would have done shell metacharacter evaluation on, and escape just these. + * The entire string is put in \" escaped quotes to handle spaces that are valid JSON + * but should be used for grouping when running the compiler for real. + */ +char * +escape_string(const char *input) +{ + size_t len = strlen(input); + size_t i, j; + char *output = malloc(len * 4 + 1); + + for (i=0, j=0; i < len; i++) { + char ch = input[i]; + + if (ch == '\\' || ch == '"') { + output[j++] = '\\'; + output[j++] = '\\'; /* output \\ in JSON, which the final shell will see as \ */ + output[j++] = '\\'; /* escape \ or ", which the final shell will see and pass to the compiler */ + } + output[j++] = ch; + } + + output[j] = '\0'; + + return output; +} diff --git a/SETUP/kextsymboltool/Makefile b/SETUP/kextsymboltool/Makefile index 3ae439d36..4c765d828 100644 --- a/SETUP/kextsymboltool/Makefile +++ b/SETUP/kextsymboltool/Makefile @@ -13,17 +13,11 @@ CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O WARNFLAGS = -Wall -LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -L$(HOST_SPARSE_SDKROOT)/usr/local/lib/system -lstdc++ -ifneq ($(HOST_SPARSE_SDKROOT),/) -LDFLAGS += -lmacho -endif +LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -lstdc++ -.SparseSDK: ALWAYS - $(_v)echo '$(HOST_SPARSE_SDKROOT)' | cmp -s - $@ || echo '$(HOST_SPARSE_SDKROOT)' > $@ - -kextsymboltool: $(OBJS) .SparseSDK +kextsymboltool: $(OBJS) @echo HOST_LD $@ - $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $(OBJS) + $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ @echo HOST_CODESIGN $@ $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@ diff --git a/SETUP/kextsymboltool/kextsymboltool.c b/SETUP/kextsymboltool/kextsymboltool.c index 9b9125bae..8bd2c293c 100644 --- a/SETUP/kextsymboltool/kextsymboltool.c +++ b/SETUP/kextsymboltool/kextsymboltool.c @@ -464,6 +464,28 @@ store_symbols(char * file, vm_size_t file_size, struct symbol * symbols, uint32_ return strtabsize; } +static const NXArchInfo * +lookup_arch(const char *archstring) +{ + /* + * As new architectures are supported by xnu, add a mapping function + * without relying on host libraries. + */ + static const NXArchInfo archlist[] = { + { "x86_64", 0x01000007 /* CPU_TYPE_X86_64 */, 3 /* CPU_SUBTYPE_X86_64_ALL */, NX_LittleEndian, NULL }, + { "x86_64h", 0x01000007 /* CPU_TYPE_X86_64 */, 8 /* CPU_SUBTYPE_X86_64_H */, NX_LittleEndian, NULL }, + }; + unsigned long i; + + for (i=0; i < sizeof(archlist)/sizeof(archlist[0]); i++) { + if (0 == strcmp(archstring, archlist[i].name)) { + return &archlist[i]; + } + } + + return NULL; +} + /********************************************************************* *********************************************************************/ int main(int argc, char * argv[]) @@ -523,7 +545,7 @@ int main(int argc, char * argv[]) if (!strcmp("-arch", argv[i])) { - target_arch = NXGetArchInfoFromName(argv[i + 1]); + target_arch = lookup_arch(argv[i + 1]); if (!target_arch) { fprintf(stderr, "unknown architecture name: %s\n", argv[i+1]); @@ -780,7 +802,7 @@ int main(int argc, char * argv[]) hdr.magic = MH_MAGIC; hdr.cputype = target_arch->cputype; hdr.cpusubtype = target_arch->cpusubtype; - hdr.filetype = (target_arch->cputype == CPU_TYPE_I386) ? MH_OBJECT : MH_KEXT_BUNDLE; + hdr.filetype = MH_KEXT_BUNDLE; hdr.ncmds = 3; hdr.sizeofcmds = sizeof(segcmd) + sizeof(symcmd) + sizeof(uuidcmd); hdr.flags = MH_INCRLINK; @@ -969,7 +991,7 @@ int main(int argc, char * argv[]) if (kErrorNone != err) { - if (output_name) + if (output_name && strncmp(output_name, "/dev/", 5)) unlink(output_name); exit(1); } diff --git a/bsd/Makefile b/bsd/Makefile index e03ce2014..99cd72176 100644 --- a/bsd/Makefile +++ b/bsd/Makefile @@ -36,6 +36,8 @@ INSTINC_SUBDIRS_X86_64H = \ INSTINC_SUBDIRS_ARM = \ arm +INSTINC_SUBDIRS_ARM64 = \ + arm EXPINC_SUBDIRS = \ bsm \ @@ -63,6 +65,8 @@ EXPINC_SUBDIRS_X86_64H = \ EXPINC_SUBDIRS_ARM = \ arm +EXPINC_SUBDIRS_ARM64 = \ + arm COMP_SUBDIRS = \ conf diff --git a/bsd/bsm/audit_fcntl.h b/bsd/bsm/audit_fcntl.h index 1398b2f0d..5a5995ee2 100644 --- a/bsd/bsm/audit_fcntl.h +++ b/bsd/bsm/audit_fcntl.h @@ -113,6 +113,7 @@ #define BSM_F_CHECK_OPENEVT 358 /* Darwin-specific. */ #define BSM_F_ADDSIGS 359 /* Darwin-specific. */ #define BSM_F_MARKDEPENDENCY 360 /* Darwin-specific. */ +#define BSM_F_BARRIERFSYNC 361 /* Darwin-specific. */ /* * Darwin file system specific (400-499). diff --git a/bsd/conf/Makefile.template b/bsd/conf/Makefile.template index ece1262df..9636b05f7 100644 --- a/bsd/conf/Makefile.template +++ b/bsd/conf/Makefile.template @@ -99,6 +99,7 @@ OBJS_NO_SIGN_COMPARE = \ ip_input.o \ ip_output.o \ raw_ip.o \ + tcp_cache.o \ tcp_input.o \ tcp_output.o \ tcp_subr.o \ @@ -236,7 +237,9 @@ OBJS_NO_CAST_ALIGN = \ munge.o \ aes.o \ aeskey.o \ - sdt_arm.o + sdt_arm.o \ + uipc_mbuf.o \ + kern_guarded.o $(foreach file,$(OBJS_NO_CAST_ALIGN),$(eval $(call add_perfile_cflags,$(file),-Wno-cast-align))) @@ -263,9 +266,9 @@ $(SOBJS): .SFLAGS $(COMPONENT).filelist: $(OBJS) @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${OBJS}; do \ + $(_v)for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ - done; ) > $(COMPONENT).filelist + done > $(COMPONENT).filelist MAKESYSCALLS = $(SRCROOT)/bsd/kern/makesyscalls.sh diff --git a/bsd/conf/files b/bsd/conf/files index d4ce218f8..2e3baa92b 100644 --- a/bsd/conf/files +++ b/bsd/conf/files @@ -47,7 +47,6 @@ OPTIONS/sysv_msg optional sysv_msg OPTIONS/sysv_shm optional sysv_shm OPTIONS/importance_inheritance optional importance_inheritance OPTIONS/importance_debug optional importance_debug -OPTIONS/in_kernel_tests optional config_in_kernel_tests OPTIONS/config_ecc_logging optional config_ecc_logging # @@ -215,18 +214,20 @@ bsd/net/route.c optional networking bsd/net/rtsock.c optional networking bsd/net/netsrc.c optional networking bsd/net/ntstat.c optional networking +bsd/net/net_perf.c optional networking bsd/net/if_gif.c optional gif bsd/net/if_stf.c optional stf bsd/net/kpi_interface.c optional networking bsd/net/kpi_protocol.c optional networking bsd/net/kpi_interfacefilter.c optional networking bsd/net/net_str_id.c optional networking -bsd/net/if_utun.c optional networking +bsd/net/if_utun.c optional networking bsd/net/if_utun_crypto.c optional networking bsd/net/if_utun_crypto_dtls.c optional networking bsd/net/if_utun_crypto_ipsec.c optional networking -bsd/net/if_ipsec.c optional ipsec -bsd/net/necp.c optional necp +bsd/net/if_ipsec.c optional ipsec +bsd/net/necp.c optional necp +bsd/net/network_agent.c optional networking bsd/net/if_pflog.c optional pflog bsd/net/pf.c optional pf bsd/net/pf_if.c optional pf @@ -269,7 +270,6 @@ bsd/net/altq/altq_subr.c optional pf_altq bsd/netinet/igmp.c optional inet bsd/netinet/in.c optional inet -bsd/netinet/in_dhcp.c optional inet bsd/netinet/dhcp_options.c optional inet bsd/netinet/in_arp.c optional inet bsd/netinet/in_mcast.c optional inet @@ -287,6 +287,7 @@ bsd/netinet/ip_id.c optional inet bsd/netinet/ip_input.c optional inet bsd/netinet/ip_output.c optional inet bsd/netinet/raw_ip.c optional inet +bsd/netinet/tcp_cache.c optional inet bsd/netinet/tcp_debug.c optional tcpdebug bsd/netinet/tcp_input.c optional inet bsd/netinet/tcp_output.c optional inet @@ -395,6 +396,7 @@ bsd/hfs/hfs_vfsutils.c optional hfs bsd/hfs/hfs_vnops.c optional hfs bsd/hfs/hfs_xattr.c optional hfs bsd/hfs/MacOSStubs.c optional hfs +bsd/hfs/hfs_extents.c optional hfs bsd/hfs/hfs_cprotect.c standard bsd/hfs/rangelist.c optional hfs bsd/hfs/hfscommon/BTree/BTree.c optional hfs @@ -481,6 +483,7 @@ bsd/kern/sys_pipe.c standard bsd/kern/sys_socket.c optional sockets bsd/kern/sys_domain.c optional sockets bsd/kern/sys_coalition.c optional config_coalitions +bsd/kern/sys_work_interval.c standard ./syscalls.c standard bsd/kern/tty.c standard bsd/kern/tty_compat.c standard @@ -538,6 +541,7 @@ bsd/kern/imageboot.c optional config_imageboot osfmk/kperf/kperfbsd.c optional kperf bsd/kern/kern_kpc.c optional kpc -bsd/kern/kern_tests.c optional config_in_kernel_tests - bsd/kern/proc_uuid_policy.c optional config_proc_uuid_policy + +bsd/pgo/profile_runtime.c standard + diff --git a/bsd/conf/param.c b/bsd/conf/param.c index 49d0f006b..f9feaa2c0 100644 --- a/bsd/conf/param.c +++ b/bsd/conf/param.c @@ -84,10 +84,13 @@ struct timezone tz = { 0, 0 }; #define NPROC (20 + 16 * 32) #define NPROC_PER_UID (NPROC/2) + #define HNPROC 2500 /* based on thread_max */ int maxproc = NPROC; int maxprocperuid = NPROC_PER_UID; -/*__private_extern__*/ int hard_maxproc = HNPROC; /* hardcoded limit */ + +int hard_maxproc = HNPROC; /* hardcoded limit */ + int nprocs = 0; /* XXX */ //#define NTEXT (80 + NPROC / 8) /* actually the object cache */ diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index dd02ad502..ae38f260a 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -20,8 +20,8 @@ */ /* - * Portions Copyright (c) 2011, Joyent, Inc. All rights reserved. - * Portions Copyright (c) 2012 by Delphix. All rights reserved. + * Portions Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Portions Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -1457,7 +1457,7 @@ dtrace_priv_proc(dtrace_state_t *state) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; - if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) + if (dtrace_is_restricted() && !dtrace_is_running_apple_internal() && !dtrace_can_attach_to_proc(current_proc())) goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) @@ -1489,7 +1489,7 @@ dtrace_priv_proc_relaxed(dtrace_state_t *state) static int dtrace_priv_kernel(dtrace_state_t *state) { - if (dtrace_is_restricted()) + if (dtrace_is_restricted() && !dtrace_is_running_apple_internal()) goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) @@ -3127,6 +3127,9 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_machtimestamp); + case DIF_VAR_CPU: + return ((uint64_t) dtrace_get_thread_last_cpu_id(current_thread())); + case DIF_VAR_IPL: if (!dtrace_priv_kernel(state)) return (0); @@ -3324,7 +3327,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (0); mstate->dtms_scratch_ptr += scratch_size; - proc_selfname( xname, MAXCOMLEN ); + proc_selfname( xname, scratch_size ); return ((uint64_t)(uintptr_t)xname); } @@ -4707,6 +4710,15 @@ inetout: regs[rd] = (uintptr_t)end + 1; break; } + case DIF_SUBR_VM_KERNEL_ADDRPERM: { + if (!dtrace_priv_kernel(state)) { + regs[rd] = 0; + } else { + regs[rd] = VM_KERNEL_ADDRPERM((vm_offset_t) tupregs[0].dttk_value); + } + + break; + } /* * APPLE NOTE: * CoreProfile callback ('core_profile (uint64_t, [uint64_t], [uint64_t] ...)') @@ -5870,6 +5882,63 @@ dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, mstate->dtms_scratch_ptr = old; } +static void +dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size, + size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind) +{ + volatile uint16_t *flags; + uint64_t val = *valp; + size_t valoffs = *valoffsp; + + flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; + ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF); + + /* + * If this is a string, we're going to only load until we find the zero + * byte -- after which we'll store zero bytes. + */ + if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { + char c = '\0' + 1; + size_t s; + + for (s = 0; s < size; s++) { + if (c != '\0' && dtkind == DIF_TF_BYREF) { + c = dtrace_load8(val++); + } else if (c != '\0' && dtkind == DIF_TF_BYUREF) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + c = dtrace_fuword8((user_addr_t)(uintptr_t)val++); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (*flags & CPU_DTRACE_FAULT) + break; + } + + DTRACE_STORE(uint8_t, tomax, valoffs++, c); + + if (c == '\0' && intuple) + break; + } + } else { + uint8_t c; + while (valoffs < end) { + if (dtkind == DIF_TF_BYREF) { + c = dtrace_load8(val++); + } else if (dtkind == DIF_TF_BYUREF) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + c = dtrace_fuword8((user_addr_t)(uintptr_t)val++); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (*flags & CPU_DTRACE_FAULT) + break; + } + + DTRACE_STORE(uint8_t, tomax, + valoffs++, c); + } + } + + *valp = val; + *valoffsp = valoffs; +} + /* * If you're looking for the epicenter of DTrace, you just found it. This * is the function called by the provider to fire a probe -- from which all @@ -6463,7 +6532,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, ASSERT(0); } - if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { + if (dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF)) { uintptr_t end = valoffs + size; if (tracememsize != 0 && @@ -6473,39 +6542,17 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, tracememsize = 0; } - if (!dtrace_vcanload((void *)(uintptr_t)val, - &dp->dtdo_rtype, &mstate, vstate)) - continue; - - /* - * If this is a string, we're going to only - * load until we find the zero byte -- after - * which we'll store zero bytes. - */ - if (dp->dtdo_rtype.dtdt_kind == - DIF_TYPE_STRING) { - char c = '\0' + 1; - int intuple = act->dta_intuple; - size_t s; - - for (s = 0; s < size; s++) { - if (c != '\0') - c = dtrace_load8(val++); - - DTRACE_STORE(uint8_t, tomax, - valoffs++, c); - - if (c == '\0' && intuple) - break; - } - + if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF && + !dtrace_vcanload((void *)(uintptr_t)val, + &dp->dtdo_rtype, &mstate, vstate)) + { continue; } - while (valoffs < end) { - DTRACE_STORE(uint8_t, tomax, valoffs++, - dtrace_load8(val++)); - } + dtrace_store_by_ref(dp, tomax, size, &valoffs, + &val, end, act->dta_intuple, + dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ? + DIF_TF_BYREF: DIF_TF_BYUREF); continue; } @@ -8663,7 +8710,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, "expected 'ret' as last DIF instruction\n"); } - if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) { + if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) { /* * If we're not returning by reference, the size must be either * 0 or the size of one of the base types. @@ -10055,12 +10102,14 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_DIFEXPR: /* * We know that our arg is a string -- turn it into a * format. */ if (arg == 0) { - ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); + ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || + desc->dtad_kind == DTRACEACT_DIFEXPR); format = 0; } else { ASSERT(arg != 0); @@ -10071,7 +10120,6 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) /*FALLTHROUGH*/ case DTRACEACT_LIBACT: - case DTRACEACT_DIFEXPR: case DTRACEACT_TRACEMEM: case DTRACEACT_TRACEMEM_DYNSIZE: case DTRACEACT_APPLEBINARY: /* __APPLE__ */ @@ -12074,15 +12122,19 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, (uintptr_t)sec->dofs_offset + offs); kind = (dtrace_actkind_t)desc->dofa_kind; - if (DTRACEACT_ISPRINTFLIKE(kind) && - (kind != DTRACEACT_PRINTA || - desc->dofa_strtab != DOF_SECIDX_NONE)) { + if ((DTRACEACT_ISPRINTFLIKE(kind) && + (kind != DTRACEACT_PRINTA || desc->dofa_strtab != DOF_SECIDX_NONE)) || + (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE)) + { dof_sec_t *strtab; char *str, *fmt; uint64_t i; /* - * printf()-like actions must have a format string. + * The argument to these actions is an index into the + * DOF string table. For printf()-like actions, this + * is the format string. For print(), this is the + * CTF type of the expression result. */ if ((strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) @@ -16852,7 +16904,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv * Security restrictions make this operation illegal, if this is enabled DTrace * must refuse to provide any fbt probes. */ - if (dtrace_is_restricted()) { + if (dtrace_fbt_probes_restricted()) { cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); return (EPERM); } @@ -17007,7 +17059,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv * Security restrictions make this operation illegal, if this is enabled DTrace * must refuse to provide any fbt probes. */ - if (dtrace_is_restricted()) { + if (dtrace_fbt_probes_restricted()) { cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); return (EPERM); } @@ -17121,7 +17173,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv case DTRACEIOC_PROCWAITFOR: { dtrace_procdesc_t pdesc = { - .p_comm = {0}, + .p_name = {0}, .p_pid = -1 }; @@ -17604,7 +17656,6 @@ dtrace_init( void ) (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */ dtrace_isa_init(); - /* * See dtrace_impl.h for a description of dof modes. * The default is lazy dof. @@ -17645,7 +17696,9 @@ dtrace_init( void ) if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) { dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; } - + + dtrace_restriction_policy_load(); + gDTraceInited = 1; } else diff --git a/bsd/dev/dtrace/dtrace_glue.c b/bsd/dev/dtrace/dtrace_glue.c index d7588c0ca..4e7ede1d2 100644 --- a/bsd/dev/dtrace/dtrace_glue.c +++ b/bsd/dev/dtrace/dtrace_glue.c @@ -66,10 +66,6 @@ #include #include /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */ -/* missing prototypes, not exported by Mach */ -extern kern_return_t task_suspend_internal(task_t); -extern kern_return_t task_resume_internal(task_t); - /* * pid/proc */ diff --git a/bsd/dev/dtrace/dtrace_subr.c b/bsd/dev/dtrace/dtrace_subr.c index aa8fb6c1a..ef857094d 100644 --- a/bsd/dev/dtrace/dtrace_subr.c +++ b/bsd/dev/dtrace/dtrace_subr.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -119,9 +120,40 @@ LIST_HEAD(listhead, dtrace_proc_awaited_entry) dtrace_proc_awaited_head void (*dtrace_proc_waitfor_exec_ptr)(proc_t*) = NULL; +static int +dtrace_proc_get_execpath(proc_t *p, char *buffer, int *maxlen) +{ + int err = 0, vid = 0; + vnode_t tvp = NULLVP, nvp = NULLVP; + + ASSERT(p); + ASSERT(buffer); + ASSERT(maxlen); + + if ((tvp = p->p_textvp) == NULLVP) + return ESRCH; + + vid = vnode_vid(tvp); + if ((err = vnode_getwithvid(tvp, vid)) != 0) + return err; + + if ((err = vn_getpath_fsenter(tvp, buffer, maxlen)) != 0) + return err; + vnode_put(tvp); + + if ((err = vnode_lookup(buffer, 0, &nvp, vfs_context_current())) != 0) + return err; + if (nvp != NULLVP) + vnode_put(nvp); + + return 0; +} + + static void dtrace_proc_exec_notification(proc_t *p) { dtrace_proc_awaited_entry_t *entry, *tmp; + static char execpath[MAXPATHLEN]; ASSERT(p); ASSERT(p->p_pid != -1); @@ -129,16 +161,31 @@ dtrace_proc_exec_notification(proc_t *p) { lck_mtx_lock(&dtrace_procwaitfor_lock); - /* - * For each entry, if it has not been matched with a process yet we - * try to match it with the newly created process. If they match, the - * entry is initialized with the process id and the process task is - * suspended. Finally, we wake up the client's waiting thread. - */ LIST_FOREACH_SAFE(entry, &dtrace_proc_awaited_head, entries, tmp) { - if ((entry->pdesc->p_pid == -1) - && !strncmp(entry->pdesc->p_comm, &p->p_comm[0], sizeof(p->p_comm))) - { + /* By default consider we're using p_comm. */ + char *pname = p->p_comm; + + /* Already matched with another process. */ + if ((entry->pdesc->p_pid != -1)) + continue; + + /* p_comm is too short, use the execpath. */ + if (entry->pdesc->p_name_length >= MAXCOMLEN) { + /* + * Retrieve the executable path. After the call, length contains + * the length of the string + 1. + */ + int length = sizeof(execpath); + if (dtrace_proc_get_execpath(p, execpath, &length) != 0) + continue; + /* Move the cursor to the position after the last / */ + pname = &execpath[length - 1]; + while (pname != execpath && *pname != '/') + pname--; + pname = (*pname == '/') ? pname + 1 : pname; + } + + if (!strcmp(entry->pdesc->p_name, pname)) { entry->pdesc->p_pid = p->p_pid; task_pidsuspend(p->task); wakeup(entry); @@ -154,7 +201,15 @@ dtrace_proc_waitfor(dtrace_procdesc_t* pdesc) { int res; ASSERT(pdesc); - ASSERT(pdesc->p_comm); + ASSERT(pdesc->p_name); + + /* + * Never trust user input, compute the length of the process name and ensure the + * string is null terminated. + */ + pdesc->p_name_length = strnlen(pdesc->p_name, sizeof(pdesc->p_name)); + if (pdesc->p_name_length >= (int) sizeof(pdesc->p_name)) + return -1; lck_mtx_lock(&dtrace_procwaitfor_lock); @@ -240,6 +295,14 @@ dtrace_invop_remove(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) kmem_free(hdlr, sizeof (dtrace_invop_hdlr_t)); } + + + +void +dtrace_restriction_policy_load(void) +{ +} + /* * Check if DTrace has been restricted by the current security policy. */ @@ -254,6 +317,32 @@ dtrace_is_restricted(void) return FALSE; } +/* + * Check if DTrace is running on a machine currently configured for Apple Internal development + */ +boolean_t +dtrace_is_running_apple_internal(void) +{ +#if CONFIG_CSR + if (csr_check(CSR_ALLOW_APPLE_INTERNAL) == 0) + return TRUE; +#endif + + return FALSE; +} + +boolean_t +dtrace_fbt_probes_restricted(void) +{ + +#if CONFIG_CSR + if (dtrace_is_restricted() && !dtrace_is_running_apple_internal()) + return TRUE; +#endif + + return FALSE; +} + /* * Check if the process can be attached. */ @@ -264,7 +353,7 @@ dtrace_can_attach_to_proc(proc_t *proc) ASSERT(proc != NULL); #if CONFIG_CSR - if ((cs_entitlement_flags(proc) & CS_GET_TASK_ALLOW) == 0) + if (cs_restricted(proc)) return FALSE; #endif diff --git a/bsd/dev/dtrace/lockstat.c b/bsd/dev/dtrace/lockstat.c index 4417a812a..ef1d9f1e7 100644 --- a/bsd/dev/dtrace/lockstat.c +++ b/bsd/dev/dtrace/lockstat.c @@ -165,6 +165,7 @@ typedef struct lockstat_assembly_probe { { LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, (vm_offset_t *) lck_rw_try_lock_exclusive_lockstat_patch_point }, { LS_LCK_MTX_LOCK_SPIN_ACQUIRE, (vm_offset_t *) lck_mtx_lock_spin_lockstat_patch_point }, #endif + /* No assembly patch points for ARM */ #endif /* CONFIG_DTRACE */ { LS_LCK_INVALID, NULL } }; diff --git a/bsd/dev/dtrace/scripts/Makefile b/bsd/dev/dtrace/scripts/Makefile index 79b907e00..3e55851a9 100644 --- a/bsd/dev/dtrace/scripts/Makefile +++ b/bsd/dev/dtrace/scripts/Makefile @@ -18,7 +18,7 @@ INSTALL_DTRACE_SCRIPTS_LIST = \ tcp.d \ unistd.d -ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) +ifneq ($(filter $(SUPPORTED_EMBEDDED_PLATFORMS),$(PLATFORM)),) INSTALL_DTRACE_SCRIPTS_LIST += mptcp.d endif diff --git a/bsd/dev/dtrace/scripts/mptcp.d b/bsd/dev/dtrace/scripts/mptcp.d index 5e5c60db5..331f82928 100644 --- a/bsd/dev/dtrace/scripts/mptcp.d +++ b/bsd/dev/dtrace/scripts/mptcp.d @@ -153,7 +153,7 @@ translator mppsinfo_t < struct mppcb *T> { typedef struct mptsesinfo { uint16_t numflows; uint16_t nummpcapflows; - connid_t connid_last; + sae_connid_t connid_last; uint8_t flags; struct mptses *mptses; } mptsesinfo_t; @@ -217,7 +217,7 @@ typedef struct mptsubinfo { uint32_t flags; uint32_t evctl; uint32_t family; - connid_t connid; + sae_connid_t connid; uint32_t rank; int32_t error; uint64_t sndnxt; diff --git a/bsd/dev/dtrace/scripts/sched.d b/bsd/dev/dtrace/scripts/sched.d index d86772554..e7cddd544 100644 --- a/bsd/dev/dtrace/scripts/sched.d +++ b/bsd/dev/dtrace/scripts/sched.d @@ -59,9 +59,9 @@ inline cpuinfo_t *curcpu = xlate (curthread->last_processor); #pragma D attributes Stable/Stable/Common curcpu #pragma D binding "1.0" curcpu -inline processorid_t cpu = curcpu->cpu_id; -#pragma D attributes Stable/Stable/Common cpu -#pragma D binding "1.0" cpu +/* + * XXX: 'cpu' is now a built-in variable in dtrace. + */ inline psetid_t pset = curcpu->cpu_pset; #pragma D attributes Stable/Stable/Common pset diff --git a/bsd/dev/dtrace/sdt.c b/bsd/dev/dtrace/sdt.c index 610de106b..f31f21be1 100644 --- a/bsd/dev/dtrace/sdt.c +++ b/bsd/dev/dtrace/sdt.c @@ -436,6 +436,10 @@ void sdt_init( void ) return; } + if (dtrace_fbt_probes_restricted()) { + return; + } + if (MH_MAGIC_KERNEL != _mh_execute_header.magic) { g_sdt_kernctl.mod_address = (vm_address_t)NULL; g_sdt_kernctl.mod_size = 0; diff --git a/bsd/dev/dtrace/sdt_subr.c b/bsd/dev/dtrace/sdt_subr.c index cde9701e4..18c16e074 100644 --- a/bsd/dev/dtrace/sdt_subr.c +++ b/bsd/dev/dtrace/sdt_subr.c @@ -948,15 +948,15 @@ sdt_argdesc_t sdt_args[] = { { "mptcp", "timer", 1, 1, "struct mptcb *", "mptsinfo_t *" }, { "mptcp", "error", 0, 0, "struct mptcb *", "mptsinfo_t *" }, { "mptcp", "connectx", 0, 0, "struct mptses *", "mptsesinfo_t *" }, - { "mptcp", "connectx", 1, 1, "associd_t", "associd_t" }, + { "mptcp", "connectx", 1, 1, "sae_associd_t", "sae_associd_t" }, { "mptcp", "connectx", 2, 2, "struct socket *", "socketinfo_t *" }, { "mptcp", "disconnectx", 0, 0, "struct mptses *", "mptsesinfo_t *" }, - { "mptcp", "disconnectx", 1, 1, "associd_t", "associd_t" }, - { "mptcp", "disconnectx", 2, 2, "connid_t", "connid_t" }, + { "mptcp", "disconnectx", 1, 1, "sae_associd_t", "sae_associd_t" }, + { "mptcp", "disconnectx", 2, 2, "sae_connid_t", "sae_connid_t" }, { "mptcp", "disconnectx", 3, 3, "struct socket *", "sockinfo_t *" }, { "mptcp", "disconnectx", 4, 4, "struct mptcb *", "mptsinfo_t *" }, { "mptcp", "peeloff", 0, 0, "struct mptses *", "mptsesinfo_t *" }, - { "mptcp", "peeloff", 1, 1, "associd_t", "associd_t" }, + { "mptcp", "peeloff", 1, 1, "sae_associd_t", "sae_associd_t" }, { "mptcp", "peeloff", 2, 2, "struct socket *", "sockinfo_t *" }, { NULL, NULL, 0, 0, NULL, NULL } }; diff --git a/bsd/dev/i386/dis_tables.c b/bsd/dev/i386/dis_tables.c index e0031d618..b57481d27 100644 --- a/bsd/dev/i386/dis_tables.c +++ b/bsd/dev/i386/dis_tables.c @@ -19,11 +19,19 @@ * * CDDL HEADER END */ + /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * * Use is subject to license terms. */ +/* + * Copyright (c) 2010, Intel Corporation. + * All rights reserved. + */ + /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ @@ -155,6 +163,7 @@ enum { CRC32, /* for crc32, with different size operands */ XADDB, /* for xaddb */ MOVSXZ, /* AMD64 mov sign extend 32 to 64 bit instruction */ + MOVBE, /* movbe instruction */ /* * MMX/SIMD addressing modes. @@ -207,9 +216,33 @@ enum { XMMX2I, /* SIMD xmm -> xmm, imm, imm */ XMM2I, /* SIMD xmm, imm, imm */ XMMFENCE, /* SIMD lfence or mfence */ - XMMSFNC /* SIMD sfence (none or mem) */ + XMMSFNC, /* SIMD sfence (none or mem) */ + XGETBV_XSETBV, + VEX_NONE, /* VEX no operand */ + VEX_MO, /* VEX mod_rm -> implicit reg */ + VEX_RMrX, /* VEX VEX.vvvv, mod_rm -> mod_reg */ + VEX_RRX, /* VEX VEX.vvvv, mod_reg -> mod_rm */ + VEX_RMRX, /* VEX VEX.vvvv, mod_rm, imm8[7:4] -> mod_reg */ + VEX_MX, /* VEX mod_rm -> mod_reg */ + VEX_MXI, /* VEX mod_rm, imm8 -> mod_reg */ + VEX_XXI, /* VEX mod_rm, imm8 -> VEX.vvvv */ + VEX_MR, /* VEX mod_rm -> mod_reg */ + VEX_RRI, /* VEX mod_reg, mod_rm -> implicit(eflags/r32) */ + VEX_RX, /* VEX mod_reg -> mod_rm */ + VEX_RR, /* VEX mod_rm -> mod_reg */ + VEX_RRi, /* VEX mod_rm, imm8 -> mod_reg */ + VEX_RM, /* VEX mod_reg -> mod_rm */ + VEX_RIM, /* VEX mod_reg, imm8 -> mod_rm */ + VEX_RRM, /* VEX VEX.vvvv, mod_reg -> mod_rm */ + VEX_RMX, /* VEX VEX.vvvv, mod_rm -> mod_reg */ }; +/* + * VEX prefixes + */ +#define VEX_2bytes 0xC5 /* the first byte of two-byte form */ +#define VEX_3bytes 0xC4 /* the first byte of three-byte form */ + #define FILL 0x90 /* Fill byte used for alignment (nop) */ /* @@ -418,6 +451,11 @@ const char *const dis_XMMREG[16] = { "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; +const char *const dis_YMMREG[16] = { + "%ymm0", "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6", "%ymm7", + "%ymm8", "%ymm9", "%ymm10", "%ymm11", "%ymm12", "%ymm13", "%ymm14", "%ymm15" +}; + const char *const dis_SEGREG[16] = { "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "", "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "" @@ -430,7 +468,12 @@ const char *const dis_PREDSUFFIX[8] = { "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord" }; - +const char *const dis_AVXvgrp7[3][8] = { + /*0 1 2 3 4 5 6 7*/ +/*71*/ {"", "", "vpsrlw", "", "vpsraw", "", "vpsllw", ""}, +/*72*/ {"", "", "vpsrld", "", "vpsrad", "", "vpslld", ""}, +/*73*/ {"", "", "vpsrlq", "vpsrldq", "", "", "vpsllq", "vpslldq"} +}; #endif /* DIS_TEXT */ @@ -462,7 +505,7 @@ const instable_t dis_op0F00[8] = { */ const instable_t dis_op0F01[8] = { -/* [0] */ TNSZ("sgdt",MO,6), TNSZ("sidt",MONITOR_MWAIT,6), TNSZ("lgdt",MO,6), TNSZ("lidt",MO,6), +/* [0] */ TNSZ("sgdt",MO,6), TNSZ("sidt",MONITOR_MWAIT,6), TNSZ("lgdt",XGETBV_XSETBV,6), TNSZ("lidt",MO,6), /* [4] */ TNSZ("smsw",M,2), INVALID, TNSZ("lmsw",M,2), TNS("invlpg",SWAPGS), }; @@ -472,7 +515,7 @@ const instable_t dis_op0F01[8] = { const instable_t dis_op0F18[8] = { /* [0] */ TNS("prefetchnta",PREF),TNS("prefetcht0",PREF), TNS("prefetcht1",PREF), TNS("prefetcht2",PREF), -/* [4] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ TNSZ("xsave",M,512), TNS("lfence",XMMFENCE), TNS("mfence",XMMFENCE), TNS("sfence",XMMSFNC), }; /* @@ -643,6 +686,88 @@ const instable_t dis_opSIMDdata16[256] = { /* [FC] */ TNSZ("paddb",XMM,16), TNSZ("paddw",XMM,16), TNSZ("paddd",XMM,16), INVALID, }; +const instable_t dis_opAVX660F[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("vmovupd",VEX_MX,16), TNSZ("vmovupd",VEX_RX,16), TNSZ("vmovlpd",VEX_RMrX,8), TNSZ("vmovlpd",VEX_RM,8), +/* [14] */ TNSZ("vunpcklpd",VEX_RMrX,16),TNSZ("vunpckhpd",VEX_RMrX,16),TNSZ("vmovhpd",VEX_RMrX,8), TNSZ("vmovhpd",VEX_RM,8), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ TNSZ("vmovapd",VEX_MX,16), TNSZ("vmovapd",VEX_RX,16), INVALID, TNSZ("vmovntpd",VEX_RM,16), +/* [2C] */ INVALID, INVALID, TNSZ("vucomisd",VEX_MX,8),TNSZ("vcomisd",VEX_MX,8), + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ TNS("vmovmskpd",VEX_MR), TNSZ("vsqrtpd",VEX_MX,16), INVALID, INVALID, +/* [54] */ TNSZ("vandpd",VEX_RMrX,16), TNSZ("vandnpd",VEX_RMrX,16), TNSZ("vorpd",VEX_RMrX,16), TNSZ("vxorpd",VEX_RMrX,16), +/* [58] */ TNSZ("vaddpd",VEX_RMrX,16), TNSZ("vmulpd",VEX_RMrX,16), TNSZ("vcvtpd2ps",VEX_MX,16),TNSZ("vcvtps2dq",VEX_MX,16), +/* [5C] */ TNSZ("vsubpd",VEX_RMrX,16), TNSZ("vminpd",VEX_RMrX,16), TNSZ("vdivpd",VEX_RMrX,16), TNSZ("vmaxpd",VEX_RMrX,16), + +/* [60] */ TNSZ("vpunpcklbw",VEX_RMrX,16),TNSZ("vpunpcklwd",VEX_RMrX,16),TNSZ("vpunpckldq",VEX_RMrX,16),TNSZ("vpacksswb",VEX_RMrX,16), +/* [64] */ TNSZ("vpcmpgtb",VEX_RMrX,16), TNSZ("vpcmpgtw",VEX_RMrX,16), TNSZ("vpcmpgtd",VEX_RMrX,16), TNSZ("vpackuswb",VEX_RMrX,16), +/* [68] */ TNSZ("vpunpckhbw",VEX_RMrX,16),TNSZ("vpunpckhwd",VEX_RMrX,16),TNSZ("vpunpckhdq",VEX_RMrX,16),TNSZ("vpackssdw",VEX_RMrX,16), +/* [6C] */ TNSZ("vpunpcklqdq",VEX_RMrX,16),TNSZ("vpunpckhqdq",VEX_RMrX,16),TNSZ("vmovd",VEX_MX,4),TNSZ("vmovdqa",VEX_MX,16), + +/* [70] */ TNSZ("vpshufd",VEX_MXI,16), TNSZ("vgrp71",VEX_XXI,16), TNSZ("vgrp72",VEX_XXI,16), TNSZ("vgrp73",VEX_XXI,16), +/* [74] */ TNSZ("vpcmpeqb",VEX_RMrX,16), TNSZ("vpcmpeqw",VEX_RMrX,16), TNSZ("vpcmpeqd",VEX_RMrX,16), INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ TNSZ("vhaddpd",VEX_RMrX,16), TNSZ("vhsubpd",VEX_RMrX,16), TNSZ("vmovd",VEX_RR,4), TNSZ("vmovdqa",VEX_RX,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("vcmppd",VEX_RMRX,16), INVALID, +/* [C4] */ TNSZ("vpinsrw",VEX_RMRX,2),TNS("vpextrw",VEX_MR), TNSZ("vshufpd",VEX_RMRX,16), INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ TNSZ("vaddsubpd",VEX_RMrX,16),TNSZ("vpsrlw",VEX_RMrX,16), TNSZ("vpsrld",VEX_RMrX,16), TNSZ("vpsrlq",VEX_RMrX,16), +/* [D4] */ TNSZ("vpaddq",VEX_RMrX,16), TNSZ("vpmullw",VEX_RMrX,16), TNSZ("vmovq",VEX_RX,8), TNS("vpmovmskb",VEX_MR), +/* [D8] */ TNSZ("vpsubusb",VEX_RMrX,16), TNSZ("vpsubusw",VEX_RMrX,16), TNSZ("vpminub",VEX_RMrX,16), TNSZ("vpand",VEX_RMrX,16), +/* [DC] */ TNSZ("vpaddusb",VEX_RMrX,16), TNSZ("vpaddusw",VEX_RMrX,16), TNSZ("vpmaxub",VEX_RMrX,16), TNSZ("vpandn",VEX_RMrX,16), + +/* [E0] */ TNSZ("vpavgb",VEX_RMrX,16), TNSZ("vpsraw",VEX_RMrX,16), TNSZ("vpsrad",VEX_RMrX,16), TNSZ("vpavgw",VEX_RMrX,16), +/* [E4] */ TNSZ("vpmulhuw",VEX_RMrX,16), TNSZ("vpmulhw",VEX_RMrX,16), TNSZ("vcvttpd2dq",VEX_MX,16),TNSZ("vmovntdq",VEX_RM,16), +/* [E8] */ TNSZ("vpsubsb",VEX_RMrX,16), TNSZ("vpsubsw",VEX_RMrX,16), TNSZ("vpminsw",VEX_RMrX,16), TNSZ("vpor",VEX_RMrX,16), +/* [EC] */ TNSZ("vpaddsb",VEX_RMrX,16), TNSZ("vpaddsw",VEX_RMrX,16), TNSZ("vpmaxsw",VEX_RMrX,16), TNSZ("vpxor",VEX_RMrX,16), + +/* [F0] */ INVALID, TNSZ("vpsllw",VEX_RMrX,16), TNSZ("vpslld",VEX_RMrX,16), TNSZ("vpsllq",VEX_RMrX,16), +/* [F4] */ TNSZ("vpmuludq",VEX_RMrX,16), TNSZ("vpmaddwd",VEX_RMrX,16), TNSZ("vpsadbw",VEX_RMrX,16), TNS("vmaskmovdqu",VEX_MX), +/* [F8] */ TNSZ("vpsubb",VEX_RMrX,16), TNSZ("vpsubw",VEX_RMrX,16), TNSZ("vpsubd",VEX_RMrX,16), TNSZ("vpsubq",VEX_RMrX,16), +/* [FC] */ TNSZ("vpaddb",VEX_RMrX,16), TNSZ("vpaddw",VEX_RMrX,16), TNSZ("vpaddd",VEX_RMrX,16), INVALID, +}; + /* * Decode table for SIMD instructions with the repnz (0xf2) prefix. */ @@ -728,6 +853,88 @@ const instable_t dis_opSIMDrepnz[256] = { /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; +const instable_t dis_opAVXF20F[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("vmovsd",VEX_RMrX,8), TNSZ("vmovsd",VEX_RRX,8), TNSZ("vmovddup",VEX_MX,8), INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("vcvtsi2sd",VEX_RMrX,4),INVALID, +/* [2C] */ TNSZ("vcvttsd2si",VEX_MR,8),TNSZ("vcvtsd2si",VEX_MR,8),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("vsqrtsd",VEX_RMrX,8), INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("vaddsd",VEX_RMrX,8), TNSZ("vmulsd",VEX_RMrX,8), TNSZ("vcvtsd2ss",VEX_RMrX,8), INVALID, +/* [5C] */ TNSZ("vsubsd",VEX_RMrX,8), TNSZ("vminsd",VEX_RMrX,8), TNSZ("vdivsd",VEX_RMrX,8), TNSZ("vmaxsd",VEX_RMrX,8), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ TNSZ("vpshuflw",VEX_MXI,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ TNSZ("vhaddps",VEX_RMrX,8), TNSZ("vhsubps",VEX_RMrX,8), INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("vcmpsd",VEX_RMRX,8), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ TNSZ("vaddsubps",VEX_RMrX,8), INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("vcvtpd2dq",VEX_MX,16),INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ TNSZ("vlddqu",VEX_MX,16), INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + /* * Decode table for SIMD instructions with the repz (0xf3) prefix. */ @@ -813,6 +1020,103 @@ const instable_t dis_opSIMDrepz[256] = { /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; +const instable_t dis_opAVXF30F[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("vmovss",VEX_RMrX,4), TNSZ("vmovss",VEX_RRX,4), TNSZ("vmovsldup",VEX_MX,4), INVALID, +/* [14] */ INVALID, INVALID, TNSZ("vmovshdup",VEX_MX,4), INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("vcvtsi2ss",VEX_RMrX,4),INVALID, +/* [2C] */ TNSZ("vcvttss2si",VEX_MR,4),TNSZ("vcvtss2si",VEX_MR,4),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("vsqrtss",VEX_RMrX,4), TNSZ("vrsqrtss",VEX_RMrX,4), TNSZ("vrcpss",VEX_RMrX,4), +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("vaddss",VEX_RMrX,4), TNSZ("vmulss",VEX_RMrX,4), TNSZ("vcvtss2sd",VEX_RMrX,4), TNSZ("vcvttps2dq",VEX_MX,16), +/* [5C] */ TNSZ("vsubss",VEX_RMrX,4), TNSZ("vminss",VEX_RMrX,4), TNSZ("vdivss",VEX_RMrX,4), TNSZ("vmaxss",VEX_RMrX,4), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, TNSZ("vmovdqu",VEX_MX,16), + +/* [70] */ TNSZ("vpshufhw",VEX_MXI,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("vmovq",VEX_MX,8), TNSZ("vmovdqu",VEX_RX,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("vcmpss",VEX_RMRX,4), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("vcvtdq2pd",VEX_MX,8), INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * The following two tables are used to encode crc32 and movbe + * since they share the same opcodes. + */ +const instable_t dis_op0F38F0[2] = { +/* [00] */ TNS("crc32b",CRC32), + TS("movbe",MOVBE), +}; + +const instable_t dis_op0F38F1[2] = { +/* [00] */ TS("crc32",CRC32), + TS("movbe",MOVBE), +}; + + const instable_t dis_op0F38[256] = { /* [00] */ TNSZ("pshufb",XMM_66o,16),TNSZ("phaddw",XMM_66o,16),TNSZ("phaddd",XMM_66o,16),TNSZ("phaddsw",XMM_66o,16), /* [04] */ TNSZ("pmaddubsw",XMM_66o,16),TNSZ("phsubw",XMM_66o,16), TNSZ("phsubd",XMM_66o,16),TNSZ("phsubsw",XMM_66o,16), @@ -895,6 +1199,87 @@ const instable_t dis_op0F38[256] = { /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; +const instable_t dis_opAVX660F38[256] = { +/* [00] */ TNSZ("vpshufb",VEX_RMrX,16),TNSZ("vphaddw",VEX_RMrX,16),TNSZ("vphaddd",VEX_RMrX,16),TNSZ("vphaddsw",VEX_RMrX,16), +/* [04] */ TNSZ("vpmaddubsw",VEX_RMrX,16),TNSZ("vphsubw",VEX_RMrX,16), TNSZ("vphsubd",VEX_RMrX,16),TNSZ("vphsubsw",VEX_RMrX,16), +/* [08] */ TNSZ("vpsignb",VEX_RMrX,16),TNSZ("vpsignw",VEX_RMrX,16),TNSZ("vpsignd",VEX_RMrX,16),TNSZ("vpmulhrsw",VEX_RMrX,16), +/* [0C] */ TNSZ("vpermilps",VEX_RMrX,8),TNSZ("vpermilpd",VEX_RMrX,16),TNSZ("vtestps",VEX_RRI,8), TNSZ("vtestpd",VEX_RRI,16), + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, TNSZ("vptest",VEX_RRI,16), +/* [18] */ TNSZ("vbroadcastss",VEX_MX,4),TNSZ("vbroadcastsd",VEX_MX,8),TNSZ("vbroadcastf128",VEX_MX,16),INVALID, +/* [1C] */ TNSZ("vpabsb",VEX_MX,16),TNSZ("vpabsw",VEX_MX,16),TNSZ("vpabsd",VEX_MX,16),INVALID, + +/* [20] */ TNSZ("vpmovsxbw",VEX_MX,16),TNSZ("vpmovsxbd",VEX_MX,16),TNSZ("vpmovsxbq",VEX_MX,16),TNSZ("vpmovsxwd",VEX_MX,16), +/* [24] */ TNSZ("vpmovsxwq",VEX_MX,16),TNSZ("vpmovsxdq",VEX_MX,16),INVALID, INVALID, +/* [28] */ TNSZ("vpmuldq",VEX_RMrX,16),TNSZ("vpcmpeqq",VEX_RMrX,16),TNSZ("vmovntdqa",VEX_MX,16),TNSZ("vpackusdw",VEX_RMrX,16), +/* [2C] */ TNSZ("vmaskmovps",VEX_RMrX,8),TNSZ("vmaskmovpd",VEX_RMrX,16),TNSZ("vmaskmovps",VEX_RRM,8),TNSZ("vmaskmovpd",VEX_RRM,16), + +/* [30] */ TNSZ("vpmovzxbw",VEX_MX,16),TNSZ("vpmovzxbd",VEX_MX,16),TNSZ("vpmovzxbq",VEX_MX,16),TNSZ("vpmovzxwd",VEX_MX,16), +/* [34] */ TNSZ("vpmovzxwq",VEX_MX,16),TNSZ("vpmovzxdq",VEX_MX,16),TNSZ("vpermd",VEX_RMrX,16),TNSZ("vpcmpgtq",VEX_RMrX,16), +/* [38] */ TNSZ("vpminsb",VEX_RMrX,16),TNSZ("vpminsd",VEX_RMrX,16),TNSZ("vpminuw",VEX_RMrX,16),TNSZ("vpminud",VEX_RMrX,16), +/* [3C] */ TNSZ("vpmaxsb",VEX_RMrX,16),TNSZ("vpmaxsd",VEX_RMrX,16),TNSZ("vpmaxuw",VEX_RMrX,16),TNSZ("vpmaxud",VEX_RMrX,16), + +/* [40] */ TNSZ("vpmulld",VEX_RMrX,16),TNSZ("vphminposuw",VEX_MX,16),INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("vpbroadcastd",VEX_MX,16),TNSZ("vpbroadcastq",VEX_MX,16),TNSZ("vbroadcasti128",VEX_MX,16),INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ TNSZ("vpbroadcastb",VEX_MX,16),TNSZ("vpbroadcastw",VEX_MX,16),INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, TNSZ("vaesimc",VEX_MX,16), +/* [DC] */ TNSZ("vaesenc",VEX_RMrX,16),TNSZ("vaesenclast",VEX_RMrX,16),TNSZ("vaesdec",VEX_RMrX,16),TNSZ("vaesdeclast",VEX_RMrX,16), + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, +/* [F0] */ IND(dis_op0F38F0), IND(dis_op0F38F1), INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + const instable_t dis_op0F3A[256] = { /* [00] */ INVALID, INVALID, INVALID, INVALID, /* [04] */ INVALID, INVALID, INVALID, INVALID, @@ -977,6 +1362,88 @@ const instable_t dis_op0F3A[256] = { /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; +const instable_t dis_opAVX660F3A[256] = { +/* [00] */ TNSZ("vpermq",VEX_MXI,16),TNSZ("vpermpd",VEX_MXI,16),TNSZ("vpblendd",VEX_RMRX,16),INVALID, +/* [04] */ TNSZ("vpermilps",VEX_MXI,8),TNSZ("vpermilpd",VEX_MXI,16),TNSZ("vperm2f128",VEX_RMRX,16),INVALID, +/* [08] */ TNSZ("vroundps",VEX_MXI,16),TNSZ("vroundpd",VEX_MXI,16),TNSZ("vroundss",VEX_RMRX,16),TNSZ("vroundsd",VEX_RMRX,16), +/* [0C] */ TNSZ("vblendps",VEX_RMRX,16),TNSZ("vblendpd",VEX_RMRX,16),TNSZ("vpblendw",VEX_RMRX,16),TNSZ("vpalignr",VEX_RMRX,16), + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ TNSZ("vpextrb",VEX_RRi,8),TNSZ("vpextrw",VEX_RRi,16),TNSZ("vpextrd",VEX_RRi,16),TNSZ("vextractps",VEX_RM,16), +/* [18] */ TNSZ("vinsertf128",VEX_RMRX,16),TNSZ("vextractf128",VEX_RX,16),INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ TNSZ("vpinsrb",VEX_RMRX,8),TNSZ("vinsertps",VEX_RMRX,16),TNSZ("vpinsrd",VEX_RMRX,16),INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ TNSZ("vinserti128",VEX_RMRX,16),TNSZ("vextracti128",VEX_RIM,16),INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ TNSZ("vdpps",VEX_RMRX,16),TNSZ("vdppd",VEX_RMRX,16),TNSZ("vmpsadbw",VEX_RMRX,16),INVALID, +/* [44] */ TNSZ("vpclmulqdq",VEX_RMRX,16),INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, TNSZ("vblendvps",VEX_RMRX,8), TNSZ("vblendvpd",VEX_RMRX,16), +/* [4C] */ TNSZ("vpblendvb",VEX_RMRX,16),INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ TNSZ("vpcmpestrm",VEX_MXI,16),TNSZ("vpcmpestri",VEX_MXI,16),TNSZ("vpcmpistrm",VEX_MXI,16),TNSZ("vpcmpistri",VEX_MXI,16), +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, TNSZ("vaeskeygenassist",VEX_MXI,16), + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + /* * Decode table for 0x0F opcodes */ @@ -1065,6 +1532,88 @@ const instable_t dis_op0F[16][16] = { /* [FC] */ TNSZ("paddb",MMO,8), TNSZ("paddw",MMO,8), TNSZ("paddd",MMO,8), INVALID, } }; +const instable_t dis_opAVX0F[16][16] = { +{ +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [10] */ TNSZ("vmovups",VEX_MX,16), TNSZ("vmovups",VEX_RM,16),TNSZ("vmovlps",VEX_RMrX,8), TNSZ("vmovlps",VEX_RM,8), +/* [14] */ TNSZ("vunpcklps",VEX_RMrX,16),TNSZ("vunpckhps",VEX_RMrX,16),TNSZ("vmovhps",VEX_RMrX,8),TNSZ("vmovhps",VEX_RM,8), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ TNSZ("vmovaps",VEX_MX,16), TNSZ("vmovaps",VEX_RX,16),INVALID, TNSZ("vmovntps",VEX_RM,16), +/* [2C] */ INVALID, INVALID, TNSZ("vucomiss",VEX_MX,4),TNSZ("vcomiss",VEX_MX,4), +}, { +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [50] */ TNS("vmovmskps",VEX_MR), TNSZ("vsqrtps",VEX_MX,16), TNSZ("vrsqrtps",VEX_MX,16),TNSZ("vrcpps",VEX_MX,16), +/* [54] */ TNSZ("vandps",VEX_RMrX,16), TNSZ("vandnps",VEX_RMrX,16), TNSZ("vorps",VEX_RMrX,16), TNSZ("vxorps",VEX_RMrX,16), +/* [58] */ TNSZ("vaddps",VEX_RMrX,16), TNSZ("vmulps",VEX_RMrX,16), TNSZ("vcvtps2pd",VEX_MX,8),TNSZ("vcvtdq2ps",VEX_MX,16), +/* [5C] */ TNSZ("vsubps",VEX_RMrX,16), TNSZ("vminps",VEX_RMrX,16), TNSZ("vdivps",VEX_RMrX,16), TNSZ("vmaxps",VEX_RMrX,16), +}, { +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, TNS("vzeroupper", VEX_NONE), +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, TNSZ("vldmxcsr",VEX_MO,2), INVALID, +}, { +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [C0] */ INVALID, INVALID, TNSZ("vcmpps",VEX_RMRX,16),INVALID, +/* [C4] */ INVALID, INVALID, TNSZ("vshufps",VEX_RMRX,16),INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +} }; /* * Decode table for 0x80 opcodes @@ -1416,6 +1965,29 @@ const instable_t dis_distable[16][16] = { #define REX_X 0x02 /* high order bit extension of SIB index field */ #define REX_B 0x01 /* extends ModRM r_m, SIB base, or opcode reg */ +/* + * These are the individual fields of a VEX prefix. + */ +#define VEX_R 0x08 /* REX.R in 1's complement form */ +#define VEX_X 0x04 /* REX.X in 1's complement form */ +#define VEX_B 0x02 /* REX.B in 1's complement form */ +/* Vector Length, 0: scalar or 128-bit vector, 1: 256-bit vector */ +#define VEX_L 0x04 +#define VEX_W 0x08 /* opcode specific, use like REX.W */ +#define VEX_m 0x1F /* VEX m-mmmm field */ +#define VEX_v 0x78 /* VEX register specifier */ +#define VEX_p 0x03 /* VEX pp field, opcode extension */ + +/* VEX m-mmmm field, only used by three bytes prefix */ +#define VEX_m_0F 0x01 /* implied 0F leading opcode byte */ +#define VEX_m_0F38 0x02 /* implied 0F 38 leading opcode byte */ +#define VEX_m_0F3A 0x03 /* implied 0F 3A leading opcode byte */ + +/* VEX pp field, providing equivalent functionality of a SIMD prefix */ +#define VEX_p_66 0x01 +#define VEX_p_F3 0x02 +#define VEX_p_F2 0x03 + /* * Even in 64 bit mode, usually only 4 byte immediate operands are supported. */ @@ -1442,6 +2014,7 @@ static int isize64[] = {1, 2, 4, 8}; #define DEBUG_OPND 6 /* "value" used to indicate a debug reg */ #define TEST_OPND 7 /* "value" used to indicate a test reg */ #define WORD_OPND 8 /* w-bit value indicating word size reg */ +#define YMM_OPND 9 /* "value" used to indicate a ymm reg */ /* * Get the next byte and separate the op code into the high and low nibbles. @@ -1521,11 +2094,30 @@ dtrace_rex_adjust(uint_t rex_prefix, uint_t mode, uint_t *reg, uint_t *r_m) } else { if (reg != NULL && (REX_R & rex_prefix) != 0) *reg += 8; - if (r_m != NULL && (REX_B & rex_prefix) != 0) + if (r_m != NULL && (REX_B & rex_prefix) != 0) + *r_m += 8; + } +} + +/* + * Adjust register selection based on any VEX prefix bits present. + * Notes: VEX.R, VEX.X and VEX.B use the inverted form compared with REX prefix + */ +/*ARGSUSED*/ +static void +dtrace_vex_adjust(uint_t vex_byte1, uint_t mode, uint_t *reg, uint_t *r_m) +{ +#pragma unused (mode) + if (reg != NULL && r_m == NULL) { + if (!(vex_byte1 & VEX_B)) + *reg += 8; + } else { + if (reg != NULL && ((VEX_R & vex_byte1) == 0)) + *reg += 8; + if (r_m != NULL && ((VEX_B & vex_byte1) == 0)) *r_m += 8; } } - /* * Get an immediate operand of the given size, with sign extension. */ @@ -1553,6 +2145,7 @@ dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex) break; case MM_OPND: case XMM_OPND: + case YMM_OPND: case SEG_OPND: case CONTROL_OPND: case DEBUG_OPND: @@ -1667,6 +2260,9 @@ dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex) case XMM_OPND: (void) strlcat(opnd, dis_XMMREG[r_m], OPLEN); break; + case YMM_OPND: + (void) strlcat(opnd, dis_YMMREG[r_m], OPLEN); + break; case SEG_OPND: (void) strlcat(opnd, dis_SEGREG[r_m], OPLEN); break; @@ -1940,13 +2536,43 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) uint_t lock_prefix = 0; uint_t rep_prefix = 0; uint_t rex_prefix = 0; /* amd64 register extension prefix */ + + /* + * Intel VEX instruction encoding prefix and fields + */ + + /* 0xC4 means 3 bytes prefix, 0xC5 means 2 bytes prefix */ + uint_t vex_prefix = 0; + + /* + * VEX prefix byte 1, includes vex.r, vex.x and vex.b + * (for 3 bytes prefix) + */ + uint_t vex_byte1 = 0; + + /* + * For 32-bit mode, it should prefetch the next byte to + * distinguish between AVX and les/lds + */ + uint_t vex_prefetch = 0; + + uint_t vex_m = 0; + uint_t vex_v = 0; + uint_t vex_p = 0; + uint_t vex_R = 1; + uint_t vex_X = 1; + uint_t vex_B = 1; + uint_t vex_W = 0; + uint_t vex_L; + size_t off; instable_t dp_mmx; x->d86_len = 0; x->d86_rmindex = -1; - x->d86_error = 0; + x->d86_rex_prefix = 0; + x->d86_got_modrm = 0; #ifdef DIS_TEXT x->d86_numopnds = 0; x->d86_seg_prefix = NULL; @@ -2040,7 +2666,150 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) goto error; dp = (instable_t *)&dis_distable[opcode1][opcode2]; + } else if (opcode1 == 0xC && + (opcode2 == 0x4 || opcode2 == 0x5)) { + /* AVX instructions */ + vex_prefix = (opcode1 << 4) | opcode2; + x->d86_rex_prefix = 0x40; + } + } else if (opcode1 == 0xC && (opcode2 == 0x4 || opcode2 == 0x5)) { + /* LDS, LES or AVX */ + dtrace_get_modrm(x, &mode, ®, &r_m); + vex_prefetch = 1; + + if (mode == REG_ONLY) { + /* AVX */ + vex_prefix = (opcode1 << 4) | opcode2; + x->d86_rex_prefix = 0x40; + opcode3 = (((mode << 3) | reg)>>1) & 0x0F; + opcode4 = ((reg << 3) | r_m) & 0x0F; + } + } + + if (vex_prefix == VEX_2bytes) { + if (!vex_prefetch) { + if (dtrace_get_opcode(x, &opcode3, &opcode4) != 0) + goto error; + } + vex_R = ((opcode3 & VEX_R) & 0x0F) >> 3; + vex_L = ((opcode4 & VEX_L) & 0x0F) >> 2; + vex_v = (((opcode3 << 4) | opcode4) & VEX_v) >> 3; + vex_p = opcode4 & VEX_p; + /* + * The vex.x and vex.b bits are not defined in two bytes + * mode vex prefix, their default values are 1 + */ + vex_byte1 = (opcode3 & VEX_R) | VEX_X | VEX_B; + + if (vex_R == 0) + x->d86_rex_prefix |= REX_R; + + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + + switch (vex_p) { + case VEX_p_66: + dp = (instable_t *) + &dis_opAVX660F[(opcode1 << 4) | opcode2]; + break; + case VEX_p_F3: + dp = (instable_t *) + &dis_opAVXF30F[(opcode1 << 4) | opcode2]; + break; + case VEX_p_F2: + dp = (instable_t *) + &dis_opAVXF20F [(opcode1 << 4) | opcode2]; + break; + default: + dp = (instable_t *) + &dis_opAVX0F[opcode1][opcode2]; + + } + + } else if (vex_prefix == VEX_3bytes) { + if (!vex_prefetch) { + if (dtrace_get_opcode(x, &opcode3, &opcode4) != 0) + goto error; } + vex_R = (opcode3 & VEX_R) >> 3; + vex_X = (opcode3 & VEX_X) >> 2; + vex_B = (opcode3 & VEX_B) >> 1; + vex_m = (((opcode3 << 4) | opcode4) & VEX_m); + vex_byte1 = opcode3 & (VEX_R | VEX_X | VEX_B); + + if (vex_R == 0) + x->d86_rex_prefix |= REX_R; + if (vex_X == 0) + x->d86_rex_prefix |= REX_X; + if (vex_B == 0) + x->d86_rex_prefix |= REX_B; + + if (dtrace_get_opcode(x, &opcode5, &opcode6) != 0) + goto error; + vex_W = (opcode5 & VEX_W) >> 3; + vex_L = (opcode6 & VEX_L) >> 2; + vex_v = (((opcode5 << 4) | opcode6) & VEX_v) >> 3; + vex_p = opcode6 & VEX_p; + + if (vex_W) + x->d86_rex_prefix |= REX_W; + + /* Only these three vex_m values valid; others are reserved */ + if ((vex_m != VEX_m_0F) && (vex_m != VEX_m_0F38) && + (vex_m != VEX_m_0F3A)) + goto error; + + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + + switch (vex_p) { + case VEX_p_66: + if (vex_m == VEX_m_0F) { + dp = (instable_t *) + &dis_opAVX660F + [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F38) { + dp = (instable_t *) + &dis_opAVX660F38 + [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F3A) { + dp = (instable_t *) + &dis_opAVX660F3A + [(opcode1 << 4) | opcode2]; + } else { + goto error; + } + break; + case VEX_p_F3: + if (vex_m == VEX_m_0F) { + dp = (instable_t *) + &dis_opAVXF30F + [(opcode1 << 4) | opcode2]; + } else { + goto error; + } + break; + case VEX_p_F2: + if (vex_m == VEX_m_0F) { + dp = (instable_t *) + &dis_opAVXF20F + [(opcode1 << 4) | opcode2]; + } else { + goto error; + } + break; + default: + dp = (instable_t *) + &dis_opAVX0F[opcode1][opcode2]; + + } + } + + if (vex_prefix) { + if (vex_L) + wbit = YMM_OPND; + else + wbit = XMM_OPND; } /* @@ -2049,7 +2818,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) * ignored. */ if (cpu_mode == SIZE64) { - if (rex_prefix & REX_W) + if ((rex_prefix & REX_W) || vex_W) opnd_size = SIZE64; else if (opnd_size_prefix) opnd_size = SIZE16; @@ -2210,8 +2979,11 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) * In amd64 bit mode, ARPL opcode is changed to MOVSXD * (sign extend 32bit to 64 bit) */ - if (cpu_mode == SIZE64 && opcode1 == 0x6 && opcode2 == 0x3) + if ((vex_prefix == 0) && cpu_mode == SIZE64 && + opcode1 == 0x6 && opcode2 == 0x3) + { dp = (instable_t *)&dis_opMOVSLD; + } /* * at this point we should have a correct (or invalid) opcode @@ -2389,7 +3161,11 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) * Process operands based on the addressing modes. */ x->d86_mode = cpu_mode; - x->d86_rex_prefix = rex_prefix; + /* + * In vex mode the rex_prefix has no meaning + */ + if (!vex_prefix) + x->d86_rex_prefix = rex_prefix; x->d86_opnd_size = opnd_size; x->d86_addr_size = addr_size; vbit = 0; /* initialize for mem/reg -> reg */ @@ -2688,7 +3464,26 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) } } /*FALLTHROUGH*/ + case XGETBV_XSETBV: + if (mode == 3) { + if (r_m == 0) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "xgetbv", OPLEN); +#endif + NOMEM; + break; + } else if (r_m == 1) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "xsetbv", OPLEN); +#endif + NOMEM; + break; + } else { + goto error; + } + } + /*FALLTHROUGH*/ case MO: /* Similar to M, but only memory (no direct registers) */ wbit = LONG_OPND; @@ -2781,6 +3576,9 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) /* memory or register operand to register */ case MR: + if (vex_prefetch) { + x->d86_got_modrm = 1; + } wbit = LONG_OPND; STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); break; @@ -3284,7 +4082,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) #else if (mode != REG_ONLY) { dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); + dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); NOMEM; } #endif @@ -3303,15 +4101,27 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) case XMMFENCE: /* - * Only the following exact byte sequences are allowed: - * - * 0f ae e8 lfence - * 0f ae f0 mfence + * XRSTOR and LFENCE share the same opcode but differ in mode */ - if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 && - (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0) - goto error; + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode == REG_ONLY) { + /* + * Only the following exact byte sequences are allowed: + * + * 0f ae e8 lfence + * 0f ae f0 mfence + */ + if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 && + (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0) + goto error; + } else { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "xrstor", OPLEN); +#endif + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); + } break; @@ -3339,6 +4149,371 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) NOMEM; break; + /* AVX instructions */ + case VEX_MO: + /* op(ModR/M.r/m) */ + x->d86_numopnds = 1; + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + if ((dp == &dis_opAVX0F[0xA][0xE]) && (reg == 3)) + (void) strncpy(x->d86_mnem, "vstmxcsr", OPLEN); +#endif + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + case VEX_RMrX: + /* ModR/M.reg := op(VEX.vvvv, ModR/M.r/m) */ + x->d86_numopnds = 3; + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + if (mode != REG_ONLY) { + if ((dp == &dis_opAVXF20F[0x10]) || + (dp == &dis_opAVXF30F[0x10])) { + /* vmovsd , */ + /* or vmovss , */ + x->d86_numopnds = 2; + goto L_VEX_MX; + } + } + + dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); + /* + * VEX prefix uses the 1's complement form to encode the + * XMM/YMM regs + */ + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + + if ((dp == &dis_opAVXF20F[0x2A]) || + (dp == &dis_opAVXF30F[0x2A])) { + /* + * vcvtsi2si , , or vcvtsi2ss , + * , + */ + wbit = LONG_OPND; + } +#ifdef DIS_TEXT + else if ((mode == REG_ONLY) && + (dp == &dis_opAVX0F[0x1][0x6])) { /* vmovlhps */ + (void) strncpy(x->d86_mnem, "vmovlhps", OPLEN); + } else if ((mode == REG_ONLY) && + (dp == &dis_opAVX0F[0x1][0x2])) { /* vmovhlps */ + (void) strncpy(x->d86_mnem, "vmovhlps", OPLEN); + } +#endif + dtrace_get_operand(x, mode, r_m, wbit, 0); + + break; + + case VEX_RRX: + /* ModR/M.rm := op(VEX.vvvv, ModR/M.reg) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + if (mode != REG_ONLY) { + if ((dp == &dis_opAVXF20F[0x11]) || + (dp == &dis_opAVXF30F[0x11])) { + /* vmovsd , */ + /* or vmovss , */ + x->d86_numopnds = 2; + goto L_VEX_RM; + } + } + + dtrace_get_operand(x, mode, r_m, wbit, 2); + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); + break; + + case VEX_RMRX: + /* ModR/M.reg := op(VEX.vvvv, ModR/M.r_m, imm8[7:4]) */ + x->d86_numopnds = 4; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 3); + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 2); + if (dp == &dis_opAVX660F3A[0x18]) { + /* vinsertf128 , , , */ + dtrace_get_operand(x, mode, r_m, XMM_OPND, 1); + } else if ((dp == &dis_opAVX660F3A[0x20]) || + (dp == & dis_opAVX660F[0xC4])) { + /* vpinsrb , , , */ + /* or vpinsrw , , , */ + dtrace_get_operand(x, mode, r_m, LONG_OPND, 1); + } else if (dp == &dis_opAVX660F3A[0x22]) { + /* vpinsrd/q , , , */ +#ifdef DIS_TEXT + if (vex_W) + x->d86_mnem[6] = 'q'; +#endif + dtrace_get_operand(x, mode, r_m, LONG_OPND, 1); + } else { + dtrace_get_operand(x, mode, r_m, wbit, 1); + } + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + + /* vblendvpd, vblendvps, vblendvb use the imm encode the regs */ + if ((dp == &dis_opAVX660F3A[0x4A]) || + (dp == &dis_opAVX660F3A[0x4B]) || + (dp == &dis_opAVX660F3A[0x4C])) { +#ifdef DIS_TEXT + int regnum = (x->d86_opnd[0].d86_value & 0xF0) >> 4; +#endif + x->d86_opnd[0].d86_mode = MODE_NONE; +#ifdef DIS_TEXT + if (vex_L) + (void) strncpy(x->d86_opnd[0].d86_opnd, + dis_YMMREG[regnum], OPLEN); + else + (void) strncpy(x->d86_opnd[0].d86_opnd, + dis_XMMREG[regnum], OPLEN); +#endif + } + break; + + case VEX_MX: + /* ModR/M.reg := op(ModR/M.rm) */ + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); +L_VEX_MX: + + if ((dp == &dis_opAVXF20F[0xE6]) || + (dp == &dis_opAVX660F[0x5A]) || + (dp == &dis_opAVX660F[0xE6])) { + /* vcvtpd2dq , */ + /* or vcvtpd2ps , */ + /* or vcvttpd2dq , */ + dtrace_get_operand(x, REG_ONLY, reg, XMM_OPND, 1); + dtrace_get_operand(x, mode, r_m, wbit, 0); + } else if ((dp == &dis_opAVXF30F[0xE6]) || + (dp == &dis_opAVX0F[0x5][0xA]) || + (dp == &dis_opAVX660F38[0x58]) || + (dp == &dis_opAVX660F38[0x59]) || + (dp == &dis_opAVX660F38[0x78]) || + (dp == &dis_opAVX660F38[0x79])) { + /* vcvtdq2pd , */ + /* or vcvtps2pd , */ + /* or vbroadcasts* , */ + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_get_operand(x, mode, r_m, XMM_OPND, 0); + } else if (dp == &dis_opAVX660F[0x6E]) { + /* vmovd/q , */ +#ifdef DIS_TEXT + if (vex_W) + x->d86_mnem[4] = 'q'; +#endif + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); + } else { + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_get_operand(x, mode, r_m, wbit, 0); + } + + break; + + case VEX_MXI: + /* ModR/M.reg := op(ModR/M.rm, imm8) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); + dtrace_get_operand(x, mode, r_m, wbit, 1); + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case VEX_XXI: + /* VEX.vvvv := op(ModR/M.rm, imm8) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, dis_AVXvgrp7[opcode2 - 1][reg], + OPLEN); +#endif + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 2); + dtrace_get_operand(x, REG_ONLY, r_m, wbit, 1); + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case VEX_MR: + /* ModR/M.reg (reg32/64) := op(ModR/M.rm) */ + if (dp == &dis_opAVX660F[0xC5]) { + /* vpextrw , , */ + x->d86_numopnds = 2; + vbit = 2; + } else { + x->d86_numopnds = 2; + vbit = 1; + } + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, vbit); + dtrace_get_operand(x, mode, r_m, wbit, vbit - 1); + + if (vbit == 2) + dtrace_imm_opnd(x, wbit, 1, 0); + + break; + + case VEX_RRI: + /* implicit(eflags/r32) := op(ModR/M.reg, ModR/M.rm) */ + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + case VEX_RX: + /* ModR/M.rm := op(ModR/M.reg) */ + if (dp == &dis_opAVX660F3A[0x19]) { /* vextractf128 */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, mode, r_m, XMM_OPND, 2); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + } + + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); + break; + + case VEX_RR: + /* ModR/M.rm := op(ModR/M.reg) */ + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + if (dp == &dis_opAVX660F[0x7E]) { + /* vmovd/q , */ +#ifdef DIS_TEXT + if (vex_W) + x->d86_mnem[4] = 'q'; +#endif + dtrace_get_operand(x, mode, r_m, LONG_OPND, 1); + } else + dtrace_get_operand(x, mode, r_m, wbit, 1); + + dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); + break; + + case VEX_RRi: + /* ModR/M.rm := op(ModR/M.reg, imm) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + +#ifdef DIS_TEXT + if (dp == &dis_opAVX660F3A[0x16]) { + /* vpextrd/q , , */ + if (vex_W) + x->d86_mnem[6] = 'q'; + } +#endif + dtrace_get_operand(x, mode, r_m, LONG_OPND, 2); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case VEX_RIM: + /* ModR/M.rm := op(ModR/M.reg, imm) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, mode, r_m, XMM_OPND, 2); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case VEX_RM: + /* ModR/M.rm := op(ModR/M.reg) */ + if (dp == &dis_opAVX660F3A[0x17]) { /* vextractps */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, mode, r_m, LONG_OPND, 2); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + } + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); +L_VEX_RM: + vbit = 1; + dtrace_get_operand(x, mode, r_m, wbit, vbit); + dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit - 1); + + break; + + case VEX_RRM: + /* ModR/M.rm := op(VEX.vvvv, ModR/M.reg) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 2); + /* VEX use the 1's complement form encode the XMM/YMM regs */ + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); + break; + + case VEX_RMX: + /* ModR/M.reg := op(VEX.vvvv, ModR/M.rm) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + dtrace_get_operand(x, REG_ONLY, r_m, wbit, 0); + break; + + case VEX_NONE: +#ifdef DIS_TEXT + if (vex_L) + (void) strncpy(x->d86_mnem, "vzeroall", OPLEN); +#endif + break; + /* an invalid op code */ case AM: case DM: diff --git a/bsd/dev/i386/kern_machdep.c b/bsd/dev/i386/kern_machdep.c index 8627c26ba..4d7891ba4 100644 --- a/bsd/dev/i386/kern_machdep.c +++ b/bsd/dev/i386/kern_machdep.c @@ -78,16 +78,6 @@ grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype) return 0; } -extern void md_prepare_for_shutdown(int, int, char *); - -void -md_prepare_for_shutdown( - __unused int paniced, - __unused int howto, - __unused char * command) -{ -} - boolean_t pie_required(cpu_type_t exectype __unused, cpu_subtype_t execsubtype __unused) { diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c index a314e46b5..f7c09455d 100644 --- a/bsd/dev/i386/sysctl.c +++ b/bsd/dev/i386/sysctl.c @@ -498,9 +498,15 @@ SYSCTL_NODE(_machdep_cpu, OID_AUTO, xsave, CTLFLAG_RW|CTLFLAG_LOCKED, 0, SYSCTL_PROC(_machdep_cpu_xsave, OID_AUTO, extended_state, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, - (void *)offsetof(cpuid_xsave_leaf_t, extended_state), + (void *) 0, sizeof(cpuid_xsave_leaf_t), - cpu_xsave, "IU", "XSAVE Extended State"); + cpu_xsave, "IU", "XSAVE Extended State Main Leaf"); + +SYSCTL_PROC(_machdep_cpu_xsave, OID_AUTO, extended_state1, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) sizeof(cpuid_xsave_leaf_t), + sizeof(cpuid_xsave_leaf_t), + cpu_xsave, "IU", "XSAVE Extended State Sub-leaf 1"); SYSCTL_NODE(_machdep_cpu, OID_AUTO, arch_perf, CTLFLAG_RW|CTLFLAG_LOCKED, 0, @@ -681,7 +687,7 @@ SYSCTL_PROC(_machdep_cpu_flex_ratio, OID_AUTO, max, cpu_flex_ratio_max, "I", "Flex ratio max (non-turbo)"); SYSCTL_PROC(_machdep_cpu, OID_AUTO, ucupdate, - CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED, 0, 0, + CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED, 0, 0, cpu_ucode_update, "S", "Microcode update interface"); static const uint32_t apic_timer_vector = (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT); @@ -690,8 +696,8 @@ static const uint32_t apic_IPI_vector = (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_IN SYSCTL_NODE(_machdep, OID_AUTO, vectors, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "Interrupt vector assignments"); -SYSCTL_UINT (_machdep_vectors, OID_AUTO, timer, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (uint32_t *)&apic_timer_vector, 0, ""); -SYSCTL_UINT (_machdep_vectors, OID_AUTO, IPI, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (uint32_t *)&apic_IPI_vector, 0, ""); +SYSCTL_UINT (_machdep_vectors, OID_AUTO, timer, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, __DECONST(uint32_t *,&apic_timer_vector), 0, ""); +SYSCTL_UINT (_machdep_vectors, OID_AUTO, IPI, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, __DECONST(uint32_t *,&apic_IPI_vector), 0, ""); uint64_t pmap_pv_hashlist_walks; uint64_t pmap_pv_hashlist_cnts; @@ -743,19 +749,19 @@ SYSCTL_NODE(_machdep_tsc, OID_AUTO, nanotime, CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "TSC to ns conversion"); SYSCTL_QUAD(_machdep_tsc_nanotime, OID_AUTO, tsc_base, CTLFLAG_RD | CTLFLAG_LOCKED, - (uint64_t *) &pal_rtc_nanotime_info.tsc_base, ""); + __DECONST(uint64_t *, &pal_rtc_nanotime_info.tsc_base), ""); SYSCTL_QUAD(_machdep_tsc_nanotime, OID_AUTO, ns_base, CTLFLAG_RD | CTLFLAG_LOCKED, - (uint64_t *)&pal_rtc_nanotime_info.ns_base, ""); + __DECONST(uint64_t *, &pal_rtc_nanotime_info.ns_base), ""); SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, scale, CTLFLAG_RD | CTLFLAG_LOCKED, - (uint32_t *)&pal_rtc_nanotime_info.scale, 0, ""); + __DECONST(uint32_t *, &pal_rtc_nanotime_info.scale), 0, ""); SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, shift, CTLFLAG_RD | CTLFLAG_LOCKED, - (uint32_t *)&pal_rtc_nanotime_info.shift, 0, ""); + __DECONST(uint32_t *, &pal_rtc_nanotime_info.shift), 0, ""); SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, generation, CTLFLAG_RD | CTLFLAG_LOCKED, - (uint32_t *)&pal_rtc_nanotime_info.generation, 0, ""); + __DECONST(uint32_t *, &pal_rtc_nanotime_info.generation), 0, ""); SYSCTL_NODE(_machdep, OID_AUTO, misc, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Miscellaneous x86 kernel parameters"); @@ -775,7 +781,14 @@ SYSCTL_PROC(_machdep_misc, OID_AUTO, machine_check_panic, 0, 0, misc_machine_check_panic, "A", "Machine-check exception test"); - +#if DEVELOPMENT || DEBUG +SYSCTL_QUAD(_machdep, OID_AUTO, reportphyreadabs, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &reportphyreaddelayabs, ""); +SYSCTL_INT(_machdep, OID_AUTO, reportphyreadosbt, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &reportphyreadosbt, 0, ""); +#endif extern void timer_queue_trace_cpu(int); static int @@ -848,3 +861,7 @@ extern uint64_t ml_timer_eager_evaluation_max; SYSCTL_QUAD(_machdep, OID_AUTO, eager_timer_evaluation_max, CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, &ml_timer_eager_evaluation_max, ""); +extern uint64_t x86_isr_fp_simd_use; +SYSCTL_QUAD(_machdep, OID_AUTO, x86_fp_simd_isr_uses, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &x86_isr_fp_simd_use, ""); diff --git a/bsd/dev/i386/systemcalls.c b/bsd/dev/i386/systemcalls.c index 9f57943df..2c7e93ea2 100644 --- a/bsd/dev/i386/systemcalls.c +++ b/bsd/dev/i386/systemcalls.c @@ -69,7 +69,9 @@ extern void *find_user_regs(thread_t); /* dynamically generated at build time based on syscalls.master */ extern const char *syscallnames[]; -#define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || ((code) == SYS_kdebug_trace64)) +#define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \ + ((code) == SYS_kdebug_trace64) || \ + ((code) == SYS_kdebug_trace_string)) /* * Function: unix_syscall @@ -102,6 +104,10 @@ unix_syscall(x86_saved_state_t *state) thread = current_thread(); uthread = get_bsdthread_info(thread); +#if PROC_REF_DEBUG + uthread_reset_proc_refcount(uthread); +#endif + /* Get the approriate proc; may be different from task's for vfork() */ is_vfork = uthread->uu_flag & UT_VFORK; if (__improbable(is_vfork != 0)) @@ -250,6 +256,12 @@ unix_syscall(x86_saved_state_t *state) pal_execve_return(thread); } +#if PROC_REF_DEBUG + if (__improbable(uthread_get_proc_refcount(uthread) != 0)) { + panic("system call returned with uu_proc_refcount != 0"); + } +#endif + thread_exception_return(); /* NOTREACHED */ } @@ -278,6 +290,10 @@ unix_syscall64(x86_saved_state_t *state) thread = current_thread(); uthread = get_bsdthread_info(thread); +#if PROC_REF_DEBUG + uthread_reset_proc_refcount(uthread); +#endif + /* Get the approriate proc; may be different from task's for vfork() */ if (__probable(!(uthread->uu_flag & UT_VFORK))) p = (struct proc *)get_bsdtask_info(current_task()); @@ -439,6 +455,12 @@ unix_syscall64(x86_saved_state_t *state) BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); +#if PROC_REF_DEBUG + if (__improbable(uthread_get_proc_refcount(uthread))) { + panic("system call returned with uu_proc_refcount != 0"); + } +#endif + thread_exception_return(); /* NOTREACHED */ } diff --git a/bsd/dev/memdev.c b/bsd/dev/memdev.c index 5a659bebd..ac6dd485e 100644 --- a/bsd/dev/memdev.c +++ b/bsd/dev/memdev.c @@ -191,7 +191,7 @@ static int mdevopen(dev_t dev, int flags, __unused int devtype, __unused struct devid = minor(dev); /* Get minor device number */ - if (devid > 16) return (ENXIO); /* Not valid */ + if (devid >= 16) return (ENXIO); /* Not valid */ if ((flags & FWRITE) && (mdev[devid].mdFlags & mdRO)) return (EACCES); /* Currently mounted RO */ @@ -206,7 +206,7 @@ static int mdevrw(dev_t dev, struct uio *uio, __unused int ioflag) { devid = minor(dev); /* Get minor device number */ - if (devid > 16) return (ENXIO); /* Not valid */ + if (devid >= 16) return (ENXIO); /* Not valid */ if (!(mdev[devid].mdFlags & mdInited)) return (ENXIO); /* Have we actually been defined yet? */ mdata = ((addr64_t)mdev[devid].mdBase << 12) + uio->uio_offset; /* Point to the area in "file" */ @@ -358,7 +358,7 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag, devid = minor(dev); /* Get minor device number */ - if (devid > 16) return (ENXIO); /* Not valid */ + if (devid >= 16) return (ENXIO); /* Not valid */ error = proc_suser(p); /* Are we superman? */ if (error) return (error); /* Nope... */ @@ -401,11 +401,6 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag, *f = 1; break; - case DKIOCGETBLOCKCOUNT32: - if(!(mdev[devid].mdFlags & mdInited)) return (ENXIO); - *f = ((mdev[devid].mdSize << 12) + mdev[devid].mdSecsize - 1) / mdev[devid].mdSecsize; - break; - case DKIOCGETBLOCKCOUNT: if(!(mdev[devid].mdFlags & mdInited)) return (ENXIO); *o = ((mdev[devid].mdSize << 12) + mdev[devid].mdSecsize - 1) / mdev[devid].mdSecsize; @@ -439,7 +434,7 @@ static int mdevsize(dev_t dev) { int devid; devid = minor(dev); /* Get minor device number */ - if (devid > 16) return (ENXIO); /* Not valid */ + if (devid >= 16) return (ENXIO); /* Not valid */ if ((mdev[devid].mdFlags & mdInited) == 0) return(-1); /* Not inited yet */ diff --git a/bsd/dev/munge.c b/bsd/dev/munge.c index adaba8e1a..edd1b7273 100644 --- a/bsd/dev/munge.c +++ b/bsd/dev/munge.c @@ -92,7 +92,7 @@ munge_wl(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[1] = *(uint64_t*)&in_args[1]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; out_args[0] = in_args[0]; } @@ -102,7 +102,7 @@ munge_wwl(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[2] = *(uint64_t*)&in_args[2]; + out_args[2] = *(volatile uint64_t*)&in_args[2]; out_args[1] = in_args[1]; out_args[0] = in_args[0]; } @@ -114,7 +114,7 @@ munge_wwlw(void *args) volatile uint32_t *in_args = (volatile uint32_t*)args; out_args[3] = in_args[4]; - out_args[2] = *(uint64_t*)&in_args[2]; + out_args[2] = *(volatile uint64_t*)&in_args[2]; out_args[1] = in_args[1]; out_args[0] = in_args[0]; } @@ -124,9 +124,9 @@ munge_wwlll(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[4] = *(uint64_t*)&in_args[6]; - out_args[3] = *(uint64_t*)&in_args[4]; - out_args[2] = *(uint64_t*)&in_args[2]; + out_args[4] = *(volatile uint64_t*)&in_args[6]; + out_args[3] = *(volatile uint64_t*)&in_args[4]; + out_args[2] = *(volatile uint64_t*)&in_args[2]; out_args[1] = in_args[1]; out_args[0] = in_args[0]; } @@ -139,8 +139,8 @@ munge_wwllww(void *args) out_args[5] = in_args[7]; out_args[4] = in_args[6]; - out_args[3] = *(uint64_t*)&in_args[4]; - out_args[2] = *(uint64_t*)&in_args[2]; + out_args[3] = *(volatile uint64_t*)&in_args[4]; + out_args[2] = *(volatile uint64_t*)&in_args[2]; out_args[1] = in_args[1]; out_args[0] = in_args[0]; } @@ -152,7 +152,19 @@ munge_wlw(void *args) volatile uint32_t *in_args = (volatile uint32_t*)args; out_args[2] = in_args[3]; - out_args[1] = *(uint64_t*)&in_args[1]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wlww(void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[3] = in_args[4]; + out_args[2] = in_args[3]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; out_args[0] = in_args[0]; } @@ -162,12 +174,12 @@ munge_wlwwwll(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[6] = *(uint64_t*)&in_args[8]; - out_args[5] = *(uint64_t*)&in_args[6]; + out_args[6] = *(volatile uint64_t*)&in_args[8]; + out_args[5] = *(volatile uint64_t*)&in_args[6]; out_args[4] = in_args[5]; out_args[3] = in_args[4]; out_args[2] = in_args[3]; - out_args[1] = *(uint64_t*)&in_args[1]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; out_args[0] = in_args[0]; } @@ -188,12 +200,12 @@ munge_wlwwlwlw(void *args) volatile uint32_t *in_args = (volatile uint32_t*)args; out_args[7] = in_args[10]; - out_args[6] = *(uint64_t*)&in_args[8]; + out_args[6] = *(volatile uint64_t*)&in_args[8]; out_args[5] = in_args[7]; - out_args[4] = *(uint64_t*)&in_args[5]; + out_args[4] = *(volatile uint64_t*)&in_args[5]; out_args[3] = in_args[4]; out_args[2] = in_args[3]; - out_args[1] = *(uint64_t*)&in_args[1]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; out_args[0] = in_args[0]; } @@ -203,8 +215,8 @@ munge_wll(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[2] = *(uint64_t*)&in_args[3]; - out_args[1] = *(uint64_t*)&in_args[1]; + out_args[2] = *(volatile uint64_t*)&in_args[3]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; out_args[0] = in_args[0]; } @@ -214,9 +226,9 @@ munge_wlll(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[3] = *(uint64_t*)&in_args[5]; - out_args[2] = *(uint64_t*)&in_args[3]; - out_args[1] = *(uint64_t*)&in_args[1]; + out_args[3] = *(volatile uint64_t*)&in_args[5]; + out_args[2] = *(volatile uint64_t*)&in_args[3]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; out_args[0] = in_args[0]; } @@ -226,10 +238,10 @@ munge_wllll(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[4] = *(uint64_t*)&in_args[7]; - out_args[3] = *(uint64_t*)&in_args[5]; - out_args[2] = *(uint64_t*)&in_args[3]; - out_args[1] = *(uint64_t*)&in_args[1]; + out_args[4] = *(volatile uint64_t*)&in_args[7]; + out_args[3] = *(volatile uint64_t*)&in_args[5]; + out_args[2] = *(volatile uint64_t*)&in_args[3]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; out_args[0] = in_args[0]; } @@ -241,8 +253,8 @@ munge_wllww(void *args) out_args[4] = in_args[6]; out_args[3] = in_args[5]; - out_args[2] = *(uint64_t*)&in_args[3]; - out_args[1] = *(uint64_t*)&in_args[1]; + out_args[2] = *(volatile uint64_t*)&in_args[3]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; out_args[0] = in_args[0]; } @@ -252,12 +264,12 @@ munge_wllwwll(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[6] = *(uint64_t*)&in_args[9]; - out_args[5] = *(uint64_t*)&in_args[7]; + out_args[6] = *(volatile uint64_t*)&in_args[9]; + out_args[5] = *(volatile uint64_t*)&in_args[7]; out_args[4] = in_args[6]; out_args[3] = in_args[5]; - out_args[2] = *(uint64_t*)&in_args[3]; - out_args[1] = *(uint64_t*)&in_args[1]; + out_args[2] = *(volatile uint64_t*)&in_args[3]; + out_args[1] = *(volatile uint64_t*)&in_args[1]; out_args[0] = in_args[0]; } @@ -268,7 +280,7 @@ munge_wwwlw(void *args) volatile uint32_t *in_args = (volatile uint32_t*)args; out_args[4] = in_args[5]; - out_args[3] = *(uint64_t*)&in_args[3]; + out_args[3] = *(volatile uint64_t*)&in_args[3]; out_args[2] = in_args[2]; out_args[1] = in_args[1]; out_args[0] = in_args[0]; @@ -282,7 +294,7 @@ munge_wwwlww(void *args) out_args[5] = in_args[6]; out_args[4] = in_args[5]; - out_args[3] = *(uint64_t*)&in_args[3]; + out_args[3] = *(volatile uint64_t*)&in_args[3]; out_args[2] = in_args[2]; out_args[1] = in_args[1]; out_args[0] = in_args[0]; @@ -294,7 +306,7 @@ munge_wwwl(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[3] = *(uint64_t*)&in_args[3]; + out_args[3] = *(volatile uint64_t*)&in_args[3]; out_args[2] = in_args[2]; out_args[1] = in_args[1]; out_args[0] = in_args[0]; @@ -307,7 +319,7 @@ munge_wwwwlw(void *args) volatile uint32_t *in_args = (volatile uint32_t*)args; out_args[5] = in_args[6]; - out_args[4] = *(uint64_t*)&in_args[4]; + out_args[4] = *(volatile uint64_t*)&in_args[4]; out_args[3] = in_args[3]; out_args[2] = in_args[2]; out_args[1] = in_args[1]; @@ -320,7 +332,7 @@ munge_wwwwl(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[4] = *(uint64_t*)&in_args[4]; + out_args[4] = *(volatile uint64_t*)&in_args[4]; out_args[3] = in_args[3]; out_args[2] = in_args[2]; out_args[1] = in_args[1]; @@ -333,7 +345,7 @@ munge_wwwwwl(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[5] = *(uint64_t*)&in_args[5]; + out_args[5] = *(volatile uint64_t*)&in_args[5]; out_args[4] = in_args[4]; out_args[3] = in_args[3]; out_args[2] = in_args[2]; @@ -349,7 +361,7 @@ munge_wwwwwlww(void *args) out_args[7] = in_args[8]; out_args[6] = in_args[7]; - out_args[5] = *(uint64_t*)&in_args[5]; + out_args[5] = *(volatile uint64_t*)&in_args[5]; out_args[4] = in_args[4]; out_args[3] = in_args[3]; out_args[2] = in_args[2]; @@ -364,8 +376,8 @@ munge_wwwwwllw(void *args) volatile uint32_t *in_args = (volatile uint32_t*)args; out_args[7] = in_args[9]; - out_args[6] = *(uint64_t*)&in_args[7]; - out_args[5] = *(uint64_t*)&in_args[5]; + out_args[6] = *(volatile uint64_t*)&in_args[7]; + out_args[5] = *(volatile uint64_t*)&in_args[5]; out_args[4] = in_args[4]; out_args[3] = in_args[3]; out_args[2] = in_args[2]; @@ -379,9 +391,9 @@ munge_wwwwwlll(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[7] = *(uint64_t*)&in_args[9]; - out_args[6] = *(uint64_t*)&in_args[7]; - out_args[5] = *(uint64_t*)&in_args[5]; + out_args[7] = *(volatile uint64_t*)&in_args[9]; + out_args[6] = *(volatile uint64_t*)&in_args[7]; + out_args[5] = *(volatile uint64_t*)&in_args[5]; out_args[4] = in_args[4]; out_args[3] = in_args[3]; out_args[2] = in_args[2]; @@ -395,7 +407,7 @@ munge_wwwwwwl(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[6] = *(uint64_t*)&in_args[6]; + out_args[6] = *(volatile uint64_t*)&in_args[6]; out_args[5] = in_args[5]; out_args[4] = in_args[4]; out_args[3] = in_args[3]; @@ -411,7 +423,7 @@ munge_wwwwwwlw(void *args) volatile uint32_t *in_args = (volatile uint32_t*)args; out_args[7] = in_args[8]; - out_args[6] = *(uint64_t*)&in_args[6]; + out_args[6] = *(volatile uint64_t*)&in_args[6]; out_args[5] = in_args[5]; out_args[4] = in_args[4]; out_args[3] = in_args[3]; @@ -426,8 +438,8 @@ munge_wwwwwwll(void *args) volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; - out_args[7] = *(uint64_t*)&in_args[8]; - out_args[6] = *(uint64_t*)&in_args[6]; + out_args[7] = *(volatile uint64_t*)&in_args[8]; + out_args[6] = *(volatile uint64_t*)&in_args[6]; out_args[5] = in_args[5]; out_args[4] = in_args[4]; out_args[3] = in_args[3]; @@ -459,6 +471,19 @@ munge_wws(void *args) } void +munge_wwws(void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[3] = (int64_t)(int)in_args[3]; /* Sign-extend */ + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + + +void munge_wwwsw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; @@ -496,7 +521,7 @@ munge_lw(void *args) volatile uint32_t *in_args = (volatile uint32_t*)args; out_args[1] = in_args[2]; - out_args[0] = *(uint64_t*)&in_args[0]; + out_args[0] = *(volatile uint64_t*)&in_args[0]; } void @@ -508,7 +533,7 @@ munge_lwww(void *args) out_args[3] = in_args[4]; out_args[2] = in_args[3]; out_args[1] = in_args[2]; - out_args[0] = *(uint64_t*)&in_args[0]; + out_args[0] = *(volatile uint64_t*)&in_args[0]; } void @@ -520,7 +545,7 @@ munge_wwlwww(void *args) out_args[5] = in_args[6]; out_args[4] = in_args[5]; out_args[3] = in_args[4]; - out_args[2] = *(uint64_t*)&in_args[2]; + out_args[2] = *(volatile uint64_t*)&in_args[2]; out_args[1] = in_args[1]; out_args[0] = in_args[0]; } diff --git a/bsd/dev/unix_startup.c b/bsd/dev/unix_startup.c index ddb2baa68..25c3610d0 100644 --- a/bsd/dev/unix_startup.c +++ b/bsd/dev/unix_startup.c @@ -52,6 +52,7 @@ #include #include #include +#include extern uint32_t kern_maxvnodes; extern vm_map_t mb_map; @@ -62,7 +63,6 @@ extern uint32_t tcp_recvspace; #endif void bsd_bufferinit(void); -extern void md_prepare_for_shutdown(int, int, char *); unsigned int bsd_mbuf_cluster_reserve(boolean_t *); void bsd_scale_setup(int); @@ -140,7 +140,7 @@ bsd_startupearly(void) &firstaddr, size, FALSE, - VM_FLAGS_ANYWHERE, + VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_FILE), &bufferhdr_map); if (ret != KERN_SUCCESS) @@ -150,7 +150,8 @@ bsd_startupearly(void) &firstaddr, size, 0, - KMA_HERE | KMA_KOBJECT); + KMA_HERE | KMA_KOBJECT, + VM_KERN_MEMORY_FILE); if (ret != KERN_SUCCESS) panic("Failed to allocate bufferhdr_map"); @@ -215,10 +216,10 @@ bsd_bufferinit(void) #if SOCKETS ret = kmem_suballoc(kernel_map, - (vm_offset_t *) & mbutl, + (vm_offset_t *) &mbutl, (vm_size_t) (nmbclusters * MCLBYTES), FALSE, - VM_FLAGS_ANYWHERE, + VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_MBUF), &mb_map); if (ret != KERN_SUCCESS) @@ -291,8 +292,8 @@ bsd_mbuf_cluster_reserve(boolean_t *overridden) nmbclusters = MAX_NCL; } - /* Round it down to nearest multiple of 4KB clusters */ - nmbclusters = P2ROUNDDOWN(nmbclusters, NCLPBG); + /* Round it down to nearest multiple of PAGE_SIZE */ + nmbclusters = P2ROUNDDOWN(nmbclusters, NCLPG); } mbuf_poolsz = nmbclusters << MCLSHIFT; done: @@ -327,15 +328,16 @@ bsd_scale_setup(int scale) maxfilesperproc = maxfiles/2; desiredvnodes = maxfiles; vnodes_sized = 1; + tcp_tfo_backlog = 100 * scale; if (scale > 4) { /* clip somaxconn at 32G level */ somaxconn = 2048; - /* - * For scale > 4 (> 32G), clip + /* + * For scale > 4 (> 32G), clip * tcp_tcbhashsize to 32K */ tcp_tcbhashsize = 32 *1024; - + if (scale > 7) { /* clip at 64G level */ max_cached_sock_count = 165000; diff --git a/bsd/dev/vn/vn.c b/bsd/dev/vn/vn.c index 457e58370..703a8ad7a 100644 --- a/bsd/dev/vn/vn.c +++ b/bsd/dev/vn/vn.c @@ -908,7 +908,6 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, case DKIOCGETMAXSEGMENTBYTECOUNTREAD: case DKIOCGETMAXSEGMENTBYTECOUNTWRITE: case DKIOCGETBLOCKCOUNT: - case DKIOCGETBLOCKCOUNT32: if ((vn->sc_flags & VNF_INITED) == 0) { error = ENXIO; goto done; @@ -979,9 +978,6 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, case DKIOCISWRITABLE: *f = 1; break; - case DKIOCGETBLOCKCOUNT32: - *f = vn->sc_size; - break; case DKIOCGETBLOCKCOUNT: *o = vn->sc_size; break; diff --git a/bsd/hfs/Makefile b/bsd/hfs/Makefile index 0f11a9737..ccf82f04f 100644 --- a/bsd/hfs/Makefile +++ b/bsd/hfs/Makefile @@ -14,15 +14,17 @@ PRIVATE_DATAFILES = \ hfs.h hfs_attrlist.h hfs_catalog.h hfs_cnode.h hfs_endian.h \ hfs_fsctl.h hfs_macos_defs.h hfs_quota.h rangelist.h +KERNELFILES = ${DATAFILES} + INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = hfs -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${KERNELFILES} EXPORT_MI_DIR = hfs -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h index 52b8faca1..b19e23208 100644 --- a/bsd/hfs/hfs.h +++ b/bsd/hfs/hfs.h @@ -43,6 +43,8 @@ #define HFS_CHECK_LOCK_ORDER 1 #endif +#define HFS_TMPDBG 0 + #include #ifdef KERNEL @@ -72,6 +74,8 @@ #if CONFIG_PROTECT /* Forward declare the cprotect struct */ struct cprotect; + + #endif /* @@ -194,7 +198,9 @@ typedef struct hfsmount { time_t hfs_mtime; /* file system last modification time */ u_int32_t hfs_filecount; /* number of files in file system */ u_int32_t hfs_dircount; /* number of directories in file system */ - u_int32_t freeBlocks; /* free allocation blocks */ + u_int32_t freeBlocks; /* free allocation blocks */ + u_int32_t reclaimBlocks; /* number of blocks we are reclaiming during resize */ + u_int32_t tentativeBlocks; /* tentative allocation blocks -- see note below */ u_int32_t nextAllocation; /* start of next allocation search */ u_int32_t sparseAllocation; /* start of allocations for sparse devices */ u_int32_t vcbNxtCNID; /* next unused catalog node ID - protected by catalog lock */ @@ -205,7 +211,13 @@ typedef struct hfsmount { /* Persistent fields (on disk, static) */ u_int16_t vcbSigWord; - int16_t vcbFlags; /* Runtime flag to indicate if volume is dirty/clean */ + + // Volume will be inconsistent if header is not flushed + bool hfs_header_dirty; + + // Volume header is dirty, but won't be inconsistent if not flushed + bool hfs_header_minor_change; + u_int32_t vcbAtrb; u_int32_t vcbJinfoBlock; u_int32_t localCreateDate;/* volume create time from volume header (For HFS+, value is in local time) */ @@ -247,7 +259,7 @@ typedef struct hfsmount { u_int32_t reserveBlocks; /* free block reserve */ u_int32_t loanedBlocks; /* blocks on loan for delayed allocations */ - + u_int32_t lockedBlocks; /* blocks reserved and locked */ /* * HFS+ Private system directories (two). Any access @@ -272,8 +284,8 @@ typedef struct hfsmount { u_int32_t hfs_jnlfileid; u_int32_t hfs_jnlinfoblkid; lck_rw_t hfs_global_lock; - u_int32_t hfs_global_lock_nesting; thread_t hfs_global_lockowner; + u_int32_t hfs_transaction_nesting; /* Notification variables: */ u_int32_t hfs_notification_conditions; @@ -292,7 +304,9 @@ typedef struct hfsmount { u_int32_t hfs_hotfile_end; u_int32_t hfs_min_alloc_start; u_int32_t hfs_freed_block_count; + u_int64_t hfs_cs_hotfile_size; // in bytes int hfs_hotfile_freeblks; + int hfs_hotfile_blk_adjust; int hfs_hotfile_maxblks; int hfs_overflow_maxblks; int hfs_catalog_maxblks; @@ -303,7 +317,7 @@ typedef struct hfsmount { time_t hfc_timebase; /* recording period start time */ time_t hfc_timeout; /* recording period stop time */ void * hfc_recdata; /* recording data (opaque) */ - int hfc_maxfiles; /* maximum files to track */ + uint32_t hfc_maxfiles; /* maximum files to track */ struct vnode * hfc_filevp; #if HFS_SPARSE_DEV @@ -348,14 +362,19 @@ typedef struct hfsmount { u_int32_t hfs_resize_progress; #if CONFIG_PROTECT /* Data Protection fields */ - struct cprotect *hfs_resize_cpentry; + cpx_t hfs_resize_cpx; u_int16_t hfs_running_cp_major_vers; uint32_t default_cp_class; /* default effective class value */ uint64_t cproot_flags; uint8_t cp_crypto_generation; uint8_t hfs_cp_lock_state; /* per-mount device lock state info */ +#if HFS_TMPDBG +#if !SECURE_KERNEL + boolean_t hfs_cp_verbose; +#endif #endif +#endif /* Per mount cnode hash variables: */ lck_mtx_t hfs_chash_mutex; /* protects access to cnode hash table */ @@ -380,6 +399,19 @@ typedef struct hfsmount { // Not currently used except for debugging purposes uint32_t hfs_active_threads; + + enum { + // These are indices into the array below + + // Tentative ranges can be claimed back at any time + HFS_TENTATIVE_BLOCKS = 0, + + // Locked ranges cannot be claimed back, but the allocation + // won't have been written to disk yet + HFS_LOCKED_BLOCKS = 1, + }; + // These lists are not sorted like a range list usually is + struct rl_head hfs_reserved_ranges[2]; } hfsmount_t; /* @@ -405,28 +437,40 @@ typedef hfsmount_t ExtendedVCB; #define vcbFilCnt hfs_filecount #define vcbDirCnt hfs_dircount -/* Inline functions to set/reset vcbFlags. Upper 8 bits indicate if the volume - * header/VCB is clean/dirty --- if set, volume header is dirty, and - * if clear, volume header is clean. This value is checked to determine - * if the in-memory copy of volume header should be flushed to the disk - * or not. - */ -/* Set runtime flag to indicate that volume is dirty */ -static __inline__ void MarkVCBDirty(ExtendedVCB *vcb) +static inline void MarkVCBDirty(hfsmount_t *hfsmp) { - vcb->vcbFlags |= 0xFF00; + hfsmp->hfs_header_dirty = true; +} + +static inline void MarkVCBClean(hfsmount_t *hfsmp) +{ + hfsmp->hfs_header_dirty = false; + hfsmp->hfs_header_minor_change = false; +} + +static inline bool IsVCBDirty(ExtendedVCB *vcb) +{ + return vcb->hfs_header_minor_change || vcb->hfs_header_dirty; } -/* Clear runtime flag to indicate that volume is dirty */ -static __inline__ void MarkVCBClean(ExtendedVCB *vcb) +// Header is changed but won't be inconsistent if we don't write it +static inline void hfs_note_header_minor_change(hfsmount_t *hfsmp) { - vcb->vcbFlags &= 0x00FF; + hfsmp->hfs_header_minor_change = true; } -/* Check runtime flag to determine if the volume is dirty or not */ -static __inline__ Boolean IsVCBDirty(ExtendedVCB *vcb) +// Must header be flushed for volume to be consistent? +static inline bool hfs_header_needs_flushing(hfsmount_t *hfsmp) { - return (vcb->vcbFlags & 0xFF00 ? true : false); + return (hfsmp->hfs_header_dirty + || ISSET(hfsmp->hfs_catalog_cp->c_flag, C_MODIFIED) + || ISSET(hfsmp->hfs_extents_cp->c_flag, C_MODIFIED) + || (hfsmp->hfs_attribute_cp + && ISSET(hfsmp->hfs_attribute_cp->c_flag, C_MODIFIED)) + || (hfsmp->hfs_allocation_cp + && ISSET(hfsmp->hfs_allocation_cp->c_flag, C_MODIFIED)) + || (hfsmp->hfs_startup_cp + && ISSET(hfsmp->hfs_startup_cp->c_flag, C_MODIFIED))); } /* @@ -473,7 +517,10 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS}; #define HFS_SSD 0x400000 #define HFS_SUMMARY_TABLE 0x800000 #define HFS_CS 0x1000000 - +#define HFS_CS_METADATA_PIN 0x2000000 +#define HFS_CS_HOTFILE_PIN 0x4000000 /* cooperative fusion (enables a hotfile variant) */ +#define HFS_FEATURE_BARRIER 0x8000000 /* device supports barrier-only flush */ +#define HFS_CS_SWAPFILE_PIN 0x10000000 /* Macro to update next allocation block in the HFS mount structure. If * the HFS_SKIP_UPDATE_NEXT_ALLOCATION is set, do not update @@ -597,11 +644,29 @@ enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 }; #define MAC_GMT_FACTOR 2082844800UL static inline __attribute__((const)) -uint64_t hfs_blk_to_bytes(uint32_t blk, uint32_t blk_size) +off_t hfs_blk_to_bytes(uint32_t blk, uint32_t blk_size) { - return (uint64_t)blk * blk_size; // Avoid the overflow + return (off_t)blk * blk_size; // Avoid the overflow } +/* + * For now, we use EIO to indicate consistency issues. It is safe to + * return or assign an error value to HFS_EINCONSISTENT but it is + * *not* safe to compare against it because EIO can be generated for + * other reasons. We take advantage of the fact that == has + * left-to-right associativity and so any uses of: + * + * if (error == HFS_EINCONSISTENT) + * + * will produce a compiler warning: "comparison between pointer and + * integer". + * + * Note that not everwhere is consistent with the use of + * HFS_EINCONSISTENT. Some places return EINVAL, EIO directly or + * other error codes. + */ +#define HFS_EINCONSISTENT (void *)0 == (void *)0 ? EIO : EIO + /***************************************************************************** FUNCTION PROTOTYPES ******************************************************************************/ @@ -636,6 +701,7 @@ int hfs_vnop_bwrite(struct vnop_bwrite_args *); /* in hfs_readwrite.c */ int hfs_vnop_blktooff(struct vnop_blktooff_args *); /* in hfs_readwrite.c */ int hfs_vnop_offtoblk(struct vnop_offtoblk_args *); /* in hfs_readwrite.c */ int hfs_vnop_blockmap(struct vnop_blockmap_args *); /* in hfs_readwrite.c */ +errno_t hfs_flush_invalid_ranges(vnode_t vp); /* in hfs_readwrite.c */ int hfs_vnop_getxattr(struct vnop_getxattr_args *); /* in hfs_xattr.c */ int hfs_vnop_setxattr(struct vnop_setxattr_args *); /* in hfs_xattr.c */ @@ -704,8 +770,29 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp); ******************************************************************************/ extern int hfs_relocate(struct vnode *, u_int32_t, kauth_cred_t, struct proc *); +/* flags for hfs_pin_block_range() and hfs_pin_vnode() */ +#define HFS_PIN_IT 0x0001 +#define HFS_UNPIN_IT 0x0002 +#define HFS_TEMP_PIN 0x0004 +#define HFS_EVICT_PIN 0x0008 +#define HFS_DATALESS_PIN 0x0010 + +// +// pin/un-pin an explicit range of blocks to the "fast" (usually ssd) device +// +int hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks, vfs_context_t ctx); + +// +// pin/un-pin all the extents belonging to a vnode. +// also, if it is non-null, "num_blocks_pinned" returns the number of blocks pin/unpinned by the function +// +int hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned, vfs_context_t ctx); + + +int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid, uint8_t forktype, uint32_t *pinned); + + /* Flags for HFS truncate */ -#define HFS_TRUNCATE_SKIPUPDATE 0x00000001 #define HFS_TRUNCATE_SKIPTIMES 0x00000002 /* implied by skipupdate; it is a subset */ @@ -743,8 +830,13 @@ extern void hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding); enum volop {VOL_UPDATE, VOL_MKDIR, VOL_RMDIR, VOL_MKFILE, VOL_RMFILE}; extern int hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot); -int hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush); -#define HFS_ALTFLUSH 1 +enum { + HFS_FVH_WAIT = 0x0001, + HFS_FVH_WRITE_ALT = 0x0002, + HFS_FVH_FLUSH_IF_DIRTY = 0x0004, +}; +typedef uint32_t hfs_flush_volume_header_options_t; +int hfs_flushvolumeheader(struct hfsmount *hfsmp, hfs_flush_volume_header_options_t); extern int hfs_extendfs(struct hfsmount *, u_int64_t, vfs_context_t); extern int hfs_truncatefs(struct hfsmount *, u_int64_t, vfs_context_t); @@ -798,6 +890,7 @@ extern int hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_ extern int check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg); extern int check_for_dataless_file(struct vnode *vp, uint64_t op_type); extern int hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid); +extern void hfs_pin_fs_metadata(struct hfsmount *hfsmp); /* Return information about number of metadata blocks for volume */ extern int hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo); @@ -822,9 +915,6 @@ void hfs_unlock_mount (struct hfsmount *hfsmp); #define SFL_VM_PRIV 0x0020 #define SFL_VALIDMASK (SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE | SFL_STARTUP | SFL_VM_PRIV) -extern int hfs_systemfile_lock(struct hfsmount *, int, enum hfs_locktype); -extern void hfs_systemfile_unlock(struct hfsmount *, int); - extern u_int32_t GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, const char *name, struct cat_attr *fattr, struct cat_fork *forkinfo); @@ -856,7 +946,6 @@ extern int hfs_start_transaction(struct hfsmount *hfsmp); extern int hfs_end_transaction(struct hfsmount *hfsmp); extern void hfs_journal_lock(struct hfsmount *hfsmp); extern void hfs_journal_unlock(struct hfsmount *hfsmp); -extern int hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO); extern void hfs_syncer_lock(struct hfsmount *hfsmp); extern void hfs_syncer_unlock(struct hfsmount *hfsmp); extern void hfs_syncer_wait(struct hfsmount *hfsmp); @@ -864,6 +953,17 @@ extern void hfs_syncer_wakeup(struct hfsmount *hfsmp); extern void hfs_syncer_queue(thread_call_t syncer); extern void hfs_sync_ejectable(struct hfsmount *hfsmp); +typedef enum hfs_flush_mode { + HFS_FLUSH_JOURNAL, // Flush journal + HFS_FLUSH_JOURNAL_META, // Flush journal and metadata blocks + HFS_FLUSH_FULL, // Flush journal and does a cache flush + HFS_FLUSH_CACHE, // Flush track cache to media + HFS_FLUSH_BARRIER, // Barrier-only flush to ensure write order + HFS_FLUSH_JOURNAL_BARRIER // Flush journal with barrier +} hfs_flush_mode_t; + +extern errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode); + extern void hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents); /* Erase unused Catalog nodes due to . */ @@ -893,9 +993,26 @@ extern void replace_desc(struct cnode *cp, struct cat_desc *cdp); extern int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp); -extern int hfs_update(struct vnode *, int); +typedef enum { + // Push all modifications to disk (including minor ones) + HFS_UPDATE_FORCE = 0x01, +} hfs_update_options_t; + +extern int hfs_update(struct vnode *, int options); + +typedef enum hfs_sync_mode { + HFS_FSYNC, + HFS_FSYNC_FULL, + HFS_FSYNC_BARRIER +} hfs_fsync_mode_t; + +extern int hfs_fsync(struct vnode *, int, hfs_fsync_mode_t, struct proc *); -extern int hfs_fsync(struct vnode *, int, int, struct proc *); +const struct cat_fork * +hfs_prepare_fork_for_update(filefork_t *ff, + const struct cat_fork *cf, + struct cat_fork *cf_buf, + uint32_t block_size); /***************************************************************************** Functions from hfs_xattr.c @@ -921,7 +1038,8 @@ int hfs_getxattr_internal(cnode_t *, struct vnop_getxattr_args *, int hfs_xattr_write(vnode_t vp, const char *name, const void *data, size_t size); int hfs_setxattr_internal(struct cnode *, const void *, size_t, struct vnop_setxattr_args *, struct hfsmount *, u_int32_t); -extern int hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid); +extern int hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid, + bool *open_transaction); extern int hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state); @@ -942,18 +1060,18 @@ extern void hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid); extern void hfs_relorigins(struct cnode *cp); extern void hfs_relorigin(struct cnode *cp, cnid_t parentcnid); extern int hfs_haslinkorigin(cnode_t *cp); -extern cnid_t hfs_currentparent(cnode_t *cp); +extern cnid_t hfs_currentparent(cnode_t *cp, bool have_lock); extern cnid_t hfs_currentcnid(cnode_t *cp); +errno_t hfs_first_link(hfsmount_t *hfsmp, cnode_t *cp, cnid_t *link_id); /***************************************************************************** Functions from VolumeAllocation.c ******************************************************************************/ -extern int hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, - u_int32_t numBlocks); +extern int hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks); -extern int hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, - u_int32_t numBlocks, u_int32_t *alloc_count); +extern int hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, + u_int32_t numBlocks, u_int32_t *alloc_count); extern int hfs_isrbtree_active (struct hfsmount *hfsmp); diff --git a/bsd/hfs/hfs_attrlist.c b/bsd/hfs/hfs_attrlist.c index 8483b45ae..3ee064859 100644 --- a/bsd/hfs/hfs_attrlist.c +++ b/bsd/hfs/hfs_attrlist.c @@ -338,7 +338,7 @@ hfs_readdirattr_internal(struct vnode *dvp, struct attrlist *alist, */ if ((dcp->c_entries == 0) && (ce_list->realentries > 0)) { dcp->c_entries++; - dcp->c_flag |= (C_MODIFIED | C_FORCEUPDATE); + dcp->c_flag |= C_MODIFIED; printf("hfs_vnop_readdirattr: repairing valence to non-zero! \n"); /* force an update on dcp while we're still holding the lock. */ hfs_update(dvp, 0); diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index fefc36ad3..f6084e31f 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -545,17 +545,17 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) /* * Update the Alternate MDB or Alternate VolumeHeader */ + VTOC(vp)->c_flag |= C_MODIFIED; if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) || (VTOC(vp)->c_fileid == kHFSCatalogFileID) || (VTOC(vp)->c_fileid == kHFSAttributesFileID) ) { - VTOC(vp)->c_flag |= C_MODIFIED; MarkVCBDirty( vcb ); - ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH); + ret = hfs_flushvolumeheader(VCBTOHFS(vcb), HFS_FVH_WAIT | HFS_FVH_WRITE_ALT); } else { VTOC(vp)->c_touch_chgtime = TRUE; VTOC(vp)->c_touch_modtime = TRUE; - (void) hfs_update(vp, TRUE); + (void) hfs_update(vp, 0); } ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize)); @@ -889,7 +889,7 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t node hfsmp->hfs_attribute_vp = vp; hfs_unlock_mount (hfsmp); - (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT); if (intrans) { hfs_end_transaction(hfsmp); diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index ef0b4a61e..8e0a65c4f 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -267,7 +267,7 @@ cat_acquire_cnid (struct hfsmount *hfsmp, cnid_t *new_cnid) } else { hfsmp->vcbNxtCNID++; } - MarkVCBDirty(hfsmp); + hfs_note_header_minor_change(hfsmp); /* First check that there are not any entries pending in the hash table with this ID */ if (cat_check_idhash (hfsmp, nextCNID)) { @@ -4359,8 +4359,9 @@ getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct case S_IFBLK: attrp->ca_rdev = bsd->special.rawDevice; break; - - case S_IFDIR: /* fall through */ + case S_IFIFO: + case S_IFSOCK: + case S_IFDIR: case S_IFREG: /* Pick up the hard link count */ if (bsd->special.linkCount > 0) @@ -4812,3 +4813,11 @@ cat_update_dirlink(struct hfsmount *hfsmp, u_int8_t forktype, } } +void hfs_fork_copy(struct cat_fork *dst, const struct cat_fork *src, + HFSPlusExtentDescriptor *extents) +{ + /* Copy everything but the extents into the dest fork */ + memcpy(dst, src, offsetof(struct cat_fork, cf_extents)); + /* Then copy the supplied extents into the fork */ + memcpy(dst->cf_extents, extents, sizeof(HFSPlusExtentRecord)); +} diff --git a/bsd/hfs/hfs_catalog.h b/bsd/hfs/hfs_catalog.h index a48ca2fb6..a4719ea41 100644 --- a/bsd/hfs/hfs_catalog.h +++ b/bsd/hfs/hfs_catalog.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2013 Apple Inc. All rights reserved. + * Copyright (c) 2002-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -148,11 +148,18 @@ struct cat_fork { u_int32_t cf_vblocks; /* virtual (unalloated) blocks */ u_int32_t cf_blocks; /* total blocks used by this fork */ struct HFSPlusExtentDescriptor cf_extents[8]; /* initial set of extents */ + + /* + * NOTE: If you change this structure, make sure you change you change + * hfs_fork_copy. + */ }; #define cf_clump cf_union.cfu_clump #define cf_bytesread cf_union.cfu_bytesread +void hfs_fork_copy(struct cat_fork *dst, const struct cat_fork *src, + HFSPlusExtentDescriptor *extents); /* * Directory Hint diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index 89589de28..668cc7870 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2014 Apple Inc. All rights reserved. + * Copyright (c) 2002-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -50,6 +51,7 @@ #include #include #include +#include extern int prtactive; @@ -57,7 +59,7 @@ extern lck_attr_t * hfs_lock_attr; extern lck_grp_t * hfs_mutex_group; extern lck_grp_t * hfs_rwlock_group; -static void hfs_reclaim_cnode(struct cnode *); +static void hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *); static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim); static int hfs_isordered(struct cnode *, struct cnode *); @@ -182,7 +184,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) enum vtype v_type; struct cnode *cp; int error = 0; - int started_tr = 0; + bool started_tr = false; struct hfsmount *hfsmp = VTOHFS(vp); struct proc *p = vfs_context_proc(ctx); int truncated = 0; @@ -200,36 +202,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) if (cp->c_rsrcfork) { ++forkcount; } - - - /* - * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim. - * The dirty regions would have already been synced to disk, so informing UBC - * that they can toss the pages doesn't help anyone at this point. - * - * Note that this is a performance problem if the vnode goes straight to reclaim - * (and skips inactive), since there would be no way for anyone to notify the UBC - * that all pages in this file are basically useless. - */ - if (reclaim == 0) { - /* - * Check whether we are tearing down a cnode with only one remaining fork. - * If there are blocks in its filefork, then we need to unlock the cnode - * before calling ubc_setsize. The cluster layer may re-enter the filesystem - * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock - * panic. - */ - - if ((v_type == VREG || v_type == VLNK) && - (cp->c_flag & C_DELETED) && - (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) { - hfs_unlock(cp); - /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */ - ubc_setsize(vp, 0); - (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - } - } - + /* * Push file data out for normal files that haven't been evicted from * the namespace. We only do this if this function was not called from reclaim, @@ -245,10 +218,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) (VTOF(vp)->ff_blocks) && (reclaim == 0)) { /* - * Note that if content protection is enabled, then this is where we will - * attempt to issue IOs for all dirty regions of this file. - * - * If we're called from hfs_vnop_inactive, all this means is at the time + * If we're called from hfs_vnop_inactive, all this means is at the time * the logic for deciding to call this function, there were not any lingering * mmap/fd references for this file. However, there is nothing preventing the system * from creating a new reference in between the time that logic was checked @@ -258,21 +228,6 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) hfs_filedone(vp, ctx, 0); } - /* - * We're holding the cnode lock now. Stall behind any shadow BPs that may - * be involved with this vnode if it is a symlink. We don't want to allow - * the blocks that we're about to release to be put back into the pool if there - * is pending I/O to them. - */ - if (v_type == VLNK) { - /* - * This will block if the asynchronous journal flush is in progress. - * If this symlink is not being renamed over and doesn't have any open FDs, - * then we'll remove it from the journal's bufs below in kill_block. - */ - buf_wait_for_shadow_io (vp, 0); - } - /* * Remove any directory hints or cached origins */ @@ -282,328 +237,326 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) if (cp->c_flag & C_HARDLINK) { hfs_relorigins(cp); } - + /* - * This check is slightly complicated. We should only truncate data - * in very specific cases for open-unlinked files. This is because - * we want to ensure that the resource fork continues to be available - * if the caller has the data fork open. However, this is not symmetric; - * someone who has the resource fork open need not be able to access the data - * fork once the data fork has gone inactive. - * - * If we're the last fork, then we have cleaning up to do. - * - * A) last fork, and vp == c_vp - * Truncate away own fork data. If rsrc fork is not in core, truncate it too. - * - * B) last fork, and vp == c_rsrc_vp - * Truncate ourselves, assume data fork has been cleaned due to C). - * - * If we're not the last fork, then things are a little different: + * -- Handle open unlinked files -- * - * C) not the last fork, vp == c_vp - * Truncate ourselves. Once the file has gone out of the namespace, - * it cannot be further opened. Further access to the rsrc fork may - * continue, however. - * - * D) not the last fork, vp == c_rsrc_vp - * Don't enter the block below, just clean up vnode and push it out of core. + * If the vnode is in use, it means a force unmount is in progress + * in which case we defer cleaning up until either we come back + * through here via hfs_vnop_reclaim, at which point the UBC + * information will have been torn down and the vnode might no + * longer be in use, or if it's still in use, it will get cleaned + * up when next remounted. */ + if (ISSET(cp->c_flag, C_DELETED) && !vnode_isinuse(vp, 0)) { + /* + * This check is slightly complicated. We should only truncate data + * in very specific cases for open-unlinked files. This is because + * we want to ensure that the resource fork continues to be available + * if the caller has the data fork open. However, this is not symmetric; + * someone who has the resource fork open need not be able to access the data + * fork once the data fork has gone inactive. + * + * If we're the last fork, then we have cleaning up to do. + * + * A) last fork, and vp == c_vp + * Truncate away own fork data. If rsrc fork is not in core, truncate it too. + * + * B) last fork, and vp == c_rsrc_vp + * Truncate ourselves, assume data fork has been cleaned due to C). + * + * If we're not the last fork, then things are a little different: + * + * C) not the last fork, vp == c_vp + * Truncate ourselves. Once the file has gone out of the namespace, + * it cannot be further opened. Further access to the rsrc fork may + * continue, however. + * + * D) not the last fork, vp == c_rsrc_vp + * Don't enter the block below, just clean up vnode and push it out of core. + */ - if ((v_type == VREG || v_type == VLNK) && - (cp->c_flag & C_DELETED) && - ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) { - - /* Truncate away our own fork data. (Case A, B, C above) */ - if (VTOF(vp)->ff_blocks != 0) { + if ((v_type == VREG || v_type == VLNK) && + ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) { + + /* Truncate away our own fork data. (Case A, B, C above) */ + if (VTOF(vp)->ff_blocks != 0) { + /* + * SYMLINKS only: + * + * Encapsulate the entire change (including truncating the link) in + * nested transactions if we are modifying a symlink, because we know that its + * file length will be at most 4k, and we can fit both the truncation and + * any relevant bitmap changes into a single journal transaction. We also want + * the kill_block code to execute in the same transaction so that any dirty symlink + * blocks will not be written. Otherwise, rely on + * hfs_truncate doing its own transactions to ensure that we don't blow up + * the journal. + */ + if (!started_tr && (v_type == VLNK)) { + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + else { + started_tr = true; + } + } - /* - * SYMLINKS only: - * - * Encapsulate the entire change (including truncating the link) in - * nested transactions if we are modifying a symlink, because we know that its - * file length will be at most 4k, and we can fit both the truncation and - * any relevant bitmap changes into a single journal transaction. We also want - * the kill_block code to execute in the same transaction so that any dirty symlink - * blocks will not be written. Otherwise, rely on - * hfs_truncate doing its own transactions to ensure that we don't blow up - * the journal. - */ - if ((started_tr == 0) && (v_type == VLNK)) { - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; + /* + * At this point, we have decided that this cnode is + * suitable for full removal. We are about to deallocate + * its blocks and remove its entry from the catalog. + * If it was a symlink, then it's possible that the operation + * which created it is still in the current transaction group + * due to coalescing. Take action here to kill the data blocks + * of the symlink out of the journal before moving to + * deallocate the blocks. We need to be in the middle of + * a transaction before calling buf_iterate like this. + * + * Note: we have to kill any potential symlink buffers out of + * the journal prior to deallocating their blocks. This is so + * that we don't race with another thread that may be doing an + * an allocation concurrently and pick up these blocks. It could + * generate I/O against them which could go out ahead of our journal + * transaction. + */ + + if (hfsmp->jnl && vnode_islnk(vp)) { + buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp); + } + + + /* + * This truncate call (and the one below) is fine from VNOP_RECLAIM's + * context because we're only removing blocks, not zero-filling new + * ones. The C_DELETED check above makes things much simpler. + */ + error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, ctx); + if (error) { goto out; } - else { - started_tr = 1; + truncated = 1; + + /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */ + if (started_tr) { + hfs_end_transaction(hfsmp); + started_tr = false; } - } - /* - * At this point, we have decided that this cnode is - * suitable for full removal. We are about to deallocate - * its blocks and remove its entry from the catalog. - * If it was a symlink, then it's possible that the operation - * which created it is still in the current transaction group - * due to coalescing. Take action here to kill the data blocks - * of the symlink out of the journal before moving to - * deallocate the blocks. We need to be in the middle of - * a transaction before calling buf_iterate like this. + } + + /* + * Truncate away the resource fork, if we represent the data fork and + * it is the last fork. That means, by definition, the rsrc fork is not in + * core. To avoid bringing a vnode into core for the sole purpose of deleting the + * data in the resource fork, we call cat_lookup directly, then hfs_release_storage + * to get rid of the resource fork's data. Note that because we are holding the + * cnode lock, it is impossible for a competing thread to create the resource fork + * vnode from underneath us while we do this. * - * Note: we have to kill any potential symlink buffers out of - * the journal prior to deallocating their blocks. This is so - * that we don't race with another thread that may be doing an - * an allocation concurrently and pick up these blocks. It could - * generate I/O against them which could go out ahead of our journal - * transaction. + * This is invoked via case A above only. */ + if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) { + struct cat_lookup_buffer *lookup_rsrc = NULL; + struct cat_desc *desc_ptr = NULL; + lockflags = 0; + + MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK); + if (lookup_rsrc == NULL) { + printf("hfs_cnode_teardown: ENOMEM from MALLOC\n"); + error = ENOMEM; + goto out; + } + else { + bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer)); + } - if (hfsmp->jnl && vnode_islnk(vp)) { - buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp); - } + if (cp->c_desc.cd_namelen == 0) { + /* Initialize the rsrc descriptor for lookup if necessary*/ + MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid); + + lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name; + lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name); + lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid; + + desc_ptr = &lookup_rsrc->lookup_desc; + } + else { + desc_ptr = &cp->c_desc; + } + lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - /* - * This truncate call (and the one below) is fine from VNOP_RECLAIM's - * context because we're only removing blocks, not zero-filling new - * ones. The C_DELETED check above makes things much simpler. - */ - error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, ctx); - if (error) { - goto out; - } - truncated = 1; + error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL, + (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL); - /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */ - if (started_tr) { - hfs_end_transaction(hfsmp); - started_tr = 0; - } + hfs_systemfile_unlock (hfsmp, lockflags); + + if (error) { + FREE (lookup_rsrc, M_TEMP); + goto out; + } - } - - /* - * Truncate away the resource fork, if we represent the data fork and - * it is the last fork. That means, by definition, the rsrc fork is not in - * core. To avoid bringing a vnode into core for the sole purpose of deleting the - * data in the resource fork, we call cat_lookup directly, then hfs_release_storage - * to get rid of the resource fork's data. Note that because we are holding the - * cnode lock, it is impossible for a competing thread to create the resource fork - * vnode from underneath us while we do this. - * - * This is invoked via case A above only. - */ - if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) { - struct cat_lookup_buffer *lookup_rsrc = NULL; - struct cat_desc *desc_ptr = NULL; - lockflags = 0; - - MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK); - if (lookup_rsrc == NULL) { - printf("hfs_cnode_teardown: ENOMEM from MALLOC\n"); - error = ENOMEM; - goto out; - } - else { - bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer)); - } + /* + * Make the filefork in our temporary struct look like a real + * filefork. Fill in the cp, sysfileinfo and rangelist fields.. + */ + rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges); + lookup_rsrc->lookup_fork.ff_cp = cp; - if (cp->c_desc.cd_namelen == 0) { - /* Initialize the rsrc descriptor for lookup if necessary*/ - MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid); - - lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name; - lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name); - lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; - lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid; - - desc_ptr = &lookup_rsrc->lookup_desc; - } - else { - desc_ptr = &cp->c_desc; - } + /* + * If there were no errors, then we have the catalog's fork information + * for the resource fork in question. Go ahead and delete the data in it now. + */ - lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid); + FREE(lookup_rsrc, M_TEMP); - error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL, - (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL); + if (error) { + goto out; + } - hfs_systemfile_unlock (hfsmp, lockflags); - - if (error) { - FREE (lookup_rsrc, M_TEMP); - goto out; + /* + * This fileid's resource fork extents have now been fully deleted on-disk + * and this CNID is no longer valid. At this point, we should be able to + * zero out cp->c_blocks to indicate there is no data left in this file. + */ + cp->c_blocks = 0; } + } + /* + * If we represent the last fork (or none in the case of a dir), + * and the cnode has become open-unlinked... + * + * We check c_blocks here because it is possible in the force + * unmount case for the data fork to be in use but the resource + * fork to not be in use in which case we will truncate the + * resource fork, but not the data fork. It will get cleaned + * up upon next mount. + */ + if (forkcount <= 1 && !cp->c_blocks) { /* - * Make the filefork in our temporary struct look like a real - * filefork. Fill in the cp, sysfileinfo and rangelist fields.. + * If it has EA's, then we need to get rid of them. + * + * Note that this must happen outside of any other transactions + * because it starts/ends its own transactions and grabs its + * own locks. This is to prevent a file with a lot of attributes + * from creating a transaction that is too large (which panics). */ - rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges); - lookup_rsrc->lookup_fork.ff_cp = cp; + if (ISSET(cp->c_attr.ca_recflags, kHFSHasAttributesMask)) + ea_error = hfs_removeallattr(hfsmp, cp->c_fileid, &started_tr); - /* - * If there were no errors, then we have the catalog's fork information - * for the resource fork in question. Go ahead and delete the data in it now. + /* + * Remove the cnode's catalog entry and release all blocks it + * may have been using. */ - error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid); - FREE(lookup_rsrc, M_TEMP); - - if (error) { - goto out; + /* + * Mark cnode in transit so that no one can get this + * cnode from cnode hash. + */ + // hfs_chash_mark_in_transit(hfsmp, cp); + // XXXdbg - remove the cnode from the hash table since it's deleted + // otherwise someone could go to sleep on the cnode and not + // be woken up until this vnode gets recycled which could be + // a very long time... + hfs_chashremove(hfsmp, cp); + + cp->c_flag |= C_NOEXISTS; // XXXdbg + cp->c_rdev = 0; + + if (!started_tr) { + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + started_tr = true; } - + /* - * This fileid's resource fork extents have now been fully deleted on-disk - * and this CNID is no longer valid. At this point, we should be able to - * zero out cp->c_blocks to indicate there is no data left in this file. + * Reserve some space in the Catalog file. */ - cp->c_blocks = 0; - } - } - - /* - * If we represent the last fork (or none in the case of a dir), - * and the cnode has become open-unlinked, - * AND it has EA's, then we need to get rid of them. - * - * Note that this must happen outside of any other transactions - * because it starts/ends its own transactions and grabs its - * own locks. This is to prevent a file with a lot of attributes - * from creating a transaction that is too large (which panics). - */ - if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 && - (cp->c_flag & C_DELETED) && - (forkcount <= 1)) { - - ea_error = hfs_removeallattr(hfsmp, cp->c_fileid); - } - - - /* - * If the cnode represented an open-unlinked file, then now - * actually remove the cnode's catalog entry and release all blocks - * it may have been using. - */ - if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) { - /* - * Mark cnode in transit so that no one can get this - * cnode from cnode hash. - */ - // hfs_chash_mark_in_transit(hfsmp, cp); - // XXXdbg - remove the cnode from the hash table since it's deleted - // otherwise someone could go to sleep on the cnode and not - // be woken up until this vnode gets recycled which could be - // a very long time... - hfs_chashremove(hfsmp, cp); - - cp->c_flag |= C_NOEXISTS; // XXXdbg - cp->c_rdev = 0; - - if (started_tr == 0) { - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; + if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) { goto out; - } - started_tr = 1; - } - - /* - * Reserve some space in the Catalog file. - */ - if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) { - goto out; - } - cat_reserve = 1; - - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); - - if (cp->c_blocks > 0) { - printf("hfs_inactive: deleting non-empty%sfile %d, " - "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ", - (int)cp->c_fileid, (int)cp->c_blocks); - } - - // - // release the name pointer in the descriptor so that - // cat_delete() will use the file-id to do the deletion. - // in the case of hard links this is imperative (in the - // case of regular files the fileid and cnid are the - // same so it doesn't matter). - // - cat_releasedesc(&cp->c_desc); - - /* - * The descriptor name may be zero, - * in which case the fileid is used. - */ - error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); - - if (error && truncated && (error != ENXIO)) { - printf("hfs_inactive: couldn't delete a truncated file!"); - } - - /* Update HFS Private Data dir */ - if (error == 0) { - hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; - if (vnode_isdir(vp)) { - DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); - } - (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], - &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); - } - - hfs_systemfile_unlock(hfsmp, lockflags); - - if (error) { - goto out; - } - -#if QUOTA - if (hfsmp->hfs_flags & HFS_QUOTAS) - (void)hfs_chkiq(cp, -1, NOCRED, 0); -#endif /* QUOTA */ - - /* Already set C_NOEXISTS at the beginning of this block */ - cp->c_flag &= ~C_DELETED; - cp->c_touch_chgtime = TRUE; - cp->c_touch_modtime = TRUE; - - if (error == 0) - hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0); - } - - /* - * A file may have had delayed allocations, in which case hfs_update - * would not have updated the catalog record (cat_update). We need - * to do that now, before we lose our fork data. We also need to - * force the update, or hfs_update will again skip the cat_update. - * - * If the file has C_NOEXISTS set, then we can skip the hfs_update call - * because the catalog entry has already been removed. There would be no point - * to looking up the entry in the catalog to modify it when we already know it's gone - */ - if ((!ISSET(cp->c_flag, C_NOEXISTS)) && - ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime || - cp->c_touch_chgtime || cp->c_touch_modtime)) { + } + cat_reserve = 1; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + if (cp->c_blocks > 0) { + printf("hfs_inactive: deleting non-empty%sfile %d, " + "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ", + (int)cp->c_fileid, (int)cp->c_blocks); + } + + // + // release the name pointer in the descriptor so that + // cat_delete() will use the file-id to do the deletion. + // in the case of hard links this is imperative (in the + // case of regular files the fileid and cnid are the + // same so it doesn't matter). + // + cat_releasedesc(&cp->c_desc); - if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){ - cp->c_flag |= C_FORCEUPDATE; + /* + * The descriptor name may be zero, + * in which case the fileid is used. + */ + error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); + + if (error && truncated && (error != ENXIO)) { + printf("hfs_inactive: couldn't delete a truncated file!"); } - hfs_update(vp, 0); + + /* Update HFS Private Data dir */ + if (error == 0) { + hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; + if (vnode_isdir(vp)) { + DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); + } + (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], + &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); + } + + hfs_systemfile_unlock(hfsmp, lockflags); + + if (error) { + goto out; + } + + #if QUOTA + if (hfsmp->hfs_flags & HFS_QUOTAS) + (void)hfs_chkiq(cp, -1, NOCRED, 0); + #endif /* QUOTA */ + + /* Already set C_NOEXISTS at the beginning of this block */ + cp->c_flag &= ~C_DELETED; + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; + + if (error == 0) + hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0); } + } // if + + hfs_update(vp, reclaim ? HFS_UPDATE_FORCE : 0); /* * Since we are about to finish what might be an inactive call, propagate * any remaining modified or touch bits from the cnode to the vnode. This * serves as a hint to vnode recycling that we shouldn't recycle this vnode * synchronously. + * + * For now, if the node *only* has a dirty atime, we don't mark + * the vnode as dirty. VFS's asynchronous recycling can actually + * lead to worse performance than having it synchronous. When VFS + * is fixed to be more performant, we can be more honest about + * marking vnodes as dirty when it's only the atime that's dirty. */ - if (ISSET(cp->c_flag, C_MODIFIED) || ISSET(cp->c_flag, C_FORCEUPDATE) || - cp->c_touch_acctime || cp->c_touch_chgtime || - cp->c_touch_modtime || ISSET(cp->c_flag, C_NEEDS_DATEADDED) || - ISSET(cp->c_flag, C_DELETED)) { + if (hfs_is_dirty(cp) == HFS_DIRTY || ISSET(cp->c_flag, C_DELETED)) { vnode_setdirty(vp); } else { vnode_cleardirty(vp); @@ -613,46 +566,12 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) if (cat_reserve) cat_postflight(hfsmp, &cookie, p); - // XXXdbg - have to do this because a goto could have come here if (started_tr) { hfs_end_transaction(hfsmp); - started_tr = 0; + started_tr = false; } -#if 0 -#if CONFIG_PROTECT - /* - * cnode truncate lock and cnode lock are both held exclusive here. - * - * Go ahead and flush the keys out if this cnode is the last fork - * and it is not class F. Class F keys should not be purged because they only - * exist in memory and have no persistent keys. Only do this - * if we haven't already done it yet (maybe a vnode skipped inactive - * and went straight to reclaim). This function gets called from both reclaim and - * inactive, so it will happen first in inactive if possible. - * - * We need to be mindful that all pending IO for this file has already been - * issued and completed before we bzero out the key. This is because - * if it isn't, tossing the key here could result in garbage IO being - * written (by using the bzero'd key) if the writes are happening asynchronously. - * - * In addition, class A files may have already been purged due to the - * lock event occurring. - */ - if (forkcount == 1) { - struct cprotect *entry = cp->c_cpentry; - if ((entry) && ( CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F)) { - if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) { - cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED; - bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len); - bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx)); - } - } - } -#endif -#endif - - return error; + return error; } @@ -762,12 +681,8 @@ hfs_filedone(struct vnode *vp, vfs_context_t context, struct cnode *cp; struct filefork *fp; struct hfsmount *hfsmp; - struct rl_entry *invalid_range; off_t leof; u_int32_t blks, blocksize; - /* flags for zero-filling sparse ranges */ - int cluster_flags = IO_CLOSE; - int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE; cp = VTOC(vp); fp = VTOF(vp); @@ -777,53 +692,8 @@ hfs_filedone(struct vnode *vp, vfs_context_t context, if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0)) return (0); - if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) { -#if CONFIG_PROTECT - /* - * Figure out if we need to do synchronous IO. - * - * If the file represents a content-protected file, we may need - * to issue synchronous IO when we dispatch to the cluster layer. - * If we didn't, then the IO would go out to the disk asynchronously. - * If the vnode hits the end of inactive before getting reclaimed, the - * content protection keys would be wiped/bzeroed out, and we'd end up - * trying to issue the IO with an invalid key. This will lead to file - * corruption. IO_SYNC will force the cluster_push to wait until all IOs - * have completed (though they may be in the track cache). - */ - if (cp_fs_protected(VTOVFS(vp))) { - cluster_flags |= IO_SYNC; - cluster_zero_flags |= IO_SYNC; - } -#endif - - hfs_unlock(cp); - (void) cluster_push(vp, cluster_flags); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - } + hfs_flush_invalid_ranges(vp); - /* - * Explicitly zero out the areas of file - * that are currently marked invalid. - */ - while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) { - off_t start = invalid_range->rl_start; - off_t end = invalid_range->rl_end; - - /* The range about to be written must be validated - * first, so that VNOP_BLOCKMAP() will return the - * appropriate mapping for the cluster code: - */ - rl_remove(start, end, &fp->ff_invalidranges); - - hfs_unlock(cp); - (void) cluster_write(vp, (struct uio *) 0, - leof, end + 1, start, (off_t)0, cluster_zero_flags); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cp->c_flag |= C_MODIFIED; - } - cp->c_flag &= ~C_ZFWANTSYNC; - cp->c_zftimeout = 0; blocksize = VTOVCB(vp)->blockSize; blks = leof / blocksize; if (((off_t)blks * (off_t)blocksize) != leof) @@ -837,17 +707,15 @@ hfs_filedone(struct vnode *vp, vfs_context_t context, if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) { hfs_unlock(cp); - (void) cluster_push(vp, cluster_flags); + cluster_push(vp, IO_CLOSE); hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - + /* * If the hfs_truncate didn't happen to flush the vnode's * information out to disk, force it to be updated now that * all invalid ranges have been zero-filled and validated: */ - if (cp->c_flag & C_MODIFIED) { - hfs_update(vp, 0); - } + hfs_update(vp, 0); } return (0); @@ -892,11 +760,13 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) } /* - * Keep track of an inactive hot file. + * Keep track of an inactive hot file. Don't bother on ssd's since + * the tracking is done differently (it's done at read() time) */ if (!vnode_isdir(vp) && !vnode_issystem(vp) && - !(cp->c_flag & (C_DELETED | C_NOEXISTS)) ) { + !(cp->c_flag & (C_DELETED | C_NOEXISTS)) && + !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) { (void) hfs_addhotfile(vp); } vnode_removefsref(vp); @@ -943,7 +813,8 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) /* Dump cached symlink data */ if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) { FREE(fp->ff_symlinkptr, M_TEMP); - } + } + rl_remove_all(&fp->ff_invalidranges); FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK); } @@ -953,7 +824,7 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) if (reclaim_cnode) { hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT); hfs_unlock(cp); - hfs_reclaim_cnode(cp); + hfs_reclaim_cnode(hfsmp, cp); } else { /* @@ -981,7 +852,39 @@ extern int (**hfs_std_vnodeop_p) (void *); /* * hfs_getnewvnode - get new default vnode * - * The vnode is returned with an iocount and the cnode locked + * The vnode is returned with an iocount and the cnode locked. + * The cnode of the parent vnode 'dvp' may or may not be locked, depending on + * the circumstances. The cnode in question (if acquiring the resource fork), + * may also already be locked at the time we enter this function. + * + * Note that there are both input and output flag arguments to this function. + * If one of the input flags (specifically, GNV_USE_VP), is set, then + * hfs_getnewvnode will use the parameter *vpp, which is traditionally only + * an output parameter, as both an input and output parameter. It will use + * the vnode provided in the output, and pass it to vnode_create with the + * proper flavor so that a new vnode is _NOT_ created on our behalf when + * we dispatch to VFS. This may be important in various HFS vnode creation + * routines, such a create or get-resource-fork, because we risk deadlock if + * jetsam is involved. + * + * Deadlock potential exists if jetsam is synchronously invoked while we are waiting + * for a vnode to be recycled in order to give it the identity we want. If jetsam + * happens to target a process for termination that is blocked in-kernel, waiting to + * acquire the cnode lock on our parent 'dvp', while our current thread has it locked, + * neither side will make forward progress and the watchdog timer will eventually fire. + * To prevent this, a caller of hfs_getnewvnode may choose to proactively force + * any necessary vnode reclamation/recycling while it is not holding any locks and + * thus not prone to deadlock. If this is the case, GNV_USE_VP will be set and + * the parameter will be used as described above. + * + * !!! !!!! + * In circumstances when GNV_USE_VP is set, this function _MUST_ clean up and either consume + * or dispose of the provided vnode. We funnel all errors to a single return value so that + * if provided_vp is still non-NULL, then we will dispose of the vnode. This will occur in + * all error cases of this function -- anywhere we zero/NULL out the *vpp parameter. It may + * also occur if the current thread raced with another to create the same vnode, and we + * find the entry already present in the cnode hash. + * !!! !!! */ int hfs_getnewvnode( @@ -1002,27 +905,43 @@ hfs_getnewvnode( struct cnode *cp = NULL; struct filefork *fp = NULL; int hfs_standard = 0; - int retval; + int retval = 0; int issystemfile; int wantrsrc; int hflags = 0; + int need_update_identity = 0; struct vnode_fsparam vfsp; enum vtype vtype; + + struct vnode *provided_vp = NULL; + + #if QUOTA int i; #endif /* QUOTA */ hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD); + if (flags & GNV_USE_VP) { + /* Store the provided VP for later use */ + provided_vp = *vpp; + } + + /* Zero out the vpp regardless of provided input */ + *vpp = NULL; + + /* Zero out the out_flags */ + *out_flags = 0; + if (attrp->ca_fileid == 0) { - *vpp = NULL; - return (ENOENT); + retval = ENOENT; + goto gnv_exit; } #if !FIFO if (IFTOVT(attrp->ca_mode) == VFIFO) { - *vpp = NULL; - return (ENOTSUP); + retval = ENOTSUP; + goto gnv_exit; } #endif /* !FIFO */ vtype = IFTOVT(attrp->ca_mode); @@ -1033,15 +952,13 @@ hfs_getnewvnode( if (vtype == VBAD) { /* Mark the FS as corrupt and bail out */ hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED); - return EINVAL; + retval = EINVAL; + goto gnv_exit; } - - /* Zero out the out_flags */ - *out_flags = 0; - + #ifdef HFS_CHECK_LOCK_ORDER /* - * The only case were its permissible to hold the parent cnode + * The only case where it's permissible to hold the parent cnode * lock is during a create operation (hfs_makenode) or when * we don't need the cnode lock (GNV_SKIPLOCK). */ @@ -1062,8 +979,18 @@ hfs_getnewvnode( * If the id is no longer valid for lookups we'll get back a NULL cp. */ if (cp == NULL) { - return (ENOENT); + retval = ENOENT; + goto gnv_exit; } + /* + * We may have been provided a vnode via + * GNV_USE_VP. In this case, we have raced with + * a 2nd thread to create the target vnode. The provided + * vnode that was passed in will be dealt with at the + * end of the function, as we don't zero out the field + * until we're ready to pass responsibility to VFS. + */ + /* * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the @@ -1083,10 +1010,28 @@ hfs_getnewvnode( */ if (!(hfs_checkdeleted(cp))) { + // + // If the bytes of the filename in the descp do not match the bytes in the + // cnp (and we're not looking up the resource fork), then we want to update + // the vnode identity to contain the bytes that HFS stores so that when an + // fsevent gets generated, it has the correct filename. otherwise daemons + // that match filenames produced by fsevents with filenames they have stored + // elsewhere (e.g. bladerunner, backupd, mds), the filenames will not match. + // See: FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories + // for more details. + // +#ifdef CN_WANTSRSRCFORK + if (*vpp && cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) { +#else + if (*vpp && cnp && cnp->cn_nameptr && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) { +#endif + vnode_update_identity (*vpp, dvp, (const char *)descp->cd_nameptr, descp->cd_namelen, 0, VNODE_UPDATE_NAME); + } if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) { /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */ if ((descp->cd_cnid == cp->c_attr.ca_fileid) && (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){ + if ((flags & GNV_SKIPLOCK) == 0) { /* * Then we took the lock. Drop it before calling @@ -1100,7 +1045,7 @@ hfs_getnewvnode( * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to * force a re-drive in the lookup routine. * Drop the iocount on the vnode obtained from - * chash_getcnode if needed. + * chash_getcnode if needed. */ if (*vpp != NULL) { vnode_put (*vpp); @@ -1120,7 +1065,8 @@ hfs_getnewvnode( } *out_flags = GNV_CAT_ATTRCHANGED; - return ERECYCLE; + retval = ERECYCLE; + goto gnv_exit; } else { /* @@ -1140,18 +1086,37 @@ hfs_getnewvnode( * that the new link lived in the same directory as the alternative name for * this item. */ - if ((*vpp != NULL) && (cnp)) { + if ((*vpp != NULL) && (cnp || cp->c_desc.cd_nameptr)) { /* we could be requesting the rsrc of a hardlink file... */ - vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, - (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); +#ifdef CN_WANTSRSRCFORK + if (cp->c_desc.cd_nameptr && (cnp == NULL || !(cnp->cn_flags & CN_WANTSRSRCFORK))) { +#else + if (cp->c_desc.cd_nameptr) { +#endif + // + // Update the identity with what we have stored on disk as + // the name of this file. This is related to: + // FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories + // + vnode_update_identity (*vpp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0, + (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); + } else if (cnp) { + vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, + (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); + } } } } } - /* Check if we found a matching vnode */ + /* + * At this point, we have performed hardlink and open-unlinked checks + * above. We have now validated the state of the vnode that was given back + * to us from the cnode hash code and find it safe to return. + */ if (*vpp != NULL) { - return (0); + retval = 0; + goto gnv_exit; } /* @@ -1171,7 +1136,7 @@ hfs_getnewvnode( if ((flags & GNV_SKIPLOCK) == 0) { hfs_unlock(cp); } - hfs_reclaim_cnode(cp); + hfs_reclaim_cnode(hfsmp, cp); *vpp = NULL; /* * If we hit this case, that means that the entry was there in the catalog when @@ -1185,7 +1150,8 @@ hfs_getnewvnode( */ if (error == ENOENT) { *out_flags = GNV_CAT_DELETED; - return ENOENT; + retval = ENOENT; + goto gnv_exit; } /* @@ -1196,7 +1162,8 @@ hfs_getnewvnode( */ if (error == ERECYCLE) { *out_flags = GNV_CAT_ATTRCHANGED; - return (ERECYCLE); + retval = ERECYCLE; + goto gnv_exit; } } } @@ -1209,9 +1176,10 @@ hfs_getnewvnode( descp->cd_flags &= ~CD_HASBUF; /* Tag hardlinks */ - if ((vtype == VREG || vtype == VDIR) && - ((descp->cd_cnid != attrp->ca_fileid) || - (attrp->ca_recflags & kHFSHasLinkChainMask))) { + if ((vtype == VREG || vtype == VDIR + || vtype == VSOCK || vtype == VFIFO) + && (descp->cd_cnid != attrp->ca_fileid + || ISSET(attrp->ca_recflags, kHFSHasLinkChainMask))) { cp->c_flag |= C_HARDLINK; } /* @@ -1327,6 +1295,7 @@ hfs_getnewvnode( vfsp.vnfs_dvp = dvp; vfsp.vnfs_cnp = cnp; } + vfsp.vnfs_fsnode = cp; /* @@ -1359,8 +1328,23 @@ hfs_getnewvnode( vfsp.vnfs_filesize = 0; vfsp.vnfs_flags = VNFS_ADDFSREF; - if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE)) +#ifdef CN_WANTSRSRCFORK + if (cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) { +#else + if (cnp && cnp->cn_nameptr && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) { +#endif + // + // We don't want VFS to add an entry for this vnode because the name in the + // cnp does not match the bytes stored on disk for this file. Instead we'll + // update the identity later after the vnode is created and we'll do so with + // the correct bytes for this filename. For more details, see: + // FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories + // + vfsp.vnfs_flags |= VNFS_NOCACHE; + need_update_identity = 1; + } else if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE)) { vfsp.vnfs_flags |= VNFS_NOCACHE; + } /* Tag system files */ vfsp.vnfs_marksystem = issystemfile; @@ -1370,15 +1354,45 @@ hfs_getnewvnode( vfsp.vnfs_markroot = 1; else vfsp.vnfs_markroot = 0; + + /* + * If provided_vp was non-NULL, then it is an already-allocated (but not + * initialized) vnode. We simply need to initialize it to this identity. + * If it was NULL, then assume that we need to call vnode_create with the + * normal arguments/types. + */ + if (provided_vp) { + vp = provided_vp; + /* + * After we assign the value of provided_vp into 'vp' (so that it can be + * mutated safely by vnode_initialize), we can NULL it out. At this point, the disposal + * and handling of the provided vnode will be the responsibility of VFS, which will + * clean it up and vnode_put it properly if vnode_initialize fails. + */ + provided_vp = NULL; - if ((retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, cvpp))) { - if (fp) { + retval = vnode_initialize (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp); + /* See error handling below for resolving provided_vp */ + } + else { + /* Do a standard vnode_create */ + retval = vnode_create (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp); + } + + /* + * We used a local variable to hold the result of vnode_create/vnode_initialize so that + * on error cases in vnode_create we won't accidentally harm the cnode's fields + */ + + if (retval) { + /* Clean up if we encountered an error */ + if (fp) { if (fp == cp->c_datafork) - cp->c_datafork = NULL; + cp->c_datafork = NULL; else - cp->c_rsrcfork = NULL; + cp->c_rsrcfork = NULL; - FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK); + FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK); } /* * If this is a newly created cnode or a vnode reclaim @@ -1386,7 +1400,7 @@ hfs_getnewvnode( */ if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) { hfs_chash_abort(hfsmp, cp); - hfs_reclaim_cnode(cp); + hfs_reclaim_cnode(hfsmp, cp); } else { hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH); @@ -1395,13 +1409,38 @@ hfs_getnewvnode( } } *vpp = NULL; - return (retval); + goto gnv_exit; } - vp = *cvpp; + + /* If no error, then assign the value into the cnode's fields */ + *cvpp = vp; + vnode_settag(vp, VT_HFS); if (cp->c_flag & C_HARDLINK) { vnode_setmultipath(vp); } + + if (cp->c_attr.ca_recflags & kHFSFastDevCandidateMask) { + vnode_setfastdevicecandidate(vp); + } + + if (cp->c_attr.ca_recflags & kHFSAutoCandidateMask) { + vnode_setautocandidate(vp); + } + + + + + if (vp && need_update_identity) { + // + // As above, update the name of the vnode if the bytes stored in hfs do not match + // the bytes in the cnp. See this radar: + // FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories + // for more details. + // + vnode_update_identity (vp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0, VNODE_UPDATE_NAME); + } + /* * Tag resource fork vnodes as needing an VNOP_INACTIVE * so that any deferred removes (open unlinked files) @@ -1423,7 +1462,7 @@ hfs_getnewvnode( /* * Stop tracking an active hot file. */ - if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) { + if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile && !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) { (void) hfs_removehotfile(vp); } @@ -1435,12 +1474,19 @@ hfs_getnewvnode( #endif *vpp = vp; - return (0); + retval = 0; + +gnv_exit: + if (provided_vp) { + /* Release our empty vnode if it was not used */ + vnode_put (provided_vp); + } + return retval; } static void -hfs_reclaim_cnode(struct cnode *cp) +hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *cp) { #if QUOTA int i; @@ -1483,11 +1529,12 @@ hfs_reclaim_cnode(struct cnode *cp) } #endif #if CONFIG_PROTECT - cp_entry_destroy(cp->c_cpentry); + cp_entry_destroy(hfsmp, cp->c_cpentry); cp->c_cpentry = NULL; +#else + (void)hfsmp; // Prevent compiler warning #endif - - + bzero(cp, sizeof(struct cnode)); FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE); } @@ -1657,24 +1704,24 @@ void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) { static u_int32_t hfs_get_dateadded_internal(const uint8_t *finderinfo, mode_t mode) { - u_int8_t *finfo = NULL; + const uint8_t *finfo = NULL; u_int32_t dateadded = 0; /* overlay the FinderInfo to the correct pointer, and advance */ - finfo = (u_int8_t*)finderinfo + 16; + finfo = finderinfo + 16; /* * FinderInfo is written out in big endian... make sure to convert it to host * native before we use it. */ if (S_ISREG(mode)) { - struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo; dateadded = OSSwapBigToHostInt32 (extinfo->date_added); } else if (S_ISDIR(mode)) { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; + const struct FndrExtendedDirInfo *extinfo = (const struct FndrExtendedDirInfo *)finfo; dateadded = OSSwapBigToHostInt32 (extinfo->date_added); } @@ -1812,7 +1859,7 @@ uint32_t hfs_incr_gencount (struct cnode *cp) { } extinfo->write_gen_counter = OSSwapHostToBigInt32 (gcount); - SET(cp->c_flag, C_MODIFIED); + SET(cp->c_flag, C_MINOR_MOD); } else { gcount = 0; @@ -1829,11 +1876,11 @@ uint32_t hfs_incr_gencount (struct cnode *cp) { static u_int32_t hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode) { - u_int8_t *finfo = NULL; + const uint8_t *finfo = NULL; u_int32_t gcount = 0; /* overlay the FinderInfo to the correct pointer, and advance */ - finfo = (u_int8_t*)finderinfo; + finfo = finderinfo; finfo = finfo + 16; /* @@ -1845,7 +1892,7 @@ hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode) * last 32-bit word) so it is safe to have one code path here. */ if (S_ISDIR(mode) || S_ISREG(mode)) { - struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo; gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter); /* @@ -1893,11 +1940,12 @@ void hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) { vfs_context_t ctx; - /* don't modify times if volume is read-only */ - if (hfsmp->hfs_flags & HFS_READ_ONLY) { + + if (ISSET(hfsmp->hfs_flags, HFS_READ_ONLY) || ISSET(cp->c_flag, C_NOEXISTS)) { cp->c_touch_acctime = FALSE; cp->c_touch_chgtime = FALSE; cp->c_touch_modtime = FALSE; + CLR(cp->c_flag, C_NEEDS_DATEADDED); return; } #if CONFIG_HFS_STD @@ -1935,42 +1983,45 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) microtime(&tv); if (cp->c_touch_acctime) { - cp->c_atime = tv.tv_sec; /* - * When the access time is the only thing changing - * then make sure its sufficiently newer before - * committing it to disk. + * When the access time is the only thing changing, we + * won't necessarily write it to disk immediately. We + * only do the atime update at vnode recycle time, when + * fsync is called or when there's another reason to write + * to the metadata. */ - if ((((u_int32_t)cp->c_atime - (u_int32_t)(cp)->c_attr.ca_atimeondisk) > - ATIME_ONDISK_ACCURACY)) { - cp->c_flag |= C_MODIFIED; - } + cp->c_atime = tv.tv_sec; cp->c_touch_acctime = FALSE; } if (cp->c_touch_modtime) { - cp->c_mtime = tv.tv_sec; cp->c_touch_modtime = FALSE; - cp->c_flag |= C_MODIFIED; - touchvol = 1; + time_t new_time = tv.tv_sec; #if CONFIG_HFS_STD /* * HFS dates that WE set must be adjusted for DST */ if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) { - cp->c_mtime += 3600; + new_time += 3600; } #endif + if (cp->c_mtime != new_time) { + cp->c_mtime = new_time; + cp->c_flag |= C_MINOR_MOD; + touchvol = 1; + } } if (cp->c_touch_chgtime) { - cp->c_ctime = tv.tv_sec; cp->c_touch_chgtime = FALSE; - cp->c_flag |= C_MODIFIED; - touchvol = 1; + if (cp->c_ctime != tv.tv_sec) { + cp->c_ctime = tv.tv_sec; + cp->c_flag |= C_MINOR_MOD; + touchvol = 1; + } } if (cp->c_flag & C_NEEDS_DATEADDED) { hfs_write_dateadded (&(cp->c_attr), tv.tv_sec); - cp->c_flag |= C_MODIFIED; + cp->c_flag |= C_MINOR_MOD; /* untwiddle the bit */ cp->c_flag &= ~C_NEEDS_DATEADDED; touchvol = 1; @@ -1978,7 +2029,7 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) /* Touch the volume modtime if needed */ if (touchvol) { - MarkVCBDirty(hfsmp); + hfs_note_header_minor_change(hfsmp); HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec; } } @@ -2000,7 +2051,12 @@ hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) thread_t thread = current_thread(); if (cp->c_lockowner == thread) { - /* Only the extents and bitmap files support lock recursion. */ + /* + * Only the extents and bitmap files support lock recursion + * here. The other system files support lock recursion in + * hfs_systemfile_lock. Eventually, we should change to + * handle recursion solely in hfs_systemfile_lock. + */ if ((cp->c_fileid == kHFSExtentsFileID) || (cp->c_fileid == kHFSAllocationFileID)) { cp->c_syslockcount++; @@ -2070,6 +2126,15 @@ hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) return (0); } +bool hfs_lock_upgrade(cnode_t *cp) +{ + if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock)) { + cp->c_lockowner = current_thread(); + return true; + } else + return false; +} + /* * Lock a pair of cnodes. */ @@ -2368,6 +2433,21 @@ hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockfla } } +bool hfs_truncate_lock_upgrade(struct cnode *cp) +{ + assert(cp->c_truncatelockowner == HFS_SHARED_OWNER); + if (!lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock)) + return false; + cp->c_truncatelockowner = current_thread(); + return true; +} + +void hfs_truncate_lock_downgrade(struct cnode *cp) +{ + assert(cp->c_truncatelockowner == current_thread()); + lck_rw_lock_exclusive_to_shared(&cp->c_truncatelock); + cp->c_truncatelockowner = HFS_SHARED_OWNER; +} /* * Attempt to get the truncate lock. If it cannot be acquired, error out. diff --git a/bsd/hfs/hfs_cnode.h b/bsd/hfs/hfs_cnode.h index 1fa4f2d8b..d45b9d236 100644 --- a/bsd/hfs/hfs_cnode.h +++ b/bsd/hfs/hfs_cnode.h @@ -49,7 +49,7 @@ #if CONFIG_PROTECT #include #endif - +#include /* * The filefork is used to represent an HFS file fork (data or resource). @@ -95,9 +95,19 @@ struct cat_lookup_buffer { #define ff_new_size ff_data.cf_new_size #define ff_clumpsize ff_data.cf_clump #define ff_bytesread ff_data.cf_bytesread -#define ff_blocks ff_data.cf_blocks #define ff_extents ff_data.cf_extents + +/* + * Note that the blocks fields are protected by the cnode lock, *not* + * the truncate lock. + */ +#define ff_blocks ff_data.cf_blocks #define ff_unallocblocks ff_data.cf_vblocks +static inline uint32_t ff_allocblocks(filefork_t *ff) +{ + assert(ff->ff_blocks >= ff->ff_unallocblocks); + return ff->ff_blocks - ff->ff_unallocblocks; +} #define ff_symlinkptr ff_union.ffu_symlinkptr #define ff_sysfileinfo ff_union.ffu_sysfileinfo @@ -172,6 +182,14 @@ struct cnode { uint8_t c_tflags; }; + /* + * Where we're using a journal, we keep track of the last + * transaction that we did an update in. If a minor modification + * is made, we'll still push it if we're still on the same + * transaction. + */ + uint32_t c_update_txn; + #if HFS_COMPRESSION decmpfs_cnode *c_decmp; #endif /* HFS_COMPRESSION */ @@ -229,7 +247,12 @@ typedef struct cnode cnode_t; #define C_DELETED 0x0000040 /* CNode has been marked to be deleted */ #define C_HARDLINK 0x0000080 /* CNode is a hard link (file or dir) */ -#define C_FORCEUPDATE 0x0000100 /* force the catalog entry update */ +/* + * A minor modification is one where the volume would not be inconsistent if + * the change was not pushed to disk. For example, changes to times. + */ +#define C_MINOR_MOD 0x0000100 /* CNode has a minor modification */ + #define C_HASXATTRS 0x0000200 /* cnode has extended attributes */ #define C_NEG_ENTRIES 0x0000400 /* directory has negative name entries */ /* @@ -336,6 +359,37 @@ int hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int s #define ATIME_ONDISK_ACCURACY 300 +static inline bool hfs_should_save_atime(cnode_t *cp) +{ + /* + * We only write atime updates to disk if the delta is greater + * than ATIME_ONDISK_ACCURACY. + */ + return (cp->c_atime < cp->c_attr.ca_atimeondisk + || cp->c_atime - cp->c_attr.ca_atimeondisk > ATIME_ONDISK_ACCURACY); +} + +typedef enum { + HFS_NOT_DIRTY = 0, + HFS_DIRTY = 1, + HFS_DIRTY_ATIME = 2 +} hfs_dirty_t; + +static inline hfs_dirty_t hfs_is_dirty(cnode_t *cp) +{ + if (ISSET(cp->c_flag, C_NOEXISTS)) + return HFS_NOT_DIRTY; + + if (ISSET(cp->c_flag, C_MODIFIED | C_MINOR_MOD | C_NEEDS_DATEADDED) + || cp->c_touch_chgtime || cp->c_touch_modtime) { + return HFS_DIRTY; + } + + if (cp->c_touch_acctime || hfs_should_save_atime(cp)) + return HFS_DIRTY_ATIME; + + return HFS_NOT_DIRTY; +} /* This overlays the FileID portion of NFS file handles. */ struct hfsfid { @@ -355,6 +409,7 @@ extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct com #define GNV_SKIPLOCK 0x02 /* Skip taking the cnode lock (when getting resource fork). */ #define GNV_CREATE 0x04 /* The vnode is for a newly created item. */ #define GNV_NOCACHE 0x08 /* Delay entering this item in the name cache */ +#define GNV_USE_VP 0x10 /* Use the vnode provided in *vpp instead of creating a new one */ /* Output flags for hfs_getnewvnode */ #define GNV_CHASH_RENAMED 0x01 /* The cnode was renamed in-flight */ @@ -362,6 +417,7 @@ extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct com #define GNV_NEW_CNODE 0x04 /* We are vending out a newly initialized cnode */ #define GNV_CAT_ATTRCHANGED 0x08 /* Something in struct cat_attr changed in between cat_lookups */ + /* Touch cnode times based on c_touch_xxx flags */ extern void hfs_touchtimes(struct hfsmount *, struct cnode *); extern void hfs_write_dateadded (struct cat_attr *cattrp, u_int32_t dateadded); @@ -421,13 +477,17 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid); * are issues with this (see #16620278). * * + If locking multiple cnodes then the truncate lock must be taken on - * both (in address order), before taking the cnode locks. + * all (in address order), before taking the cnode locks. + * + * 2. Hot Files stage mutex (grabbed before manipulating individual vnodes/cnodes) * - * 2. cnode lock (in parent-child order if related, otherwise by address order) + * 3. cnode locks in address order (if needed) * - * 3. journal (if needed) + * 4. journal (if needed) * - * 4. system files (as needed) + * 5. Hot Files B-Tree lock (not treated as a system file) + * + * 6. system files (as needed) * * A. Catalog B-tree file * B. Attributes B-tree file @@ -435,7 +495,7 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid); * D. Allocation Bitmap file (always exclusive, supports recursion) * E. Overflow Extents B-tree file (always exclusive, supports recursion) * - * 5. hfs mount point (always last) + * 7. hfs mount point (always last) * * * I. HFS cnode hash lock (must not acquire any new locks while holding this lock, always taken last) @@ -494,11 +554,15 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid); * pages, we will deadlock. (See #16620278.) * * + If you do anything that requires blocks to not be deleted or - * encrpytion keys to remain valid, you must take the truncate lock + * encryption keys to remain valid, you must take the truncate lock * shared. * * + And it follows therefore, that if you want to delete blocks or - * delete keys, you must take the truncate lock exclusively. + * delete keys, you must take the truncate lock exclusively. Note + * that for asynchronous writes, the truncate lock will be dropped + * after issuing I/O but before the I/O has completed which means + * that before manipulating keys, you *must* issue + * vnode_wait_for_writes in addition to holding the truncate lock. * * N.B. ff_size is actually protected by the cnode lock and so you * must hold the cnode lock exclusively to change it and shared to @@ -524,18 +588,23 @@ enum hfs_lockflags { void hfs_lock_always(cnode_t *cnode, enum hfs_locktype); int hfs_lock(struct cnode *, enum hfs_locktype, enum hfs_lockflags); +bool hfs_lock_upgrade(cnode_t *cp); int hfs_lockpair(struct cnode *, struct cnode *, enum hfs_locktype); int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *, enum hfs_locktype, struct cnode **); - void hfs_unlock(struct cnode *); void hfs_unlockpair(struct cnode *, struct cnode *); void hfs_unlockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *); void hfs_lock_truncate(struct cnode *, enum hfs_locktype, enum hfs_lockflags); +bool hfs_truncate_lock_upgrade(struct cnode *cp); +void hfs_truncate_lock_downgrade(struct cnode *cp); void hfs_unlock_truncate(struct cnode *, enum hfs_lockflags); int hfs_try_trunclock(struct cnode *, enum hfs_locktype, enum hfs_lockflags); +extern int hfs_systemfile_lock(struct hfsmount *, int, enum hfs_locktype); +extern void hfs_systemfile_unlock(struct hfsmount *, int); + void hfs_clear_might_be_dirty_flag(cnode_t *cp); // cnode must be locked diff --git a/bsd/hfs/hfs_cprotect.c b/bsd/hfs/hfs_cprotect.c index ebb58b7ff..963305e02 100644 --- a/bsd/hfs/hfs_cprotect.c +++ b/bsd/hfs/hfs_cprotect.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,8 +25,8 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include -#include +#if CONFIG_PROTECT + #include #include #include @@ -36,16 +36,20 @@ #include #include #include +#include #include #include #include "hfs.h" #include "hfs_cnode.h" #include "hfs_fsctl.h" +#include "hfs_cprotect.h" -#if CONFIG_PROTECT -/* - * The wrap function pointers and the variable to indicate if they + +#define PTR_ADD(type, base, offset) (type)((uintptr_t)(base) + (offset)) + +/* + * The wrap function pointers and the variable to indicate if they * are initialized are system-wide, and hence are defined globally. */ static struct cp_wrap_func g_cp_wrap_func = {}; @@ -58,27 +62,15 @@ extern int (**hfs_vnodeop_p) (void *); */ static int cp_root_major_vers(mount_t mp); static int cp_getxattr(cnode_t *, struct hfsmount *hfsmp, struct cprotect **); -static struct cprotect *cp_entry_alloc(size_t); -static void cp_entry_dealloc(struct cprotect *entry); +static void cp_entry_dealloc(hfsmount_t *hfsmp, struct cprotect *entry); static int cp_restore_keys(struct cprotect *, struct hfsmount *hfsmp, struct cnode *); static int cp_lock_vfs_callback(mount_t, void *); static int cp_lock_vnode_callback(vnode_t, void *); static int cp_vnode_is_eligible (vnode_t); static int cp_check_access (cnode_t *cp, struct hfsmount *hfsmp, int vnop); -static int cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, - uint32_t flags, struct cprotect **output_entry); -static int cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass); static int cp_unwrap(struct hfsmount *, struct cprotect *, struct cnode *); -static int cp_setup_aes_ctx(struct cprotect *entry); static void cp_init_access(cp_cred_t access, struct cnode *cp); -static inline int cp_get_crypto_generation (uint32_t protclass) { - if (protclass & CP_CRYPTO_G1) { - return 1; - } - else return 0; -} - #if DEVELOPMENT || DEBUG #define CP_ASSERT(x) \ @@ -89,6 +81,294 @@ static inline int cp_get_crypto_generation (uint32_t protclass) { #define CP_ASSERT(x) #endif +// -- cpx_t accessors -- + +size_t cpx_size(size_t key_size) +{ + size_t size = sizeof(struct cpx) + key_size; + +#if DEBUG + size += 4; // Extra for magic +#endif + + return size; +} + +static size_t cpx_sizex(const struct cpx *cpx) +{ + return cpx_size(cpx->cpx_max_key_len); +} + +cpx_t cpx_alloc(size_t key_len) +{ + cpx_t cpx; + + MALLOC(cpx, cpx_t, cpx_size(key_len), M_TEMP, M_WAITOK); + + cpx_init(cpx, key_len); + + return cpx; +} + +#if DEBUG +static const uint32_t cpx_magic1 = 0x7b787063; // cpx{ +static const uint32_t cpx_magic2 = 0x7870637d; // }cpx +#endif + +void cpx_free(cpx_t cpx) +{ +#if DEBUG + assert(cpx->cpx_magic1 == cpx_magic1); + assert(*PTR_ADD(uint32_t *, cpx, cpx_sizex(cpx) - 4) == cpx_magic2); +#endif + bzero(cpx->cpx_cached_key, cpx->cpx_max_key_len); + FREE(cpx, M_TEMP); +} + +void cpx_init(cpx_t cpx, size_t key_len) +{ +#if DEBUG + cpx->cpx_magic1 = cpx_magic1; + *PTR_ADD(uint32_t *, cpx, cpx_size(key_len) - 4) = cpx_magic2; +#endif + cpx->cpx_flags = 0; + cpx->cpx_key_len = 0; + cpx->cpx_max_key_len = key_len; +} + +bool cpx_is_sep_wrapped_key(const struct cpx *cpx) +{ + return ISSET(cpx->cpx_flags, CPX_SEP_WRAPPEDKEY); +} + +void cpx_set_is_sep_wrapped_key(struct cpx *cpx, bool v) +{ + if (v) + SET(cpx->cpx_flags, CPX_SEP_WRAPPEDKEY); + else + CLR(cpx->cpx_flags, CPX_SEP_WRAPPEDKEY); +} + +bool cpx_use_offset_for_iv(const struct cpx *cpx) +{ + return ISSET(cpx->cpx_flags, CPX_USE_OFFSET_FOR_IV); +} + +void cpx_set_use_offset_for_iv(struct cpx *cpx, bool v) +{ + if (v) + SET(cpx->cpx_flags, CPX_USE_OFFSET_FOR_IV); + else + CLR(cpx->cpx_flags, CPX_USE_OFFSET_FOR_IV); +} + +uint16_t cpx_max_key_len(const struct cpx *cpx) +{ + return cpx->cpx_max_key_len; +} + +uint16_t cpx_key_len(const struct cpx *cpx) +{ + return cpx->cpx_key_len; +} + +void cpx_set_key_len(struct cpx *cpx, uint16_t key_len) +{ + cpx->cpx_key_len = key_len; + + if (ISSET(cpx->cpx_flags, CPX_IV_AES_CTX_HFS)) { + /* + * We assume that if the key length is being modified, the key + * has changed. As a result, un-set any bits related to the + * AES context, if needed. They should be re-generated + * on-demand. + */ + CLR(cpx->cpx_flags, CPX_IV_AES_CTX_INITIALIZED | CPX_IV_AES_CTX_HFS); + } +} + +bool cpx_has_key(const struct cpx *cpx) +{ + return cpx->cpx_key_len > 0; +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-qual" +void *cpx_key(const struct cpx *cpx) +{ + return (void *)cpx->cpx_cached_key; +} +#pragma clang diagnostic pop + +static void cpx_set_aes_iv_key(struct cpx *cpx, void *iv_key) +{ + aes_encrypt_key128(iv_key, &cpx->cpx_iv_aes_ctx); + SET(cpx->cpx_flags, CPX_IV_AES_CTX_INITIALIZED | CPX_USE_OFFSET_FOR_IV); + CLR(cpx->cpx_flags, CPX_IV_AES_CTX_HFS); +} + +aes_encrypt_ctx *cpx_iv_aes_ctx(struct cpx *cpx) +{ + if (ISSET(cpx->cpx_flags, CPX_IV_AES_CTX_INITIALIZED)) + return &cpx->cpx_iv_aes_ctx; + + SHA1_CTX sha1ctxt; + uint8_t digest[SHA_DIGEST_LENGTH]; /* Kiv */ + + /* First init the cp_cache_iv_key[] */ + SHA1Init(&sha1ctxt); + + /* + * We can only use this when the keys are generated in the AP; As a result + * we only use the first 32 bytes of key length in the cache key + */ + SHA1Update(&sha1ctxt, cpx->cpx_cached_key, cpx->cpx_key_len); + SHA1Final(digest, &sha1ctxt); + + cpx_set_aes_iv_key(cpx, digest); + SET(cpx->cpx_flags, CPX_IV_AES_CTX_HFS); + + return &cpx->cpx_iv_aes_ctx; +} + +static void cpx_flush(cpx_t cpx) +{ + bzero(cpx->cpx_cached_key, cpx->cpx_max_key_len); + bzero(&cpx->cpx_iv_aes_ctx, sizeof(cpx->cpx_iv_aes_ctx)); + cpx->cpx_flags = 0; + cpx->cpx_key_len = 0; +} + +static bool cpx_can_copy(const struct cpx *src, const struct cpx *dst) +{ + return src->cpx_key_len <= dst->cpx_max_key_len; +} + +void cpx_copy(const struct cpx *src, cpx_t dst) +{ + uint16_t key_len = cpx_key_len(src); + cpx_set_key_len(dst, key_len); + memcpy(cpx_key(dst), cpx_key(src), key_len); + dst->cpx_flags = src->cpx_flags; + if (ISSET(dst->cpx_flags, CPX_IV_AES_CTX_INITIALIZED)) + dst->cpx_iv_aes_ctx = src->cpx_iv_aes_ctx; +} + +// -- cp_key_pair accessors -- + +void cpkp_init(cp_key_pair_t *cpkp, uint16_t max_pers_key_len, + uint16_t max_cached_key_len) +{ + cpkp->cpkp_max_pers_key_len = max_pers_key_len; + cpkp->cpkp_pers_key_len = 0; + cpx_init(&cpkp->cpkp_cpx, max_cached_key_len); + + // Default to using offsets + cpx_set_use_offset_for_iv(&cpkp->cpkp_cpx, true); +} + +uint16_t cpkp_max_pers_key_len(const cp_key_pair_t *cpkp) +{ + return cpkp->cpkp_max_pers_key_len; +} + +uint16_t cpkp_pers_key_len(const cp_key_pair_t *cpkp) +{ + return cpkp->cpkp_pers_key_len; +} + +static bool cpkp_has_pers_key(const cp_key_pair_t *cpkp) +{ + return cpkp->cpkp_pers_key_len > 0; +} + +static void *cpkp_pers_key(const cp_key_pair_t *cpkp) +{ + return PTR_ADD(void *, &cpkp->cpkp_cpx, cpx_sizex(&cpkp->cpkp_cpx)); +} + +static void cpkp_set_pers_key_len(cp_key_pair_t *cpkp, uint16_t key_len) +{ + if (key_len > cpkp->cpkp_max_pers_key_len) + panic("hfs_cprotect: key too big!"); + cpkp->cpkp_pers_key_len = key_len; +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-qual" +cpx_t cpkp_cpx(const cp_key_pair_t *cpkp) +{ + // Cast to remove const qualifier + return (cpx_t)&cpkp->cpkp_cpx; +} +#pragma clang diagnostic pop + +size_t cpkp_size(uint16_t pers_key_len, uint16_t cached_key_len) +{ + return (sizeof(cp_key_pair_t) - sizeof(struct cpx) + + pers_key_len + cpx_size(cached_key_len)); +} + +size_t cpkp_sizex(const cp_key_pair_t *cpkp) +{ + return cpkp_size(cpkp->cpkp_max_pers_key_len, cpkp->cpkp_cpx.cpx_max_key_len); +} + +void cpkp_flush(cp_key_pair_t *cpkp) +{ + cpx_flush(&cpkp->cpkp_cpx); + cpkp->cpkp_pers_key_len = 0; + bzero(cpkp_pers_key(cpkp), cpkp->cpkp_max_pers_key_len); +} + +bool cpkp_can_copy(const cp_key_pair_t *src, const cp_key_pair_t *dst) +{ + return (cpkp_pers_key_len(src) <= dst->cpkp_max_pers_key_len + && cpx_can_copy(&src->cpkp_cpx, &dst->cpkp_cpx)); +} + +void cpkp_copy(const cp_key_pair_t *src, cp_key_pair_t *dst) +{ + const uint16_t key_len = cpkp_pers_key_len(src); + cpkp_set_pers_key_len(dst, key_len); + memcpy(cpkp_pers_key(dst), cpkp_pers_key(src), key_len); + cpx_copy(&src->cpkp_cpx, &dst->cpkp_cpx); +} + +// -- + +bool cp_is_supported_version(uint16_t vers) +{ + return vers == CP_VERS_4 || vers == CP_VERS_5; +} + +/* + * Return the appropriate key and, if requested, the physical offset and + * maximum length for a particular I/O operation. + */ +void cp_io_params(__unused hfsmount_t *hfsmp, cprotect_t cpr, + __unused off_rsrc_t off_rsrc, + __unused int direction, cp_io_params_t *io_params) +{ + + io_params->max_len = INT64_MAX; + io_params->phys_offset = -1; + io_params->cpx = cpkp_cpx(&cpr->cp_keys); +} + +static void cp_flush_cached_keys(cprotect_t cpr) +{ + cpx_flush(cpkp_cpx(&cpr->cp_keys)); +} + +static bool cp_needs_pers_key(cprotect_t cpr) +{ + if (CP_CLASS(cpr->cp_pclass) == PROTECTION_CLASS_F) + return !cpx_has_key(cpkp_cpx(&cpr->cp_keys)); + else + return !cpkp_has_pers_key(&cpr->cp_keys); +} + int cp_key_store_action(int action) { @@ -107,8 +387,7 @@ cp_key_store_action(int action) * Upcast the value in 'action' to be a pointer-width unsigned integer. * This avoids issues relating to pointer-width. */ - unsigned long action_arg = (unsigned long) action; - return vfs_iterate(0, cp_lock_vfs_callback, (void*)action_arg); + return vfs_iterate(0, cp_lock_vfs_callback, (void*)(uintptr_t)action); } @@ -128,6 +407,19 @@ cp_register_wraps(cp_wrap_func_t key_store_func) return 0; } +static cp_key_revision_t cp_initial_key_revision(__unused hfsmount_t *hfsmp) +{ + return 1; +} + +cp_key_revision_t cp_next_key_revision(cp_key_revision_t rev) +{ + rev = (rev + 0x0100) ^ (mach_absolute_time() & 0xff); + if (!rev) + rev = 1; + return rev; +} + /* * Allocate and initialize a cprotect blob for a new cnode. * Called from hfs_getnewvnode: cnode is locked exclusive. @@ -171,22 +463,7 @@ cp_entry_init(struct cnode *cp, struct mount *mp) CP_ASSERT (cp->c_cpentry == NULL); error = cp_getxattr(cp, hfsmp, &entry); - if (error == 0) { - /* - * Success; attribute was found, though it may not have keys. - * If the entry is not returned without keys, we will delay generating - * keys until the first I/O. - */ - if (S_ISREG(cp->c_mode)) { - if (entry->cp_flags & CP_NEEDS_KEYS) { - entry->cp_flags &= ~CP_KEY_FLUSHED; - } - else { - entry->cp_flags |= CP_KEY_FLUSHED; - } - } - } - else if (error == ENOATTR) { + if (error == ENOATTR) { /* * Normally, we should always have a CP EA for a file or directory that * we are initializing here. However, there are some extenuating circumstances, @@ -195,15 +472,21 @@ cp_entry_init(struct cnode *cp, struct mount *mp) * As a result, we leave code here to deal with an ENOATTR which will always * default to a 'D/NONE' key, though we don't expect to use it much. */ - int target_class = PROTECTION_CLASS_D; - + cp_key_class_t target_class = PROTECTION_CLASS_D; + if (S_ISDIR(cp->c_mode)) { target_class = PROTECTION_CLASS_DIR_NONE; - } + } + + cp_key_revision_t key_revision = cp_initial_key_revision(hfsmp); + /* allow keybag to override our class preferences */ - uint32_t keyflags = CP_KEYWRAP_DIFFCLASS; - error = cp_new (target_class, hfsmp, cp, cp->c_mode, keyflags, &entry); + error = cp_new (&target_class, hfsmp, cp, cp->c_mode, CP_KEYWRAP_DIFFCLASS, + key_revision, (cp_new_alloc_fn)cp_entry_alloc, (void **)&entry); if (error == 0) { + entry->cp_pclass = target_class; + entry->cp_key_os_version = cp_os_version(); + entry->cp_key_revision = key_revision; error = cp_setxattr (cp, entry, hfsmp, cp->c_fileid, XATTR_CREATE); } } @@ -226,7 +509,7 @@ cp_entry_init(struct cnode *cp, struct mount *mp) } else { if (entry) { - cp_entry_destroy(entry); + cp_entry_destroy(hfsmp, entry); } cp->c_cpentry = NULL; } @@ -246,8 +529,9 @@ cp_entry_init(struct cnode *cp, struct mount *mp) * keys for us, we could have to fail the open(2) call and back out the entry. */ -int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppliedclass, - mode_t cmode, struct cprotect **tmpentry) +int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, + cp_key_class_t suppliedclass, mode_t cmode, + struct cprotect **tmpentry) { int isdir = 0; struct cprotect *entry = NULL; @@ -309,7 +593,7 @@ int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppli } /* Generate the cprotect to vend out */ - entry = cp_entry_alloc (0); + entry = cp_entry_alloc(NULL, 0, 0, NULL); if (entry == NULL) { *tmpentry = NULL; return ENOMEM; @@ -320,7 +604,7 @@ int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppli * this blob has no keys and it has no backing xattr. We just know the * target class. */ - entry->cp_flags = (CP_NEEDS_KEYS | CP_NO_XATTR); + entry->cp_flags = CP_NO_XATTR; /* Note this is only the effective class */ entry->cp_pclass = target_class; *tmpentry = entry; @@ -328,34 +612,6 @@ int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppli return 0; } - -/* - * cp_needs_tempkeys - * - * Relay to caller whether or not the filesystem should generate temporary keys - * during resize operations. - */ - -int cp_needs_tempkeys (struct hfsmount *hfsmp, int *needs) -{ - - if (hfsmp->hfs_running_cp_major_vers < CP_PREV_MAJOR_VERS || - hfsmp->hfs_running_cp_major_vers > CP_NEW_MAJOR_VERS) { - return -1; - } - - /* CP_NEW_MAJOR_VERS implies CP_OFF_IV_ENABLED */ - if (hfsmp->hfs_running_cp_major_vers < CP_NEW_MAJOR_VERS) { - *needs = 0; - } - else { - *needs = 1; - } - - return 0; -} - - /* * Set up an initial key/class pair for a disassociated cprotect entry. * This function is used to generate transient keys that will never be @@ -367,41 +623,17 @@ int cp_needs_tempkeys (struct hfsmount *hfsmp, int *needs) * where we may rely on AES symmetry to relocate encrypted data from * one spot in the disk to another. */ -int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp) +int cpx_gentempkeys(cpx_t *pcpx, __unused struct hfsmount *hfsmp) { + cpx_t cpx = cpx_alloc(CP_MAX_KEYSIZE); - struct cprotect *entry = NULL; - - if (hfsmp->hfs_running_cp_major_vers < CP_NEW_MAJOR_VERS) { - return EPERM; - } - - /* - * This should only be used for files and won't be written out. - * We don't need a persistent key. - */ - entry = cp_entry_alloc (0); - if (entry == NULL) { - *entry_ptr = NULL; - return ENOMEM; - } - /* This is generated in-kernel so we leave it at the max key*/ - entry->cp_cache_key_len = CP_MAX_KEYSIZE; - - /* This pclass is only the effective class */ - entry->cp_pclass = PROTECTION_CLASS_F; - entry->cp_persistent_key_len = 0; + cpx_set_key_len(cpx, CP_MAX_KEYSIZE); + read_random(cpx_key(cpx), CP_MAX_KEYSIZE); + cpx_set_use_offset_for_iv(cpx, true); - /* Generate the class F key */ - read_random (&entry->cp_cache_key[0], entry->cp_cache_key_len); + *pcpx = cpx; - /* Generate the IV key */ - cp_setup_aes_ctx(entry); - entry->cp_flags |= CP_OFF_IV_ENABLED; - - *entry_ptr = entry; return 0; - } /* @@ -409,13 +641,13 @@ int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp) * Called at hfs_reclaim_cnode: cnode is locked exclusive. */ void -cp_entry_destroy(struct cprotect *entry_ptr) +cp_entry_destroy(hfsmount_t *hfsmp, struct cprotect *entry_ptr) { if (entry_ptr == NULL) { /* nothing to clean up */ return; } - cp_entry_dealloc(entry_ptr); + cp_entry_dealloc(hfsmp, entry_ptr); } @@ -511,7 +743,6 @@ cp_vnode_getclass(struct vnode *vp, int *class) return error; } - /* * Sets persistent class for this file or directory. * If vnode cannot be protected (system file, non-regular file, non-hfs), EBADF. @@ -592,61 +823,80 @@ cp_vnode_setclass(struct vnode *vp, uint32_t newclass) * target class (since B allows I/O but an unwrap prior to the next unlock * will not be allowed). */ - if (entry->cp_flags & CP_KEY_FLUSHED) { + if (!cpx_has_key(&entry->cp_keys.cpkp_cpx)) { error = cp_restore_keys (entry, hfsmp, cp); if (error) { goto out; } } + if (newclass == PROTECTION_CLASS_F) { /* Verify that file is blockless if switching to class F */ if (cp->c_datafork->ff_size > 0) { error = EINVAL; - goto out; + goto out; } - /* newclass is only the effective class */ - entry->cp_pclass = newclass; + cp_key_pair_t *cpkp; + cprotect_t new_entry = cp_entry_alloc(NULL, 0, CP_MAX_KEYSIZE, &cpkp); - /* Class F files are not wrapped, so they continue to use MAX_KEYSIZE */ - entry->cp_cache_key_len = CP_MAX_KEYSIZE; - read_random (&entry->cp_cache_key[0], entry->cp_cache_key_len); - if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { - cp_setup_aes_ctx (entry); - entry->cp_flags |= CP_OFF_IV_ENABLED; - } - bzero(entry->cp_persistent_key, entry->cp_persistent_key_len); - entry->cp_persistent_key_len = 0; - } else { - /* Deny the setclass if file is to be moved from F to something else */ - if (entry->cp_pclass == PROTECTION_CLASS_F) { - error = EPERM; - goto out; - } - /* We cannot call cp_rewrap unless the keys were already in existence. */ - if (entry->cp_flags & CP_NEEDS_KEYS) { - struct cprotect *newentry = NULL; - /* - * We want to fail if we can't wrap to the target class. By not setting - * CP_KEYWRAP_DIFFCLASS, we tell keygeneration that if it can't wrap - * to 'newclass' then error out. - */ - uint32_t flags = 0; - error = cp_generate_keys (hfsmp, cp, newclass, flags, &newentry); - if (error == 0) { - cp_replace_entry (cp, newentry); - } - /* Bypass the setxattr code below since generate_keys does it for us */ + if (!new_entry) { + error = ENOMEM; goto out; } - else { - error = cp_rewrap(cp, hfsmp, newclass); + + /* newclass is only the effective class */ + new_entry->cp_pclass = newclass; + new_entry->cp_key_os_version = cp_os_version(); + new_entry->cp_key_revision = cp_next_key_revision(entry->cp_key_revision); + + cpx_t cpx = cpkp_cpx(cpkp); + + /* Class F files are not wrapped, so they continue to use MAX_KEYSIZE */ + cpx_set_key_len(cpx, CP_MAX_KEYSIZE); + read_random (cpx_key(cpx), CP_MAX_KEYSIZE); + + cp_replace_entry(hfsmp, cp, new_entry); + + error = 0; + goto out; + } + + /* Deny the setclass if file is to be moved from F to something else */ + if (entry->cp_pclass == PROTECTION_CLASS_F) { + error = EPERM; + goto out; + } + + if (!cpkp_has_pers_key(&entry->cp_keys)) { + struct cprotect *new_entry = NULL; + /* + * We want to fail if we can't wrap to the target class. By not setting + * CP_KEYWRAP_DIFFCLASS, we tell keygeneration that if it can't wrap + * to 'newclass' then error out. + */ + uint32_t flags = 0; + error = cp_generate_keys (hfsmp, cp, newclass, flags, &new_entry); + if (error == 0) { + cp_replace_entry (hfsmp, cp, new_entry); } + /* Bypass the setxattr code below since generate_keys does it for us */ + goto out; } + + cprotect_t new_entry; + error = cp_rewrap(cp, hfsmp, &newclass, &entry->cp_keys, entry, + (cp_new_alloc_fn)cp_entry_alloc, (void **)&new_entry); if (error) { /* we didn't have perms to set this class. leave file as-is and error out */ goto out; } + + + new_entry->cp_pclass = newclass; + + cp_replace_entry(hfsmp, cp, new_entry); + entry = new_entry; } else if (vnode_isdir(vp)) { /* For directories, just update the pclass. newclass is only effective class */ @@ -722,16 +972,6 @@ int cp_vnode_transcode(vnode_t vp, void *key, unsigned *len) goto out; } - if ((entry->cp_flags & CP_NEEDS_KEYS)) { - /* - * If we are transcoding keys for AKB, then we should have already established - * a set of keys for this vnode. IF we don't have keys yet, then something bad - * happened. - */ - error = EINVAL; - goto out; - } - /* Send the per-file key in wrapped form for re-wrap with the current class information * Send NULLs in the output parameters of the wrapper() and AKS will do the rest. * Don't need to process any outputs, so just clear the locks and pass along the error. */ @@ -749,8 +989,18 @@ int cp_vnode_transcode(vnode_t vp, void *key, unsigned *len) bzero(&wrapped_key_in, sizeof(wrapped_key_in)); bzero(&wrapped_key_out, sizeof(wrapped_key_out)); - wrapped_key_in.key = entry->cp_persistent_key; - wrapped_key_in.key_len = entry->cp_persistent_key_len; + + cp_key_pair_t *cpkp = &entry->cp_keys; + + + wrapped_key_in.key = cpkp_pers_key(cpkp); + wrapped_key_in.key_len = cpkp_pers_key_len(cpkp); + + if (!wrapped_key_in.key_len) { + error = EINVAL; + goto out; + } + /* Use the actual persistent class when talking to AKS */ wrapped_key_in.dp_class = entry->cp_pclass; wrapped_key_out.key = key; @@ -863,7 +1113,8 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) if ((error = cp_check_access(cp, hfsmp, vnop))) { /* check for raw encrypted access before bailing out */ - if ((vnop == CP_READ_ACCESS) && (ioflag & IO_ENCRYPTED)) { + if ((ioflag & IO_ENCRYPTED) + && (vnop == CP_READ_ACCESS)) { /* * read access only + asking for the raw encrypted bytes * is legitimate, so reset the error value to 0 @@ -875,9 +1126,15 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) } } - if (entry->cp_flags == 0) { - /* no more work to do */ - goto out; + if (!ISSET(entry->cp_flags, CP_NO_XATTR)) { + if (!S_ISREG(cp->c_mode)) + goto out; + + // If we have a persistent key and the cached key, we're done + if (!cp_needs_pers_key(entry) + && cpx_has_key(cpkp_cpx(&entry->cp_keys))) { + goto out; + } } /* upgrade to exclusive lock */ @@ -890,7 +1147,7 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) } /* generate new keys if none have ever been saved */ - if ((entry->cp_flags & CP_NEEDS_KEYS)) { + if (cp_needs_pers_key(entry)) { struct cprotect *newentry = NULL; /* * It's ok if this ends up being wrapped in a different class than 'pclass'. @@ -900,7 +1157,7 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), flags, &newentry); if (error == 0) { - cp_replace_entry (cp, newentry); + cp_replace_entry (hfsmp, cp, newentry); entry = newentry; } else { @@ -909,7 +1166,7 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) } /* unwrap keys if needed */ - if (entry->cp_flags & CP_KEY_FLUSHED) { + if (!cpx_has_key(cpkp_cpx(&entry->cp_keys))) { if ((vnop == CP_READ_ACCESS) && (ioflag & IO_ENCRYPTED)) { /* no need to try to restore keys; they are not going to be used */ error = 0; @@ -932,6 +1189,30 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) return error; } +#if HFS_TMPDBG +#if !SECURE_KERNEL +static void cp_log_eperm (struct vnode* vp, int pclass, boolean_t create) { + char procname[256] = {}; + const char *fname = "unknown"; + const char *dbgop = "open"; + + int ppid = proc_selfpid(); + /* selfname does a strlcpy so we're OK */ + proc_selfname(procname, sizeof(procname)); + if (vp && vp->v_name) { + /* steal from the namecache */ + fname = vp->v_name; + } + + if (create) { + dbgop = "create"; + } + + printf("proc %s (pid %d) class %d, op: %s failure @ file %s\n", procname, ppid, pclass, dbgop, fname); +} +#endif +#endif + int cp_handle_open(struct vnode *vp, int mode) @@ -951,8 +1232,17 @@ cp_handle_open(struct vnode *vp, int mode) return 0; } - /* We know the vnode is in a valid state. Acquire cnode and validate */ cp = VTOC(vp); + + // Allow if raw encrypted mode requested + if (ISSET(mode, FENCRYPTED)) { + return 0; + } + if (ISSET(mode, FUNENCRYPTED)) { + return 0; + } + + /* We know the vnode is in a valid state. Acquire cnode and validate */ hfsmp = VTOHFS(vp); if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { @@ -977,13 +1267,13 @@ cp_handle_open(struct vnode *vp, int mode) /* * Does the cnode have keys yet? If not, then generate them. */ - if (entry->cp_flags & CP_NEEDS_KEYS) { + if (cp_needs_pers_key(entry)) { struct cprotect *newentry = NULL; /* Allow the keybag to override our class preferences */ uint32_t flags = CP_KEYWRAP_DIFFCLASS; error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), flags, &newentry); if (error == 0) { - cp_replace_entry (cp, newentry); + cp_replace_entry (hfsmp, cp, newentry); entry = newentry; } else { @@ -1011,7 +1301,7 @@ cp_handle_open(struct vnode *vp, int mode) break; } - if ((entry->cp_flags & CP_KEY_FLUSHED) == 0) { + if (cpx_has_key(cpkp_cpx(&entry->cp_keys)) && !ISSET(mode, FENCRYPTED)) { /* * For a class B file, attempt the unwrap if we have the key in * core already. @@ -1024,8 +1314,8 @@ cp_handle_open(struct vnode *vp, int mode) cp_init_access(&access_in, cp); bzero(&wrapped_key_in, sizeof(wrapped_key_in)); - wrapped_key_in.key = entry->cp_persistent_key; - wrapped_key_in.key_len = entry->cp_persistent_key_len; + wrapped_key_in.key = cpkp_pers_key(&entry->cp_keys); + wrapped_key_in.key_len = cpkp_pers_key_len(&entry->cp_keys); /* Use the persistent class when talking to AKS */ wrapped_key_in.dp_class = entry->cp_pclass; error = g_cp_wrap_func.unwrapper(&access_in, &wrapped_key_in, NULL); @@ -1052,7 +1342,7 @@ cp_handle_open(struct vnode *vp, int mode) * Since this function is bypassed entirely if we're opening a raw encrypted file, * we can always attempt the restore. */ - if (entry->cp_flags & CP_KEY_FLUSHED) { + if (!cpx_has_key(cpkp_cpx(&entry->cp_keys))) { error = cp_restore_keys(entry, hfsmp, cp); } @@ -1068,61 +1358,20 @@ cp_handle_open(struct vnode *vp, int mode) } out: - hfs_unlock(cp); - return error; -} - -/* - * During hfs resize operations, we have slightly different constraints than during - * normal VNOPS that read/write data to files. Specifically, we already have the cnode - * locked (so nobody else can modify it), and we are doing the IO with root privileges, since - * we are moving the data behind the user's back. So, we skip access checks here (for unlock - * vs. lock), and don't worry about non-existing keys. If the file exists on-disk with valid - * payload, then it must have keys set up already by definition. - */ -int -cp_handle_relocate (struct cnode *cp, struct hfsmount *hfsmp) -{ - struct cprotect *entry; - int error = -1; - - /* cp is already locked */ - entry = cp->c_cpentry; - if (!entry) - goto out; - - /* - * Still need to validate whether to permit access to the file or not - * based on lock status - */ - if ((error = cp_check_access(cp, hfsmp, CP_READ_ACCESS | CP_WRITE_ACCESS))) { - goto out; +#if HFS_TMPDBG +#if !SECURE_KERNEL + if ((hfsmp->hfs_cp_verbose) && (error == EPERM)) { + cp_log_eperm (vp, CP_CLASS(entry->cp_pclass), false); } +#endif +#endif - if (entry->cp_flags == 0) { - /* no more work to do */ - error = 0; - goto out; - } - - /* it must have keys since it is an existing file with actual payload */ - - /* unwrap keys if needed */ - if (entry->cp_flags & CP_KEY_FLUSHED) { - error = cp_restore_keys(entry, hfsmp, cp); - } - - /* - * Don't need to write out the EA since if the file has actual extents, - * it must have an EA - */ -out: - - /* return the cp still locked */ + hfs_unlock(cp); return error; } + /* * cp_getrootxattr: * Gets the EA we set on the root folder (fileid 1) to get information about the @@ -1135,7 +1384,14 @@ cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) { uio_t auio; char uio_buf[UIO_SIZEOF(1)]; - size_t attrsize = sizeof(struct cp_root_xattr); + void *buf; + + /* + * We allow for an extra 64 bytes to cater for upgrades. This wouldn't + * be necessary if the xattr routines just returned what we asked for. + */ + size_t attrsize = roundup(sizeof(struct cp_root_xattr) + 64, 64); + int error = 0; struct vnop_getxattr_args args; @@ -1143,8 +1399,10 @@ cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) panic("Content Protection: cp_xattr called with xattr == NULL"); } + MALLOC(buf, void *, attrsize, M_TEMP, M_WAITOK); + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); - uio_addiov(auio, CAST_USER_ADDR_T(outxattr), attrsize); + uio_addiov(auio, CAST_USER_ADDR_T(buf), attrsize); args.a_desc = NULL; // unused args.a_vp = NULL; //unused since we're writing EA to root folder. @@ -1156,17 +1414,34 @@ cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) error = hfs_getxattr_internal(NULL, &args, hfsmp, 1); - /* Now convert the multi-byte fields to native endianness */ - outxattr->major_version = OSSwapLittleToHostInt16(outxattr->major_version); - outxattr->minor_version = OSSwapLittleToHostInt16(outxattr->minor_version); - outxattr->flags = OSSwapLittleToHostInt64(outxattr->flags); - if (error != 0) { goto out; } + if (attrsize < CP_ROOT_XATTR_MIN_LEN) { + error = HFS_EINCONSISTENT; + goto out; + } + + const struct cp_root_xattr *xattr = buf; + + bzero(outxattr, sizeof(*outxattr)); + + /* Now convert the multi-byte fields to native endianness */ + outxattr->major_version = OSSwapLittleToHostInt16(xattr->major_version); + outxattr->minor_version = OSSwapLittleToHostInt16(xattr->minor_version); + outxattr->flags = OSSwapLittleToHostInt64(xattr->flags); + + if (outxattr->major_version >= CP_VERS_5) { + if (attrsize < sizeof(struct cp_root_xattr)) { + error = HFS_EINCONSISTENT; + goto out; + } + } + out: uio_free(auio); + FREE(buf, M_TEMP); return error; } @@ -1193,13 +1468,24 @@ cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr) args.a_options = 0; args.a_context = NULL; //no context needed, only done from mount. + const uint32_t flags = newxattr->flags; + /* Now convert the multi-byte fields to little endian before writing to disk. */ + newxattr->flags = OSSwapHostToLittleInt64(newxattr->flags); + + int xattr_size = sizeof(struct cp_root_xattr); + + newxattr->major_version = OSSwapHostToLittleInt16(newxattr->major_version); newxattr->minor_version = OSSwapHostToLittleInt16(newxattr->minor_version); - newxattr->flags = OSSwapHostToLittleInt64(newxattr->flags); error = hfs_setxattr_internal(NULL, (caddr_t)newxattr, - sizeof(struct cp_root_xattr), &args, hfsmp, 1); + xattr_size, &args, hfsmp, 1); + + if (!error) { + hfsmp->cproot_flags = flags; + } + return error; } @@ -1210,110 +1496,69 @@ cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr) * * This function is also invoked during file creation. */ -int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, uint32_t fileid, int options) +int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, + uint32_t fileid, int options) { int error = 0; - size_t attrsize; - struct vnop_setxattr_args args; - uint32_t target_fileid; - struct cnode *arg_cp = NULL; - uint32_t tempflags = 0; + cp_key_pair_t *cpkp = &entry->cp_keys; - args.a_desc = NULL; - if (hfsmp->hfs_flags & HFS_READ_ONLY) { return EROFS; } - - if (cp) { - args.a_vp = cp->c_vp; - target_fileid = 0; - arg_cp = cp; - } - else { - /* - * When we set the EA in the same txn as the file creation, - * we do not have a vnode/cnode yet. Use the specified fileid. - */ - args.a_vp = NULL; - target_fileid = fileid; - } - args.a_name = CONTENT_PROTECTION_XATTR_NAME; - args.a_uio = NULL; //pass data ptr instead - args.a_options = options; - args.a_context = vfs_context_current(); - /* Note that it's OK to write out an XATTR without keys. */ - /* Disable flags that will be invalid as we're writing the EA out at this point. */ - tempflags = entry->cp_flags; + if (hfsmp->hfs_running_cp_major_vers < CP_CURRENT_VERS) { + // Upgrade + printf("hfs: upgrading to cp version %u\n", CP_CURRENT_VERS); - /* we're writing the EA; CP_NO_XATTR is invalid */ - tempflags &= ~CP_NO_XATTR; - - /* CP_SEP_WRAPPEDKEY is informational/runtime only. */ - tempflags &= ~CP_SEP_WRAPPEDKEY; - - switch(hfsmp->hfs_running_cp_major_vers) { - case CP_NEW_MAJOR_VERS: { - struct cp_xattr_v4 *newxattr = NULL; // 70+ bytes; don't alloc on stack. - MALLOC (newxattr, struct cp_xattr_v4*, sizeof(struct cp_xattr_v4), M_TEMP, M_WAITOK); - if (newxattr == NULL) { - error = ENOMEM; - break; - } - bzero (newxattr, sizeof(struct cp_xattr_v4)); + struct cp_root_xattr root_xattr; - attrsize = sizeof(*newxattr) - CP_MAX_WRAPPEDKEYSIZE + entry->cp_persistent_key_len; + error = cp_getrootxattr(hfsmp, &root_xattr); + if (error) + return error; - /* Endian swap the multi-byte fields into L.E from host. */ - newxattr->xattr_major_version = OSSwapHostToLittleInt16 (hfsmp->hfs_running_cp_major_vers); - newxattr->xattr_minor_version = OSSwapHostToLittleInt16(CP_MINOR_VERS); - newxattr->key_size = OSSwapHostToLittleInt32(entry->cp_persistent_key_len); - newxattr->flags = OSSwapHostToLittleInt32(tempflags); - newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass); - bcopy(entry->cp_persistent_key, newxattr->persistent_key, entry->cp_persistent_key_len); + root_xattr.major_version = CP_CURRENT_VERS; + root_xattr.minor_version = CP_MINOR_VERS; - error = hfs_setxattr_internal(arg_cp, (caddr_t)newxattr, attrsize, &args, hfsmp, target_fileid); + error = cp_setrootxattr(hfsmp, &root_xattr); + if (error) + return error; - FREE(newxattr, M_TEMP); - break; - } - case CP_PREV_MAJOR_VERS: { - struct cp_xattr_v2 *newxattr = NULL; - MALLOC (newxattr, struct cp_xattr_v2*, sizeof(struct cp_xattr_v2), M_TEMP, M_WAITOK); - if (newxattr == NULL) { - error = ENOMEM; - break; - } - bzero (newxattr, sizeof(struct cp_xattr_v2)); + hfsmp->hfs_running_cp_major_vers = CP_CURRENT_VERS; + } - attrsize = sizeof(*newxattr); + struct cp_xattr_v5 *xattr; + MALLOC(xattr, struct cp_xattr_v5 *, sizeof(*xattr), M_TEMP, M_WAITOK); - /* Endian swap the multi-byte fields into L.E from host. */ - newxattr->xattr_major_version = OSSwapHostToLittleInt16(hfsmp->hfs_running_cp_major_vers); - newxattr->xattr_minor_version = OSSwapHostToLittleInt16(CP_MINOR_VERS); - newxattr->key_size = OSSwapHostToLittleInt32(entry->cp_persistent_key_len); - newxattr->flags = OSSwapHostToLittleInt32(tempflags); - newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass); - bcopy(entry->cp_persistent_key, newxattr->persistent_key, entry->cp_persistent_key_len); + xattr->xattr_major_version = OSSwapHostToLittleConstInt16(CP_VERS_5); + xattr->xattr_minor_version = OSSwapHostToLittleConstInt16(CP_MINOR_VERS); + xattr->flags = 0; + xattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass); + xattr->key_os_version = OSSwapHostToLittleInt32(entry->cp_key_os_version); + xattr->key_revision = OSSwapHostToLittleInt16(entry->cp_key_revision); - error = hfs_setxattr_internal(arg_cp, (caddr_t)newxattr, attrsize, &args, hfsmp, target_fileid); + uint16_t key_len = cpkp_pers_key_len(cpkp); + xattr->key_len = OSSwapHostToLittleInt16(key_len); + memcpy(xattr->persistent_key, cpkp_pers_key(cpkp), key_len); - FREE (newxattr, M_TEMP); - break; - } - default: - printf("hfs: cp_setxattr: Unknown CP version running \n"); - break; - } + size_t xattr_len = offsetof(struct cp_xattr_v5, persistent_key) + key_len; + + + struct vnop_setxattr_args args = { + .a_vp = cp ? cp->c_vp : NULL, + .a_name = CONTENT_PROTECTION_XATTR_NAME, + .a_options = options, + .a_context = vfs_context_current(), + }; + + error = hfs_setxattr_internal(cp, xattr, xattr_len, &args, hfsmp, fileid); + + FREE(xattr, M_TEMP); if (error == 0 ) { entry->cp_flags &= ~CP_NO_XATTR; } return error; - - } /* @@ -1420,216 +1665,244 @@ cp_is_valid_class(int isdir, int32_t protectionclass) } } +#if DEBUG +static const uint32_t cp_magic1 = 0x7b727063; // cpr{ +static const uint32_t cp_magic2 = 0x7270637d; // }cpr +#endif -static struct cprotect * -cp_entry_alloc(size_t keylen) +struct cprotect * +cp_entry_alloc(cprotect_t old, uint16_t pers_key_len, + uint16_t cached_key_len, cp_key_pair_t **pcpkp) { struct cprotect *cp_entry; - if (keylen > CP_MAX_WRAPPEDKEYSIZE) + if (pers_key_len > CP_MAX_WRAPPEDKEYSIZE) return (NULL); - MALLOC(cp_entry, struct cprotect *, sizeof(struct cprotect) + keylen, - M_TEMP, M_WAITOK); - if (cp_entry == NULL) - return (NULL); + size_t size = (sizeof(struct cprotect) - sizeof(cp_key_pair_t) + + cpkp_size(pers_key_len, cached_key_len)); + +#if DEBUG + size += 4; // Extra for magic2 +#endif - bzero(cp_entry, sizeof(*cp_entry) + keylen); - cp_entry->cp_persistent_key_len = keylen; - return (cp_entry); + MALLOC(cp_entry, struct cprotect *, size, M_TEMP, M_WAITOK); + + if (old) { + memcpy(cp_entry, old, offsetof(struct cprotect, cp_keys)); + + } else { + bzero(cp_entry, offsetof(struct cprotect, cp_keys)); + } + +#if DEBUG + cp_entry->cp_magic1 = cp_magic1; + *PTR_ADD(uint32_t *, cp_entry, size - 4) = cp_magic2; +#endif + + cpkp_init(&cp_entry->cp_keys, pers_key_len, cached_key_len); + + /* + * If we've been passed the old entry, then we are in the process of + * rewrapping in which case we need to copy the cached key. This is + * important for class B files when the device is locked because we + * won't be able to unwrap whilst in this state, yet we still need the + * unwrapped key. + */ + if (old) + cpx_copy(cpkp_cpx(&old->cp_keys), cpkp_cpx(&cp_entry->cp_keys)); + + if (pcpkp) + *pcpkp = &cp_entry->cp_keys; + + return cp_entry; } static void -cp_entry_dealloc(struct cprotect *entry) +cp_entry_dealloc(__unused hfsmount_t *hfsmp, struct cprotect *entry) { - uint32_t keylen = entry->cp_persistent_key_len; - bzero(entry, (sizeof(*entry) + keylen)); + + cpkp_flush(&entry->cp_keys); + +#if DEBUG + assert(entry->cp_magic1 == cp_magic1); + assert(*PTR_ADD(uint32_t *, entry, (sizeof(struct cprotect) - sizeof(cp_key_pair_t) + + cpkp_sizex(&entry->cp_keys) == cp_magic2))); +#endif + FREE(entry, M_TEMP); } - -/* - * Initializes a new cprotect entry with xattr data from the cnode. - * cnode lock held shared - */ -static int -cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry) +static int cp_read_xattr_v4(__unused hfsmount_t *hfsmp, struct cp_xattr_v4 *xattr, + size_t xattr_len, cprotect_t *pcpr, cp_getxattr_options_t options) { - int error = 0; - uio_t auio; - size_t attrsize; - char uio_buf[UIO_SIZEOF(1)]; - struct vnop_getxattr_args args; - struct cprotect *entry = NULL; + /* Endian swap the multi-byte fields into host endianness from L.E. */ + xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version); + xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version); + xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size); + xattr->flags = OSSwapLittleToHostInt32(xattr->flags); + xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class); + xattr->key_os_version = OSSwapLittleToHostInt32(xattr->key_os_version); - auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); - args.a_desc = NULL; // unused - args.a_vp = cp->c_vp; - args.a_name = CONTENT_PROTECTION_XATTR_NAME; - args.a_uio = auio; - args.a_options = XATTR_REPLACE; - args.a_context = vfs_context_current(); // unused + /* + * Prevent a buffer overflow, and validate the key length obtained from the + * EA. If it's too big, then bail out, because the EA can't be trusted at this + * point. + */ + if (xattr->key_size > CP_MAX_WRAPPEDKEYSIZE) + return HFS_EINCONSISTENT; - switch (hfsmp->hfs_running_cp_major_vers) { - case CP_NEW_MAJOR_VERS: { - struct cp_xattr_v4 *xattr = NULL; - MALLOC (xattr, struct cp_xattr_v4*, sizeof(struct cp_xattr_v4), M_TEMP, M_WAITOK); - if (xattr == NULL) { - error = ENOMEM; - break; - } - bzero(xattr, sizeof (struct cp_xattr_v4)); - attrsize = sizeof(*xattr); + size_t min_len = offsetof(struct cp_xattr_v4, persistent_key) + xattr->key_size; + if (xattr_len < min_len) + return HFS_EINCONSISTENT; - uio_addiov(auio, CAST_USER_ADDR_T(xattr), attrsize); - args.a_size = &attrsize; + /* + * Class F files have no backing key; their keylength should be 0, + * though they should have the proper flags set. + * + * A request to instantiate a CP for a class F file should result + * in a bzero'd cp that just says class F, with key_flushed set. + */ + if (CP_CLASS(xattr->persistent_class) == PROTECTION_CLASS_F + || ISSET(xattr->flags, CP_XAF_NEEDS_KEYS)) { + xattr->key_size = 0; + } - error = hfs_getxattr_internal(cp, &args, VTOHFS(cp->c_vp), 0); - if (error != 0) { - FREE (xattr, M_TEMP); - goto out; - } + /* set up entry with information from xattr */ + cp_key_pair_t *cpkp; + cprotect_t entry; + + if (ISSET(options, CP_GET_XATTR_BASIC_INFO)) { + /* caller passed in a pre-allocated structure to get the basic info */ + entry = *pcpr; + bzero(entry, offsetof(struct cprotect, cp_keys)); + } + else { + entry = cp_entry_alloc(NULL, xattr->key_size, CP_MAX_CACHEBUFLEN, &cpkp); + } - /* Endian swap the multi-byte fields into host endianness from L.E. */ - xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version); - xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version); - xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size); - xattr->flags = OSSwapLittleToHostInt32(xattr->flags); - xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class); + entry->cp_pclass = xattr->persistent_class; + entry->cp_key_os_version = xattr->key_os_version; - if (xattr->xattr_major_version != hfsmp->hfs_running_cp_major_vers ) { - printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n", - xattr->xattr_major_version, hfsmp->hfs_running_cp_major_vers); - error = EINVAL; - FREE (xattr, M_TEMP); - goto out; - } - /* - * Prevent a buffer overflow, and validate the key length obtained from the - * EA. If it's too big, then bail out, because the EA can't be trusted at this - * point. - */ - if (xattr->key_size > CP_MAX_WRAPPEDKEYSIZE) { - error = EINVAL; - FREE (xattr, M_TEMP); + if (!ISSET(options, CP_GET_XATTR_BASIC_INFO)) { + if (xattr->key_size) { + cpkp_set_pers_key_len(cpkp, xattr->key_size); + memcpy(cpkp_pers_key(cpkp), xattr->persistent_key, xattr->key_size); + } - goto out; - } + *pcpr = entry; + } + else if (xattr->key_size) { + SET(entry->cp_flags, CP_HAS_A_KEY); + } - /* - * Class F files have no backing key; their keylength should be 0, - * though they should have the proper flags set. - * - * A request to instantiate a CP for a class F file should result - * in a bzero'd cp that just says class F, with key_flushed set. - */ + return 0; +} - /* set up entry with information from xattr */ - entry = cp_entry_alloc(xattr->key_size); - if (!entry) { - FREE (xattr, M_TEMP); +int cp_read_xattr_v5(hfsmount_t *hfsmp, struct cp_xattr_v5 *xattr, + size_t xattr_len, cprotect_t *pcpr, cp_getxattr_options_t options) +{ + if (xattr->xattr_major_version == OSSwapHostToLittleConstInt16(CP_VERS_4)) { + return cp_read_xattr_v4(hfsmp, (struct cp_xattr_v4 *)xattr, xattr_len, pcpr, options); + } - return ENOMEM; - } + xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version); - entry->cp_pclass = xattr->persistent_class; + if (xattr->xattr_major_version != CP_VERS_5) { + printf("hfs: cp_getxattr: unsupported xattr version %d\n", + xattr->xattr_major_version); + return ENOTSUP; + } - /* - * Suppress invalid flags that should not be set. - * If we have gotten this far, then CP_NO_XATTR cannot possibly - * be valid; the EA exists. - */ - xattr->flags &= ~CP_NO_XATTR; + size_t min_len = offsetof(struct cp_xattr_v5, persistent_key); - entry->cp_flags = xattr->flags; - if (xattr->xattr_major_version >= CP_NEW_MAJOR_VERS) { - entry->cp_flags |= CP_OFF_IV_ENABLED; - } + if (xattr_len < min_len) + return HFS_EINCONSISTENT; - if (CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F ) { - bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size); - } + xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version); + xattr->flags = OSSwapLittleToHostInt32(xattr->flags); + xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class); + xattr->key_os_version = OSSwapLittleToHostInt32(xattr->key_os_version); + xattr->key_revision = OSSwapLittleToHostInt16(xattr->key_revision); + xattr->key_len = OSSwapLittleToHostInt16(xattr->key_len); - FREE (xattr, M_TEMP); + uint16_t pers_key_len = xattr->key_len; - break; - } - case CP_PREV_MAJOR_VERS: { - struct cp_xattr_v2 *xattr = NULL; - MALLOC (xattr, struct cp_xattr_v2*, sizeof(struct cp_xattr_v2), M_TEMP, M_WAITOK); - if (xattr == NULL) { - error = ENOMEM; - break; - } - bzero (xattr, sizeof (struct cp_xattr_v2)); - attrsize = sizeof(*xattr); + min_len += pers_key_len; + if (xattr_len < min_len) + return HFS_EINCONSISTENT; - uio_addiov(auio, CAST_USER_ADDR_T(xattr), attrsize); - args.a_size = &attrsize; - error = hfs_getxattr_internal(cp, &args, VTOHFS(cp->c_vp), 0); - if (error != 0) { - FREE (xattr, M_TEMP); - goto out; - } + cp_key_pair_t *cpkp; + cprotect_t entry; + + /* + * If option CP_GET_XATTR_BASIC_INFO is set, we only return basic + * information about the file's protection (and not the key) and + * we store the result in the structure the caller passed to us. + */ + if (ISSET(options, CP_GET_XATTR_BASIC_INFO)) { + entry = *pcpr; + bzero(entry, offsetof(struct cprotect, cp_keys)); + } else { + entry = cp_entry_alloc(NULL, xattr->key_len, CP_MAX_CACHEBUFLEN, &cpkp); + } - /* Endian swap the multi-byte fields into host endianness from L.E. */ - xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version); - xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version); - xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size); - xattr->flags = OSSwapLittleToHostInt32(xattr->flags); - xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class); + entry->cp_pclass = xattr->persistent_class; + entry->cp_key_os_version = xattr->key_os_version; + entry->cp_key_revision = xattr->key_revision; - if (xattr->xattr_major_version != hfsmp->hfs_running_cp_major_vers) { - printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n", - xattr->xattr_major_version, hfsmp->hfs_running_cp_major_vers); - error = EINVAL; - FREE (xattr, M_TEMP); - goto out; - } + if (!ISSET(options, CP_GET_XATTR_BASIC_INFO)) { + if (xattr->key_len) { + cpkp_set_pers_key_len(cpkp, xattr->key_len); + memcpy(cpkp_pers_key(cpkp), xattr->persistent_key, xattr->key_len); + } - /* - * Prevent a buffer overflow, and validate the key length obtained from the - * EA. If it's too big, then bail out, because the EA can't be trusted at this - * point. - */ - if (xattr->key_size > CP_V2_WRAPPEDKEYSIZE) { - error = EINVAL; - FREE (xattr, M_TEMP); - goto out; - } - /* set up entry with information from xattr */ - entry = cp_entry_alloc(xattr->key_size); - if (!entry) { - FREE (xattr, M_TEMP); - return ENOMEM; - } - entry->cp_pclass = xattr->persistent_class; + *pcpr = entry; + } + else if (xattr->key_len) { + SET(entry->cp_flags, CP_HAS_A_KEY); + } - /* - * Suppress invalid flags that should not be set. - * If we have gotten this far, then CP_NO_XATTR cannot possibly - * be valid; the EA exists. - */ - xattr->flags &= ~CP_NO_XATTR; + return 0; +} - entry->cp_flags = xattr->flags; +/* + * Initializes a new cprotect entry with xattr data from the cnode. + * cnode lock held shared + */ +static int +cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, cprotect_t *outentry) +{ + size_t xattr_len = sizeof(struct cp_xattr_v5); + struct cp_xattr_v5 *xattr; - if (CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F ) { - bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size); - } + MALLOC (xattr, struct cp_xattr_v5 *, xattr_len, + M_TEMP, M_WAITOK); - FREE (xattr, M_TEMP); - break; - } + int error = hfs_xattr_read(cp->c_vp, CONTENT_PROTECTION_XATTR_NAME, + xattr, &xattr_len); + + if (!error) { + if (xattr_len < CP_XATTR_MIN_LEN) + error = HFS_EINCONSISTENT; + else + error = cp_read_xattr_v5(hfsmp, xattr, xattr_len, outentry, 0); } -out: - uio_free(auio); +#if DEBUG + if (error && error != ENOATTR) { + printf("cp_getxattr: bad cp xattr (%d):\n", error); + for (size_t i = 0; i < xattr_len; ++i) + printf("%02x ", ((uint8_t *)xattr)[i]); + printf("\n"); + } +#endif + + FREE(xattr, M_TEMP); - *outentry = entry; return error; } @@ -1645,15 +1918,9 @@ cp_restore_keys(struct cprotect *entry, struct hfsmount *hfsmp, struct cnode *cp error = cp_unwrap(hfsmp, entry, cp); if (error) { - entry->cp_flags |= CP_KEY_FLUSHED; - bzero(entry->cp_cache_key, entry->cp_cache_key_len); + cp_flush_cached_keys(entry); error = EPERM; } - else { - /* ready for business */ - entry->cp_flags &= ~CP_KEY_FLUSHED; - - } return error; } @@ -1819,9 +2086,7 @@ cp_lock_vnode_callback(struct vnode *vp, void *arg) * is no key leakage in that layer. */ - entry->cp_flags |= CP_KEY_FLUSHED; - bzero(&entry->cp_cache_key, entry->cp_cache_key_len); - bzero(&entry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx)); + cp_flush_cached_keys(entry); /* some write may have arrived in the mean time. dump those pages */ hfs_unlock(cp); @@ -1858,15 +2123,17 @@ cp_lock_vnode_callback(struct vnode *vp, void *arg) * Generate a new wrapped key based on the existing cache key. */ -static int -cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) +int +cp_rewrap(struct cnode *cp, __unused hfsmount_t *hfsmp, + cp_key_class_t *newclass, cp_key_pair_t *cpkp, const void *old_holder, + cp_new_alloc_fn alloc_fn, void **pholder) { - struct cprotect *entry = cp->c_cpentry; + uint8_t new_persistent_key[CP_MAX_WRAPPEDKEYSIZE]; size_t keylen = CP_MAX_WRAPPEDKEYSIZE; int error = 0; - newclass = CP_CLASS(newclass); + const cp_key_class_t key_class = CP_CLASS(*newclass); /* Structures passed between HFS and AKS */ cp_cred_s access_in; @@ -1878,15 +2145,15 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) * key that is only good as long as the file is open. There is no * wrapped key, so there isn't anything to wrap. */ - if (newclass == PROTECTION_CLASS_F) { + if (key_class == PROTECTION_CLASS_F) { return EINVAL; } cp_init_access(&access_in, cp); bzero(&wrapped_key_in, sizeof(wrapped_key_in)); - wrapped_key_in.key = entry->cp_persistent_key; - wrapped_key_in.key_len = entry->cp_persistent_key_len; + wrapped_key_in.key = cpkp_pers_key(cpkp); + wrapped_key_in.key_len = cpkp_pers_key_len(cpkp); /* Use the persistent class when talking to AKS */ wrapped_key_in.dp_class = entry->cp_pclass; @@ -1902,21 +2169,20 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) * don't lose. */ error = g_cp_wrap_func.rewrapper(&access_in, - newclass, /* new class */ + key_class, /* new class */ &wrapped_key_in, &wrapped_key_out); keylen = wrapped_key_out.key_len; if (error == 0) { - struct cprotect *newentry = NULL; - /* + /* * Verify that AKS returned to us a wrapped key of the * target class requested. */ /* Get the effective class here */ - int effective = CP_CLASS(wrapped_key_out.dp_class); - if (effective != newclass) { + cp_key_class_t effective = CP_CLASS(wrapped_key_out.dp_class); + if (effective != key_class) { /* * Fail the operation if defaults or some other enforcement * dictated that the class be wrapped differently. @@ -1926,29 +2192,16 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) return EPERM; } - /* v2 EA's don't support the larger class B keys */ - if ((keylen != CP_V2_WRAPPEDKEYSIZE) && - (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) { - return EINVAL; - } - /* Allocate a new cpentry */ - newentry = cp_entry_alloc (keylen); - bcopy (entry, newentry, sizeof(struct cprotect)); + cp_key_pair_t *new_cpkp; + *pholder = alloc_fn(old_holder, keylen, CP_MAX_CACHEBUFLEN, &new_cpkp); /* copy the new key into the entry */ - bcopy (new_persistent_key, newentry->cp_persistent_key, keylen); - newentry->cp_persistent_key_len = keylen; - newentry->cp_backing_cnode = cp; + cpkp_set_pers_key_len(new_cpkp, keylen); + memcpy(cpkp_pers_key(new_cpkp), new_persistent_key, keylen); /* Actually record/store what AKS reported back, not the effective class stored in newclass */ - newentry->cp_pclass = wrapped_key_out.dp_class; - - /* Attach the new entry to the cnode */ - cp->c_cpentry = newentry; - - /* destroy the old entry */ - cp_entry_destroy (entry); + *newclass = wrapped_key_out.dp_class; } else { error = EPERM; @@ -1957,46 +2210,36 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) return error; } - -static int -cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp) +static int cpkp_unwrap(cnode_t *cp, cp_key_class_t key_class, cp_key_pair_t *cpkp) { int error = 0; uint8_t iv_key[CP_IV_KEYSIZE]; + cpx_t cpx = cpkp_cpx(cpkp); /* Structures passed between HFS and AKS */ cp_cred_s access_in; cp_wrapped_key_s wrapped_key_in; cp_raw_key_s key_out; - /* - * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient - * key that is only good as long as the file is open. There is no - * wrapped key, so there isn't anything to unwrap. - */ - if (CP_CLASS(entry->cp_pclass) == PROTECTION_CLASS_F) { - return EPERM; - } - cp_init_access(&access_in, cp); bzero(&wrapped_key_in, sizeof(wrapped_key_in)); - wrapped_key_in.key = entry->cp_persistent_key; - wrapped_key_in.key_len = entry->cp_persistent_key_len; + wrapped_key_in.key = cpkp_pers_key(cpkp); + wrapped_key_in.key_len = cpkp_max_pers_key_len(cpkp); /* Use the persistent class when talking to AKS */ - wrapped_key_in.dp_class = entry->cp_pclass; + wrapped_key_in.dp_class = key_class; bzero(&key_out, sizeof(key_out)); key_out.iv_key = iv_key; - key_out.key = entry->cp_cache_key; - /* - * The unwrapper should validate/set the key length for + key_out.key = cpx_key(cpx); + /* + * The unwrapper should validate/set the key length for * the IV key length and the cache key length, however we need * to supply the correct buffer length so that AKS knows how * many bytes it has to work with. */ key_out.iv_key_len = CP_IV_KEYSIZE; - key_out.key_len = CP_MAX_CACHEBUFLEN; + key_out.key_len = cpx_max_key_len(cpx); error = g_cp_wrap_func.unwrapper(&access_in, &wrapped_key_in, &key_out); if (!error) { @@ -2004,24 +2247,13 @@ cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp) panic ("cp_unwrap: invalid key length! (%ul)\n", key_out.key_len); } - if (key_out.iv_key_len == 0 || key_out.iv_key_len > CP_IV_KEYSIZE) { + if (key_out.iv_key_len != CP_IV_KEYSIZE) panic ("cp_unwrap: invalid iv key length! (%ul)\n", key_out.iv_key_len); - } - - entry->cp_cache_key_len = key_out.key_len; - - /* No need to go here for older EAs */ - if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { - aes_encrypt_key128(iv_key, &entry->cp_cache_iv_ctx); - entry->cp_flags |= CP_OFF_IV_ENABLED; - } - /* Is the key a raw wrapped key? */ - if (key_out.flags & CP_RAW_KEY_WRAPPEDKEY) { - /* OR in the right bit for the cprotect */ - entry->cp_flags |= CP_SEP_WRAPPEDKEY; - } + cpx_set_key_len(cpx, key_out.key_len); + cpx_set_aes_iv_key(cpx, iv_key); + cpx_set_is_sep_wrapped_key(cpx, ISSET(key_out.flags, CP_RAW_KEY_WRAPPEDKEY)); } else { error = EPERM; } @@ -2029,26 +2261,22 @@ cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp) return error; } -/* Setup AES context */ static int -cp_setup_aes_ctx(struct cprotect *entry) +cp_unwrap(__unused struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp) { - SHA1_CTX sha1ctxt; - uint8_t cp_cache_iv_key[CP_IV_KEYSIZE]; /* Kiv */ - - /* First init the cp_cache_iv_key[] */ - SHA1Init(&sha1ctxt); - /* - * We can only use this when the keys are generated in the AP; As a result - * we only use the first 32 bytes of key length in the cache key + * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient + * key that is only good as long as the file is open. There is no + * wrapped key, so there isn't anything to unwrap. */ - SHA1Update(&sha1ctxt, &entry->cp_cache_key[0], CP_MAX_KEYSIZE); - SHA1Final(&cp_cache_iv_key[0], &sha1ctxt); + if (CP_CLASS(entry->cp_pclass) == PROTECTION_CLASS_F) { + return EPERM; + } + + int error = cpkp_unwrap(cp, entry->cp_pclass, &entry->cp_keys); - aes_encrypt_key128(&cp_cache_iv_key[0], &entry->cp_cache_iv_ctx); - return 0; + return error; } /* @@ -2060,7 +2288,7 @@ cp_setup_aes_ctx(struct cprotect *entry) * on 'cp'. * */ -int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, +int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, cp_key_class_t targetclass, uint32_t keyflags, struct cprotect **newentry) { @@ -2085,13 +2313,16 @@ int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, } if (S_ISREG(cp->c_mode)) { - if ((cp->c_cpentry->cp_flags & CP_NEEDS_KEYS) == 0){ + if (!cp_needs_pers_key(cp->c_cpentry)) { error = EINVAL; goto out; } } - error = cp_new (targetclass, hfsmp, cp, cp->c_mode, keyflags, &newcp); + cp_key_revision_t key_revision = cp_initial_key_revision(hfsmp); + + error = cp_new (&targetclass, hfsmp, cp, cp->c_mode, keyflags, key_revision, + (cp_new_alloc_fn)cp_entry_alloc, (void **)&newcp); if (error) { /* * Key generation failed. This is not necessarily fatal @@ -2101,8 +2332,12 @@ int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, error = EPERM; goto out; } - - /* + + newcp->cp_pclass = targetclass; + newcp->cp_key_os_version = cp_os_version(); + newcp->cp_key_revision = key_revision; + + /* * If we got here, then we have a new cprotect. * Attempt to write the new one out. */ @@ -2112,7 +2347,7 @@ int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, /* Tear down the new cprotect; Tell MKB that it's invalid. Bail out */ /* TODO: rdar://12170074 needs to be fixed before we can tell MKB */ if (newcp) { - cp_entry_destroy(newcp); + cp_entry_destroy(hfsmp, newcp); } goto out; } @@ -2123,20 +2358,19 @@ int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, * 2) wrote the new keys to disk. * 3) cprotect is ready to go. */ - - newcp->cp_flags &= ~CP_NEEDS_KEYS; + *newentry = newcp; - + out: return error; } -void cp_replace_entry (struct cnode *cp, struct cprotect *newentry) +void cp_replace_entry (hfsmount_t *hfsmp, struct cnode *cp, struct cprotect *newentry) { - if (cp->c_cpentry) { - cp_entry_destroy (cp->c_cpentry); + + cp_entry_destroy (hfsmp, cp->c_cpentry); } cp->c_cpentry = newentry; newentry->cp_backing_cnode = cp; @@ -2154,13 +2388,13 @@ void cp_replace_entry (struct cnode *cp, struct cprotect *newentry) * Additionally, decide if keys are even needed -- directories get cprotect data structures * but they do not have keys. * - */ + */ -static int -cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, - uint32_t keyflags, struct cprotect **output_entry) +int +cp_new(cp_key_class_t *newclass_eff, __unused struct hfsmount *hfsmp, struct cnode *cp, + mode_t cmode, int32_t keyflags, cp_key_revision_t key_revision, + cp_new_alloc_fn alloc_fn, void **pholder) { - struct cprotect *entry = NULL; int error = 0; uint8_t new_key[CP_MAX_CACHEBUFLEN]; size_t new_key_len = CP_MAX_CACHEBUFLEN; /* AKS tell us the proper key length, how much of this is used */ @@ -2169,18 +2403,13 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, uint8_t iv_key[CP_IV_KEYSIZE]; size_t iv_key_len = CP_IV_KEYSIZE; int iswrapped = 0; - - newclass_eff = CP_CLASS(newclass_eff); + cp_key_class_t key_class = CP_CLASS(*newclass_eff); /* Structures passed between HFS and AKS */ cp_cred_s access_in; cp_wrapped_key_s wrapped_key_out; cp_raw_key_s key_out; - if (*output_entry != NULL) { - panic ("cp_new with non-null entry!"); - } - if (are_wraps_initialized == false) { printf("hfs: cp_new: wrap/gen functions not yet set\n"); return ENXIO; @@ -2212,7 +2441,7 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, } else { /* Must be a file */ - if (newclass_eff == PROTECTION_CLASS_F) { + if (key_class == PROTECTION_CLASS_F) { /* class F files are not wrapped; they can still use the max key size */ new_key_len = CP_MAX_KEYSIZE; read_random (&new_key[0], new_key_len); @@ -2245,8 +2474,10 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, wrapped_key_out.key = new_persistent_key; wrapped_key_out.key_len = new_persistent_len; - error = g_cp_wrap_func.new_key(&access_in, - newclass_eff, + access_in.key_revision = key_revision; + + error = g_cp_wrap_func.new_key(&access_in, + key_class, &key_out, &wrapped_key_out); @@ -2261,7 +2492,7 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, panic ("cp_new: invalid key length! (%ul) \n", key_out.key_len); } - if (key_out.iv_key_len == 0 || key_out.iv_key_len > CP_IV_KEYSIZE) { + if (key_out.iv_key_len != CP_IV_KEYSIZE) { panic ("cp_new: invalid iv key length! (%ul) \n", key_out.iv_key_len); } @@ -2272,17 +2503,15 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, * if that occurred. Check that the effective class returned by * AKS is the same as our effective new class */ - if ((int)(CP_CLASS(wrapped_key_out.dp_class)) != newclass_eff) { - if (keyflags & CP_KEYWRAP_DIFFCLASS) { - newclass_eff = CP_CLASS(wrapped_key_out.dp_class); - } - else { - error = EPERM; + if (CP_CLASS(wrapped_key_out.dp_class) != key_class) { + if (!ISSET(keyflags, CP_KEYWRAP_DIFFCLASS)) { + error = EPERM; /* TODO: When 12170074 fixed, release/invalidate the key! */ goto cpnew_fail; } } + *newclass_eff = wrapped_key_out.dp_class; new_key_len = key_out.key_len; iv_key_len = key_out.iv_key_len; new_persistent_len = wrapped_key_out.key_len; @@ -2298,67 +2527,41 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, * Step 2: allocate cprotect and initialize it. */ - - /* - * v2 EA's don't support the larger class B keys - */ - if ((new_persistent_len != CP_V2_WRAPPEDKEYSIZE) && - (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) { - return EINVAL; - } - - entry = cp_entry_alloc (new_persistent_len); - if (entry == NULL) { + cp_key_pair_t *cpkp; + *pholder = alloc_fn(NULL, new_persistent_len, new_key_len, &cpkp); + if (*pholder == NULL) { return ENOMEM; } - *output_entry = entry; - - /* - * For directories and class F files, just store the effective new class. - * AKS does not interact with us in generating keys for F files, and directories - * don't actually have keys. - */ - if ( S_ISDIR (cmode) || (newclass_eff == PROTECTION_CLASS_F)) { - entry->cp_pclass = newclass_eff; - } - else { - /* - * otherwise, store what AKS actually returned back to us. - * wrapped_key_out is only valid if we have round-tripped to AKS - */ - entry->cp_pclass = wrapped_key_out.dp_class; - } - /* Copy the cache key & IV keys into place if needed. */ if (new_key_len > 0) { - bcopy (new_key, entry->cp_cache_key, new_key_len); - entry->cp_cache_key_len = new_key_len; + cpx_t cpx = cpkp_cpx(cpkp); + cpx_set_key_len(cpx, new_key_len); + memcpy(cpx_key(cpx), new_key, new_key_len); /* Initialize the IV key */ - if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { - if (newclass_eff == PROTECTION_CLASS_F) { - /* class F needs a full IV initialize */ - cp_setup_aes_ctx(entry); - } - else { - /* Key store gave us an iv key. Just need to wrap it.*/ - aes_encrypt_key128(iv_key, &entry->cp_cache_iv_ctx); - } - entry->cp_flags |= CP_OFF_IV_ENABLED; - } + if (key_class != PROTECTION_CLASS_F) + cpx_set_aes_iv_key(cpx, iv_key); + + cpx_set_is_sep_wrapped_key(cpx, iswrapped); } if (new_persistent_len > 0) { - bcopy(new_persistent_key, entry->cp_persistent_key, new_persistent_len); + cpkp_set_pers_key_len(cpkp, new_persistent_len); + memcpy(cpkp_pers_key(cpkp), new_persistent_key, new_persistent_len); } - /* Mark it as a wrapped key if necessary */ - if (iswrapped) { - entry->cp_flags |= CP_SEP_WRAPPEDKEY; +cpnew_fail: + +#if HFS_TMPDBG +#if !SECURE_KERNEL + if ((hfsmp->hfs_cp_verbose) && (error == EPERM)) { + /* Only introspect the data fork */ + cp_log_eperm (cp->c_vp, *newclass_eff, true); } +#endif +#endif -cpnew_fail: return error; } @@ -2376,20 +2579,261 @@ static void cp_init_access(cp_cred_t access, struct cnode *cp) access->pid = proc_pid(proc); access->uid = kauth_cred_getuid(cred); + if (cp->c_cpentry) + access->key_revision = cp->c_cpentry->cp_key_revision; + return; } -#else +/* + * Parses versions of the form 12A316, i.e. and + * returns a uint32_t in the form 0xaabbcccc where aa = , + * bb = , cccc = . + */ +static cp_key_os_version_t parse_os_version(void) +{ + const char *p = osversion; + + int a = 0; + while (*p >= '0' && *p <= '9') { + a = a * 10 + *p - '0'; + ++p; + } + + if (!a) + return 0; + + int b = *p++; + if (!b) + return 0; + + int c = 0; + while (*p >= '0' && *p <= '9') { + c = c * 10 + *p - '0'; + ++p; + } + + if (!c) + return 0; + + return (a & 0xff) << 24 | b << 16 | (c & 0xffff); +} + +cp_key_os_version_t cp_os_version(void) +{ + static cp_key_os_version_t cp_os_version; + + if (cp_os_version) + return cp_os_version; + + if (!osversion[0]) + return 0; + + cp_os_version = parse_os_version(); + if (!cp_os_version) { + printf("cp_os_version: unable to parse osversion `%s'\n", osversion); + cp_os_version = 1; + } + + return cp_os_version; +} + + +errno_t cp_handle_strategy(buf_t bp) +{ + vnode_t vp = buf_vnode(bp); + cnode_t *cp = NULL; + + if (bufattr_rawencrypted(buf_attr(bp)) + || !(cp = cp_get_protected_cnode(vp)) + || !cp->c_cpentry) { + // Nothing to do + return 0; + } + + /* + * For filesystem resize, we may not have access to the underlying + * file's cache key for whatever reason (device may be locked). + * However, we do not need it since we are going to use the + * temporary HFS-wide resize key which is generated once we start + * relocating file content. If this file's I/O should be done + * using the resize key, it will have been supplied already, so do + * not attach the file's cp blob to the buffer. + */ + if (ISSET(cp->c_cpentry->cp_flags, CP_RELOCATION_INFLIGHT)) + return 0; + + { + // Fast path + cpx_t cpx = cpkp_cpx(&cp->c_cpentry->cp_keys); + + if (cpx_has_key(cpx)) { + bufattr_setcpx(buf_attr(bp), cpx); + return 0; + } + } + + /* + * We rely mostly (see note below) upon the truncate lock to + * protect the CP cache key from getting tossed prior to our IO + * finishing here. Nearly all cluster io calls to manipulate file + * payload from HFS take the truncate lock before calling into the + * cluster layer to ensure the file size does not change, or that + * they have exclusive right to change the EOF of the file. That + * same guarantee protects us here since the code that deals with + * CP lock events must now take the truncate lock before doing + * anything. + * + * If you want to change content protection structures, then the + * truncate lock is not sufficient; you must take the truncate + * lock and then wait for outstanding writes to complete. This is + * necessary because asynchronous I/O only holds the truncate lock + * whilst I/O is being queued. + * + * One exception should be the VM swapfile IO, because HFS will + * funnel the VNOP_PAGEOUT directly into a cluster_pageout call + * for the swapfile code only without holding the truncate lock. + * This is because individual swapfiles are maintained at + * fixed-length sizes by the VM code. In non-swapfile IO we use + * PAGEOUT_V2 semantics which allow us to create our own UPL and + * thus take the truncate lock before calling into the cluster + * layer. In that case, however, we are not concerned with the CP + * blob being wiped out in the middle of the IO because there + * isn't anything to toss; the VM swapfile key stays in-core as + * long as the file is open. + */ + + off_rsrc_t off_rsrc = off_rsrc_make(buf_lblkno(bp) * GetLogicalBlockSize(vp), + VNODE_IS_RSRC(vp)); + cp_io_params_t io_params; + + + /* + * We want to take the cnode lock here and because the vnode write + * count is a pseudo-lock, we need to do something to preserve + * lock ordering; the cnode lock comes before the write count. + * Ideally, the write count would be incremented after the + * strategy routine returns, but that becomes complicated if the + * strategy routine where to call buf_iodone before returning. + * For now, we drop the write count here and then pick it up again + * later. + */ + if (!ISSET(buf_flags(bp), B_READ) && !ISSET(buf_flags(bp), B_RAW)) + vnode_writedone(vp); + + hfs_lock_always(cp, HFS_SHARED_LOCK); + cp_io_params(VTOHFS(vp), cp->c_cpentry, off_rsrc, + ISSET(buf_flags(bp), B_READ) ? VNODE_READ : VNODE_WRITE, + &io_params); + hfs_unlock(cp); + + /* + * Last chance: If this data protected I/O does not have unwrapped + * keys present, then try to get them. We already know that it + * should, by this point. + */ + if (!cpx_has_key(io_params.cpx)) { + int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS); + errno_t error = cp_handle_vnop(vp, io_op, 0); + if (error) { + /* + * We have to be careful here. By this point in the I/O + * path, VM or the cluster engine has prepared a buf_t + * with the proper file offsets and all the rest, so + * simply erroring out will result in us leaking this + * particular buf_t. We need to properly decorate the + * buf_t just as buf_strategy would so as to make it + * appear that the I/O errored out with the particular + * error code. + */ + if (!ISSET(buf_flags(bp), B_READ) && !ISSET(buf_flags(bp), B_RAW)) + vnode_startwrite(vp); + buf_seterror (bp, error); + buf_biodone(bp); + return error; + } + + hfs_lock_always(cp, HFS_SHARED_LOCK); + cp_io_params(VTOHFS(vp), cp->c_cpentry, off_rsrc, + ISSET(buf_flags(bp), B_READ) ? VNODE_READ : VNODE_WRITE, + &io_params); + hfs_unlock(cp); + } + + assert(buf_count(bp) <= io_params.max_len); + bufattr_setcpx(buf_attr(bp), io_params.cpx); + + if (!ISSET(buf_flags(bp), B_READ) && !ISSET(buf_flags(bp), B_RAW)) + vnode_startwrite(vp); + + return 0; +} + +#else // !CONFIG_PROTECT + +#include +#include +#include int cp_key_store_action(int action __unused) { return ENOTSUP; } - int cp_register_wraps(cp_wrap_func_t key_store_func __unused) { return ENOTSUP; } +size_t cpx_size(__unused size_t key_size) +{ + return 0; +} + +cpx_t cpx_alloc(__unused size_t key_size) +{ + return NULL; +} + +void cpx_free(__unused cpx_t cpx) +{ +} + +bool cpx_is_sep_wrapped_key(__unused const struct cpx *cpx) +{ + return false; +} + +void cpx_set_is_sep_wrapped_key(__unused struct cpx *cpx, __unused bool v) +{ +} + +bool cpx_use_offset_for_iv(__unused const struct cpx *cpx) +{ + return false; +} + +void cpx_set_use_offset_for_iv(__unused struct cpx *cpx, __unused bool v) +{ +} + +uint16_t cpx_key_len(__unused const struct cpx *cpx) +{ + return 0; +} + +void cpx_set_key_len(__unused struct cpx *cpx, __unused uint16_t key_len) +{ +} + +void *cpx_key(__unused const struct cpx *cpx) +{ + return NULL; +} + +aes_encrypt_ctx *cpx_iv_aes_ctx(__unused cpx_t cpx) +{ + return NULL; +} + #endif /* CONFIG_PROTECT */ diff --git a/bsd/hfs/hfs_cprotect.h b/bsd/hfs/hfs_cprotect.h new file mode 100644 index 000000000..b25ecc70c --- /dev/null +++ b/bsd/hfs/hfs_cprotect.h @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2009-2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef HFS_CPROTECT_H_ +#define HFS_CPROTECT_H_ + +#if KERNEL_PRIVATE + +#include + +#include +#include +#include +#include +#include + +#include "hfs.h" +#include "hfs_fsctl.h" + +__BEGIN_DECLS + +#define CP_IV_KEYSIZE 16 /* 16x8 = 128 */ +#define CP_MAX_KEYSIZE 32 /* 8x4 = 32, 32x8 = 256 */ +#define CP_MAX_CACHEBUFLEN 64 /* Maximum size of cp cache buffer/array */ + +#define CP_INITIAL_WRAPPEDKEYSIZE 40 +#define CP_V2_WRAPPEDKEYSIZE 40 /* Size of the wrapped key in a v2 EA */ +#define CP_V4_RESERVEDBYTES 16 /* Number of reserved bytes in EA still present */ + +#define CP_LOCKED_KEYCHAIN 0 +#define CP_UNLOCKED_KEYCHAIN 1 + +#define CONTENT_PROTECTION_XATTR_NAME "com.apple.system.cprotect" +#define CONTENT_PROTECTION_XATTR_NAME_CHARS \ + { 'c', 'o', 'm', '.', 'a', 'p', 'p', 'l', 'e', \ + '.', 's', 'y', 's', 't', 'e', 'm', \ + '.', 'c', 'p', 'r', 'o', 't', 'e', 'c', 't' } +#define CP_CURRENT_VERS CP_VERS_5 +#define CP_VERS_5 5 // iOS 8.1 +#define CP_VERS_4 4 // iOS 5 +#define CP_VERS_2 2 // iOS 4 +#define CP_MINOR_VERS 0 + +/* the class occupies the lowest 5 bits, so there are 32 values (0-31) */ +#define CP_EFFECTIVE_CLASSMASK 0x0000001f + +typedef uint32_t cp_key_class_t; +typedef uint32_t cp_key_os_version_t; + +/* macros for quick access/typing to mask out the classmask */ +#define CP_CLASS(x) ((cp_key_class_t)(CP_EFFECTIVE_CLASSMASK & (x))) + +#define CP_CRYPTO_G1 0x00000020 + +typedef struct cp_xattr *cp_xattr_t; +typedef struct cnode * cnode_ptr_t; +//forward declare the struct. +struct hfsmount; + +/* + * Flags for Key Generation Behavior + * + * These are passed to cp_generate_keys() and cp_new() in the + * flags arguments + */ +#define CP_KEYWRAP_DIFFCLASS 0x00000001 /* wrapping with a different class bag is OK */ + +/* + * off_rsrc_t: this structure represents an offset and whether or not it's + * the resource fork. It's done this way so that we can easily do comparisons + * i.e. + * + * { 0, data-fork } < { 100, rsrc-fork } + */ + +enum { + OFF_RSRC_BIT = 0x4000000000000000, +}; + +typedef int64_t off_rsrc_t; + +static inline bool off_rsrc_is_rsrc(off_rsrc_t off_rsrc) +{ + return off_rsrc & OFF_RSRC_BIT; +} + +static inline off_t off_rsrc_get_off(off_rsrc_t off_rsrc) +{ + return off_rsrc & (OFF_RSRC_BIT - 1); +} + +static inline off_rsrc_t off_rsrc_make(off_t offset, bool is_rsrc) +{ + return offset | (is_rsrc ? OFF_RSRC_BIT : 0); +} + +// -- struct cpx -- + +/* + * This structure contains the unwrapped key and is passed to the lower layers. + * It is private so users must use the accessors declared in sys/cprotect.h + * to read/write it. + */ + +// cpx_flags +typedef uint32_t cpx_flags_t; +enum { + CPX_SEP_WRAPPEDKEY = 0x01, + CPX_IV_AES_CTX_INITIALIZED = 0x02, + CPX_USE_OFFSET_FOR_IV = 0x04, + + // Using AES IV context generated from key + CPX_IV_AES_CTX_HFS = 0x08, +}; + +struct cpx { +#if DEBUG + uint32_t cpx_magic1; +#endif + cpx_flags_t cpx_flags; + uint16_t cpx_max_key_len; + uint16_t cpx_key_len; + aes_encrypt_ctx cpx_iv_aes_ctx; // Context used for generating the IV + uint8_t cpx_cached_key[]; +} __attribute__((packed)); + +// -- struct cp_key_pair -- + +/* + * This structure maintains the pair of keys; the persistent, wrapped key that + * is written to disk, and the unwrapped key (cpx_t) that we pass to lower + * layers. + */ + +typedef struct cp_key_pair { + uint16_t cpkp_max_pers_key_len; + uint16_t cpkp_pers_key_len; + struct cpx cpkp_cpx; + + // cpkp_cpx is variable length so the location of the persistent key varies + // uint8_t cpkp_persistent_key[]; +} cp_key_pair_t; + +// -- struct cprotect -- + +/* + * Runtime-only structure containing the content protection status for + * the given file. This is referenced by the cnode. It has the + * variable length key pair at the end. + */ + +typedef uint32_t cp_flags_t; +enum { + CP_NO_XATTR = 0x01, /* Key info has not been saved as EA to the FS */ + CP_RELOCATION_INFLIGHT = 0x02, /* File with offset IVs is in the process of being relocated. */ + + CP_HAS_A_KEY = 0x08, /* File has a non-zero length key */ +}; + +struct cprotect { +#if DEBUG + uint32_t cp_magic1; +#endif + cp_flags_t cp_flags; + cp_key_class_t cp_pclass; /* persistent class stored on-disk */ + void* cp_backing_cnode; + cp_key_os_version_t cp_key_os_version; + cp_key_revision_t cp_key_revision; + uint16_t cp_raw_open_count; + cp_key_pair_t cp_keys; // Variable length +}; + +// -- On-Disk Structures -- + +typedef uint32_t cp_xattr_flags_t; +enum { + /* + * Be careful about using flags 0x02 to 0x20. Older code used to write + * flags that were used for in-memory purposes to disk and therefore + * they might be used in V4 structures. Here's what they were: + * + * CP_KEY_FLUSHED 0x02 Should never have made it to disk + * CP_NO_XATTR 0x04 Should never have made it to disk + * CP_OFF_IV_ENABLED 0x08 Probably made it to disk + * CP_RELOCATION_INFLIGHT 0x10 Should never have made it to disk + * CP_SEP_WRAPPEDKEY 0x20 Probably made it to disk + * + */ + + CP_XAF_NEEDS_KEYS = 0x0001, /* V4 only: file needs persistent keys */ + +}; + +/* + * V2 structure written as the per-file EA payload + * All on-disk multi-byte fields for the CP XATTR must be stored + * little-endian on-disk. This means they must be endian swapped to + * L.E on getxattr() and converted to LE on setxattr(). + * + * This structure is a fixed length and is tightly packed. + * 56 bytes total. + */ +struct cp_xattr_v2 { + u_int16_t xattr_major_version; + u_int16_t xattr_minor_version; + cp_xattr_flags_t flags; + u_int32_t persistent_class; + u_int32_t key_size; + uint8_t persistent_key[CP_V2_WRAPPEDKEYSIZE]; +} __attribute__((aligned(2), packed)); + + +/* + * V4 Content Protection EA On-Disk Layout. + * + * This structure must be tightly packed, but the *size can vary* + * depending on the length of the key. At MOST, the key length will be + * CP_MAX_WRAPPEDKEYSIZE, but the length is defined by the key_size field. + * + * Either way, the packing must be applied to ensure that the key data is + * retrievable in the right location relative to the start of the struct. + * + * Fully packed, this structure can range from : + * MIN: 36 bytes (no key -- used with directories) + * MAX: 164 bytes (with 128 byte key) + * + * During runtime we always allocate with the full 128 byte key, but only + * use as much of the key buffer as needed. It must be tightly packed, though. + */ + +struct cp_xattr_v4 { + u_int16_t xattr_major_version; + u_int16_t xattr_minor_version; + cp_xattr_flags_t flags; + cp_key_class_t persistent_class; + u_int32_t key_size; + // This field will be zero on older systems + cp_key_os_version_t key_os_version; + /* CP V4 Reserved Bytes == 16 */ + u_int8_t reserved[CP_V4_RESERVEDBYTES]; + /* All above fields are fixed regardless of key length (36 bytes) */ + /* Max Wrapped Size == 128 */ + uint8_t persistent_key[CP_MAX_WRAPPEDKEYSIZE]; +} __attribute__((aligned(2), packed)); + +// -- Version 5 -- + + +struct cp_xattr_v5 { + uint16_t xattr_major_version; + uint16_t xattr_minor_version; + cp_xattr_flags_t flags; + cp_key_class_t persistent_class; + cp_key_os_version_t key_os_version; + cp_key_revision_t key_revision; + uint16_t key_len; + + // 20 bytes to here + + // Variable length from here + uint8_t persistent_key[CP_MAX_WRAPPEDKEYSIZE]; + + + // Wouldn't be necessary if xattr routines returned just what we ask for + uint8_t spare[512]; +} __attribute__((aligned(2), packed)); + +enum { + CP_XATTR_MIN_LEN = 20, // Minimum length for all versions +}; + +/* + * The Root Directory's EA (fileid 1) is special; it defines information about + * what capabilities the filesystem is using. + * + * The data is still stored little endian. + */ +struct cp_root_xattr { + u_int16_t major_version; + u_int16_t minor_version; + u_int64_t flags; +} __attribute__((aligned(2), packed)); + +enum { + CP_ROOT_XATTR_MIN_LEN = 12, +}; + + +// -- Function Prototypes -- + +int cp_entry_init(cnode_ptr_t, struct mount *); +int cpx_gentempkeys(cpx_t *pcpx, struct hfsmount *hfsmp); +void cp_entry_destroy(struct hfsmount *hfsmp, struct cprotect *entry_ptr); +void cp_replace_entry (struct hfsmount *hfsmp, struct cnode *cp, struct cprotect *newentry); +cnode_ptr_t cp_get_protected_cnode(vnode_t); +int cp_fs_protected (mount_t); +int cp_getrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *outxattr); +int cp_setrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *newxattr); +int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, + cp_key_class_t targetclass, uint32_t flags, + struct cprotect **newentry); +int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, + cp_key_class_t suppliedclass, mode_t cmode, + struct cprotect **tmpentry); +int cp_is_valid_class (int isdir, int32_t protectionclass); +int cp_set_trimmed(struct hfsmount*); +int cp_set_rewrapped(struct hfsmount *); +int cp_flop_generation (struct hfsmount*); +bool cp_is_supported_version(uint16_t version); + + +typedef struct cp_io_params { + // The key to use + cpx_t cpx; + + /* + * The physical offset for this I/O or -1 if unknown (i.e. caller must + * do a regular look up). + */ + off_t phys_offset; + + // The maximum length allowed for this I/O + off_t max_len; +} cp_io_params_t; + +// Return the I/O parameters for this I/O +void cp_io_params(struct hfsmount *hfsmp, cprotect_t cpr, off_rsrc_t off_rsrc, + int direction, cp_io_params_t *io_params); + +int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, + uint32_t fileid, int xattr_opts); + +typedef void * (* cp_new_alloc_fn)(const void *old, uint16_t pers_key_len, + uint16_t cached_key_len, + cp_key_pair_t **pcpkp); + +int cp_new(cp_key_class_t *newclass_eff, struct hfsmount *hfsmp, + struct cnode *cp, mode_t cmode, int32_t keyflags, + cp_key_revision_t key_revision, + cp_new_alloc_fn alloc_fn, void **pholder); + +int cp_rewrap(struct cnode *cp, __unused struct hfsmount *hfsmp, + cp_key_class_t *newclass, cp_key_pair_t *cpkp, const void *old_holder, + cp_new_alloc_fn alloc_fn, void **pholder); + +cprotect_t cp_entry_alloc(cprotect_t old, uint16_t pers_keylen, + uint16_t cached_key_len, cp_key_pair_t **pcpkp); + +cp_key_os_version_t cp_os_version(void); + +cp_key_revision_t cp_next_key_revision(cp_key_revision_t rev); + +typedef uint32_t cp_getxattr_options_t; +enum { + // Return just basic information (not the key) + CP_GET_XATTR_BASIC_INFO = 1, +}; + +int cp_read_xattr_v5(struct hfsmount *hfsmp, struct cp_xattr_v5 *xattr, + size_t xattr_len, cprotect_t *pcpr, cp_getxattr_options_t options); + + +errno_t cp_handle_strategy(buf_t bp); + +// -- cp_key_pair_t functions -- + +size_t cpkp_size(uint16_t pers_key_len, uint16_t cached_key_len); +size_t cpkp_sizex(const cp_key_pair_t *cpkp); +void cpkp_init(cp_key_pair_t *cpkp, uint16_t max_pers_key_len, + uint16_t max_cached_key_len); +void cpkp_flush(cp_key_pair_t *cpkp); +void cpkp_copy(const cp_key_pair_t *src, cp_key_pair_t *dst); +uint16_t cpkp_max_pers_key_len(const cp_key_pair_t *cpkp); +uint16_t cpkp_pers_key_len(const cp_key_pair_t *cpkp); +bool cpkp_can_copy(const cp_key_pair_t *src, const cp_key_pair_t *dst); + +// -- Private cpx functions -- + +void cpx_init(cpx_t, size_t key_len); +bool cpx_has_key(const struct cpx *cpx); +uint16_t cpx_max_key_len(const struct cpx *cpx); +cpx_t cpkp_cpx(const cp_key_pair_t *cpkp); +void cpx_copy(const struct cpx *src, cpx_t dst); + +// -- Helper Functions -- + +static inline int cp_get_crypto_generation (cp_key_class_t protclass) { + if (protclass & CP_CRYPTO_G1) { + return 1; + } + else return 0; +} + +__END_DECLS + +#endif /* KERNEL_PRIVATE */ + +#endif /* !HFS_CPROTECT_H_ */ diff --git a/bsd/hfs/hfs_endian.c b/bsd/hfs/hfs_endian.c index 50fb1ddd9..eb242b37f 100644 --- a/bsd/hfs/hfs_endian.c +++ b/bsd/hfs/hfs_endian.c @@ -126,25 +126,43 @@ hfs_swap_BTNode ( */ if (btcb->totalNodes != 0) { if (srcDesc->fLink >= btcb->totalNodes) { +#if DEVELOPMENT || DEBUG + panic("hfs_swap_BTNode: invalid forward link (0x%08x >= 0x%08x)\n", srcDesc->fLink, btcb->totalNodes); +#else printf("hfs_swap_BTNode: invalid forward link (0x%08x >= 0x%08x)\n", srcDesc->fLink, btcb->totalNodes); +#endif error = fsBTInvalidHeaderErr; goto fail; } if (srcDesc->bLink >= btcb->totalNodes) { +#if DEVELOPMENT || DEBUG + panic("hfs_swap_BTNode: invalid backward link (0x%08x >= 0x%08x)\n", srcDesc->bLink, btcb->totalNodes); +#else printf("hfs_swap_BTNode: invalid backward link (0x%08x >= 0x%08x)\n", srcDesc->bLink, btcb->totalNodes); +#endif error = fsBTInvalidHeaderErr; goto fail; } if ((src->blockNum != 0) && (srcDesc->fLink == (u_int32_t) src->blockNum)) { +#if DEVELOPMENT || DEBUG + panic("hfs_swap_BTNode: invalid forward link (0x%08x == 0x%08x)\n", + srcDesc->fLink, (u_int32_t) src->blockNum); +#else printf("hfs_swap_BTNode: invalid forward link (0x%08x == 0x%08x)\n", srcDesc->fLink, (u_int32_t) src->blockNum); +#endif error = fsBTInvalidHeaderErr; goto fail; } if ((src->blockNum != 0) && (srcDesc->bLink == (u_int32_t) src->blockNum)) { +#if DEVELOPMENT || DEBUG + panic("hfs_swap_BTNode: invalid backward link (0x%08x == 0x%08x)\n", + srcDesc->bLink, (u_int32_t) src->blockNum); +#else printf("hfs_swap_BTNode: invalid backward link (0x%08x == 0x%08x)\n", srcDesc->bLink, (u_int32_t) src->blockNum); +#endif error = fsBTInvalidHeaderErr; goto fail; } diff --git a/bsd/hfs/hfs_extents.c b/bsd/hfs/hfs_extents.c new file mode 100644 index 000000000..509de326d --- /dev/null +++ b/bsd/hfs/hfs_extents.c @@ -0,0 +1,770 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#if HFS_EXTENTS_TEST + +#include "hfs_extents_test.h" +#include "hfs_extents.h" + +#else + +#include "hfs_extents.h" + +// In this file, group refers to a set of 8 extents + +static uint32_t hfs_total_blocks(const HFSPlusExtentDescriptor *ext, int count); +static errno_t hfs_ext_iter_next_group(struct hfs_ext_iter *iter); +static errno_t hfs_ext_iter_update(struct hfs_ext_iter *iter, + HFSPlusExtentDescriptor *extents, + int count, + HFSPlusExtentRecord cat_extents); +static errno_t hfs_ext_iter_check_group(hfs_ext_iter_t *iter); + +#endif + +#define CHECK(x, var, goto_label) \ + do { \ + var = (x); \ + if (var) { \ + printf("%s:%u error: %d\n", __func__, __LINE__, var); \ + goto goto_label; \ + } \ + } while (0) + +#define min(a,b) \ + ({ typeof (a) _a = (a); typeof (b) _b = (b); _a < _b ? _a : _b; }) + +static __attribute__((pure)) +const HFSPlusExtentKey *hfs_ext_iter_key(const hfs_ext_iter_t *iter) +{ + return (const HFSPlusExtentKey *)&iter->bt_iter.key; +} + +static __attribute__((pure)) +HFSPlusExtentKey *hfs_ext_iter_key_mut(hfs_ext_iter_t *iter) +{ + return (HFSPlusExtentKey *)&iter->bt_iter.key; +} + +// Returns the total number of blocks for the @count extents provided +uint32_t hfs_total_blocks(const HFSPlusExtentDescriptor *extents, int count) +{ + uint32_t block_count = 0; + for (int i = 0; i < count; ++i) + block_count += extents[i].blockCount; + return block_count; +} + +/* + * Checks a group of extents: makes sure that if it's the last group + * for a fork, that all the remaining extents are properly zeroed and + * if it's not then checks that all extents are set. This also sets + * @group_block_count and @last_in_fork. Returns ESTALE if + * inconsistent. + */ +errno_t hfs_ext_iter_check_group(hfs_ext_iter_t *iter) +{ + filefork_t *ff = VTOF(iter->vp); + const HFSPlusExtentKey *key = hfs_ext_iter_key(iter); + uint32_t count = 0; + int i; + + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (!iter->group[i].blockCount) + break; + count += iter->group[i].blockCount; + } + + if (i < kHFSPlusExtentDensity) { + iter->last_in_fork = true; + if (key->startBlock + count != ff_allocblocks(ff)) + goto bad; + + // Check remainder of extents + for (++i; i < kHFSPlusExtentDensity; ++i) { + if (iter->group[i].blockCount) + goto bad; + } + } else { + if (key->startBlock + count > ff_allocblocks(ff)) + goto bad; + + iter->last_in_fork = (key->startBlock + count == ff_allocblocks(ff)); + } + + iter->group_block_count = count; + + return 0; + +bad: + +#if DEBUG + printf("hfs_ext_iter_check_group: bad group; start: %u, total blocks: %u\n", + key->startBlock, ff_allocblocks(ff)); + + for (int j = 0; j < kHFSPlusExtentDensity; ++j) { + printf("%s<%u, %u>", j ? ", " : "", + iter->group[j].startBlock, iter->group[j].blockCount); + } + + printf("\n"); +#endif + + return ESTALE; +} + +// NOTE: doesn't copy group data +static void hfs_ext_iter_copy(const hfs_ext_iter_t *src, hfs_ext_iter_t *dst) +{ + dst->vp = src->vp; + memcpy(&dst->bt_iter.key, &src->bt_iter.key, sizeof(HFSPlusExtentKey)); + + dst->file_block = src->file_block; + dst->ndx = src->ndx; + + dst->bt_iter.hint = src->bt_iter.hint; + dst->bt_iter.version = 0; + dst->bt_iter.reserved = 0; + dst->bt_iter.hitCount = 0; + dst->bt_iter.maxLeafRecs = 0; +} + +bool hfs_ext_iter_is_catalog_extents(hfs_ext_iter_t *iter) +{ + return hfs_ext_iter_key(iter)->startBlock == 0; +} + +#if !HFS_EXTENTS_TEST + +/* + * Finds the extent for offset. It might be in the catalog or the extents + * file. + */ +errno_t hfs_ext_find(vnode_t vp, off_t offset, hfs_ext_iter_t *iter) +{ + errno_t ret; + hfsmount_t *hfsmp = VTOHFS(vp); + + iter->vp = vp; + + uint32_t end_block, index; + HFSPlusExtentKey *key = hfs_ext_iter_key_mut(iter); + + filefork_t *ff = VTOF(vp); + + CHECK(SearchExtentFile(hfsmp, ff, offset, + key, iter->group, &index, + &iter->bt_iter.hint.nodeNum, &end_block), ret, exit); + + iter->ndx = index; + iter->file_block = end_block - iter->group[index].blockCount; + + if (!key->keyLength) { + // We're pointing at the catalog record extents so fix up the key + key->keyLength = kHFSPlusExtentKeyMaximumLength; + key->forkType = (VNODE_IS_RSRC(iter->vp) + ? kHFSResourceForkType : kHFSDataForkType); + key->pad = 0; + key->fileID = VTOC(iter->vp)->c_fileid; + key->startBlock = 0; + } + + CHECK(hfs_ext_iter_check_group(iter), ret, exit); + + ret = 0; + +exit: + + return MacToVFSError(ret); +} + +static uint32_t hfs_ext_iter_next_group_block(const hfs_ext_iter_t *iter) +{ + const HFSPlusExtentKey *key = hfs_ext_iter_key(iter); + + return key->startBlock + iter->group_block_count; +} + +/* + * Move the iterator to the next group. Don't call if there's a chance + * there is no entry; the caller should check last_in_fork instead. + */ +static errno_t hfs_ext_iter_next_group(hfs_ext_iter_t *iter) +{ + errno_t ret; + hfsmount_t *hfsmp = VTOHFS(iter->vp); + filefork_t * const tree = hfsmp->hfs_extents_cp->c_datafork; + HFSPlusExtentKey *key = hfs_ext_iter_key_mut(iter); + const bool catalog_extents = hfs_ext_iter_is_catalog_extents(iter); + const uint32_t next_block = hfs_ext_iter_next_group_block(iter); + + FSBufferDescriptor fbd = { + .bufferAddress = &iter->group, + .itemCount = 1, + .itemSize = sizeof(iter->group) + }; + + if (catalog_extents) { + key->startBlock = next_block; + + CHECK(BTSearchRecord(tree, &iter->bt_iter, &fbd, NULL, + &iter->bt_iter), ret, exit); + } else { + const uint32_t file_id = key->fileID; + const uint8_t fork_type = key->forkType; + + CHECK(BTIterateRecord(tree, kBTreeNextRecord, &iter->bt_iter, + &fbd, NULL), ret, exit); + + if (key->fileID != file_id + || key->forkType != fork_type + || key->startBlock != next_block) { + // This indicates an inconsistency + ret = ESTALE; + goto exit; + } + } + + iter->file_block = key->startBlock; + iter->ndx = 0; + + CHECK(hfs_ext_iter_check_group(iter), ret, exit); + + ret = 0; + +exit: + + return MacToVFSError(ret); +} + +/* + * Updates with the extents provided and sets the key up for the next group. + * It is assumed that any previous record that might collide has been deleted. + * NOTE: @extents must point to a buffer that can be zero padded to multiple + * of 8 extents. + */ +errno_t hfs_ext_iter_update(hfs_ext_iter_t *iter, + HFSPlusExtentDescriptor *extents, + int count, + HFSPlusExtentRecord cat_extents) +{ + errno_t ret; + hfsmount_t *hfsmp = VTOHFS(iter->vp); + cnode_t *cp = VTOC(iter->vp); + HFSPlusExtentKey *key = hfs_ext_iter_key_mut(iter); + int ndx = 0; + + if (!extents) + extents = iter->group; + + if (count % kHFSPlusExtentDensity) { + // Zero out last group + bzero(&extents[count], (kHFSPlusExtentDensity + - (count % 8)) * sizeof(*extents)); + } + + if (hfs_ext_iter_is_catalog_extents(iter)) { + // Caller is responsible for in-memory updates + + if (cat_extents) + hfs_ext_copy_rec(extents, cat_extents); + + struct cat_fork fork; + + hfs_fork_copy(&fork, &VTOF(iter->vp)->ff_data, extents); + hfs_prepare_fork_for_update(VTOF(iter->vp), &fork, &fork, hfsmp->blockSize); + + bool is_rsrc = VNODE_IS_RSRC(iter->vp); + CHECK(cat_update(hfsmp, &cp->c_desc, &cp->c_attr, + is_rsrc ? NULL : &fork, + is_rsrc ? &fork : NULL), ret, exit); + + // Set the key to the next group + key->startBlock = hfs_total_blocks(extents, kHFSPlusExtentDensity); + + ndx += 8; + } + + // Deal with the remainder which must be overflow extents + for (; ndx < count; ndx += 8) { + filefork_t * const tree = hfsmp->hfs_extents_cp->c_datafork; + + FSBufferDescriptor fbd = { + .bufferAddress = &extents[ndx], + .itemCount = 1, + .itemSize = sizeof(HFSPlusExtentRecord) + }; + + CHECK(BTInsertRecord(tree, &iter->bt_iter, &fbd, + sizeof(HFSPlusExtentRecord)), ret, exit); + + // Set the key to the next group + key->startBlock += hfs_total_blocks(&extents[ndx], kHFSPlusExtentDensity); + } + + ret = 0; + +exit: + + return ret; +} + +#endif // !HFS_EXTENTS_TEST + +static void push_ext(HFSPlusExtentDescriptor *extents, int *count, + const HFSPlusExtentDescriptor *ext) +{ + if (!ext->blockCount) + return; + + if (*count && hfs_ext_end(&extents[*count - 1]) == ext->startBlock) + extents[*count - 1].blockCount += ext->blockCount; + else + extents[(*count)++] = *ext; +} + +/* + * NOTE: Here we rely on the replacement extents not being too big as + * otherwise the number of BTree records that we have to delete could be + * too large. + */ +errno_t hfs_ext_replace(hfsmount_t *hfsmp, vnode_t vp, + uint32_t file_block, + const HFSPlusExtentDescriptor *repl, + int repl_count, + HFSPlusExtentRecord catalog_extents) +{ + errno_t ret; + filefork_t * const tree = hfsmp->hfs_extents_cp->c_datafork; + hfs_ext_iter_t *iter_in = NULL, *iter_out; + HFSPlusExtentDescriptor *extents = NULL; + HFSPlusExtentDescriptor *roll_back_extents = NULL; + int roll_back_count = 0; + const uint32_t end_file_block = file_block + hfs_total_blocks(repl, repl_count); + filefork_t *ff = VTOF(vp); + + // Indicate we haven't touched catalog extents + catalog_extents[0].blockCount = 0; + + if (end_file_block > ff_allocblocks(ff)) { + ret = EINVAL; + goto exit; + } + + MALLOC(iter_in, hfs_ext_iter_t *, sizeof(*iter_in) * 2, M_TEMP, M_WAITOK); + iter_out = iter_in + 1; + HFSPlusExtentKey *key_in = hfs_ext_iter_key_mut(iter_in); + + // Get to where we want to start + off_t offset = hfs_blk_to_bytes(file_block, hfsmp->blockSize); + + /* + * If the replacement is at the start of a group, we want to pull in the + * group before so that we tidy up any padding that we might have done + * in a prior hfs_ext_replace call. + */ + if (offset > 0) + --offset; + + CHECK(hfs_ext_find(vp, offset, iter_in), ret, exit); + + const uint32_t start_group_block = key_in->startBlock; + + const int max_roll_back_extents = 128 * 1024 / sizeof(HFSPlusExtentDescriptor); + MALLOC(roll_back_extents, HFSPlusExtentDescriptor *, 128 * 1024, M_TEMP, M_WAITOK); + + // Move to the first extent in this group + iter_in->ndx = 0; + + hfs_ext_iter_copy(iter_in, iter_out); + + // Create a buffer for our extents + const int buffered_extents = roundup(3 * kHFSPlusExtentDensity + repl_count, + kHFSPlusExtentDensity); + MALLOC(extents, HFSPlusExtentDescriptor *, + sizeof(*extents) * buffered_extents, M_TEMP, M_WAITOK); + int count = 0; + + /* + * Iterate through the extents that are affected by this replace operation. + * We cannot push more than 16 + repl_count extents here; 8 for the group + * containing the replacement start, repl_count for the replacements and 8 + * for the group containing the end. If we went back a group due to + * decrementing the offset above, it's still the same because we know in + * that case the replacement starts at the beginning of the next group. + */ + uint32_t block = start_group_block; + for (;;) { + if (!iter_in->ndx) { + hfs_ext_copy_rec(iter_in->group, &roll_back_extents[roll_back_count]); + roll_back_count += kHFSPlusExtentDensity; + + if (!hfs_ext_iter_is_catalog_extents(iter_in)) { + // Delete this extent group; we're going to replace it + CHECK(BTDeleteRecord(tree, &iter_in->bt_iter), ret, exit); + } + } + + HFSPlusExtentDescriptor *ext = &iter_in->group[iter_in->ndx]; + if (!ext->blockCount) { + /* + * We ran out of existing extents so we just write the + * extents and we're done. + */ + goto finish; + } + + // If the current extent does not overlap replacement... + if (block + ext->blockCount <= file_block || block >= end_file_block) { + // Keep the current extent exactly as it is + push_ext(extents, &count, ext); + } else { + HFSPlusExtentDescriptor dealloc_ext = *ext; + + if (block <= file_block) { + /* + * The middle or tail of the current extent overlaps + * the replacement extents. Keep the non-overlapping + * head of the current extent. + */ + uint32_t trimmed_len = file_block - block; + + if (trimmed_len) { + // Push (keep) non-overlapping head of current extent + push_ext(extents, &count, + &(HFSPlusExtentDescriptor){ ext->startBlock, + trimmed_len }); + + /* + * Deallocate the part of the current extent that + * overlaps the replacement extents. That starts + * at @file_block. For now, assume it goes + * through the end of the current extent. (If the + * current extent extends beyond the end of the + * replacement extents, we'll update the + * blockCount below.) + */ + dealloc_ext.startBlock += trimmed_len; + dealloc_ext.blockCount -= trimmed_len; + } + + // Insert the replacements + for (int i = 0; i < repl_count; ++i) + push_ext(extents, &count, &repl[i]); + } + + if (block + ext->blockCount > end_file_block) { + /* + * The head or middle of the current extent overlaps + * the replacement extents. Keep the non-overlapping + * tail of the current extent. + */ + uint32_t overlap = end_file_block - block; + + // Push (keep) non-overlapping tail of current extent + push_ext(extents, &count, + &(HFSPlusExtentDescriptor){ ext->startBlock + overlap, + ext->blockCount - overlap }); + + /* + * Deallocate the part of current extent that overlaps + * the replacements. + */ + dealloc_ext.blockCount = (ext->startBlock + overlap + - dealloc_ext.startBlock); + } + + CHECK(BlockDeallocate(hfsmp, dealloc_ext.startBlock, + dealloc_ext.blockCount, 0), ret, exit); + } + + // Move to next (existing) extent from iterator + block += ext->blockCount; + + if (++iter_in->ndx >= kHFSPlusExtentDensity) { + if (block >= end_file_block) { + if (iter_in->last_in_fork || !(count % kHFSPlusExtentDensity)) { + /* + * This is the easy case. We've hit the end or we have a + * multiple of 8, so we can just write out the extents we + * have and it should all fit within a transaction. + */ + + goto finish; + } + + if (count + kHFSPlusExtentDensity > buffered_extents + || (roll_back_count + + kHFSPlusExtentDensity > max_roll_back_extents)) { + /* + * We've run out of room for the next group, so drop out + * and take a different strategy. + */ + break; + } + } + + CHECK(hfs_ext_iter_next_group(iter_in), ret, exit); + } + } // for (;;) + + /* + * We're not at the end so we need to try and pad to a multiple of 8 + * so that we don't have to touch all the subsequent records. We pad + * by stealing single blocks. + */ + + int stop_at = 0; + + for (;;) { + // @in points to the record we're stealing from + int in = count - 1; + + count = roundup(count, kHFSPlusExtentDensity); + + // @out is where we put the stolen single blocks + int out = count - 1; + + do { + if (out <= in) { + // We suceeded in padding; we're done + goto finish; + } + + /* + * "Steal" a block, or move a one-block extent within the + * @extents array. + * + * If the extent we're "stealing" from (@in) is only one + * block long, we'll end up copying it to @out, setting + * @in's blockCount to zero, and decrementing @in. So, we + * either split a multi-block extent; or move it within + * the @extents array. + */ + extents[out].blockCount = 1; + extents[out].startBlock = (extents[in].startBlock + + extents[in].blockCount - 1); + --out; + } while (--extents[in].blockCount || --in >= stop_at); + + // We ran out of extents + if (roll_back_count + kHFSPlusExtentDensity > max_roll_back_extents) { + ret = ENOSPC; + goto exit; + } + + // Need to shift extents starting at out + 1 + ++out; + memmove(&extents[stop_at], &extents[out], + (count - out) * sizeof(*extents)); + count -= out - stop_at; + + // Pull in the next group + CHECK(hfs_ext_iter_next_group(iter_in), ret, exit); + + // Take a copy of these extents for roll back purposes + hfs_ext_copy_rec(iter_in->group, &roll_back_extents[roll_back_count]); + roll_back_count += kHFSPlusExtentDensity; + + // Delete this group; we're going to replace it + CHECK(BTDeleteRecord(tree, &iter_in->bt_iter), ret, exit); + + if (iter_in->last_in_fork) { + // Great! We've hit the end. Coalesce and write out. + int old_count = count; + count = 0; + + /* + * First coalesce the extents we already have. Takes + * advantage of push_ext coalescing the input extent with + * the last extent in @extents. If the extents are not + * contiguous, then this just copies the extents over + * themselves and sets @count back to @old_count. + */ + for (int i = 0; i < old_count; ++i) + push_ext(extents, &count, &extents[i]); + + // Make room if necessary + const int flush_count = buffered_extents - kHFSPlusExtentDensity; + if (count > flush_count) { + CHECK(hfs_ext_iter_update(iter_out, extents, + flush_count, catalog_extents), ret, exit); + + memmove(&extents[0], &extents[flush_count], + (count - flush_count) * sizeof(*extents)); + + count -= flush_count; + } + + // Add in the extents we just read in + for (int i = 0; i < kHFSPlusExtentDensity; ++i) { + HFSPlusExtentDescriptor *ext = &iter_in->group[i]; + if (!ext->blockCount) + break; + push_ext(extents, &count, ext); + } + + goto finish; + } // if (iter_in->last_in_fork) + + /* + * Otherwise, we're not at the end, so we add these extents and then + * try and pad out again to a multiple of 8. We start by making room. + */ + if (count > buffered_extents - kHFSPlusExtentDensity) { + // Only write out one group here + CHECK(hfs_ext_iter_update(iter_out, extents, + kHFSPlusExtentDensity, + catalog_extents), ret, exit); + + memmove(&extents[0], &extents[kHFSPlusExtentDensity], + (count - kHFSPlusExtentDensity) * sizeof(*extents)); + + count -= kHFSPlusExtentDensity; + } + + // Record where to stop when padding above + stop_at = count; + + // Copy in the new extents + hfs_ext_copy_rec(iter_in->group, &extents[count]); + count += kHFSPlusExtentDensity; + } // for (;;) + +finish: + + // Write the remaining extents + CHECK(hfs_ext_iter_update(iter_out, extents, count, + catalog_extents), ret, exit); + + CHECK(BTFlushPath(hfsmp->hfs_catalog_cp->c_datafork), ret, exit); + CHECK(BTFlushPath(hfsmp->hfs_extents_cp->c_datafork), ret, exit); + +exit: + + if (ret && roll_back_count) { + +#define RB_FAILED \ + do { \ + printf("hfs_ext_replace:%u: roll back failed\n", __LINE__); \ + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); \ + goto roll_back_failed; \ + } while (0) + + // First delete any groups we inserted + HFSPlusExtentKey *key_out = hfs_ext_iter_key_mut(iter_out); + + key_in->startBlock = start_group_block; + if (!key_in->startBlock && key_out->startBlock > key_in->startBlock) { + key_in->startBlock += hfs_total_blocks(catalog_extents, + kHFSPlusExtentDensity); + } + + if (key_out->startBlock > key_in->startBlock) { + FSBufferDescriptor fbd = { + .bufferAddress = &iter_in->group, + .itemCount = 1, + .itemSize = sizeof(iter_in->group) + }; + + if (BTSearchRecord(tree, &iter_in->bt_iter, &fbd, NULL, + &iter_in->bt_iter)) { + RB_FAILED; + } + + for (;;) { + if (BTDeleteRecord(tree, &iter_in->bt_iter)) + RB_FAILED; + + key_in->startBlock += hfs_total_blocks(iter_in->group, + kHFSPlusExtentDensity); + + if (key_in->startBlock >= key_out->startBlock) + break; + + if (BTSearchRecord(tree, &iter_in->bt_iter, &fbd, NULL, + &iter_in->bt_iter)) { + RB_FAILED; + } + } + } + + // Position iter_out + key_out->startBlock = start_group_block; + + // Roll back all the extents + if (hfs_ext_iter_update(iter_out, roll_back_extents, roll_back_count, + catalog_extents)) { + RB_FAILED; + } + + // And we need to reallocate the blocks we deallocated + const uint32_t end_block = min(block, end_file_block); + block = start_group_block; + for (int i = 0; i < roll_back_count && block < end_block; ++i) { + HFSPlusExtentDescriptor *ext = &roll_back_extents[i]; + + if (block + ext->blockCount <= file_block) + continue; + + HFSPlusExtentDescriptor alloc_ext = *ext; + + if (block <= file_block) { + uint32_t trimmed_len = file_block - block; + + alloc_ext.startBlock += trimmed_len; + alloc_ext.blockCount -= trimmed_len; + } + + if (block + ext->blockCount > end_file_block) { + uint32_t overlap = end_file_block - block; + + alloc_ext.blockCount = (ext->startBlock + overlap + - alloc_ext.startBlock); + } + + if (hfs_block_alloc(hfsmp, &alloc_ext, HFS_ALLOC_ROLL_BACK, NULL)) + RB_FAILED; + + block += ext->blockCount; + } + + if (BTFlushPath(hfsmp->hfs_catalog_cp->c_datafork) + || BTFlushPath(hfsmp->hfs_extents_cp->c_datafork)) { + RB_FAILED; + } + } // if (ret && roll_back_count) + +roll_back_failed: + + FREE(iter_in, M_TEMP); + FREE(extents, M_TEMP); + FREE(roll_back_extents, M_TEMP); + + return MacToVFSError(ret); +} diff --git a/bsd/hfs/hfs_extents.h b/bsd/hfs/hfs_extents.h new file mode 100644 index 000000000..9dd6073dd --- /dev/null +++ b/bsd/hfs/hfs_extents.h @@ -0,0 +1,54 @@ +// +// hfs_extents.h +// hfs +// +// Created by csuter on 7/11/14. +// Copyright (c) 2014 Apple. All rights reserved. +// + +#ifndef HFS_EXTENTS_H_ +#define HFS_EXTENTS_H_ + +#include +#include + +#include "hfs_format.h" + +#if !HFS_EXTENTS_TEST && !HFS_ALLOC_TEST +#include "hfs_cnode.h" +#include "hfs.h" +#include "hfscommon/headers/BTreesInternal.h" +#endif + +typedef struct hfs_ext_iter { + struct vnode *vp; // If NULL, this is an xattr extent + BTreeIterator bt_iter; + uint8_t ndx; // Index in group + bool last_in_fork; + uint32_t file_block; + uint32_t group_block_count; + HFSPlusExtentRecord group; +} hfs_ext_iter_t; + +errno_t hfs_ext_find(vnode_t vp, off_t offset, hfs_ext_iter_t *iter); + +errno_t hfs_ext_replace(hfsmount_t *hfsmp, vnode_t vp, + uint32_t file_block, + const HFSPlusExtentDescriptor *repl, + int count, + HFSPlusExtentRecord catalog_extents); + +bool hfs_ext_iter_is_catalog_extents(hfs_ext_iter_t *iter); + +static inline void hfs_ext_copy_rec(const HFSPlusExtentRecord src, + HFSPlusExtentRecord dst) +{ + memcpy(dst, src, sizeof(HFSPlusExtentRecord)); +} + +static inline uint32_t hfs_ext_end(const HFSPlusExtentDescriptor *ext) +{ + return ext->startBlock + ext->blockCount; +} + +#endif // HFS_EXTENTS_H_ diff --git a/bsd/hfs/hfs_format.h b/bsd/hfs/hfs_format.h index ba00a272a..dcc180724 100644 --- a/bsd/hfs/hfs_format.h +++ b/bsd/hfs/hfs_format.h @@ -363,8 +363,23 @@ enum { kHFSHasChildLinkBit = 0x0006, /* folder has a child that's a dir link */ kHFSHasChildLinkMask = 0x0040, - kHFSHasDateAddedBit = 0x0007, /* File/Folder has the date-added stored in the finder info. */ - kHFSHasDateAddedMask = 0x0080 + kHFSHasDateAddedBit = 0x0007, /* File/Folder has the date-added stored in the finder info. */ + kHFSHasDateAddedMask = 0x0080, + + kHFSFastDevPinnedBit = 0x0008, /* this file has been pinned to the fast-device by the hot-file code on cooperative fusion */ + kHFSFastDevPinnedMask = 0x0100, + + kHFSDoNotFastDevPinBit = 0x0009, /* this file can not be pinned to the fast-device */ + kHFSDoNotFastDevPinMask = 0x0200, + + kHFSFastDevCandidateBit = 0x000a, /* this item is a potential candidate for fast-dev pinning (as are any of its descendents */ + kHFSFastDevCandidateMask = 0x0400, + + kHFSAutoCandidateBit = 0x000b, /* this item was automatically marked as a fast-dev candidate by the kernel */ + kHFSAutoCandidateMask = 0x0800 + + // There are only 4 flag bits remaining: 0x1000, 0x2000, 0x4000, 0x8000 + }; diff --git a/bsd/hfs/hfs_fsctl.h b/bsd/hfs/hfs_fsctl.h index b90b722b5..0958179ea 100644 --- a/bsd/hfs/hfs_fsctl.h +++ b/bsd/hfs/hfs_fsctl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2015 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -273,8 +273,7 @@ enum { #define HFSIOC_CLRBACKINGSTOREINFO _IO('h', 8) #define HFS_CLRBACKINGSTOREINFO IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO) -#define HFSIOC_BULKACCESS _IOW('h', 9, struct user32_access_t) -#define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS) +// 'h', 9 used to be HFSIOC_BULKACCESS which is now deprecated /* Unsupported - Previously used to enable/disable ACLs */ #define HFSIOC_UNSUPPORTED _IOW('h', 10, int32_t) @@ -364,10 +363,28 @@ enum { #define HFSIOC_CS_FREESPACE_TRIM _IOWR('h', 39, u_int32_t) #define HFS_CS_FREESPACE_TRIM IOCBASECMD(HFSIOC_CS_FREESPACE_TRIM) + /* Get file system information for the given volume */ #define HFSIOC_GET_FSINFO _IOWR('h', 45, hfs_fsinfo) #define HFS_GET_FSINFO IOCBASECMD(HFSIOC_GET_FSINFO) +/* Re-pin hotfile data; argument controls what state gets repinned */ +#define HFSIOC_REPIN_HOTFILE_STATE _IOWR('h', 46, u_int32_t) +#define HFS_REPIN_HOTFILE_STATE IOCBASECMD(HFSIOC_REPIN_HOTFILE_STATE) + +#define HFS_REPIN_METADATA 0x0001 +#define HFS_REPIN_USERDATA 0x0002 + +/* Mark a directory or file as worth caching on any underlying "fast" device */ +#define HFSIOC_SET_HOTFILE_STATE _IOWR('h', 47, u_int32_t) +#define HFS_SET_HOTFILE_STATE IOCBASECMD(HFSIOC_SET_HOTFILE_STATE) + +/* flags to pass to SET_HOTFILE_STATE */ +#define HFS_MARK_FASTDEVCANDIDATE 0x0001 +#define HFS_UNMARK_FASTDEVCANDIDATE 0x0002 +#define HFS_NEVER_FASTDEVCANDIDATE 0x0004 + + #endif /* __APPLE_API_UNSTABLE */ #endif /* ! _HFS_FSCTL_H_ */ diff --git a/bsd/hfs/hfs_fsinfo.c b/bsd/hfs/hfs_fsinfo.c index d3071086a..ffb31575b 100644 --- a/bsd/hfs/hfs_fsinfo.c +++ b/bsd/hfs/hfs_fsinfo.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Apple Inc. All rights reserved. + * Copyright (c) 2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -40,9 +40,7 @@ #include "hfscommon/headers/BTreesPrivate.h" #include "hfscommon/headers/FileMgrInternal.h" -#if CONFIG_PROTECT #include -#endif union HFSPlusRecord { @@ -837,6 +835,10 @@ static int fsinfo_cprotect_count_callback(struct hfsmount *hfsmp, HFSPlusKey *ke { struct hfs_fsinfo_cprotect *fsinfo = (struct hfs_fsinfo_cprotect *)data; static const uint16_t cp_xattrname_utf16[] = CONTENT_PROTECTION_XATTR_NAME_CHARS; + /* + * NOTE: cp_xattrname_utf16_len is the number of UTF-16 code units in + * the EA name string. + */ static const size_t cp_xattrname_utf16_len = sizeof(cp_xattrname_utf16)/2; struct cp_xattr_v5 *xattr; size_t xattr_len = sizeof(struct cp_xattr_v5); @@ -850,7 +852,7 @@ static int fsinfo_cprotect_count_callback(struct hfsmount *hfsmp, HFSPlusKey *ke /* We only look at content protection xattrs */ if ((key->attr_key.attrNameLen != cp_xattrname_utf16_len) || - (bcmp(key->attr_key.attrName, cp_xattrname_utf16, cp_xattrname_utf16_len))) { + (bcmp(key->attr_key.attrName, cp_xattrname_utf16, 2 * cp_xattrname_utf16_len))) { return 0; } diff --git a/bsd/hfs/hfs_hotfiles.c b/bsd/hfs/hfs_hotfiles.c index 7ba80c737..4cecf72a9 100644 --- a/bsd/hfs/hfs_hotfiles.c +++ b/bsd/hfs/hfs_hotfiles.c @@ -89,13 +89,47 @@ typedef struct hotfile_entry { u_int32_t blocks; } hotfile_entry_t; + +// +// We cap the max temperature for non-system files to "MAX_NORMAL_TEMP" +// so that they will always have a lower temperature than system (aka +// "auto-cached") files. System files have MAX_NORMAL_TEMP added to +// their temperature which produces two bands of files (all non-system +// files will have a temp less than MAX_NORMAL_TEMP and all system +// files will have a temp greatern than MAX_NORMAL_TEMP). +// +// This puts non-system files on the left side of the hotfile btree +// (and we start evicting from the left-side of the tree). The idea is +// that we will evict non-system files more aggressively since their +// working set changes much more dynamically than system files (which +// are for the most part, static). +// +// NOTE: these values have to fit into a 32-bit int. We use a +// value of 1-billion which gives a pretty broad range +// and yet should not run afoul of any sign issues. +// +#define MAX_NORMAL_TEMP 1000000000 +#define HF_TEMP_RANGE MAX_NORMAL_TEMP + + +// +// These used to be defines of the hard coded values. But if +// we're on an cooperative fusion (CF) system we need to change +// the values (which happens in hfs_recording_init() +// +uint32_t hfc_default_file_count = 1000; +uint32_t hfc_default_duration = (3600 * 60); +uint32_t hfc_max_file_count = 5000; +uint64_t hfc_max_file_size = (10 * 1024 * 1024); + + /* * Hot File Recording Data (runtime). */ typedef struct hotfile_data { struct hfsmount *hfsmp; long refcount; - int activefiles; /* active number of hot files */ + u_int32_t activefiles; /* active number of hot files */ u_int32_t threshold; u_int32_t maxblocks; hotfile_entry_t *rootentry; @@ -107,11 +141,15 @@ typedef struct hotfile_data { static int hfs_recording_start (struct hfsmount *); static int hfs_recording_stop (struct hfsmount *); +/* Hotfiles pinning routines */ +static int hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned); +static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned); +static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc); /* * Hot File Data recording functions (in-memory binary tree). */ -static void hf_insert (hotfile_data_t *, hotfile_entry_t *); +static int hf_insert (hotfile_data_t *, hotfile_entry_t *); static void hf_delete (hotfile_data_t *, u_int32_t, u_int32_t); static hotfile_entry_t * hf_coldest (hotfile_data_t *); static hotfile_entry_t * hf_getnewentry (hotfile_data_t *); @@ -128,11 +166,12 @@ static void hf_printtree (hotfile_entry_t *); */ static int hotfiles_collect (struct hfsmount *); static int hotfiles_age (struct hfsmount *); -static int hotfiles_adopt (struct hfsmount *); +static int hotfiles_adopt (struct hfsmount *, vfs_context_t); static int hotfiles_evict (struct hfsmount *, vfs_context_t); static int hotfiles_refine (struct hfsmount *); static int hotextents(struct hfsmount *, HFSPlusExtentDescriptor *); static int hfs_addhotfile_internal(struct vnode *); +static int hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp); /* @@ -140,7 +179,10 @@ static int hfs_addhotfile_internal(struct vnode *); */ static int hfc_btree_create (struct hfsmount *, unsigned int, unsigned int); static int hfc_btree_open (struct hfsmount *, struct vnode **); +static int hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs); static int hfc_btree_close (struct hfsmount *, struct vnode *); +static int hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key); +static int hfc_btree_delete(struct hfsmount *hfsmp); static int hfc_comparekeys (HotFileKey *, HotFileKey *); @@ -154,7 +196,7 @@ char hfc_tag[] = "CLUSTERED HOT FILES B-TREE "; */ /* - * Start recording the hotest files on a file system. + * Start recording the hottest files on a file system. * * Requires that the hfc_mutex be held. */ @@ -206,16 +248,31 @@ hfs_recording_start(struct hfsmount *hfsmp) (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC) && (SWAP_BE32 (hotfileinfo.timeleft) > 0) && (SWAP_BE32 (hotfileinfo.timebase) > 0)) { - hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt); + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + if (hfsmp->hfs_hotfile_freeblks == 0) { + hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks); + } + hfsmp->hfc_maxfiles = 0x7fffffff; + printf("hfs: %s: %s: hotfile freeblocks: %d, max: %d\n", hfsmp->vcbVN, __FUNCTION__, + hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks); + } else { + hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt); + } hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase); - hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ; + int timeleft = (int)SWAP_BE32(hotfileinfo.timeleft); + if (timeleft < 0 || timeleft > (int)(HFC_DEFAULT_DURATION*2)) { + // in case this field got botched, don't let it screw things up + // printf("hfs: hotfiles: bogus looking timeleft: %d\n", timeleft); + timeleft = HFC_DEFAULT_DURATION; + } + hfsmp->hfc_timeout = timeleft + tv.tv_sec ; /* Fix up any bogus timebase values. */ if (hfsmp->hfc_timebase < HFC_MIN_BASE_TIME) { hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION; } #if HFC_VERBOSE - printf("hfs: Resume recording hot files on %s (%d secs left)\n", - hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft)); + printf("hfs: Resume recording hot files on %s (%d secs left (%d); timeout %ld)\n", + hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft), timeleft, hfsmp->hfc_timeout - tv.tv_sec); #endif } else { hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT; @@ -240,7 +297,10 @@ hfs_recording_start(struct hfsmount *hfsmp) return (error); } #if HFC_VERBOSE - printf("hfs: begin recording hot files on %s\n", hfsmp->vcbVN); + printf("hfs: begin recording hot files on %s (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n", + hfsmp->vcbVN, + hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end, + hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles); #endif hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT; hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION; @@ -391,7 +451,7 @@ hfs_recording_stop(struct hfsmount *hfsmp) /* * Compute the amount of space to reclaim... */ - if (listp->hfl_totalblocks > hfsmp->hfs_hotfile_freeblks) { + if (listp->hfl_totalblocks > hfs_hotfile_cur_freeblks(hfsmp)) { listp->hfl_reclaimblks = MIN(listp->hfl_totalblocks, hfsmp->hfs_hotfile_maxblks) - hfsmp->hfs_hotfile_freeblks; @@ -425,15 +485,40 @@ hfs_recording_stop(struct hfsmount *hfsmp) return (error); } +static void +save_btree_user_info(struct hfsmount *hfsmp) +{ + HotFilesInfo hotfileinfo; + struct timeval tv; + + microtime(&tv); + hotfileinfo.magic = SWAP_BE32 (HFC_MAGIC); + hotfileinfo.version = SWAP_BE32 (HFC_VERSION); + hotfileinfo.duration = SWAP_BE32 (HFC_DEFAULT_DURATION); + hotfileinfo.timebase = SWAP_BE32 (hfsmp->hfc_timebase); + hotfileinfo.timeleft = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec); + hotfileinfo.threshold = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE); + hotfileinfo.maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize); + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + hotfileinfo.usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfs_hotfile_cur_freeblks(hfsmp)); +#if HFC_VERBOSE + printf("hfs: %s: saving usedblocks = %d (timeleft: %d; timeout %ld)\n", hfsmp->vcbVN, (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks), + SWAP_BE32(hotfileinfo.timeleft), hfsmp->hfc_timeout); +#endif + } else { + hotfileinfo.maxfilecnt = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT); + } + strlcpy((char *)hotfileinfo.tag, hfc_tag, sizeof hotfileinfo.tag); + (void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo)); +} + /* * Suspend recording the hotest files on a file system. */ int hfs_recording_suspend(struct hfsmount *hfsmp) { - HotFilesInfo hotfileinfo; hotfile_data_t *hotdata = NULL; - struct timeval tv; int error; if (hfsmp->hfc_stage == HFC_DISABLED) @@ -465,25 +550,13 @@ hfs_recording_suspend(struct hfsmount *hfsmp) } if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; goto out; } if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { - error = EPERM; goto end_transaction; } - microtime(&tv); - hotfileinfo.magic = SWAP_BE32 (HFC_MAGIC); - hotfileinfo.version = SWAP_BE32 (HFC_VERSION); - hotfileinfo.duration = SWAP_BE32 (HFC_DEFAULT_DURATION); - hotfileinfo.timebase = SWAP_BE32 (hfsmp->hfc_timebase); - hotfileinfo.timeleft = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec); - hotfileinfo.threshold = SWAP_BE32 (hotdata->threshold); - hotfileinfo.maxfileblks = SWAP_BE32 (hotdata->maxblocks); - hotfileinfo.maxfilecnt = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT); - strlcpy((char *)hotfileinfo.tag, hfc_tag, sizeof hotfileinfo.tag); - (void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo)); + save_btree_user_info(hfsmp); hfs_unlock(VTOC(hfsmp->hfc_filevp)); @@ -507,122 +580,233 @@ hfs_recording_suspend(struct hfsmount *hfsmp) } -/* - * - */ -int -hfs_recording_init(struct hfsmount *hfsmp) +static void +reset_file_ids(struct hfsmount *hfsmp, uint32_t *fileid_table, int num_ids) +{ + int i, error; + + for(i=0; i < num_ids; i++) { + struct vnode *vp; + + error = hfs_vget(hfsmp, fileid_table[i], &vp, 0, 0); + if (error) { + if (error == ENOENT) { + error = 0; + continue; /* stale entry, go to next */ + } + continue; + } + + // hfs_vget returns a locked cnode so no need to lock here + + if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) { + error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, NULL, vfs_context_kernel()); + } + + /* + * The updates to the catalog must be journaled + */ + hfs_start_transaction(hfsmp); + + // + // turn off _all_ the hotfile related bits since we're resetting state + // + if (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevCandidateMask) { + vnode_clearfastdevicecandidate(vp); + } + + VTOC(vp)->c_attr.ca_recflags &= ~(kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask); + VTOC(vp)->c_flag |= C_MODIFIED; + + hfs_update(vp, 0); + + hfs_end_transaction(hfsmp); + + hfs_unlock(VTOC(vp)); + vnode_put(vp); + } +} + +static int +flag_hotfile(struct hfsmount *hfsmp, const char *filename) +{ + struct vnode *dvp = NULL, *fvp = NULL; + vfs_context_t ctx = vfs_context_kernel(); + struct componentname cname; + int error=0; + size_t fname_len; + const char *orig_fname = filename; + + if (filename == NULL) { + return EINVAL; + } + + fname_len = strlen(filename); // do NOT include the trailing '\0' so that we break out of the loop below + + error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx); + if (error) { + return (error); + } + + /* At this point, 'dvp' must be considered iocounted */ + const char *ptr; + ptr = filename; + + while (ptr < (orig_fname + fname_len - 1)) { + for(; ptr < (orig_fname + fname_len) && *ptr && *ptr != '/'; ptr++) { + /* just keep advancing till we reach the end of the string or a slash */ + } + + cname.cn_nameiop = LOOKUP; + cname.cn_flags = ISLASTCN; + cname.cn_context = ctx; + cname.cn_ndp = NULL; + cname.cn_pnbuf = __DECONST(char *, orig_fname); + cname.cn_nameptr = __DECONST(char *, filename); + cname.cn_pnlen = fname_len; + cname.cn_namelen = ptr - filename; + cname.cn_hash = 0; + cname.cn_consume = 0; + + error = VNOP_LOOKUP(dvp, &fvp, &cname, ctx); + if (error) { + /* + * If 'dvp' is non-NULL, then it has an iocount. Make sure to release it + * before bailing out. VNOP_LOOKUP could legitimately return ENOENT + * if the item didn't exist or if we raced with a delete. + */ + if (dvp) { + vnode_put(dvp); + dvp = NULL; + } + return error; + } + + if (ptr < orig_fname + fname_len - 1) { + // + // we've got a multi-part pathname so drop the ref on the dir, + // make dvp become what we just looked up, and advance over + // the slash character in the pathname to get to the next part + // of the component + // + vnode_put(dvp); + dvp = fvp; + fvp = NULL; + + filename = ++ptr; // skip the slash character + } + } + + if (fvp == NULL) { + error = ENOENT; + goto out; + } + + struct cnode *cp = VTOC(fvp); + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) { + goto out; + } + + hfs_start_transaction(hfsmp); + + cp->c_attr.ca_recflags |= (kHFSFastDevCandidateMask|kHFSAutoCandidateMask); + cp->c_flag |= C_MODIFIED; + + hfs_update(fvp, 0); + + hfs_end_transaction(hfsmp); + + hfs_unlock(cp); + //printf("hfs: flagged /%s with the fast-dev-candidate|auto-candidate flags\n", filename); + + +out: + if (fvp) { + vnode_put(fvp); + fvp = NULL; + } + + if (dvp) { + vnode_put(dvp); + dvp = NULL; + } + + return error; +} + + +static void +hfs_setup_default_cf_hotfiles(struct hfsmount *hfsmp) +{ + const char *system_default_hotfiles[] = { + "usr", + "System", + "Applications", + "private/var/db/dyld" + }; + int i; + + for(i=0; i < (int)(sizeof(system_default_hotfiles)/sizeof(char *)); i++) { + flag_hotfile(hfsmp, system_default_hotfiles[i]); + } +} + + +#define NUM_FILE_RESET_IDS 4096 // so we allocate 16k to hold file-ids + +static void +hfs_hotfile_reset(struct hfsmount *hfsmp) { CatalogKey * keyp; CatalogRecord * datap; u_int32_t dataSize; - HFSPlusCatalogFile *filep; BTScanState scanstate; BTreeIterator * iterator = NULL; FSBufferDescriptor record; - HotFileKey * key; - filefork_t * filefork; u_int32_t data; - struct cat_attr cattr; u_int32_t cnid; int error = 0; + uint32_t *fileids=NULL; + int cur_id_index = 0; - int inserted = 0; /* debug variables */ + int cleared = 0; /* debug variables */ int filecount = 0; + int dircount = 0; - /* - * For now, only the boot volume is supported. - */ - if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) { - hfsmp->hfc_stage = HFC_DISABLED; - return (EPERM); - } - - /* - * Tracking of hot files requires up-to-date access times. - * So if access time updates are disabled, then we disable - * hot files, too. - */ - if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_NOATIME) { - hfsmp->hfc_stage = HFC_DISABLED; - return EPERM; - } - - /* - * If the Hot File btree exists then metadata zone is ready. - */ - cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL); - if (cnid != 0 && S_ISREG(cattr.ca_mode)) { - if (hfsmp->hfc_stage == HFC_DISABLED) - hfsmp->hfc_stage = HFC_IDLE; - return (0); - } - - if (hfs_start_transaction(hfsmp) != 0) { - return EINVAL; - } - - error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT); - if (error) { -#if HFC_VERBOSE - printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN); -#endif - goto out2; - } - /* - * Open the Hot File B-tree file for writing. - */ - if (hfsmp->hfc_filevp) - panic("hfs_recording_init: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp); - error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp); - if (error) { #if HFC_VERBOSE - printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN); + printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__); #endif - goto out2; - } + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); if (iterator == NULL) { error = ENOMEM; - (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); - hfsmp->hfc_filevp = NULL; - goto out2; + goto out; } bzero(iterator, sizeof(*iterator)); - key = (HotFileKey*) &iterator->key; - key->keyLength = HFC_KEYLENGTH; + + MALLOC(fileids, uint32_t *, NUM_FILE_RESET_IDS * sizeof(uint32_t), M_TEMP, M_WAITOK); + if (fileids == NULL) { + error = ENOMEM; + goto out; + } record.bufferAddress = &data; record.itemSize = sizeof(u_int32_t); record.itemCount = 1; -#if HFC_VERBOSE - printf("hfs: Evaluating space for \"%s\" metadata zone...\n", HFSTOVCB(hfsmp)->vcbVN); -#endif + /* * Get ready to scan the Catalog file. */ error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0, kCatSearchBufferSize, &scanstate); if (error) { - printf("hfs_recording_init: err %d BTScanInit\n", error); - goto out2; - } - - /* - * The writes to Hot File B-tree file are journaled. - */ - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; - goto out1; - } - if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { - error = EPERM; - goto out0; + printf("hfs_hotfile_reset: err %d BTScanInit\n", error); + goto out; } - filefork = VTOF(hfsmp->hfc_filevp); /* - * Visit all the catalog btree leaf records. + * Visit all the catalog btree leaf records, clearing any that have the + * HotFileCached bit set. */ for (;;) { error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize); @@ -630,56 +814,955 @@ hfs_recording_init(struct hfsmount *hfsmp) if (error == btNotFound) error = 0; else - printf("hfs_recording_init: err %d BTScanNext\n", error); + printf("hfs_hotfile_reset: err %d BTScanNext\n", error); break; } - if ((datap->recordType != kHFSPlusFileRecord) || - (dataSize != sizeof(HFSPlusCatalogFile))) { - continue; - } - filep = (HFSPlusCatalogFile *)datap; - filecount++; - if (filep->dataFork.totalBlocks == 0) { - continue; - } - /* - * Any file that has blocks inside the hot file - * space is recorded for later eviction. - * - * For now, resource forks are ignored. - */ - if (!hotextents(hfsmp, &filep->dataFork.extents[0])) { + + if (datap->recordType == kHFSPlusFolderRecord && (dataSize == sizeof(HFSPlusCatalogFolder))) { + HFSPlusCatalogFolder *dirp = (HFSPlusCatalogFolder *)datap; + + dircount++; + + if ((dirp->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) { + continue; + } + + cnid = dirp->folderID; + } else if ((datap->recordType == kHFSPlusFileRecord) && (dataSize == sizeof(HFSPlusCatalogFile))) { + HFSPlusCatalogFile *filep = (HFSPlusCatalogFile *)datap; + + filecount++; + + /* + * If the file doesn't have any of the HotFileCached bits set, ignore it. + */ + if ((filep->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) { + continue; + } + + cnid = filep->fileID; + } else { continue; } - cnid = filep->fileID; /* Skip over journal files. */ if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) { continue; } - /* - * XXX - need to skip quota files as well. - */ - - /* Insert a hot file entry. */ - key->keyLength = HFC_KEYLENGTH; - key->temperature = HFC_MINIMUM_TEMPERATURE; - key->fileID = cnid; - key->forkType = 0; - data = 0x3f3f3f3f; - error = BTInsertRecord(filefork, iterator, &record, record.itemSize); - if (error) { - printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); - error = MacToVFSError(error); - break; - } + + // + // Just record the cnid of the file for now. We will modify it separately + // because we can't modify the catalog while we're scanning it. + // + fileids[cur_id_index++] = cnid; + if (cur_id_index >= NUM_FILE_RESET_IDS) { + // + // We're over the limit of file-ids so we have to terminate this + // scan, go modify all the catalog records, then restart the scan. + // This is required because it's not permissible to modify the + // catalog while scanning it. + // + (void) BTScanTerminate(&scanstate, &data, &data, &data); + + reset_file_ids(hfsmp, fileids, cur_id_index); + cleared += cur_id_index; + cur_id_index = 0; + + // restart the scan + error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0, + kCatSearchBufferSize, &scanstate); + if (error) { + printf("hfs_hotfile_reset: err %d BTScanInit\n", error); + goto out; + } + continue; + } + } + + if (cur_id_index) { + reset_file_ids(hfsmp, fileids, cur_id_index); + cleared += cur_id_index; + cur_id_index = 0; + } + + printf("hfs: cleared HotFileCache related bits on %d files out of %d (dircount %d)\n", cleared, filecount, dircount); + + (void) BTScanTerminate(&scanstate, &data, &data, &data); + +out: + if (fileids) + FREE(fileids, M_TEMP); + + if (iterator) + FREE(iterator, M_TEMP); + + // + // If the hotfile btree exists, delete it. We need to open + // it to be able to delete it because we need the hfc_filevp + // for deletion. + // + error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1); + if (!error) { + printf("hfs: hotfile_reset: deleting existing hotfile btree\n"); + hfc_btree_delete(hfsmp); + } + + if (hfsmp->hfc_filevp) { + (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + hfsmp->hfc_filevp = NULL; + } + + hfsmp->hfs_hotfile_blk_adjust = 0; + hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks; +} + + +// +// This should ONLY be called by hfs_recording_init() and the special fsctl. +// +// We assume that the hotfile btree is already opened. +// +static int +hfs_hotfile_repin_files(struct hfsmount *hfsmp) +{ + BTreeIterator * iterator = NULL; + HotFileKey * key; + filefork_t * filefork; + int error = 0; + int bt_op; + enum hfc_stage stage; + uint32_t pinned_blocks; + uint32_t num_files=0, nrsrc=0; + uint32_t total_pinned=0; + + if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || !hfsmp->hfc_filevp) { + // + // this is only meaningful if we're pinning hotfiles + // (as opposed to the regular form of hotfiles that + // get relocated to the hotfile zone) + // + return 0; + } + +#if HFC_VERBOSE + printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__); +#endif + + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { + return (EPERM); + } + + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + if (iterator == NULL) { + hfs_unlock(VTOC(hfsmp->hfc_filevp)); + return (ENOMEM); + } + + stage = hfsmp->hfc_stage; + hfsmp->hfc_stage = HFC_BUSY; + + bt_op = kBTreeFirstRecord; + + bzero(iterator, sizeof(*iterator)); + key = (HotFileKey*) &iterator->key; + + filefork = VTOF(hfsmp->hfc_filevp); + int lockflags; + + while (1) { + + lockflags = 0; + /* + * Obtain the first record (ie the coldest one). + */ + if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) { + // no more records + error = 0; + break; + } + if (key->keyLength != HFC_KEYLENGTH) { + // printf("hfs: hotfiles_repin_files: invalid key length %d\n", key->keyLength); + error = EFTYPE; + break; + } + if (key->temperature == HFC_LOOKUPTAG) { + // ran into thread records in the hotfile btree + error = 0; + break; + } + + // + // Just lookup the records in the catalog and pin the direct + // mapped extents. Faster than instantiating full vnodes + // (and thereby thrashing the system vnode cache). + // + struct cat_desc fdesc; + struct cat_attr attr; + struct cat_fork fork; + uint8_t forktype = 0; + + lockflags = hfs_systemfile_lock(hfsmp, (SFL_CATALOG | SFL_EXTENTS), HFS_SHARED_LOCK); + /* + * Snoop the cnode hash to find out if the item we want is in-core already. + * + * We largely expect this function to fail (the items we want are probably not in the hash). + * we use the special variant which bails out as soon as it finds a vnode (even if it is + * marked as open-unlinked or actually removed on-disk. If we find a vnode, then we + * release the systemfile locks and go through the pin-vnode path instead. + */ + if (hfs_chash_snoop (hfsmp, key->fileID, 1, NULL, NULL) == 0) { + pinned_blocks = 0; + + /* unlock immediately and go through the in-core path */ + hfs_systemfile_unlock(hfsmp, lockflags); + lockflags = 0; + + error = hfs_getvnode_and_pin (hfsmp, key->fileID, &pinned_blocks); + if (error) { + /* if ENOENT, then it was deleted in the catalog. Remove from our hotfiles tracking */ + if (error == ENOENT) { + hfc_btree_delete_record(hfsmp, iterator, key); + } + /* other errors, just ignore and move on with life */ + } + else { //!error + total_pinned += pinned_blocks; + num_files++; + } + + goto next; + } + + /* If we get here, we're still holding the systemfile locks */ + error = cat_idlookup(hfsmp, key->fileID, 1, 0, &fdesc, &attr, &fork); + if (error) { + // + // this file system could have been mounted while booted from a + // different partition and thus the hotfile btree would not have + // been maintained. thus a file that was hotfile cached could + // have been deleted while booted from a different partition which + // means we need to delete it from the hotfile btree. + // + // block accounting is taken care of at the end: we re-assign + // hfsmp->hfs_hotfile_freeblks based on how many blocks we actually + // pinned. + // + hfc_btree_delete_record(hfsmp, iterator, key); + + goto next; + } + + if (fork.cf_size == 0) { + // hmmm, the data is probably in the resource fork (aka a compressed file) + error = cat_idlookup(hfsmp, key->fileID, 1, 1, &fdesc, &attr, &fork); + if (error) { + hfc_btree_delete_record(hfsmp, iterator, key); + goto next; + } + forktype = 0xff; + nrsrc++; + } + + pinned_blocks = 0; + + /* Can't release the catalog /extents lock yet, we may need to go find the overflow blocks */ + error = hfs_pin_extent_record (hfsmp, fork.cf_extents, &pinned_blocks); + if (error) { + goto next; //skip to next + } + /* add in the blocks from the inline 8 */ + total_pinned += pinned_blocks; + pinned_blocks = 0; + + /* Could this file have overflow extents? */ + if (fork.cf_extents[kHFSPlusExtentDensity-1].startBlock) { + /* better pin them, too */ + error = hfs_pin_overflow_extents (hfsmp, key->fileID, forktype, &pinned_blocks); + if (error) { + /* If we fail to pin all of the overflow extents, then just skip to the next file */ + goto next; + } + } + + num_files++; + if (pinned_blocks) { + /* now add in any overflow also */ + total_pinned += pinned_blocks; + } + + next: + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + lockflags = 0; + } + bt_op = kBTreeNextRecord; + + } /* end while */ + +#if HFC_VERBOSE + printf("hfs: hotfiles_repin_files: re-pinned %d files (nrsrc %d, total pinned %d blks; freeblock %d, maxblocks %d, calculated free: %d)\n", + num_files, nrsrc, total_pinned, hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks, + hfsmp->hfs_hotfile_maxblks - total_pinned); +#endif + // + // make sure this is accurate based on how many blocks we actually pinned + // + hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - total_pinned; + + hfs_unlock(VTOC(hfsmp->hfc_filevp)); + + FREE(iterator, M_TEMP); + hfsmp->hfc_stage = stage; + wakeup((caddr_t)&hfsmp->hfc_stage); + return (error); +} + +void +hfs_repin_hotfiles(struct hfsmount *hfsmp) +{ + int error, need_close; + + lck_mtx_lock(&hfsmp->hfc_mutex); + + if (hfsmp->hfc_filevp == NULL) { + error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp); + if (!error) { + need_close = 1; + } else { + printf("hfs: failed to open the btree err=%d. Unable to re-pin hotfiles.\n", error); + lck_mtx_unlock(&hfsmp->hfc_mutex); + return; + } + } else { + need_close = 0; + } + + hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL, vfs_context_kernel()); + + hfs_hotfile_repin_files(hfsmp); + + if (need_close) { + (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + hfsmp->hfc_filevp = NULL; + } + + lck_mtx_unlock(&hfsmp->hfc_mutex); +} + +/* + * For a given file ID, find and pin all of its overflow extents to the underlying CS + * device. Assumes that the extents overflow b-tree is locked for the duration of this call. + * + * Emit the number of blocks pinned in output argument 'pinned' + * + * Return success or failure (errno) in return value. + * + */ +int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid, + uint8_t forktype, uint32_t *pinned) { + + struct BTreeIterator *ext_iter = NULL; + ExtentKey *ext_key_ptr = NULL; + ExtentRecord ext_data; + FSBufferDescriptor btRecord; + uint16_t btRecordSize; + int error = 0; + + uint32_t pinned_blocks = 0; + + + MALLOC (ext_iter, struct BTreeIterator*, sizeof (struct BTreeIterator), M_TEMP, M_WAITOK); + if (ext_iter == NULL) { + return ENOMEM; + } + bzero (ext_iter, sizeof(*ext_iter)); + + BTInvalidateHint (ext_iter); + ext_key_ptr = (ExtentKey*)&ext_iter->key; + btRecord.bufferAddress = &ext_data; + btRecord.itemCount = 1; + + /* + * This is like when you delete a file; we don't actually need most of the search machinery because + * we are going to need all of the extent records that belong to this file (for a given fork type), + * so we might as well use a straight-up iterator. + * + * Position the B-Tree iterator at the first record with this file ID + */ + btRecord.itemSize = sizeof (HFSPlusExtentRecord); + ext_key_ptr->hfsPlus.keyLength = kHFSPlusExtentKeyMaximumLength; + ext_key_ptr->hfsPlus.forkType = forktype; + ext_key_ptr->hfsPlus.pad = 0; + ext_key_ptr->hfsPlus.fileID = fileid; + ext_key_ptr->hfsPlus.startBlock = 0; + + error = BTSearchRecord (VTOF(hfsmp->hfs_extents_vp), ext_iter, &btRecord, &btRecordSize, ext_iter); + if (error == btNotFound) { + /* empty b-tree, so that's ok. we'll fall out during error check below. */ + error = 0; + } + + while (1) { + uint32_t found_fileid; + uint32_t pblocks; + + error = BTIterateRecord (VTOF(hfsmp->hfs_extents_vp), kBTreeNextRecord, ext_iter, &btRecord, &btRecordSize); + if (error) { + /* swallow it if it's btNotFound, otherwise just bail out */ + if (error == btNotFound) + error = 0; + break; + } + + found_fileid = ext_key_ptr->hfsPlus.fileID; + /* + * We only do one fork type at a time. So if either the fork-type doesn't + * match what we are looking for (resource or data), OR the file id doesn't match + * which indicates that there's nothing more with this file ID as the key, then bail out + */ + if ((found_fileid != fileid) || (ext_key_ptr->hfsPlus.forkType != forktype)) { + error = 0; + break; + } + + /* Otherwise, we now have an extent record. Process and pin all of the file extents. */ + pblocks = 0; + error = hfs_pin_extent_record (hfsmp, ext_data.hfsPlus, &pblocks); + + if (error) { + break; + } + pinned_blocks += pblocks; + + /* if 8th extent is empty, then bail out */ + if (ext_data.hfsPlus[kHFSPlusExtentDensity-1].startBlock == 0) { + error = 0; + break; + } + + } // end extent-getting loop + + /* dump the iterator */ + FREE (ext_iter, M_TEMP); + + if (error == 0) { + /* + * In the event that the file has no overflow extents, pinned_blocks + * will never be updated, so we'll properly export 0 pinned blocks to caller + */ + *pinned = pinned_blocks; + } + + return error; + +} + + +static int +hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned) { + struct vnode *vp; + int error = 0; + *pinned = 0; + uint32_t pblocks; + + /* + * Acquire the vnode for this file. This returns a locked cnode on success + */ + error = hfs_vget(hfsmp, fileid, &vp, 0, 0); + if (error) { + /* It's possible the file was open-unlinked. In this case, we'll get ENOENT back. */ + return error; + } + + /* + * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck + * here. We do not want to move them. + */ + if (!vnode_isreg(vp)) { + hfs_unlock(VTOC(vp)); + vnode_put(vp); + return EPERM; + } + + if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) { + hfs_unlock(VTOC(vp)); + vnode_put(vp); + return EINVAL; + } + + error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pblocks, vfs_context_kernel()); + if (error == 0) { + *pinned = pblocks; + } + + hfs_unlock(VTOC(vp)); + vnode_put(vp); + + return error; + +} + +/* + * Pins an HFS Extent record to the underlying CoreStorage. Assumes that Catalog & Extents overflow + * B-trees are held locked, as needed. + * + * Returns the number of blocks pinned in the output argument 'pinned' + * + * Returns error status (0 || errno) in return value. + */ +static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned) { + uint32_t pb = 0; + int i; + int error; + + if (pinned == NULL) { + return EINVAL; + } + *pinned = 0; + + + + /* iterate through the extents */ + for ( i = 0; i < kHFSPlusExtentDensity; i++) { + if (extents[i].startBlock == 0) { + break; + } + + error = hfs_pin_block_range (hfsmp, HFS_PIN_IT, extents[i].startBlock, + extents[i].blockCount, vfs_context_kernel()); + + if (error) { + break; + } + pb += extents[i].blockCount; + } + + *pinned = pb; + + return error; +} + +/* + * Consume an HFS Plus on-disk catalog record and pin its blocks + * to the underlying CS devnode. + * + * NOTE: This is an important distinction! + * This function takes in an HFSPlusCatalogFile* which is the actual + * 200-some-odd-byte on-disk representation in the Catalog B-Tree (not + * one of the run-time structs that we normally use. + * + * This assumes that the catalog and extents-overflow btrees + * are locked, at least in shared mode + */ +static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc) { + uint32_t pinned_blocks = 0; + HFSPlusForkData *forkdata; + int error = 0; + uint8_t forktype = 0; + + if (rsrc) { + forkdata = &cfp->resourceFork; + forktype = 0xff; + } + else { + forkdata = &cfp->dataFork; + } + + uint32_t pblocks = 0; + + /* iterate through the inline extents */ + error = hfs_pin_extent_record (hfsmp, forkdata->extents, &pblocks); + if (error) { + return error; + } + + pinned_blocks += pblocks; + pblocks = 0; + + /* it may have overflow extents */ + if (forkdata->extents[kHFSPlusExtentDensity-1].startBlock != 0) { + error = hfs_pin_overflow_extents (hfsmp, cfp->fileID, forktype, &pblocks); + } + pinned_blocks += pblocks; + + hfsmp->hfs_hotfile_freeblks -= pinned_blocks; + + return error; +} + + +/* + * + */ +int +hfs_recording_init(struct hfsmount *hfsmp) +{ + CatalogKey * keyp; + CatalogRecord * datap; + u_int32_t dataSize; + HFSPlusCatalogFile *filep; + BTScanState scanstate; + BTreeIterator * iterator = NULL; + FSBufferDescriptor record; + HotFileKey * key; + filefork_t * filefork; + u_int32_t data; + struct cat_attr cattr; + u_int32_t cnid; + int error = 0; + long starting_temp; + + int started_tr = 0; + int started_scan = 0; + + int inserted = 0; /* debug variables */ + int filecount = 0; + int uncacheable = 0; + + /* + * For now, only the boot volume is supported. + */ + if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) { + hfsmp->hfc_stage = HFC_DISABLED; + return (EPERM); + } + + /* We grab the HFC mutex even though we're not fully mounted yet, just for orderliness */ + lck_mtx_lock (&hfsmp->hfc_mutex); + + /* + * Tracking of hot files requires up-to-date access times. + * So if access time updates are disabled, then we disable + * hot files, too. + */ + if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_NOATIME) { + hfsmp->hfc_stage = HFC_DISABLED; + lck_mtx_unlock (&hfsmp->hfc_mutex); + return EPERM; + } + + // + // Check if we've been asked to suspend operation + // + cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-suspend", &cattr, NULL); + if (cnid != 0) { + printf("hfs: %s: %s: hotfiles explicitly disabled! remove /.hotfiles-suspend to re-enable\n", hfsmp->vcbVN, __FUNCTION__); + hfsmp->hfc_stage = HFC_DISABLED; + lck_mtx_unlock (&hfsmp->hfc_mutex); + return EPERM; + } + + // + // Check if we've been asked to reset our state. + // + cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-reset", &cattr, NULL); + if (cnid != 0) { + hfs_hotfile_reset(hfsmp); + } + + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + // + // Cooperative Fusion (CF) systems use different constants + // than traditional hotfile systems. These were picked after a bit of + // experimentation - we can cache many more files on the + // ssd in an CF system and we can do so more rapidly + // so bump the limits considerably (and turn down the + // duration so that it doesn't take weeks to adopt all + // the files). + // + hfc_default_file_count = 20000; + hfc_default_duration = 300; // 5min + hfc_max_file_count = 50000; + hfc_max_file_size = (512ULL * 1024ULL * 1024ULL); + } + + /* + * If the Hot File btree exists then metadata zone is ready. + */ + cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL); + if (cnid != 0 && S_ISREG(cattr.ca_mode)) { + int recreate = 0; + + if (hfsmp->hfc_stage == HFC_DISABLED) + hfsmp->hfc_stage = HFC_IDLE; + hfsmp->hfs_hotfile_freeblks = 0; + + if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && cattr.ca_blocks > 0) { + // + // make sure the hotfile btree is pinned + // + error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp); + if (!error) { + /* XXX: must fix hfs_pin_vnode too */ + hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL, vfs_context_kernel()); + + } else { + printf("hfs: failed to open the btree err=%d. Recreating hotfile btree.\n", error); + recreate = 1; + } + + hfs_hotfile_repin_files(hfsmp); + + if (hfsmp->hfc_filevp) { + (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + hfsmp->hfc_filevp = NULL; + } + + } else if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + // hmmm, the hotfile btree is zero bytes long? how odd. let's recreate it. + printf("hfs: hotfile btree is zero bytes long?! recreating it.\n"); + recreate = 1; + } + + if (!recreate) { + /* don't forget to unlock the mutex */ + lck_mtx_unlock (&hfsmp->hfc_mutex); + return (0); + } else { + // + // open the hotfile btree file ignoring errors because + // we need the vnode pointer for hfc_btree_delete() to + // be able to do its work + // + error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1); + if (!error) { + // and delete it! + error = hfc_btree_delete(hfsmp); + (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + hfsmp->hfc_filevp = NULL; + } + } + } + + printf("hfs: %s: %s: creating the hotfile btree\n", hfsmp->vcbVN, __FUNCTION__); + if (hfs_start_transaction(hfsmp) != 0) { + lck_mtx_unlock (&hfsmp->hfc_mutex); + return EINVAL; + } + + /* B-tree creation must be journaled */ + started_tr = 1; + + error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT); + if (error) { +#if HFC_VERBOSE + printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN); +#endif + goto recording_init_out; + } + + hfs_end_transaction (hfsmp); + started_tr = 0; + /* + * Do a journal flush + flush track cache. We have to ensure that the async I/Os have been issued to the media + * before proceeding. + */ + hfs_flush (hfsmp, HFS_FLUSH_FULL); + + /* now re-start a new transaction */ + if (hfs_start_transaction (hfsmp) != 0) { + lck_mtx_unlock (&hfsmp->hfc_mutex); + return EINVAL; + } + started_tr = 1; + + /* + * Open the Hot File B-tree file for writing. + */ + if (hfsmp->hfc_filevp) + panic("hfs_recording_init: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp); + + error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp); + if (error) { +#if HFC_VERBOSE + printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN); +#endif + goto recording_init_out; + } + + /* + * This function performs work similar to namei; we must NOT hold the catalog lock while + * calling it. This will decorate catalog records as being pinning candidates. (no hotfiles work) + */ + hfs_setup_default_cf_hotfiles(hfsmp); + + /* + * now grab the hotfiles b-tree vnode/cnode lock first, as it is not classified as a systemfile. + */ + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { + error = EPERM; + (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + /* zero it out to avoid pinning later on */ + hfsmp->hfc_filevp = NULL; + goto recording_init_out; + } + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + if (iterator == NULL) { + error = ENOMEM; + hfs_unlock (VTOC(hfsmp->hfc_filevp)); + (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + /* zero it out to avoid pinning */ + hfsmp->hfc_filevp = NULL; + goto recording_init_out; + } + + bzero(iterator, sizeof(*iterator)); + key = (HotFileKey*) &iterator->key; + key->keyLength = HFC_KEYLENGTH; + + record.bufferAddress = &data; + record.itemSize = sizeof(u_int32_t); + record.itemCount = 1; + +#if HFC_VERBOSE + printf("hfs: Evaluating space for \"%s\" metadata zone... (freeblks %d)\n", HFSTOVCB(hfsmp)->vcbVN, + hfsmp->hfs_hotfile_freeblks); +#endif + + /* + * Get ready to scan the Catalog file. We explicitly do NOT grab the catalog lock because + * we're fully single-threaded at the moment (by virtue of being called during mount()), + * and if we have to grow the hotfile btree, then we would need to grab the catalog lock + * and if we take a shared lock here, it would deadlock (see ) + * + * We already started a transaction so we should already be holding the journal lock at this point. + * Note that we have to hold the journal lock / start a txn BEFORE the systemfile locks. + */ + + error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0, + kCatSearchBufferSize, &scanstate); + if (error) { + printf("hfs_recording_init: err %d BTScanInit\n", error); + + /* drop the systemfile locks */ + hfs_unlock(VTOC(hfsmp->hfc_filevp)); + + (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + + /* zero it out to avoid pinning */ + hfsmp->hfc_filevp = NULL; + goto recording_init_out; + } + + started_scan = 1; + + filefork = VTOF(hfsmp->hfc_filevp); + + starting_temp = random() % HF_TEMP_RANGE; + + /* + * Visit all the catalog btree leaf records. We have to hold the catalog lock to do this. + * + * NOTE: The B-Tree scanner reads from the media itself. Under normal circumstances it would be + * fine to simply use b-tree routines to read blocks that correspond to b-tree nodes, because the + * block cache is going to ensure you always get the cached copy of a block (even if a journal + * txn has modified one of those blocks). That is NOT true when + * using the scanner. In particular, it will always read whatever is on-disk. So we have to ensure + * that the journal has flushed and that the async I/Os to the metadata files have been issued. + */ + for (;;) { + error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize); + if (error) { + if (error == btNotFound) + error = 0; + else + printf("hfs_recording_init: err %d BTScanNext\n", error); + break; + } + if ((datap->recordType != kHFSPlusFileRecord) || + (dataSize != sizeof(HFSPlusCatalogFile))) { + continue; + } + filep = (HFSPlusCatalogFile *)datap; + filecount++; + + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + if (filep->flags & kHFSDoNotFastDevPinMask) { + uncacheable++; + } + + // + // If the file does not have the FastDevPinnedMask set, we + // can ignore it and just go to the next record. + // + if ((filep->flags & kHFSFastDevPinnedMask) == 0) { + continue; + } + } else if (filep->dataFork.totalBlocks == 0) { + continue; + } + + /* + * On a regular hdd, any file that has blocks inside + * the hot file space is recorded for later eviction. + * + * For now, resource forks are ignored. + * + * We don't do this on CF systems as there is no real + * hotfile area - we just pin/unpin blocks belonging to + * interesting files. + */ + if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && !hotextents(hfsmp, &filep->dataFork.extents[0])) { + continue; + } + cnid = filep->fileID; + + /* Skip over journal files. */ + if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) { + continue; + } + /* + * XXX - need to skip quota files as well. + */ + + uint32_t temp; + + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + int rsrc = 0; + + temp = (uint32_t)starting_temp++; + if (filep->flags & kHFSAutoCandidateMask) { + temp += MAX_NORMAL_TEMP; + } + + /* use the data fork by default */ + if (filep->dataFork.totalBlocks == 0) { + /* + * but if empty, switch to rsrc as its likely + * a compressed file + */ + rsrc = 1; + } + + error = hfs_pin_catalog_rec (hfsmp, filep, rsrc); + if (error) + break; + + } else { + temp = HFC_MINIMUM_TEMPERATURE; + } + + /* Insert a hot file entry. */ + key->keyLength = HFC_KEYLENGTH; + key->temperature = temp; + key->fileID = cnid; + key->forkType = 0; + data = 0x3f3f3f3f; + error = BTInsertRecord(filefork, iterator, &record, record.itemSize); + if (error) { + printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); + error = MacToVFSError(error); + break; + } /* Insert the corresponding thread record. */ key->keyLength = HFC_KEYLENGTH; key->temperature = HFC_LOOKUPTAG; key->fileID = cnid; key->forkType = 0; - data = HFC_MINIMUM_TEMPERATURE; + data = temp; error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); @@ -687,29 +1770,50 @@ hfs_recording_init(struct hfsmount *hfsmp) break; } inserted++; - } + } // end catalog iteration loop + + save_btree_user_info(hfsmp); (void) BTFlushPath(filefork); - hfs_unlock(VTOC(hfsmp->hfc_filevp)); -out0: - hfs_end_transaction(hfsmp); +recording_init_out: + + /* Unlock first, then pin after releasing everything else */ + if (hfsmp->hfc_filevp) { + hfs_unlock (VTOC(hfsmp->hfc_filevp)); + } + + if (started_scan) { + (void) BTScanTerminate (&scanstate, &data, &data, &data); + } + + if (started_tr) { + hfs_end_transaction(hfsmp); + } + #if HFC_VERBOSE - printf("hfs: %d files identified out of %d\n", inserted, filecount); + printf("hfs: %d files identified out of %d (freeblocks is now: %d)\n", inserted, filecount, hfsmp->hfs_hotfile_freeblks); + if (uncacheable) { + printf("hfs: %d files were marked as uncacheable\n", uncacheable); + } #endif -out1: - (void) BTScanTerminate(&scanstate, &data, &data, &data); -out2: - hfs_end_transaction(hfsmp); if (iterator) FREE(iterator, M_TEMP); + if (hfsmp->hfc_filevp) { + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL, vfs_context_kernel()); + } (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); hfsmp->hfc_filevp = NULL; } + if (error == 0) hfsmp->hfc_stage = HFC_IDLE; + /* Finally, unlock the HFC mutex */ + lck_mtx_unlock (&hfsmp->hfc_mutex); + return (error); } @@ -740,7 +1844,7 @@ hfs_hotfilesync(struct hfsmount *hfsmp, vfs_context_t ctx) break; case HFC_ADOPTION: - (void) hotfiles_adopt(hfsmp); + (void) hotfiles_adopt(hfsmp, ctx); break; default: break; @@ -778,6 +1882,20 @@ hfs_addhotfile(struct vnode *vp) return (error); } +static int +hf_ignore_process(const char *pname, size_t maxlen) +{ + if ( strncmp(pname, "mds", maxlen) == 0 + || strncmp(pname, "mdworker", maxlen) == 0 + || strncmp(pname, "mds_stores", maxlen) == 0 + || strncmp(pname, "makewhatis", maxlen) == 0) { + return 1; + } + + return 0; + +} + static int hfs_addhotfile_internal(struct vnode *vp) { @@ -813,20 +1931,59 @@ hfs_addhotfile_internal(struct vnode *vp) ffp = VTOF(vp); cp = VTOC(vp); - if ((ffp->ff_bytesread == 0) || - (ffp->ff_blocks == 0) || - (ffp->ff_size == 0) || - (ffp->ff_blocks > hotdata->maxblocks) || - (cp->c_flag & (C_DELETED | C_NOEXISTS)) || - (cp->c_bsdflags & UF_NODUMP) || - (cp->c_atime < hfsmp->hfc_timebase)) { - return (0); + if (cp->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask)) { + // it's already a hotfile or can't be a hotfile... + return 0; } - temperature = ffp->ff_bytesread / ffp->ff_size; - if (temperature < hotdata->threshold) { - return (0); + if (vnode_isdir(vp) || vnode_issystem(vp) || (cp->c_flag & (C_DELETED | C_NOEXISTS))) { + return 0; + } + + if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && vnode_isfastdevicecandidate(vp)) { + // + // On cooperative fusion (CF) systems we have different criteria for whether something + // can be pinned to the ssd. + // + if (cp->c_flag & (C_DELETED|C_NOEXISTS)) { + // + // dead files are definitely not worth caching + // + return 0; + } else if (ffp->ff_blocks == 0 && !(cp->c_bsdflags & UF_COMPRESSED) && !(cp->c_attr.ca_recflags & kHFSFastDevCandidateMask)) { + // + // empty files aren't worth caching but compressed ones might be, as are + // newly created files that live in WorthCaching directories... + // + return 0; + } + + char pname[256]; + pname[0] = '\0'; + proc_selfname(pname, sizeof(pname)); + if (hf_ignore_process(pname, sizeof(pname))) { + // ignore i/o's from certain system daemons + return 0; + } + + temperature = cp->c_fileid; // in memory we just keep it sorted by file-id + } else { + // the normal hard drive based hotfile checks + if ((ffp->ff_bytesread == 0) || + (ffp->ff_blocks == 0) || + (ffp->ff_size == 0) || + (ffp->ff_blocks > hotdata->maxblocks) || + (cp->c_bsdflags & (UF_NODUMP | UF_COMPRESSED)) || + (cp->c_atime < hfsmp->hfc_timebase)) { + return (0); + } + + temperature = ffp->ff_bytesread / ffp->ff_size; + if (temperature < hotdata->threshold) { + return (0); + } } + /* * If there is room or this file is hotter than * the coldest one then add it to the list. @@ -834,72 +1991,222 @@ hfs_addhotfile_internal(struct vnode *vp) */ if ((hotdata->activefiles < hfsmp->hfc_maxfiles) || (hotdata->coldest == NULL) || - (temperature > hotdata->coldest->temperature)) { + (temperature >= hotdata->coldest->temperature)) { + ++hotdata->refcount; + entry = hf_getnewentry(hotdata); + entry->temperature = temperature; + entry->fileid = cp->c_fileid; + // + // if ffp->ff_blocks is zero, it might be compressed so make sure we record + // that there's at least one block. + // + entry->blocks = ffp->ff_blocks ? ffp->ff_blocks : 1; + if (hf_insert(hotdata, entry) == EEXIST) { + // entry is already present, don't need to add it again + entry->right = hotdata->freelist; + hotdata->freelist = entry; + } + --hotdata->refcount; + } + + return (0); +} + +/* + * Remove a hot file from the recording list. + * + * This can happen when a hot file becomes + * an active vnode (active hot files are + * not kept in the recording list until the + * end of the recording period). + * + * Note: the cnode is locked on entry. + */ +int +hfs_removehotfile(struct vnode *vp) +{ + hotfile_data_t *hotdata; + hfsmount_t *hfsmp; + cnode_t *cp; + filefork_t *ffp; + u_int32_t temperature; + + hfsmp = VTOHFS(vp); + if (hfsmp->hfc_stage != HFC_RECORDING) + return (0); + + if ((!vnode_isreg(vp)) || vnode_issystem(vp)) { + return (0); + } + + ffp = VTOF(vp); + cp = VTOC(vp); + + if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) || + (ffp->ff_size == 0) || (cp->c_atime < hfsmp->hfc_timebase)) { + return (0); + } + + lck_mtx_lock(&hfsmp->hfc_mutex); + if (hfsmp->hfc_stage != HFC_RECORDING) + goto out; + if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL) + goto out; + + temperature = ffp->ff_bytesread / ffp->ff_size; + if (temperature < hotdata->threshold) + goto out; + + if (hotdata->coldest && (temperature >= hotdata->coldest->temperature)) { + ++hotdata->refcount; + hf_delete(hotdata, VTOC(vp)->c_fileid, temperature); + --hotdata->refcount; + } +out: + lck_mtx_unlock(&hfsmp->hfc_mutex); + return (0); +} + +int +hfs_hotfile_deleted(__unused struct vnode *vp) +{ +#if 1 + return 0; +#else + // + // XXXdbg - this code, while it would work, would introduce a huge inefficiency + // to deleting files as the way it's written would require us to open + // the hotfile btree on every open, delete two records in it and then + // close the hotfile btree (which involves more writes). + // + // We actually can be lazy about deleting hotfile records for files + // that get deleted. When it's time to evict things, if we encounter + // a record that references a dead file (i.e. a fileid which no + // longer exists), the eviction code will remove the records. Likewise + // the code that scans the HotFile B-Tree at boot time to re-pin files + // will remove dead records. + // + + hotfile_data_t *hotdata; + hfsmount_t *hfsmp; + cnode_t *cp; + filefork_t *filefork; + u_int32_t temperature; + BTreeIterator * iterator = NULL; + FSBufferDescriptor record; + HotFileKey *key; + u_int32_t data; + int error=0; + + cp = VTOC(vp); + if (cp == NULL || !(cp->c_attr.ca_recflags & kHFSFastDevPinnedMask)) { + return 0; + } + + hfsmp = VTOHFS(vp); + if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) { + return 0; + } + + if (hfc_btree_open(hfsmp, &hfsmp->hfc_filevp) != 0 || hfsmp->hfc_filevp == NULL) { + // either there is no hotfile info or it's damaged + return EINVAL; + } + + filefork = VTOF(hfsmp->hfc_filevp); + if (filefork == NULL) { + return 0; + } + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + if (iterator == NULL) { + return ENOMEM; + } + bzero(iterator, sizeof(*iterator)); + key = (HotFileKey*) &iterator->key; + + record.bufferAddress = &data; + record.itemSize = sizeof(u_int32_t); + record.itemCount = 1; + + key->keyLength = HFC_KEYLENGTH; + key->temperature = HFC_LOOKUPTAG; + key->fileID = cp->c_fileid; + key->forkType = 0; + + lck_mtx_lock(&hfsmp->hfc_mutex); + (void) BTInvalidateHint(iterator); + if (BTSearchRecord(filefork, iterator, &record, NULL, iterator) == 0) { + temperature = key->temperature; + hfc_btree_delete_record(hfsmp, iterator, key); + } else { + //printf("hfs: hotfile_deleted: did not find fileid %d\n", cp->c_fileid); + error = ENOENT; + } + + if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) != NULL) { + // just in case, also make sure it's removed from the in-memory list as well ++hotdata->refcount; - entry = hf_getnewentry(hotdata); - entry->temperature = temperature; - entry->fileid = cp->c_fileid; - entry->blocks = ffp->ff_blocks; - hf_insert(hotdata, entry); + hf_delete(hotdata, cp->c_fileid, cp->c_fileid); --hotdata->refcount; } - return (0); + lck_mtx_unlock(&hfsmp->hfc_mutex); + FREE(iterator, M_TEMP); + + hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + + return error; +#endif } -/* - * Remove a hot file from the recording list. - * - * This can happen when a hot file becomes - * an active vnode (active hot files are - * not kept in the recording list until the - * end of the recording period). - * - * Note: the cnode is locked on entry. - */ int -hfs_removehotfile(struct vnode *vp) +hfs_hotfile_adjust_blocks(struct vnode *vp, int64_t num_blocks) { - hotfile_data_t *hotdata; hfsmount_t *hfsmp; - cnode_t *cp; - filefork_t *ffp; - u_int32_t temperature; + + if (vp == NULL) { + return 0; + } hfsmp = VTOHFS(vp); - if (hfsmp->hfc_stage != HFC_RECORDING) - return (0); - if ((!vnode_isreg(vp)) || vnode_issystem(vp)) { - return (0); + if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || num_blocks == 0 || vp == NULL) { + return 0; } - ffp = VTOF(vp); - cp = VTOC(vp); - - if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) || - (ffp->ff_size == 0) || (cp->c_atime < hfsmp->hfc_timebase)) { - return (0); + // + // if file is not HotFileCached or it has the CanNotHotFile cache + // bit set then there is nothing to do + // + if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) || (VTOC(vp)->c_attr.ca_recflags & kHFSDoNotFastDevPinMask)) { + // it's not a hot file or can't be one so don't bother tracking + return 0; } + + OSAddAtomic(num_blocks, &hfsmp->hfs_hotfile_blk_adjust); - lck_mtx_lock(&hfsmp->hfc_mutex); - if (hfsmp->hfc_stage != HFC_RECORDING) - goto out; - if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL) - goto out; + return (0); +} - temperature = ffp->ff_bytesread / ffp->ff_size; - if (temperature < hotdata->threshold) - goto out; +// +// Assumes hfsmp->hfc_mutex is LOCKED +// +static int +hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp) +{ + if (hfsmp->hfc_stage < HFC_IDLE) { + return 0; + } + + int cur_blk_adjust = hfsmp->hfs_hotfile_blk_adjust; // snap a copy of this value - if (hotdata->coldest && (temperature >= hotdata->coldest->temperature)) { - ++hotdata->refcount; - hf_delete(hotdata, VTOC(vp)->c_fileid, temperature); - --hotdata->refcount; + if (cur_blk_adjust) { + OSAddAtomic(-cur_blk_adjust, &hfsmp->hfs_hotfile_blk_adjust); + hfsmp->hfs_hotfile_freeblks += cur_blk_adjust; } -out: - lck_mtx_unlock(&hfsmp->hfc_mutex); - return (0); + + return hfsmp->hfs_hotfile_freeblks; } @@ -971,10 +2278,15 @@ hotfiles_refine(struct hfsmount *hfsmp) int i; int error = 0; - if ((listp = (hotfilelist_t *)hfsmp->hfc_recdata) == NULL) return (0); + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + // on ssd's we don't refine the temperature since the + // replacement algorithm is simply random + return 0; + } + mp = HFSTOVFS(hfsmp); MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); @@ -1016,12 +2328,12 @@ hotfiles_refine(struct hfsmount *hfsmp) * Update thread entry with latest temperature. */ error = BTUpdateRecord(filefork, iterator, - (IterateCallBackProcPtr)update_callback, - &listp->hfl_hotfile[i].hf_temperature); + (IterateCallBackProcPtr)update_callback, + &listp->hfl_hotfile[i].hf_temperature); if (error) { printf("hfs: hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID); error = MacToVFSError(error); - // break; + // break; } /* * Re-key entry with latest temperature. @@ -1049,7 +2361,6 @@ hotfiles_refine(struct hfsmount *hfsmp) error = MacToVFSError(error); break; } - /* * Invalidate this entry in the list. */ @@ -1075,7 +2386,7 @@ hotfiles_refine(struct hfsmount *hfsmp) * Requires that the hfc_mutex be held. */ static int -hotfiles_adopt(struct hfsmount *hfsmp) +hotfiles_adopt(struct hfsmount *hfsmp, vfs_context_t ctx) { BTreeIterator * iterator = NULL; struct vnode *vp; @@ -1091,6 +2402,14 @@ hotfiles_adopt(struct hfsmount *hfsmp) int last; int error = 0; int startedtrans = 0; + // + // all files in a given adoption phase have a temperature + // that starts at a random value and then increases linearly. + // the idea is that during eviction, files that were adopted + // together will be evicted together + // + long starting_temp = random() % HF_TEMP_RANGE; + long temp_adjust = 0; if ((listp = (hotfilelist_t *)hfsmp->hfc_recdata) == NULL) return (0); @@ -1108,6 +2427,14 @@ hotfiles_adopt(struct hfsmount *hfsmp) return (ENOMEM); } +#if HFC_VERBOSE + printf("hfs:%s: hotfiles_adopt: (hfl_next: %d, hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n", + hfsmp->vcbVN, + listp->hfl_next, + hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end, + hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles); +#endif + stage = hfsmp->hfc_stage; hfsmp->hfc_stage = HFC_BUSY; @@ -1128,17 +2455,30 @@ hotfiles_adopt(struct hfsmount *hfsmp) for (i = listp->hfl_next; (i < last) && (blksmoved < HFC_BLKSPERSYNC); ++i) { /* - * Skip invalid entries (already in hot area). + * Skip entries that aren't going to work. */ if (listp->hfl_hotfile[i].hf_temperature == 0) { - listp->hfl_next++; - continue; + //printf("hfs: zero temp on file-id %d\n", listp->hfl_hotfile[i].hf_fileid); + listp->hfl_next++; + continue; + } + if (listp->hfl_hotfile[i].hf_fileid == VTOC(hfsmp->hfc_filevp)->c_fileid) { + //printf("hfs: cannot adopt the hotfile b-tree itself! (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid); + listp->hfl_next++; + continue; + } + if (listp->hfl_hotfile[i].hf_fileid < kHFSFirstUserCatalogNodeID) { + //printf("hfs: cannot adopt system files (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid); + listp->hfl_next++; + continue; } + /* * Acquire a vnode for this file. */ error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0, 0); if (error) { + //printf("failed to get fileid %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error); if (error == ENOENT) { error = 0; listp->hfl_next++; @@ -1146,16 +2486,24 @@ hotfiles_adopt(struct hfsmount *hfsmp) } break; } + + //printf("hfs: examining hotfile entry w/fileid %d, temp %d, blocks %d (HotFileCached: %s)\n", + // listp->hfl_hotfile[i].hf_fileid, listp->hfl_hotfile[i].hf_temperature, + // listp->hfl_hotfile[i].hf_blocks, + // (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) ? "YES" : "NO"); + if (!vnode_isreg(vp)) { /* Symlinks are ineligible for adoption into the hotfile zone. */ - printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid); + //printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid); hfs_unlock(VTOC(vp)); vnode_put(vp); listp->hfl_hotfile[i].hf_temperature = 0; listp->hfl_next++; continue; /* stale entry, go to next */ } - if (hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) { + if ( (VTOC(vp)->c_flag & (C_DELETED | C_NOEXISTS)) + || (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) + || (VTOC(vp)->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask))) { hfs_unlock(VTOC(vp)); vnode_put(vp); listp->hfl_hotfile[i].hf_temperature = 0; @@ -1163,8 +2511,35 @@ hotfiles_adopt(struct hfsmount *hfsmp) listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks; continue; /* stale entry, go to next */ } + fileblocks = VTOF(vp)->ff_blocks; - if (fileblocks > hfsmp->hfs_hotfile_freeblks) { + + // + // for CF, if the file is empty (and not compressed) or it is too large, + // do not try to pin it. (note: if fileblocks == 0 but the file is marked + // as compressed, we may still be able to cache it). + // + if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && + ((fileblocks == 0 && !(VTOC(vp)->c_bsdflags & UF_COMPRESSED)) || + (unsigned int)fileblocks > (HFC_MAXIMUM_FILESIZE / (uint64_t)HFSTOVCB(hfsmp)->blockSize))) { + // don't try to cache something too large or that's zero-bytes + + vnode_clearfastdevicecandidate(vp); // turn off the fast-dev-candidate flag so we don't keep trying to cache it. + + hfs_unlock(VTOC(vp)); + vnode_put(vp); + listp->hfl_hotfile[i].hf_temperature = 0; + listp->hfl_next++; + listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks; + continue; /* entry is too big, just carry on with the next guy */ + } + + if (fileblocks > hfs_hotfile_cur_freeblks(hfsmp)) { + // + // No room for this file. Although eviction should have made space + // it's best that we check here as well since writes to existing + // hotfiles may have eaten up space since we performed eviction + // hfs_unlock(VTOC(vp)); vnode_put(vp); listp->hfl_next++; @@ -1174,6 +2549,10 @@ hotfiles_adopt(struct hfsmount *hfsmp) if ((blksmoved > 0) && (blksmoved + fileblocks) > HFC_BLKSPERSYNC) { + // + // we've done enough work, let's be nice to the system and + // stop until the next iteration + // hfs_unlock(VTOC(vp)); vnode_put(vp); break; /* adopt this entry the next time around */ @@ -1183,10 +2562,76 @@ hotfiles_adopt(struct hfsmount *hfsmp) else data = 0x3f3f3f3f; - error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc()); + + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + // + // For CF we pin the blocks belonging to the file + // to the "fast" (aka ssd) media + // + uint32_t pinned_blocks; + + if (vnode_isautocandidate(vp)) { + VTOC(vp)->c_attr.ca_recflags |= kHFSAutoCandidateMask; + } + if (VTOC(vp)->c_attr.ca_recflags & kHFSAutoCandidateMask) { + // + // this moves auto-cached files to the higher tier + // of "temperatures" which means they are less likely + // to get evicted (user selected hotfiles will get + // evicted first in the theory that they change more + // frequently compared to system files) + // + temp_adjust = MAX_NORMAL_TEMP; + } else { + temp_adjust = 0; + } + + hfs_unlock(VTOC(vp)); // don't need an exclusive lock for this + hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + + error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pinned_blocks, ctx); + + fileblocks = pinned_blocks; + + // go back to an exclusive lock since we're going to modify the cnode again + hfs_unlock(VTOC(vp)); + hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + } else { + // + // Old style hotfiles moves the data to the center (aka "hot") + // region of the disk + // + error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc()); + } + + if (!error) { + VTOC(vp)->c_attr.ca_recflags |= kHFSFastDevPinnedMask; + VTOC(vp)->c_flag |= C_MODIFIED; + } else if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && error == EALREADY) { + // + // If hfs_pin_vnode() returned EALREADY then this file is not + // ever able to be hotfile cached the normal way. This can + // happen with compressed files which have their data stored + // in an extended attribute. We flag them so that we won't + // bother to try and hotfile cache them again the next time + // they're read. + // + VTOC(vp)->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask; + VTOC(vp)->c_flag |= C_MODIFIED; + } + hfs_unlock(VTOC(vp)); vnode_put(vp); if (error) { +#if HFC_VERBOSE + if (error != EALREADY) { + printf("hfs: hotfiles_adopt: could not relocate file %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error); + } +#endif + + if (last < listp->hfl_count) { + last++; + } /* Move on to next item. */ listp->hfl_next++; continue; @@ -1197,6 +2642,22 @@ hotfiles_adopt(struct hfsmount *hfsmp) /* Insert hot file entry */ key->keyLength = HFC_KEYLENGTH; + + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + // + // The "temperature" for a CF hotfile is simply a random + // number that we sequentially increment for each file in + // the set of files we're currently adopting. This has the + // nice property that all of the files we pin to the ssd + // in the current phase will sort together in the hotfile + // btree. When eviction time comes we will evict them + // together as well. This gives the eviction phase temporal + // locality - things written together get evicted together + // which is what ssd's like. + // + listp->hfl_hotfile[i].hf_temperature = (uint32_t)temp_adjust + starting_temp++; + } + key->temperature = listp->hfl_hotfile[i].hf_temperature; key->fileID = listp->hfl_hotfile[i].hf_fileid; key->forkType = 0; @@ -1210,8 +2671,9 @@ hotfiles_adopt(struct hfsmount *hfsmp) error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { - printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); + int orig_error = error; error = MacToVFSError(error); + printf("hfs: hotfiles_adopt:1: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID); stage = HFC_IDLE; break; } @@ -1224,12 +2686,20 @@ hotfiles_adopt(struct hfsmount *hfsmp) data = listp->hfl_hotfile[i].hf_temperature; error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { - printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); + int orig_error = error; error = MacToVFSError(error); + printf("hfs: hotfiles_adopt:2: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID); stage = HFC_IDLE; break; + } else { + (void) BTFlushPath(filefork); + blksmoved += fileblocks; + } + + listp->hfl_next++; + if (listp->hfl_next >= listp->hfl_count) { + break; } - (void) BTFlushPath(filefork); /* Transaction complete. */ if (startedtrans) { @@ -1237,12 +2707,7 @@ hotfiles_adopt(struct hfsmount *hfsmp) startedtrans = 0; } - blksmoved += fileblocks; - listp->hfl_next++; - if (listp->hfl_next >= listp->hfl_count) { - break; - } - if (hfsmp->hfs_hotfile_freeblks <= 0) { + if (hfs_hotfile_cur_freeblks(hfsmp) <= 0) { #if HFC_VERBOSE printf("hfs: hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks); #endif @@ -1251,10 +2716,19 @@ hotfiles_adopt(struct hfsmount *hfsmp) } /* end for */ #if HFC_VERBOSE - printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d left)\n", listp->hfl_next, blksmoved, listp->hfl_totalblocks); + printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d files left)\n", listp->hfl_next, blksmoved, listp->hfl_count - i); #endif + if (!startedtrans) { + // start a txn so we'll save the btree summary info + if (hfs_start_transaction(hfsmp) == 0) { + startedtrans = 1; + } + } + /* Finish any outstanding transactions. */ if (startedtrans) { + save_btree_user_info(hfsmp); + (void) BTFlushPath(filefork); hfs_end_transaction(hfsmp); startedtrans = 0; @@ -1312,6 +2786,13 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) return (EPERM); } +#if HFC_VERBOSE + printf("hfs:%s: hotfiles_evict (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n", + hfsmp->vcbVN, + hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end, + hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles); +#endif + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); if (iterator == NULL) { hfs_unlock(VTOC(hfsmp->hfc_filevp)); @@ -1329,6 +2810,10 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) filefork = VTOF(hfsmp->hfc_filevp); +#if HFC_VERBOSE + printf("hfs: hotfiles_evict: reclaim blks %d\n", listp->hfl_reclaimblks); +#endif + while (listp->hfl_reclaimblks > 0 && blksmoved < HFC_BLKSPERSYNC && filesmoved < HFC_FILESPERSYNC) { @@ -1376,7 +2861,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) * here. We do not want to move them. */ if (!vnode_isreg(vp)) { - printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID); + //printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID); hfs_unlock(VTOC(vp)); vnode_put(vp); goto delete; /* invalid entry, go to next */ @@ -1392,7 +2877,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) /* * Make sure file is in the hot area. */ - if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) { + if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0]) && !(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) { #if HFC_VERBOSE printf("hfs: hotfiles_evict: file %d isn't hot!\n", key->fileID); #endif @@ -1402,15 +2887,38 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) } /* - * Relocate file out of hot area. + * Relocate file out of hot area. On cooperative fusion (CF) that just + * means un-pinning the data from the ssd. For traditional hotfiles that means moving + * the file data out of the hot region of the disk. */ - error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx)); + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + uint32_t pinned_blocks; + + hfs_unlock(VTOC(vp)); // don't need an exclusive lock for this + hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + + error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &pinned_blocks, ctx); + fileblocks = pinned_blocks; + + if (!error) { + // go back to an exclusive lock since we're going to modify the cnode again + hfs_unlock(VTOC(vp)); + hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + } + } else { + error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx)); + } if (error) { +#if HFC_VERBOSE printf("hfs: hotfiles_evict: err %d relocating file %d\n", error, key->fileID); +#endif hfs_unlock(VTOC(vp)); vnode_put(vp); bt_op = kBTreeNextRecord; goto next; /* go to next */ + } else { + VTOC(vp)->c_attr.ca_recflags &= ~kHFSFastDevPinnedMask; + VTOC(vp)->c_flag |= C_MODIFIED; } // @@ -1466,6 +2974,8 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) #endif /* Finish any outstanding transactions. */ if (startedtrans) { + save_btree_user_info(hfsmp); + (void) BTFlushPath(filefork); hfs_end_transaction(hfsmp); startedtrans = 0; @@ -1511,6 +3021,13 @@ hotfiles_age(struct hfsmount *hfsmp) u_int16_t reclen; + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + // + // hotfiles don't age on CF + // + return 0; + } + MALLOC(iterator, BTreeIterator *, 2 * sizeof(*iterator), M_TEMP, M_WAITOK); if (iterator == NULL) { error = ENOMEM; @@ -1690,6 +3207,12 @@ hotextents(struct hfsmount *hfsmp, HFSPlusExtentDescriptor * extents) */ static int hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp) +{ + return hfc_btree_open_ext(hfsmp, vpp, 0); +} + +static int +hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs) { proc_t p; struct vnode *vp; @@ -1745,8 +3268,12 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp) /* Open the B-tree file for writing... */ error = BTOpenPath(VTOF(vp), (KeyCompareProcPtr) hfc_comparekeys); if (error) { - printf("hfs: hfc_btree_open: BTOpenPath error %d\n", error); - error = MacToVFSError(error); + if (!ignore_btree_errs) { + printf("hfs: hfc_btree_open: BTOpenPath error %d; filesize %lld\n", error, VTOF(vp)->ff_size); + error = MacToVFSError(error); + } else { + error = 0; + } } hfs_unlock(VTOC(vp)); @@ -1759,6 +3286,18 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp) if (!vnode_issystem(vp)) panic("hfs: hfc_btree_open: not a system file (vp = %p)", vp); + HotFilesInfo hotfileinfo; + + if (error == 0 && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) { + if ((BTGetUserData(VTOF(vp), &hotfileinfo, sizeof(hotfileinfo)) == 0) && (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC)) { + if (hfsmp->hfs_hotfile_freeblks == 0) { + hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks); + } + + hfs_hotfile_cur_freeblks(hfsmp); // factors in any adjustments that happened at run-time + } + } + return (error); } @@ -1775,7 +3314,7 @@ hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp) if (hfsmp->jnl) { - hfs_journal_flush(hfsmp, FALSE); + hfs_flush(hfsmp, HFS_FLUSH_JOURNAL); } if (vnode_get(vp) == 0) { @@ -1793,6 +3332,106 @@ hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp) return (error); } +// +// Assumes that hfsmp->hfc_filevp points to the hotfile btree vnode +// (i.e. you called hfc_btree_open() ahead of time) +// +static int +hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key) +{ + int error; + filefork_t *filefork=VTOF(hfsmp->hfc_filevp); + + /* Start a new transaction before calling BTree code. */ + if (hfs_start_transaction(hfsmp) != 0) { + return EINVAL; + } + + error = BTDeleteRecord(filefork, iterator); + if (error) { + error = MacToVFSError(error); + printf("hfs: failed to delete record for file-id %d : err %d\n", key->fileID, error); + goto out; + } + + int savedtemp; + savedtemp = key->temperature; + key->temperature = HFC_LOOKUPTAG; + error = BTDeleteRecord(filefork, iterator); + if (error) { + error = MacToVFSError(error); + printf("hfs:2: failed to delete record for file-id %d : err %d\n", key->fileID, error); + } + key->temperature = savedtemp; + + (void) BTFlushPath(filefork); + +out: + /* Transaction complete. */ + hfs_end_transaction(hfsmp); + + return error; +} + +// +// You have to have already opened the hotfile btree so +// that hfsmp->hfc_filevp is filled in. +// +static int +hfc_btree_delete(struct hfsmount *hfsmp) +{ + struct vnode *dvp = NULL; + vfs_context_t ctx = vfs_context_current(); + struct vnode_attr va; + struct componentname cname; + static char filename[] = HFC_FILENAME; + int error; + + error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx); + if (error) { + return (error); + } + cname.cn_nameiop = DELETE; + cname.cn_flags = ISLASTCN; + cname.cn_context = ctx; + cname.cn_pnbuf = filename; + cname.cn_pnlen = sizeof(filename); + cname.cn_nameptr = filename; + cname.cn_namelen = strlen(filename); + cname.cn_hash = 0; + cname.cn_consume = 0; + + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VREG); + VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR); + VATTR_SET(&va, va_uid, 0); + VATTR_SET(&va, va_gid, 0); + + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + + /* call ourselves directly, ignore the higher-level VFS file creation code */ + error = VNOP_REMOVE(dvp, hfsmp->hfc_filevp, &cname, 0, ctx); + if (error) { + printf("hfs: error %d removing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); + } + + hfs_end_transaction(hfsmp); + +out: + if (dvp) { + vnode_put(dvp); + dvp = NULL; + } + + return 0; +} + + + + /* * Create a hot files btree file. * @@ -1877,7 +3516,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent ((FndrFileInfo *)&cp->c_finderinfo[0])->fdFlags |= SWAP_BE16 (kIsInvisible + kNameLocked); - if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, nodesize)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, nodesize, VM_KERN_MEMORY_FILE)) { error = ENOMEM; goto out; } @@ -1918,7 +3557,14 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent hotfileinfo->timeleft = 0; hotfileinfo->threshold = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE); hotfileinfo->maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize); - hotfileinfo->maxfilecnt = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT); + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + if (hfsmp->hfs_hotfile_freeblks == 0) { + hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks; + } + hotfileinfo->usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks); + } else { + hotfileinfo->maxfilecnt = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT); + } strlcpy((char *)hotfileinfo->tag, hfc_tag, sizeof hotfileinfo->tag); offset += kBTreeHeaderUserBytes; @@ -2049,7 +3695,7 @@ hf_lookup(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature) /* * Insert a hot file entry into the tree. */ -static void +static int hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry) { hotfile_entry_t *entry = hotdata->rootentry; @@ -2060,44 +3706,48 @@ hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry) hotdata->rootentry = newentry; hotdata->coldest = newentry; hotdata->activefiles++; - return; + return 0; } while (entry) { if (temperature > entry->temperature) { - if (entry->right) + if (entry->right) { entry = entry->right; - else { + } else { entry->right = newentry; break; } } else if (temperature < entry->temperature) { - if (entry->left) + if (entry->left) { entry = entry->left; - else { + } else { entry->left = newentry; break; } } else if (fileid > entry->fileid) { - if (entry->right) + if (entry->right) { entry = entry->right; - else { + } else { if (entry->fileid != fileid) entry->right = newentry; break; } } else { - if (entry->left) + if (entry->left) { entry = entry->left; - else { - if (entry->fileid != fileid) + } else { + if (entry->fileid != fileid) { entry->left = newentry; + } else { + return EEXIST; + } break; } } } hotdata->activefiles++; + return 0; } /* @@ -2158,7 +3808,7 @@ hf_delete(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature) if (entry) { /* - * Reorginize the sub-trees spanning from our entry. + * Reorganize the sub-trees spanning from our entry. */ if ((next = entry->right)) { hotfile_entry_t *pnextl, *psub; @@ -2254,7 +3904,7 @@ hf_getsortedlist(hotfile_data_t * hotdata, hotfilelist_t *sortedlist) sortedlist->hfl_count = i; #if HFC_VERBOSE - printf("hfs: hf_getsortedlist returned %d entries\n", i); + printf("hfs: hf_getsortedlist returning %d entries w/%d total blocks\n", i, sortedlist->hfl_totalblocks); #endif } diff --git a/bsd/hfs/hfs_hotfiles.h b/bsd/hfs/hfs_hotfiles.h index 5c1ac29bb..7d8681954 100644 --- a/bsd/hfs/hfs_hotfiles.h +++ b/bsd/hfs/hfs_hotfiles.h @@ -40,11 +40,11 @@ /* * Temperature measurement constraints. */ -#define HFC_DEFAULT_FILE_COUNT 1000 -#define HFC_DEFAULT_DURATION (3600 * 60) +#define HFC_DEFAULT_FILE_COUNT hfc_default_file_count +#define HFC_DEFAULT_DURATION hfc_default_duration #define HFC_CUMULATIVE_CYCLES 3 -#define HFC_MAXIMUM_FILE_COUNT 5000 -#define HFC_MAXIMUM_FILESIZE (10 * 1024 * 1024) +#define HFC_MAXIMUM_FILE_COUNT hfc_max_file_count +#define HFC_MAXIMUM_FILESIZE hfc_max_file_size #define HFC_MINIMUM_TEMPERATURE 24 @@ -95,9 +95,16 @@ struct HotFilesInfo { u_int32_t timeleft; /* time remaining in recording period (secs) */ u_int32_t threshold; u_int32_t maxfileblks; - u_int32_t maxfilecnt; + union { + u_int32_t _maxfilecnt; // on hdd's we track the max # of files + u_int32_t _usedblocks; // on ssd's we track how many blocks are used + } _u; u_int8_t tag[32]; }; + +#define usedblocks _u._usedblocks +#define maxfilecnt _u._maxfilecnt + typedef struct HotFilesInfo HotFilesInfo; #define HFC_MAGIC 0xFF28FF26 @@ -118,6 +125,11 @@ int hfs_recording_suspend (struct hfsmount *); int hfs_addhotfile (struct vnode *); int hfs_removehotfile (struct vnode *); +int hfs_hotfile_deleted(struct vnode *vp); // called when a file is deleted +void hfs_repin_hotfiles(struct hfsmount *); + +// call this to adjust the number of used hotfile blocks either up/down +int hfs_hotfile_adjust_blocks(struct vnode *vp, int64_t num_blocks); #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/hfs/hfs_kdebug.h b/bsd/hfs/hfs_kdebug.h index 324a15f16..827fc4f29 100644 --- a/bsd/hfs/hfs_kdebug.h +++ b/bsd/hfs/hfs_kdebug.h @@ -1,3 +1,33 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef HFS_KDEBUG_H_ +#define HFS_KDEBUG_H_ + #include /* @@ -22,9 +52,9 @@ enum { HFSDBG_BLOCK_DEALLOCATE = HFSDBG_CODE(5), /* 0x03080014 */ HFSDBG_READ_BITMAP_BLOCK = HFSDBG_CODE(6), /* 0x03080018 */ HFSDBG_RELEASE_BITMAP_BLOCK = HFSDBG_CODE(7), /* 0x0308001C */ - HFSDBG_ALLOC_CONTIG_BITMAP = HFSDBG_CODE(8), /* 0x03080020 */ + HFSDBG_FIND_CONTIG_BITMAP = HFSDBG_CODE(8), /* 0x03080020 */ HFSDBG_ALLOC_ANY_BITMAP = HFSDBG_CODE(9), /* 0x03080024 */ - HFSDBG_ALLOC_KNOWN_BITMAP = HFSDBG_CODE(10), /* 0x03080028 */ + HFSDBG_ALLOC_FIND_KNOWN = HFSDBG_CODE(10), /* 0x03080028 */ HFSDBG_MARK_ALLOC_BITMAP = HFSDBG_CODE(11), /* 0x0308002C */ HFSDBG_MARK_FREE_BITMAP = HFSDBG_CODE(12), /* 0x03080030 */ HFSDBG_BLOCK_FIND_CONTIG = HFSDBG_CODE(13), /* 0x03080034 */ @@ -38,7 +68,7 @@ enum { HFSDBG_SYNCER = HFSDBG_CODE(21), /* 0x03080054 */ HFSDBG_SYNCER_TIMED = HFSDBG_CODE(22), /* 0x03080058 */ HFSDBG_UNMAP_SCAN = HFSDBG_CODE(23), /* 0x0308005C */ - HFSDBG_UNMAP_SCAN_TRIM = HFSDBG_CODE(24) /* 0x03080060 */ + HFSDBG_UNMAP_SCAN_TRIM = HFSDBG_CODE(24), /* 0x03080060 */ }; /* @@ -62,10 +92,10 @@ enum { 5 HFSDBG_BLOCK_DEALLOCATE startBlock, blockCount, flags, 0, 0 ... err, 0, 0, 0, 0 6 HFSDBG_READ_BITMAP_BLOCK startBlock, 0, 0, 0, 0 ... err, 0, 0, 0, 0 7 HFSDBG_RELEASE_BITMAP_BLOCK dirty, 0, 0, 0, 0 ... 0, 0, 0, 0, 0 - 8 HFSDBG_ALLOC_CONTIG_BITMAP startBlock, minBlocks, maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 + 8 HFSDBG_FIND_CONTIG_BITMAP startBlock, minBlocks, maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 9 HFSDBG_ALLOC_ANY_BITMAP startBlock, endBlock, maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 - 10 HFSDBG_ALLOC_KNOWN_BITMAP 0, 0, maxBlocks, 0, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 - 11 HFSDBG_MARK_ALLOC_BITMAP startBlock, blockCount, 0, 0, 0 ... err, 0, 0, 0, 0 + 10 HFSDBG_ALLOC_FIND_KNOWN 0, 0, maxBlocks, 0, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 + 11 HFSDBG_MARK_ALLOC_BITMAP startBlock, blockCount, flags, 0, 0 ... err, 0, 0, 0, 0 12 HFSDBG_MARK_FREE_BITMAP startBlock, blockCount, valid, 0, 0 ... err, 0, 0, 0, 0 13 HFSDBG_BLOCK_FIND_CONTIG startBlock, endBlock, minBlocks, maxBlocks, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 14 HFSDBG_IS_ALLOCATED startBlock, blockCount, stop, 0, 0 ... err, 0, actualBlockCount, 0, 0 @@ -80,3 +110,5 @@ enum { 23 HFSDBG_UNMAP_SCAN hfs_raw_dev, 0, 0, 0, 0 ... hfs_raw_dev, error, 0, 0, 0 24 HFSDBG_UNMAP_TRIM hfs_raw_dev, 0, 0, 0, 0 ... hfs_raw_dev, error, 0, 0, 0 */ + +#endif // HFS_KDEBUG_H_ diff --git a/bsd/hfs/hfs_link.c b/bsd/hfs/hfs_link.c index 667bad9c6..2dd7fda4b 100644 --- a/bsd/hfs/hfs_link.c +++ b/bsd/hfs/hfs_link.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2014 Apple Inc. All rights reserved. + * Copyright (c) 1999-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -388,10 +388,9 @@ hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp, } } } + cp->c_flag |= C_MODIFIED; cp->c_touch_chgtime = TRUE; - cp->c_flag |= C_FORCEUPDATE; } - dcp->c_flag |= C_FORCEUPDATE; } out: hfs_systemfile_unlock(hfsmp, lockflags); @@ -450,6 +449,8 @@ hfs_vnop_link(struct vnop_link_args *ap) if (v_type == VLNK) return (ENOTSUP); + cp = VTOC(vp); + if (v_type == VDIR) { #if CONFIG_HFS_DIRLINK /* Make sure our private directory exists. */ @@ -464,8 +465,10 @@ hfs_vnop_link(struct vnop_link_args *ap) if (hfsmp->jnl == NULL) { return (EPERM); } + /* Directory hardlinks also need the parent of the original directory. */ - if ((error = hfs_vget(hfsmp, hfs_currentparent(VTOC(vp)), &fdvp, 1, 0))) { + if ((error = hfs_vget(hfsmp, hfs_currentparent(cp, /* have_lock: */ false), + &fdvp, 1, 0))) { return (error); } #else @@ -503,9 +506,8 @@ hfs_vnop_link(struct vnop_link_args *ap) } } tdcp = VTOC(tdvp); - cp = VTOC(vp); /* grab the parent CNID from originlist after grabbing cnode locks */ - parentcnid = hfs_currentparent(cp); + parentcnid = hfs_currentparent(cp, /* have_lock: */ true); /* * Make sure we didn't race the src or dst parent directories with rmdir. @@ -607,6 +609,7 @@ hfs_vnop_link(struct vnop_link_args *ap) lockflags = 0; cp->c_linkcount++; + cp->c_flag |= C_MODIFIED; cp->c_touch_chgtime = TRUE; error = hfs_makelink(hfsmp, vp, cp, tdcp, cnp); if (error) { @@ -633,10 +636,10 @@ hfs_vnop_link(struct vnop_link_args *ap) } } tdcp->c_dirchangecnt++; + tdcp->c_flag |= C_MODIFIED; hfs_incr_gencount(tdcp); tdcp->c_touch_chgtime = TRUE; tdcp->c_touch_modtime = TRUE; - tdcp->c_flag |= C_FORCEUPDATE; error = hfs_update(tdvp, 0); if (error) { @@ -652,8 +655,8 @@ hfs_vnop_link(struct vnop_link_args *ap) ((fdcp->c_attr.ca_recflags & kHFSHasChildLinkMask) == 0)) { fdcp->c_attr.ca_recflags |= kHFSHasChildLinkMask; + fdcp->c_flag |= C_MODIFIED; fdcp->c_touch_chgtime = TRUE; - fdcp->c_flag |= C_FORCEUPDATE; error = hfs_update(fdvp, 0); if (error) { if (error != EIO && error != ENXIO) { @@ -673,10 +676,8 @@ hfs_vnop_link(struct vnop_link_args *ap) hfs_volupdate(hfsmp, VOL_MKFILE, (tdcp->c_cnid == kHFSRootFolderID)); } - /* Make sure update occurs inside transaction */ - cp->c_flag |= C_FORCEUPDATE; - if (error == 0 && (ret = hfs_update(vp, TRUE)) != 0) { + if (error == 0 && (ret = hfs_update(vp, 0)) != 0) { if (ret != EIO && ret != ENXIO) printf("hfs_vnop_link: error %d updating vp @ %p\n", ret, vp); hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE); @@ -794,9 +795,9 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c dcp->c_dirchangecnt++; hfs_incr_gencount(dcp); microtime(&tv); - dcp->c_ctime = tv.tv_sec; - dcp->c_mtime = tv.tv_sec; - (void ) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + dcp->c_touch_chgtime = dcp->c_touch_modtime = true; + dcp->c_flag |= C_MODIFIED; + hfs_update(dcp->c_vp, 0); /* * If this is the last link then we need to process the inode. @@ -877,7 +878,7 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c firstlink == cndesc.cd_cnid) { if (setfirstlink(hfsmp, cp->c_fileid, nextlinkid) == 0) cp->c_attr.ca_recflags |= kHFSHasAttributesMask; - } else if (vnode_isreg(vp) && cp->c_attr.ca_firstlink == cndesc.cd_cnid) { + } else if (cp->c_attr.ca_firstlink == cndesc.cd_cnid) { cp->c_attr.ca_firstlink = nextlinkid; } /* Update previous link. */ @@ -888,22 +889,23 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c if (nextlinkid) { (void) cat_update_siblinglinks(hfsmp, nextlinkid, prevlinkid, HFS_IGNORABLE_LINK); } - - /* - * The call to cat_releasedesc below will only release the name buffer; - * it does not zero out the rest of the fields in the 'cat_desc' data structure. - * - * As a result, since there are still other links at this point, we need - * to make the current cnode descriptor point to the raw inode. If a path-based - * system call comes along first, it will replace the descriptor with a valid link - * ID. If a userland process already has a file descriptor open, then they will - * bypass that lookup, though. Replacing the descriptor CNID with the raw - * inode will force it to generate a new full path. - */ - cp->c_cnid = cp->c_fileid; - } + /* + * The call to cat_releasedesc below will only release the name + * buffer; it does not zero out the rest of the fields in the + * 'cat_desc' data structure. + * + * As a result, since there are still other links at this point, + * we need to make the current cnode descriptor point to the raw + * inode. If a path-based system call comes along first, it will + * replace the descriptor with a valid link ID. If a userland + * process already has a file descriptor open, then they will + * bypass that lookup, though. Replacing the descriptor CNID with + * the raw inode will force it to generate a new full path. + */ + cp->c_cnid = cp->c_fileid; + /* Push new link count to disk. */ cp->c_ctime = tv.tv_sec; (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); @@ -1198,11 +1200,22 @@ hfs_relorigin(struct cnode *cp, cnid_t parentcnid) thread_t thread = current_thread(); TAILQ_FOREACH_SAFE(origin, &cp->c_originlist, lo_link, prev) { - if ((origin->lo_thread == thread) || - (origin->lo_parentcnid == parentcnid)) { + if (origin->lo_thread == thread) { TAILQ_REMOVE(&cp->c_originlist, origin, lo_link); FREE(origin, M_TEMP); break; + } else if (origin->lo_parentcnid == parentcnid) { + /* + * If the threads don't match, then we don't want to + * delete the entry because that might cause other threads + * to fall back and use whatever happens to be in + * c_parentcnid or the wrong link ID. By setting the + * values to zero here, it should serve as an indication + * that the path is no longer valid and that's better than + * using a random parent ID or link ID. + */ + origin->lo_parentcnid = 0; + origin->lo_cnid = 0; } } } @@ -1222,7 +1235,7 @@ hfs_haslinkorigin(cnode_t *cp) TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) { if (origin->lo_thread == thread) { - return (1); + return origin->lo_cnid != 0; } } } @@ -1236,17 +1249,25 @@ hfs_haslinkorigin(cnode_t *cp) */ __private_extern__ cnid_t -hfs_currentparent(cnode_t *cp) +hfs_currentparent(cnode_t *cp, bool have_lock) { if (cp->c_flag & C_HARDLINK) { + if (!have_lock) + hfs_lock_always(cp, HFS_SHARED_LOCK); + linkorigin_t *origin; thread_t thread = current_thread(); - + TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) { if (origin->lo_thread == thread) { + if (!have_lock) + hfs_unlock(cp); return (origin->lo_parentcnid); } } + + if (!have_lock) + hfs_unlock(cp); } return (cp->c_parentcnid); } @@ -1387,3 +1408,24 @@ getfirstlink(struct hfsmount * hfsmp, cnid_t fileid, cnid_t *firstlink) return MacToVFSError(result); } +errno_t hfs_first_link(hfsmount_t *hfsmp, cnode_t *cp, cnid_t *link_id) +{ + errno_t error = 0; + + if (S_ISDIR(cp->c_mode)) { + int lockf = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK); + + error = getfirstlink(hfsmp, cp->c_fileid, link_id); + + hfs_systemfile_unlock(hfsmp, lockf); + } else { + if (cp->c_attr.ca_firstlink) + *link_id = cp->c_attr.ca_firstlink; + else { + // This can happen if the cnode has been deleted + error = ENOENT; + } + } + + return error; +} diff --git a/bsd/hfs/hfs_lookup.c b/bsd/hfs/hfs_lookup.c index e198d3190..c46bce7c7 100644 --- a/bsd/hfs/hfs_lookup.c +++ b/bsd/hfs/hfs_lookup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2012 Apple Inc. All rights reserved. + * Copyright (c) 1999-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include @@ -333,7 +334,8 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int * Directory hard links can have multiple parents so * find the appropriate parent for the current thread. */ - if ((retval = hfs_vget(hfsmp, hfs_currentparent(VTOC(dvp)), &tvp, 0, 0))) { + if ((retval = hfs_vget(hfsmp, hfs_currentparent(VTOC(dvp), + /* have_lock: */ false), &tvp, 0, 0))) { goto exit; } *cnode_locked = 1; @@ -419,12 +421,9 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int * Save the origin info for file and directory hardlinks. Directory hardlinks * need the origin for '..' lookups, and file hardlinks need it to ensure that * competing lookups do not cause us to vend different hardlinks than the ones requested. - * We want to restrict saving the cache entries to LOOKUP namei operations, since - * we're really doing this to protect getattr. */ - if ((nameiop == LOOKUP) && (VTOC(tvp)->c_flag & C_HARDLINK)) { + if (ISSET(VTOC(tvp)->c_flag, C_HARDLINK)) hfs_savelinkorigin(VTOC(tvp), VTOC(dvp)->c_fileid); - } *cnode_locked = 1; *vpp = tvp; } @@ -473,12 +472,19 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) int flags = cnp->cn_flags; int force_casesensitive_lookup = proc_is_forcing_hfs_case_sensitivity(p); int cnode_locked; + int fastdev_candidate = 0; + int auto_candidate = 0; *vpp = NULL; dcp = VTOC(dvp); - hfsmp = VTOHFS(dvp); + if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (vnode_isfastdevicecandidate(dvp) || (dcp->c_attr.ca_recflags & kHFSFastDevCandidateMask)) ){ + fastdev_candidate = 1; + auto_candidate = (vnode_isautocandidate(dvp) || (dcp->c_attr.ca_recflags & kHFSAutoCandidateMask)); + } + + /* * Lookup an entry in the cache * @@ -513,7 +519,10 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) goto exit; } - + if (cp->c_attr.ca_recflags & kHFSDoNotFastDevPinMask) { + fastdev_candidate = 0; + } + /* * If this is a hard-link vnode then we need to update * the name (of the link), the parent ID, the cnid, the @@ -603,12 +612,8 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) * Save the origin info for file and directory hardlinks. Directory hardlinks * need the origin for '..' lookups, and file hardlinks need it to ensure that * competing lookups do not cause us to vend different hardlinks than the ones requested. - * We want to restrict saving the cache entries to LOOKUP namei operations, since - * we're really doing this to protect getattr. */ - if (cnp->cn_nameiop == LOOKUP) { - hfs_savelinkorigin(cp, dcp->c_fileid); - } + hfs_savelinkorigin(cp, dcp->c_fileid); } else { /* If the fileID does not match then do NOT replace the descriptor! */ @@ -650,9 +655,25 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) error = hfs_lookup(dvp, vpp, cnp, &cnode_locked, force_casesensitive_lookup); + if (*vpp && (VTOC(*vpp)->c_attr.ca_recflags & kHFSDoNotFastDevPinMask)) { + fastdev_candidate = 0; + } + + if (*vpp && (VTOC(*vpp)->c_attr.ca_recflags & kHFSAutoCandidateMask)) { + //printf("vp %s / %d is an auto-candidate\n", (*vpp)->v_name ? (*vpp)->v_name : "no-name", VTOC(*vpp)->c_fileid); + auto_candidate = 1; + } + if (cnode_locked) hfs_unlock(VTOC(*vpp)); exit: + if (*vpp && fastdev_candidate && (*vpp)->v_parent == dvp && !(vnode_isfastdevicecandidate(*vpp))) { + vnode_setfastdevicecandidate(*vpp); + if (auto_candidate) { + vnode_setautocandidate(*vpp); + } + } + { uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread()); diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index f09bdc7d2..78719c069 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -56,6 +56,8 @@ #include #include +#include + #include #include @@ -64,6 +66,8 @@ #include #include +#include + #include #include "hfs.h" @@ -76,6 +80,7 @@ #include "hfs_cnode.h" #include "hfs_dbg.h" + #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2))) enum { @@ -85,9 +90,12 @@ enum { /* from bsd/hfs/hfs_vfsops.c */ extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); +/* from hfs_hotfiles.c */ +extern int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid, + uint8_t forktype, uint32_t *pinned); + static int hfs_clonefile(struct vnode *, int, int, int); static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *); -static int hfs_minorupdate(struct vnode *vp); static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context); /* from bsd/hfs/hfs_vnops.c */ @@ -141,13 +149,13 @@ hfs_vnop_read(struct vnop_read_args *ap) if (offset < 0) return (EINVAL); /* cant read from a negative offset */ +#if SECURE_KERNEL if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) == (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) { /* Don't allow unencrypted io request from user space */ return EPERM; } - - +#endif #if HFS_COMPRESSION if (VNODE_IS_RSRC(vp)) { @@ -159,12 +167,19 @@ hfs_vnop_read(struct vnop_read_args *ap) int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ if (compressed) { retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp)); + if (retval == 0 && !(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) { + (void) hfs_addhotfile(vp); + } if (compressed) { if (retval == 0) { /* successful read, update the access time */ VTOC(vp)->c_touch_acctime = TRUE; - /* compressed files are not hot file candidates */ + // + // compressed files are not traditional hot file candidates + // but they may be for CF (which ignores the ff_bytesread + // field) + // if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { VTOF(vp)->ff_bytesread = 0; } @@ -193,7 +208,8 @@ hfs_vnop_read(struct vnop_read_args *ap) if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) { goto exit; } -#endif + +#endif // CONFIG_PROTECT /* * If this read request originated from a syscall (as opposed to @@ -264,6 +280,16 @@ hfs_vnop_read(struct vnop_read_args *ap) } else { fp->ff_bytesread += bytesread; } + + if (!(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) { + // + // We don't add hotfiles for processes doing IO_EVTONLY I/O + // on the assumption that they're system processes such as + // mdworker which scan everything in the system (and thus + // do not represent user-initiated access to files) + // + (void) hfs_addhotfile(vp); + } if (took_cnode_lock) hfs_unlock(cp); } @@ -284,6 +310,30 @@ hfs_vnop_read(struct vnop_read_args *ap) return (retval); } +/* + * Ideally, this wouldn't be necessary; the cluster code should be + * able to handle this on the read-side. See . + */ +static errno_t hfs_zero_eof_page(vnode_t vp, off_t zero_up_to) +{ + assert(VTOC(vp)->c_lockowner != current_thread()); + assert(VTOC(vp)->c_truncatelockowner == current_thread()); + + struct filefork *fp = VTOF(vp); + + if (!(fp->ff_size & PAGE_MASK_64) || zero_up_to <= fp->ff_size) { + // Nothing to do + return 0; + } + + zero_up_to = MIN(zero_up_to, (off_t)round_page_64(fp->ff_size)); + + /* N.B. At present, @zero_up_to is not important because the cluster + code will always zero up to the end of the page anyway. */ + return cluster_write(vp, NULL, fp->ff_size, zero_up_to, + fp->ff_size, 0, IO_HEADZEROFILL); +} + /* * Write data to a file. */ @@ -314,7 +364,6 @@ hfs_vnop_write(struct vnop_write_args *ap) int took_truncate_lock = 0; int io_return_on_throttle = 0; int throttled_count = 0; - struct rl_entry *invalid_range; #if HFS_COMPRESSION if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ @@ -347,12 +396,13 @@ hfs_vnop_write(struct vnop_write_args *ap) #endif +#if SECURE_KERNEL if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) == (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) { /* Don't allow unencrypted io request from user space */ return EPERM; } - +#endif resid = uio_resid(uio); offset = uio_offset(uio); @@ -419,9 +469,12 @@ hfs_vnop_write(struct vnop_write_args *ap) goto exit; } + cred = vfs_context_ucred(ap->a_context); + if (cred && suser(cred, NULL) != 0) + eflags |= kEFReserveMask; + origFileSize = fp->ff_size; writelimit = offset + resid; - filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; /* * We may need an exclusive truncate lock for several reasons, all @@ -439,16 +492,11 @@ hfs_vnop_write(struct vnop_write_args *ap) * old EOF and new EOF are in the same block, we still need to * protect that range of bytes until they are written for the * first time. - * 3. The write overlaps some invalid ranges (delayed zero fill; that - * part of the file has been allocated, but not yet written). * * If we had a shared lock with the above cases, we need to try to upgrade * to an exclusive lock. If the upgrade fails, we will lose the shared * lock, and will need to take the truncate lock again; the took_truncate_lock * flag will still be set, causing us to try for an exclusive lock next time. - * - * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode - * lock is held, since it protects the range lists. */ if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && ((fp->ff_unallocblocks != 0) || @@ -471,26 +519,16 @@ hfs_vnop_write(struct vnop_write_args *ap) goto exit; } cnode_locked = 1; - - /* - * Now that we have the cnode lock, see if there are delayed zero fill ranges - * overlapping our write. If so, we need the truncate lock exclusive (see above). - */ - if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && - (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) { - /* - * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes - * a deadlock, rather than simply returning failure. (That is, it apparently does - * not behave like a "try_lock"). Since this condition is rare, just drop the - * cnode lock and try again. Since took_truncate_lock is set, we will - * automatically take the truncate lock exclusive. - */ - hfs_unlock(cp); - cnode_locked = 0; - hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); - goto again; + + filebytes = hfs_blk_to_bytes(fp->ff_blocks, hfsmp->blockSize); + + if (offset > filebytes + && (hfs_blk_to_bytes(hfs_freeblks(hfsmp, ISSET(eflags, kEFReserveMask)), + hfsmp->blockSize) < offset - filebytes)) { + retval = ENOSPC; + goto exit; } - + KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START, (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); @@ -500,7 +538,6 @@ hfs_vnop_write(struct vnop_write_args *ap) goto sizeok; } - cred = vfs_context_ucred(ap->a_context); bytesToAdd = writelimit - filebytes; #if QUOTA @@ -517,8 +554,6 @@ hfs_vnop_write(struct vnop_write_args *ap) while (writelimit > filebytes) { bytesToAdd = writelimit - filebytes; - if (cred && suser(cred, NULL) != 0) - eflags |= kEFReserveMask; /* Protect extents b-tree and allocation bitmap */ lockflags = SFL_BITMAP; @@ -543,7 +578,7 @@ hfs_vnop_write(struct vnop_write_args *ap) KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE, (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); } - (void) hfs_update(vp, TRUE); + (void) hfs_update(vp, 0); (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); (void) hfs_end_transaction(hfsmp); @@ -561,133 +596,45 @@ hfs_vnop_write(struct vnop_write_args *ap) sizeok: if (retval == E_NONE) { off_t filesize; - off_t zero_off; - off_t tail_off; - off_t inval_start; - off_t inval_end; - off_t io_start; + off_t head_off; int lflag; - if (writelimit > fp->ff_size) + if (writelimit > fp->ff_size) { filesize = writelimit; - else + struct timeval tv; + rl_add(fp->ff_size, writelimit - 1 , &fp->ff_invalidranges); + microuptime(&tv); + cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; + } else filesize = fp->ff_size; lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY); - if (offset <= fp->ff_size) { - zero_off = offset & ~PAGE_MASK_64; - - /* Check to see whether the area between the zero_offset and the start - of the transfer to see whether is invalid and should be zero-filled - as part of the transfer: - */ - if (offset > zero_off) { - if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP) - lflag |= IO_HEADZEROFILL; - } - } else { - off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64; - - /* The bytes between fp->ff_size and uio->uio_offset must never be - read without being zeroed. The current last block is filled with zeroes - if it holds valid data but in all cases merely do a little bookkeeping - to track the area from the end of the current last page to the start of - the area actually written. For the same reason only the bytes up to the - start of the page where this write will start is invalidated; any remainder - before uio->uio_offset is explicitly zeroed as part of the cluster_write. - - Note that inval_start, the start of the page after the current EOF, - may be past the start of the write, in which case the zeroing - will be handled by the cluser_write of the actual data. - */ - inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; - inval_end = offset & ~PAGE_MASK_64; - zero_off = fp->ff_size; - - if ((fp->ff_size & PAGE_MASK_64) && - (rl_scan(&fp->ff_invalidranges, - eof_page_base, - fp->ff_size - 1, - &invalid_range) != RL_NOOVERLAP)) { - /* The page containing the EOF is not valid, so the - entire page must be made inaccessible now. If the write - starts on a page beyond the page containing the eof - (inval_end > eof_page_base), add the - whole page to the range to be invalidated. Otherwise - (i.e. if the write starts on the same page), zero-fill - the entire page explicitly now: - */ - if (inval_end > eof_page_base) { - inval_start = eof_page_base; - } else { - zero_off = eof_page_base; - }; - }; - - if (inval_start < inval_end) { - struct timeval tv; - /* There's some range of data that's going to be marked invalid */ - - if (zero_off < inval_start) { - /* The pages between inval_start and inval_end are going to be invalidated, - and the actual write will start on a page past inval_end. Now's the last - chance to zero-fill the page containing the EOF: - */ - hfs_unlock(cp); - cnode_locked = 0; - retval = cluster_write(vp, (uio_t) 0, - fp->ff_size, inval_start, - zero_off, (off_t)0, - lflag | IO_HEADZEROFILL | IO_NOZERODIRTY); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cnode_locked = 1; - if (retval) goto ioerr_exit; - offset = uio_offset(uio); - }; - - /* Mark the remaining area of the newly allocated space as invalid: */ - rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges); - microuptime(&tv); - cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; - zero_off = fp->ff_size = inval_end; - }; - - if (offset > zero_off) lflag |= IO_HEADZEROFILL; - }; - - /* Check to see whether the area between the end of the write and the end of - the page it falls in is invalid and should be zero-filled as part of the transfer: - */ - tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; - if (tail_off > filesize) tail_off = filesize; - if (tail_off > writelimit) { - if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) { - lflag |= IO_TAILZEROFILL; - }; - }; - /* - * if the write starts beyond the current EOF (possibly advanced in the - * zeroing of the last block, above), then we'll zero fill from the current EOF - * to where the write begins: - * - * NOTE: If (and ONLY if) the portion of the file about to be written is - * before the current EOF it might be marked as invalid now and must be - * made readable (removed from the invalid ranges) before cluster_write - * tries to write it: + * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except + * for one case below). For the regions that lie before the + * beginning and after the end of this write that are in the + * same page, we let the cluster code handle zeroing that out + * if necessary. If those areas are not cached, the cluster + * code will try and read those areas in, and in the case + * where those regions have never been written to, + * hfs_vnop_blockmap will consult the invalid ranges and then + * indicate that. The cluster code will zero out those areas. */ - io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset; - if (io_start < fp->ff_size) { - off_t io_end; - io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit; - rl_remove(io_start, io_end - 1, &fp->ff_invalidranges); - }; + head_off = trunc_page_64(offset); + + if (head_off < offset && head_off >= fp->ff_size) { + /* + * The first page is beyond current EOF, so as an + * optimisation, we can pass IO_HEADZEROFILL. + */ + lflag |= IO_HEADZEROFILL; + } hfs_unlock(cp); cnode_locked = 0; - + /* * We need to tell UBC the fork's new size BEFORE calling * cluster_write, in case any of the new pages need to be @@ -708,11 +655,14 @@ hfs_vnop_write(struct vnop_write_args *ap) * zero, unless we are extending the file via write. */ if (filesize > fp->ff_size) { + retval = hfs_zero_eof_page(vp, offset); + if (retval) + goto exit; fp->ff_new_size = filesize; ubc_setsize(vp, filesize); } - retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off, - tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle); + retval = cluster_write(vp, uio, fp->ff_size, filesize, head_off, + 0, lflag | IO_NOZERODIRTY | io_return_on_throttle); if (retval) { fp->ff_new_size = 0; /* no longer extending; use ff_size */ @@ -770,17 +720,17 @@ hfs_vnop_write(struct vnop_write_args *ap) // XXXdbg - see radar 4871353 for more info { if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) { - VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); + hfs_flush(hfsmp, HFS_FLUSH_CACHE); } } ioerr_exit: - if (resid > uio_resid(uio)) { - if (!cnode_locked) { - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cnode_locked = 1; - } + if (!cnode_locked) { + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + cnode_locked = 1; + } + if (resid > uio_resid(uio)) { cp->c_touch_chgtime = TRUE; cp->c_touch_modtime = TRUE; hfs_incr_gencount(cp); @@ -806,7 +756,7 @@ hfs_vnop_write(struct vnop_write_args *ap) filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; } } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) - retval = hfs_update(vp, TRUE); + retval = hfs_update(vp, 0); /* Updating vcbWrCnt doesn't need to be atomic. */ hfsmp->vcbWrCnt++; @@ -814,9 +764,15 @@ hfs_vnop_write(struct vnop_write_args *ap) KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END, (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); exit: + if (retval && took_truncate_lock + && cp->c_truncatelockowner == current_thread()) { + fp->ff_new_size = 0; + rl_remove(fp->ff_size, RL_INFINITY, &fp->ff_invalidranges); + } + if (cnode_locked) hfs_unlock(cp); - + if (took_truncate_lock) { hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } @@ -1744,8 +1700,8 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { cp->c_bsdflags &= ~UF_TRACKED; // mark the cnodes dirty - cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; - to_cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; + cp->c_flag |= C_MODIFIED; + to_cp->c_flag |= C_MODIFIED; int lockflags; if ((error = hfs_start_transaction(hfsmp)) == 0) { @@ -1849,6 +1805,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { case HFS_RESIZE_VOLUME: { u_int64_t newsize; u_int64_t cursize; + int ret; vfsp = vfs_statfs(HFSTOVFS(hfsmp)); if (suser(cred, NULL) && @@ -1865,14 +1822,18 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { } newsize = *(u_int64_t *)ap->a_data; cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; - + + if (newsize == cursize) { + return (0); + } + IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeWillResize); if (newsize > cursize) { - return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context); - } else if (newsize < cursize) { - return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context); + ret = hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context); } else { - return (0); + ret = hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context); } + IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeDidResize); + return (ret); } case HFS_CHANGE_NEXT_ALLOCATION: { int error = 0; /* Assume success */ @@ -2105,22 +2066,6 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { return hfs_thaw(hfsmp, current_proc()); } - case HFS_BULKACCESS_FSCTL: { - int size; - - if (hfsmp->hfs_flags & HFS_STANDARD) { - return EINVAL; - } - - if (is64bit) { - size = sizeof(struct user64_access_t); - } else { - size = sizeof(struct user32_access_t); - } - - return do_bulk_access_check(hfsmp, vp, ap, size, context); - } - case HFS_EXT_BULKACCESS_FSCTL: { int size; @@ -2427,7 +2372,22 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { } error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (error == 0) { - error = hfs_fsync(vp, MNT_WAIT, TRUE, p); + error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_FULL, p); + hfs_unlock(VTOC(vp)); + } + + return error; + } + + case F_BARRIERFSYNC: { + int error; + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error == 0) { + error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_BARRIER, p); hfs_unlock(VTOC(vp)); } @@ -2562,7 +2522,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { hfs_lock_mount (hfsmp); bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo)); hfs_unlock_mount (hfsmp); - (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT); break; case HFS_GET_BOOT_INFO: @@ -2604,8 +2564,8 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { jnl_start = 0; jnl_size = 0; } else { - jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset; - jnl_size = (off_t)hfsmp->jnl_size; + jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, hfsmp->blockSize) + hfsmp->hfsPlusIOPosOffset; + jnl_size = hfsmp->jnl_size; } jip->jstart = jnl_start; @@ -2735,7 +2695,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { hfs_journal_lock(hfsmp); /* Flush the journal and wait for all I/Os to finish up */ - error = hfs_journal_flush(hfsmp, TRUE); + error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); if (error) { hfs_journal_unlock(hfsmp); return error; @@ -2775,6 +2735,91 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { break; } + case HFS_SET_HOTFILE_STATE: { + int error; + struct cnode *cp = VTOC(vp); + uint32_t hf_state = *((uint32_t*)ap->a_data); + uint32_t num_unpinned = 0; + + error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error) { + return error; + } + + // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name); + if (hf_state == HFS_MARK_FASTDEVCANDIDATE) { + vnode_setfastdevicecandidate(vp); + + cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask; + cp->c_attr.ca_recflags &= ~kHFSDoNotFastDevPinMask; + cp->c_flag |= C_MODIFIED; + } else if (hf_state == HFS_UNMARK_FASTDEVCANDIDATE || hf_state == HFS_NEVER_FASTDEVCANDIDATE) { + vnode_clearfastdevicecandidate(vp); + hfs_removehotfile(vp); + + if (cp->c_attr.ca_recflags & kHFSFastDevPinnedMask) { + hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &num_unpinned, ap->a_context); + } + + if (hf_state == HFS_NEVER_FASTDEVCANDIDATE) { + cp->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask; + } + cp->c_attr.ca_recflags &= ~(kHFSFastDevCandidateMask|kHFSFastDevPinnedMask); + cp->c_flag |= C_MODIFIED; + + } else { + error = EINVAL; + } + + if (num_unpinned != 0) { + lck_mtx_lock(&hfsmp->hfc_mutex); + hfsmp->hfs_hotfile_freeblks += num_unpinned; + lck_mtx_unlock(&hfsmp->hfc_mutex); + } + + hfs_unlock(cp); + return error; + break; + } + + case HFS_REPIN_HOTFILE_STATE: { + int error=0; + uint32_t repin_what = *((uint32_t*)ap->a_data); + + /* Only root allowed */ + if (!kauth_cred_issuser(kauth_cred_get())) { + return EACCES; + } + + if (!(hfsmp->hfs_flags & (HFS_CS_METADATA_PIN | HFS_CS_HOTFILE_PIN))) { + // this system is neither regular Fusion or Cooperative Fusion + // so this fsctl makes no sense. + return EINVAL; + } + + // + // After a converting a CoreStorage volume to be encrypted, the + // extents could have moved around underneath us. This call + // allows corestoraged to re-pin everything that should be + // pinned (it would happen on the next reboot too but that could + // be a long time away). + // + if ((repin_what & HFS_REPIN_METADATA) && (hfsmp->hfs_flags & HFS_CS_METADATA_PIN)) { + hfs_pin_fs_metadata(hfsmp); + } + if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) { + hfs_repin_hotfiles(hfsmp); + } + if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_SWAPFILE_PIN)) { + //XXX Swapfiles (marked SWAP_PINNED) may have moved too. + //XXX Do we care? They have a more transient/dynamic nature/lifetime. + } + + return error; + break; + } + + default: return (ENOTTY); } @@ -2916,6 +2961,62 @@ hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap) * searched for mapping. * * System file cnodes are expected to be locked (shared or exclusive). + * + * -- INVALID RANGES -- + * + * Invalid ranges are used to keep track of where we have extended a + * file, but have not yet written that data to disk. In the past we + * would clear up the invalid ranges as we wrote to those areas, but + * before data was actually flushed to disk. The problem with that + * approach is that the data can be left in the cache and is therefore + * still not valid on disk. So now we clear up the ranges here, when + * the flags field has VNODE_WRITE set, indicating a write is about to + * occur. This isn't ideal (ideally we want to clear them up when + * know the data has been successfully written), but it's the best we + * can do. + * + * For reads, we use the invalid ranges here in block map to indicate + * to the caller that the data should be zeroed (a_bpn == -1). We + * have to be careful about what ranges we return to the cluster code. + * Currently the cluster code can only handle non-rounded values for + * the EOF; it cannot handle funny sized ranges in the middle of the + * file (the main problem is that it sends down odd sized I/Os to the + * disk). Our code currently works because whilst the very first + * offset and the last offset in the invalid ranges are not aligned, + * gaps in the invalid ranges between the first and last, have to be + * aligned (because we always write page sized blocks). For example, + * consider this arrangement: + * + * +-------------+-----+-------+------+ + * | |XXXXX| |XXXXXX| + * +-------------+-----+-------+------+ + * a b c d + * + * This shows two invalid ranges and . Whilst a and d + * are not necessarily aligned, b and c *must* be. + * + * Zero-filling occurs in a number of ways: + * + * 1. When a read occurs and we return with a_bpn == -1. + * + * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges + * which will cause us to iterate over the ranges bringing in + * pages that are not present in the cache and zeroing them. Any + * pages that are already in the cache are left untouched. Note + * that hfs_fsync does not always flush invalid ranges. + * + * 3. When we extend a file we zero out from the old EOF to the end + * of the page. It would be nice if we didn't have to do this if + * the page wasn't present (and could defer it), but because of + * the problem described above, we have to. + * + * The invalid ranges are also used to restrict the size that we write + * out on disk: see hfs_prepare_fork_for_update. + * + * Note that invalid ranges are ignored when neither the VNODE_READ or + * the VNODE_WRITE flag is specified. This is useful for the + * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they + * just want to know whether blocks are physically allocated or not. */ int hfs_vnop_blockmap(struct vnop_blockmap_args *ap) @@ -2936,7 +3037,7 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) struct cnode *cp; struct filefork *fp; struct hfsmount *hfsmp; - size_t bytesContAvail = 0; + size_t bytesContAvail = ap->a_size; int retval = E_NONE; int syslocks = 0; int lockflags = 0; @@ -2977,17 +3078,110 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) if (ap->a_bpn == NULL) return (0); + hfsmp = VTOHFS(vp); + cp = VTOC(vp); + fp = VTOF(vp); + if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) { - if (VTOC(vp)->c_lockowner != current_thread()) { + if (cp->c_lockowner != current_thread()) { hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); tooklock = 1; } + + // For reads, check the invalid ranges + if (ISSET(ap->a_flags, VNODE_READ)) { + if (ap->a_foffset >= fp->ff_size) { + retval = ERANGE; + goto exit; + } + + overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset, + ap->a_foffset + (off_t)bytesContAvail - 1, + &invalid_range); + switch(overlaptype) { + case RL_MATCHINGOVERLAP: + case RL_OVERLAPCONTAINSRANGE: + case RL_OVERLAPSTARTSBEFORE: + /* There's no valid block for this byte offset */ + *ap->a_bpn = (daddr64_t)-1; + /* There's no point limiting the amount to be returned + * if the invalid range that was hit extends all the way + * to the EOF (i.e. there's no valid bytes between the + * end of this range and the file's EOF): + */ + if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && + ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { + bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; + } + + retval = 0; + goto exit; + + case RL_OVERLAPISCONTAINED: + case RL_OVERLAPENDSAFTER: + /* The range of interest hits an invalid block before the end: */ + if (invalid_range->rl_start == ap->a_foffset) { + /* There's actually no valid information to be had starting here: */ + *ap->a_bpn = (daddr64_t)-1; + if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && + ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { + bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; + } + + retval = 0; + goto exit; + } else { + /* + * Sadly, the lower layers don't like us to + * return unaligned ranges, so we skip over + * any invalid ranges here that are less than + * a page: zeroing of those bits is not our + * responsibility (it's dealt with elsewhere). + */ + do { + off_t rounded_start = round_page_64(invalid_range->rl_start); + if ((off_t)bytesContAvail < rounded_start - ap->a_foffset) + break; + if (rounded_start < invalid_range->rl_end + 1) { + bytesContAvail = rounded_start - ap->a_foffset; + break; + } + } while ((invalid_range = TAILQ_NEXT(invalid_range, + rl_link))); + } + break; + + case RL_NOOVERLAP: + break; + } // switch + } } - hfsmp = VTOHFS(vp); - cp = VTOC(vp); - fp = VTOF(vp); + +#if CONFIG_PROTECT + if (cp->c_cpentry) { + const int direction = (ISSET(ap->a_flags, VNODE_WRITE) + ? VNODE_WRITE : VNODE_READ); + + cp_io_params_t io_params; + cp_io_params(hfsmp, cp->c_cpentry, + off_rsrc_make(ap->a_foffset, VNODE_IS_RSRC(vp)), + direction, &io_params); + + if (io_params.max_len < (off_t)bytesContAvail) + bytesContAvail = io_params.max_len; + + if (io_params.phys_offset != -1) { + *ap->a_bpn = ((io_params.phys_offset + hfsmp->hfsPlusIOPosOffset) + / hfsmp->hfs_logical_block_size); + + retval = 0; + goto exit; + } + } +#endif retry: + /* Check virtual blocks only when performing write operation */ if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) { if (hfs_start_transaction(hfsmp) != 0) { @@ -3049,7 +3243,7 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) hfs_systemfile_unlock(hfsmp, lockflags); cp->c_flag |= C_MODIFIED; if (started_tr) { - (void) hfs_update(vp, TRUE); + (void) hfs_update(vp, 0); (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); hfs_end_transaction(hfsmp); @@ -3059,19 +3253,13 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) } } - retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset, + retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset, ap->a_bpn, &bytesContAvail); if (syslocks) { hfs_systemfile_unlock(hfsmp, lockflags); syslocks = 0; } - if (started_tr) { - (void) hfs_update(vp, TRUE); - (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); - hfs_end_transaction(hfsmp); - started_tr = 0; - } if (retval) { /* On write, always return error because virtual blocks, if any, * should have been allocated in ExtendFileC(). We do not @@ -3083,42 +3271,28 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) (ap->a_flags & VNODE_WRITE) || ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) { goto exit; - } - + } + /* Validate if the start offset is within logical file size */ if (ap->a_foffset >= fp->ff_size) { goto exit; } /* - * At this point, we have encountered a failure during - * MapFileBlockC that resulted in ERANGE, and we are not servicing - * a write, and there are borrowed blocks. - * - * However, the cluster layer will not call blockmap for - * blocks that are borrowed and in-cache. We have to assume that - * because we observed ERANGE being emitted from MapFileBlockC, this - * extent range is not valid on-disk. So we treat this as a - * mapping that needs to be zero-filled prior to reading. + * At this point, we have encountered a failure during + * MapFileBlockC that resulted in ERANGE, and we are not + * servicing a write, and there are borrowed blocks. * - * Note that under certain circumstances (such as non-contiguous - * userland VM mappings in the calling process), cluster_io - * may be forced to split a large I/O driven by hfs_vnop_write - * into multiple sub-I/Os that necessitate a RMW cycle. If this is - * the case here, then we have already removed the invalid range list - * mapping prior to getting to this blockmap call, so we should not - * search the invalid rangelist for this byte range. - */ - - bytesContAvail = fp->ff_size - ap->a_foffset; - /* - * Clip the contiguous available bytes to, at most, the allowable - * maximum or the amount requested. + * However, the cluster layer will not call blockmap for + * blocks that are borrowed and in-cache. We have to assume + * that because we observed ERANGE being emitted from + * MapFileBlockC, this extent range is not valid on-disk. So + * we treat this as a mapping that needs to be zero-filled + * prior to reading. */ - if (bytesContAvail > ap->a_size) { - bytesContAvail = ap->a_size; - } + if (fp->ff_size - ap->a_foffset < (off_t)bytesContAvail) + bytesContAvail = fp->ff_size - ap->a_foffset; *ap->a_bpn = (daddr64_t) -1; retval = 0; @@ -3126,54 +3300,42 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) goto exit; } - /* MapFileC() found a valid extent in the filefork. Search the - * mapping information further for invalid file ranges - */ - overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset, - ap->a_foffset + (off_t)bytesContAvail - 1, - &invalid_range); - if (overlaptype != RL_NOOVERLAP) { - switch(overlaptype) { - case RL_MATCHINGOVERLAP: - case RL_OVERLAPCONTAINSRANGE: - case RL_OVERLAPSTARTSBEFORE: - /* There's no valid block for this byte offset */ - *ap->a_bpn = (daddr64_t)-1; - /* There's no point limiting the amount to be returned - * if the invalid range that was hit extends all the way - * to the EOF (i.e. there's no valid bytes between the - * end of this range and the file's EOF): - */ - if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && - ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { - bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; - } - break; - - case RL_OVERLAPISCONTAINED: - case RL_OVERLAPENDSAFTER: - /* The range of interest hits an invalid block before the end: */ - if (invalid_range->rl_start == ap->a_foffset) { - /* There's actually no valid information to be had starting here: */ - *ap->a_bpn = (daddr64_t)-1; - if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && - ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { - bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; +exit: + if (retval == 0) { + if (ISSET(ap->a_flags, VNODE_WRITE)) { + struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges); + + // See if we might be overlapping invalid ranges... + if (r && (ap->a_foffset + (off_t)bytesContAvail) > r->rl_start) { + /* + * Mark the file as needing an update if we think the + * on-disk EOF has changed. + */ + if (ap->a_foffset <= r->rl_start) + SET(cp->c_flag, C_MODIFIED); + + /* + * This isn't the ideal place to put this. Ideally, we + * should do something *after* we have successfully + * written to the range, but that's difficult to do + * because we cannot take locks in the callback. At + * present, the cluster code will call us with VNODE_WRITE + * set just before it's about to write the data so we know + * that data is about to be written. If we get an I/O + * error at this point then chances are the metadata + * update to follow will also have an I/O error so the + * risk here is small. + */ + rl_remove(ap->a_foffset, ap->a_foffset + bytesContAvail - 1, + &fp->ff_invalidranges); + + if (!TAILQ_FIRST(&fp->ff_invalidranges)) { + cp->c_flag &= ~C_ZFWANTSYNC; + cp->c_zftimeout = 0; } - } else { - bytesContAvail = invalid_range->rl_start - ap->a_foffset; } - break; + } - case RL_NOOVERLAP: - break; - } /* end switch */ - if (bytesContAvail > ap->a_size) - bytesContAvail = ap->a_size; - } - -exit: - if (retval == 0) { if (ap->a_run) *ap->a_run = bytesContAvail; @@ -3181,6 +3343,13 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) *(int *)ap->a_poff = 0; } + if (started_tr) { + hfs_update(vp, TRUE); + hfs_volupdate(hfsmp, VOL_UPDATE, 0); + hfs_end_transaction(hfsmp); + started_tr = 0; + } + if (tooklock) hfs_unlock(cp); @@ -3216,86 +3385,17 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap) } #if CONFIG_PROTECT - cnode_t *cp = NULL; - - if ((!bufattr_rawencrypted(&bp->b_attr)) && - ((cp = cp_get_protected_cnode(vp)) != NULL)) { - /* - * We rely upon the truncate lock to protect the - * CP cache key from getting tossed prior to our IO finishing here. - * Nearly all cluster io calls to manipulate file payload from HFS - * take the truncate lock before calling into the cluster - * layer to ensure the file size does not change, or that they - * have exclusive right to change the EOF of the file. - * That same guarantee protects us here since the code that - * deals with CP lock events must now take the truncate lock - * before doing anything. - * - * There is 1 exception here: - * 1) One exception should be the VM swapfile IO, because HFS will - * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the - * swapfile code only without holding the truncate lock. This is because - * individual swapfiles are maintained at fixed-length sizes by the VM code. - * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to - * create our own UPL and thus take the truncate lock before calling - * into the cluster layer. In that case, however, we are not concerned - * with the CP blob being wiped out in the middle of the IO - * because there isn't anything to toss; the VM swapfile key stays - * in-core as long as the file is open. - */ - - - /* - * Last chance: If this data protected I/O does not have unwrapped keys - * present, then try to get them. We already know that it should, by this point. - */ - if (cp->c_cpentry->cp_flags & (CP_KEY_FLUSHED | CP_NEEDS_KEYS)) { - int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS); - if ((error = cp_handle_vnop(vp, io_op, 0)) != 0) { - /* - * We have to be careful here. By this point in the I/O path, VM or the cluster - * engine has prepared a buf_t with the proper file offsets and all the rest, - * so simply erroring out will result in us leaking this particular buf_t. - * We need to properly decorate the buf_t just as buf_strategy would so as - * to make it appear that the I/O errored out with the particular error code. - */ - buf_seterror (bp, error); - buf_biodone(bp); - return error; - } - } - - /* - *NB: - * For filesystem resize, we may not have access to the underlying - * file's cache key for whatever reason (device may be locked). However, - * we do not need it since we are going to use the temporary HFS-wide resize key - * which is generated once we start relocating file content. If this file's I/O - * should be done using the resize key, it will have been supplied already, so - * do not attach the file's cp blob to the buffer. - */ - if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) { - buf_setcpaddr(bp, cp->c_cpentry); - } - } -#endif /* CONFIG_PROTECT */ + error = cp_handle_strategy(bp); + + if (error) + return error; +#endif error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap); return error; } -static int -hfs_minorupdate(struct vnode *vp) { - struct cnode *cp = VTOC(vp); - cp->c_flag &= ~C_MODIFIED; - cp->c_touch_acctime = 0; - cp->c_touch_chgtime = 0; - cp->c_touch_modtime = 0; - - return 0; -} - int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context) { @@ -3310,7 +3410,6 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf int blksize; struct hfsmount *hfsmp; int lockflags; - int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE); int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES); blksize = VTOVCB(vp)->blockSize; @@ -3419,13 +3518,8 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf hfs_systemfile_unlock(hfsmp, lockflags); if (hfsmp->jnl) { - if (skipupdate) { - (void) hfs_minorupdate(vp); - } - else { - (void) hfs_update(vp, TRUE); - (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); - } + hfs_update(vp, 0); + hfs_volupdate(hfsmp, VOL_UPDATE, 0); } hfs_end_transaction(hfsmp); @@ -3443,47 +3537,22 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf rl_remove_all(&fp->ff_invalidranges); } else { if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) { - struct rl_entry *invalid_range; - off_t zero_limit; - - zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; - if (length < zero_limit) zero_limit = length; - if (length > (off_t)fp->ff_size) { struct timeval tv; /* Extending the file: time to fill out the current last page w. zeroes? */ - if ((fp->ff_size & PAGE_MASK_64) && - (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64, - fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) { - - /* There's some valid data at the start of the (current) last page + if (fp->ff_size & PAGE_MASK_64) { + /* There might be some valid data at the start of the (current) last page of the file, so zero out the remainder of that page to ensure the - entire page contains valid data. Since there is no invalid range - possible past the (current) eof, there's no need to remove anything - from the invalid range list before calling cluster_write(): */ + entire page contains valid data. */ hfs_unlock(cp); - retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit, - fp->ff_size, (off_t)0, - (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY); + retval = hfs_zero_eof_page(vp, length); hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); if (retval) goto Err_Exit; - - /* Merely invalidate the remaining area, if necessary: */ - if (length > zero_limit) { - microuptime(&tv); - rl_add(zero_limit, length - 1, &fp->ff_invalidranges); - cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; - } - } else { - /* The page containing the (current) eof is invalid: just add the - remainder of the page to the invalid list, along with the area - being newly allocated: - */ + } microuptime(&tv); rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges); cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; - }; } } else { panic("hfs_truncate: invoked on non-UBC object?!"); @@ -3533,9 +3602,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf hfs_unlock_mount (hfsmp); } -#if QUOTA off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); -#endif /* QUOTA */ if (hfs_start_transaction(hfsmp) != 0) { retval = EINVAL; goto Err_Exit; @@ -3557,13 +3624,8 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf if (retval == 0) { fp->ff_size = length; } - if (skipupdate) { - (void) hfs_minorupdate(vp); - } - else { - (void) hfs_update(vp, TRUE); - (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); - } + hfs_update(vp, 0); + hfs_volupdate(hfsmp, VOL_UPDATE, 0); } hfs_end_transaction(hfsmp); @@ -3575,6 +3637,15 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0); #endif /* QUOTA */ + // + // Unlike when growing a file, we adjust the hotfile block count here + // instead of deeper down in the block allocation code because we do + // not necessarily have a vnode or "fcb" at the time we're deleting + // the file and so we wouldn't know if it was hotfile cached or not + // + hfs_hotfile_adjust_blocks(vp, (int64_t)((savedbytes - filebytes) / blksize)); + + /* * Only set update flag if the logical length changes & we aren't * suppressing modtime updates. @@ -3585,30 +3656,24 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf fp->ff_size = length; } if (cp->c_mode & (S_ISUID | S_ISGID)) { - if (!vfs_context_issuser(context)) { + if (!vfs_context_issuser(context)) cp->c_mode &= ~(S_ISUID | S_ISGID); - skipupdate = 0; - } } - if (skipupdate) { - retval = hfs_minorupdate(vp); - } - else { - cp->c_touch_chgtime = TRUE; /* status changed */ - if (suppress_times == 0) { - cp->c_touch_modtime = TRUE; /* file data was modified */ - - /* - * If we are not suppressing the modtime update, then - * update the gen count as well. - */ - if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) { - hfs_incr_gencount(cp); - } - } + cp->c_flag |= C_MODIFIED; + cp->c_touch_chgtime = TRUE; /* status changed */ + if (suppress_times == 0) { + cp->c_touch_modtime = TRUE; /* file data was modified */ - retval = hfs_update(vp, MNT_WAIT); + /* + * If we are not suppressing the modtime update, then + * update the gen count as well. + */ + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) { + hfs_incr_gencount(cp); + } } + + retval = hfs_update(vp, 0); if (retval) { KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE, -1, -1, -1, retval, 0); @@ -3728,10 +3793,12 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, /* Data Fork */ if (datafork) { + off_t prev_filebytes; datafork->ff_size = 0; fileblocks = datafork->ff_blocks; - filebytes = (off_t)fileblocks * (off_t)blksize; + filebytes = (off_t)fileblocks * (off_t)blksize; + prev_filebytes = filebytes; /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ @@ -3761,6 +3828,12 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, } (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + struct cnode *cp = datafork ? FTOC(datafork) : NULL; + struct vnode *vp; + vp = cp ? CTOV(cp, 0) : NULL; + hfs_hotfile_adjust_blocks(vp, (int64_t)((prev_filebytes - filebytes) / blksize)); + prev_filebytes = filebytes; + /* Finish the transaction and start over if necessary */ hfs_end_transaction(hfsmp); @@ -3864,6 +3937,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int blksize; errno_t error = 0; struct cnode *cp = VTOC(vp); + hfsmount_t *hfsmp = VTOHFS(vp); /* Cannot truncate an HFS directory! */ if (vnode_isdir(vp)) { @@ -3874,7 +3948,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, return (EPERM); } - blksize = VTOVCB(vp)->blockSize; + blksize = hfsmp->blockSize; fileblocks = fp->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; @@ -3901,22 +3975,28 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, } else { filebytes = length; } - cp->c_flag |= C_FORCEUPDATE; error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context); if (error) break; } } else if (length > filebytes) { - while (filebytes < length) { - if ((length - filebytes) > HFS_BIGFILE_SIZE) { - filebytes += HFS_BIGFILE_SIZE; - } else { - filebytes = length; + kauth_cred_t cred = vfs_context_ucred(context); + const bool keep_reserve = cred && suser(cred, NULL) != 0; + + if (hfs_freeblks(hfsmp, keep_reserve) + < howmany(length - filebytes, blksize)) { + error = ENOSPC; + } else { + while (filebytes < length) { + if ((length - filebytes) > HFS_BIGFILE_SIZE) { + filebytes += HFS_BIGFILE_SIZE; + } else { + filebytes = length; + } + error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context); + if (error) + break; } - cp->c_flag |= C_FORCEUPDATE; - error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context); - if (error) - break; } } else /* Same logical size */ { @@ -3927,6 +4007,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, fp->ff_bytesread = 0; } + if (!caller_has_cnode_lock) hfs_unlock(cp); @@ -4032,6 +4113,13 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { * value of filebytes is 0, length will be at least 1. */ if (length > filebytes) { + if (ISSET(extendFlags, kEFAllMask) + && (hfs_freeblks(hfsmp, ISSET(extendFlags, kEFReserveMask)) + < howmany(length - filebytes, hfsmp->blockSize))) { + retval = ENOSPC; + goto Err_Exit; + } + off_t total_bytes_added = 0, orig_request_size; orig_request_size = moreBytesRequested = length - filebytes; @@ -4109,7 +4197,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { hfs_systemfile_unlock(hfsmp, lockflags); if (hfsmp->jnl) { - (void) hfs_update(vp, TRUE); + (void) hfs_update(vp, 0); (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); } @@ -4169,9 +4257,10 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { } Std_Exit: + cp->c_flag |= C_MODIFIED; cp->c_touch_chgtime = TRUE; cp->c_touch_modtime = TRUE; - retval2 = hfs_update(vp, MNT_WAIT); + retval2 = hfs_update(vp, 0); if (retval == 0) retval = retval2; @@ -4416,12 +4505,19 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap) * compressed once the compression lock is successfully taken * i.e. we would block on that lock while the file is being inflated */ + if (error == 0 && vnode_isfastdevicecandidate(vp)) { + (void) hfs_addhotfile(vp); + } if (compressed) { if (error == 0) { /* successful page-in, update the access time */ VTOC(vp)->c_touch_acctime = TRUE; - /* compressed files are not hot file candidates */ + // + // compressed files are not traditional hot file candidates + // but they may be for CF (which ignores the ff_bytesread + // field) + // if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { fp->ff_bytesread = 0; } @@ -4514,6 +4610,10 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap) fp->ff_bytesread += bytesread; } cp->c_touch_acctime = TRUE; + + if (vnode_isfastdevicecandidate(vp)) { + (void) hfs_addhotfile(vp); + } if (took_cnode_lock) hfs_unlock(cp); } @@ -4567,16 +4667,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) cp = VTOC(vp); fp = VTOF(vp); - /* - * Figure out where the file ends, for pageout purposes. If - * ff_new_size > ff_size, then we're in the middle of extending the - * file via a write, so it is safe (and necessary) that we be able - * to pageout up to that point. - */ - filesize = fp->ff_size; - if (fp->ff_new_size > filesize) - filesize = fp->ff_new_size; - a_flags = ap->a_flags; a_pl_offset = ap->a_pl_offset; @@ -4628,6 +4718,16 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) * it was either passed in or we succesfully created it */ + /* + * Figure out where the file ends, for pageout purposes. If + * ff_new_size > ff_size, then we're in the middle of extending the + * file via a write, so it is safe (and necessary) that we be able + * to pageout up to that point. + */ + filesize = fp->ff_size; + if (fp->ff_new_size > filesize) + filesize = fp->ff_new_size; + /* * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own * UPL instead of relying on the UPL passed into us. We go ahead and do that here, @@ -4718,42 +4818,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) } xsize = num_of_pages * PAGE_SIZE; - if (!vnode_isswap(vp)) { - off_t end_of_range; - int tooklock; - - tooklock = 0; - - if (cp->c_lockowner != current_thread()) { - if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { - /* - * we're in the v2 path, so we are the - * owner of the UPL... we may have already - * processed some of the UPL, so abort it - * from the current working offset to the - * end of the UPL - */ - ubc_upl_abort_range(upl, - offset, - ap->a_size - offset, - UPL_ABORT_FREE_ON_EMPTY); - goto pageout_done; - } - tooklock = 1; - } - end_of_range = f_offset + xsize - 1; - - if (end_of_range >= filesize) { - end_of_range = (off_t)(filesize - 1); - } - if (f_offset < filesize) { - rl_remove(f_offset, end_of_range, &fp->ff_invalidranges); - cp->c_flag |= C_MODIFIED; /* leof is dirty */ - } - if (tooklock) { - hfs_unlock(cp); - } - } if ((error = cluster_pageout(vp, upl, offset, f_offset, xsize, filesize, a_flags))) { if (error_ret == 0) @@ -4770,36 +4834,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) } } /* end block for v2 pageout behavior */ else { - if (!vnode_isswap(vp)) { - off_t end_of_range; - int tooklock = 0; - - if (cp->c_lockowner != current_thread()) { - if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { - if (!(a_flags & UPL_NOCOMMIT)) { - ubc_upl_abort_range(upl, - a_pl_offset, - ap->a_size, - UPL_ABORT_FREE_ON_EMPTY); - } - goto pageout_done; - } - tooklock = 1; - } - end_of_range = ap->a_f_offset + ap->a_size - 1; - - if (end_of_range >= filesize) { - end_of_range = (off_t)(filesize - 1); - } - if (ap->a_f_offset < filesize) { - rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); - cp->c_flag |= C_MODIFIED; /* leof is dirty */ - } - - if (tooklock) { - hfs_unlock(cp); - } - } /* * just call cluster_pageout for old pre-v2 behavior */ @@ -4911,6 +4945,168 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap) return (retval); } + +int +hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks, vfs_context_t ctx) +{ + _dk_cs_pin_t pin; + unsigned ioc; + int err; + + memset(&pin, 0, sizeof(pin)); + pin.cp_extent.offset = ((uint64_t)start_block) * HFSTOVCB(hfsmp)->blockSize; + pin.cp_extent.length = ((uint64_t)nblocks) * HFSTOVCB(hfsmp)->blockSize; + switch (pin_state) { + case HFS_PIN_IT: + ioc = _DKIOCCSPINEXTENT; + pin.cp_flags = _DKIOCCSPINTOFASTMEDIA; + break; + case HFS_PIN_IT | HFS_TEMP_PIN: + ioc = _DKIOCCSPINEXTENT; + pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSTEMPORARYPIN; + break; + case HFS_PIN_IT | HFS_DATALESS_PIN: + ioc = _DKIOCCSPINEXTENT; + pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSPINFORSWAPFILE; + break; + case HFS_UNPIN_IT: + ioc = _DKIOCCSUNPINEXTENT; + pin.cp_flags = 0; + break; + case HFS_UNPIN_IT | HFS_EVICT_PIN: + ioc = _DKIOCCSPINEXTENT; + pin.cp_flags = _DKIOCCSPINTOSLOWMEDIA; + break; + default: + return EINVAL; + } + err = VNOP_IOCTL(hfsmp->hfs_devvp, ioc, (caddr_t)&pin, 0, ctx); + return err; +} + +// +// The cnode lock should already be held on entry to this function +// +int +hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned, vfs_context_t ctx) +{ + struct filefork *fp = VTOF(vp); + int i, err=0, need_put=0; + struct vnode *rsrc_vp=NULL; + uint32_t npinned = 0; + off_t offset; + + if (num_blocks_pinned) { + *num_blocks_pinned = 0; + } + + if (vnode_vtype(vp) != VREG) { + /* Not allowed to pin directories or symlinks */ + printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp)); + return (EPERM); + } + + if (fp->ff_unallocblocks) { + printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp->ff_unallocblocks); + return (EINVAL); + } + + /* + * It is possible that if the caller unlocked/re-locked the cnode after checking + * for C_NOEXISTS|C_DELETED that the file could have been deleted while the + * cnode was unlocked. So check the condition again and return ENOENT so that + * the caller knows why we failed to pin the vnode. + */ + if (VTOC(vp)->c_flag & (C_NOEXISTS|C_DELETED)) { + // makes no sense to pin something that's pending deletion + return ENOENT; + } + + if (fp->ff_blocks == 0 && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { + if (!VNODE_IS_RSRC(vp) && hfs_vgetrsrc(hfsmp, vp, &rsrc_vp) == 0) { + //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid, + // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size); + + fp = VTOC(rsrc_vp)->c_rsrcfork; + need_put = 1; + } + } + if (fp->ff_blocks == 0) { + if (need_put) { + // + // use a distinct error code for a compressed file that has no resource fork; + // we return EALREADY to indicate that the data is already probably hot file + // cached because it's in an EA and the attributes btree is on the ssd + // + err = EALREADY; + } else { + err = EINVAL; + } + goto out; + } + + offset = 0; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + if (fp->ff_extents[i].startBlock == 0) { + break; + } + + err = hfs_pin_block_range(hfsmp, pin_state, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, ctx); + if (err) { + break; + } else { + npinned += fp->ff_extents[i].blockCount; + } + } + + if (err || npinned == 0) { + goto out; + } + + if (fp->ff_extents[kHFSPlusExtentDensity-1].startBlock) { + uint32_t pblocks; + uint8_t forktype = 0; + + if (fp == VTOC(vp)->c_rsrcfork) { + forktype = 0xff; + } + /* + * The file could have overflow extents, better pin them. + * + * We assume that since we are holding the cnode lock for this cnode, + * the files extents cannot be manipulated, but the tree could, so we + * need to ensure that it doesn't change behind our back as we iterate it. + */ + int lockflags = hfs_systemfile_lock (hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK); + err = hfs_pin_overflow_extents(hfsmp, VTOC(vp)->c_fileid, forktype, &pblocks); + hfs_systemfile_unlock (hfsmp, lockflags); + + if (err) { + goto out; + } + npinned += pblocks; + } + +out: + if (num_blocks_pinned) { + *num_blocks_pinned = npinned; + } + + if (need_put && rsrc_vp) { + // + // have to unlock the cnode since it's shared between the + // resource fork vnode and the data fork vnode (and the + // vnode_put() may need to re-acquire the cnode lock to + // reclaim the resource fork vnode) + // + hfs_unlock(VTOC(vp)); + vnode_put(rsrc_vp); + hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + } + return err; +} + + /* * Relocate a file to a new location on disk * cnode must be locked on entry @@ -5170,13 +5366,13 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, /* Push cnode's new extent data to disk. */ if (retval == 0) { - (void) hfs_update(vp, MNT_WAIT); + hfs_update(vp, 0); } if (hfsmp->jnl) { if (cp->c_cnid < kHFSFirstUserCatalogNodeID) - (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT); else - (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + (void) hfs_flushvolumeheader(hfsmp, 0); } exit: if (started_tr) @@ -5242,7 +5438,7 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) } #endif /* CONFIG_PROTECT */ - if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize, VM_KERN_MEMORY_FILE)) { hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); return (ENOMEM); } @@ -5336,7 +5532,7 @@ hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1); breadcnt = bufsize / iosize; - if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize, VM_KERN_MEMORY_FILE)) { return (ENOMEM); } start_blk = ((daddr64_t)blkstart * blksize) / iosize; @@ -5397,3 +5593,90 @@ hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, return (error); } + +errno_t hfs_flush_invalid_ranges(vnode_t vp) +{ + cnode_t *cp = VTOC(vp); + + assert(cp->c_lockowner == current_thread()); + assert(cp->c_truncatelockowner == current_thread()); + + if (!ISSET(cp->c_flag, C_ZFWANTSYNC) && !cp->c_zftimeout) + return 0; + + filefork_t *fp = VTOF(vp); + + /* + * We can't hold the cnode lock whilst we call cluster_write so we + * need to copy the extents into a local buffer. + */ + int max_exts = 16; + struct ext { + off_t start, end; + } exts_buf[max_exts]; // 256 bytes + struct ext *exts = exts_buf; + int ext_count = 0; + errno_t ret; + + struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges); + + while (r) { + /* If we have more than can fit in our stack buffer, switch + to a heap buffer. */ + if (exts == exts_buf && ext_count == max_exts) { + max_exts = 256; + MALLOC(exts, struct ext *, sizeof(struct ext) * max_exts, + M_TEMP, M_WAITOK); + memcpy(exts, exts_buf, ext_count * sizeof(struct ext)); + } + + struct rl_entry *next = TAILQ_NEXT(r, rl_link); + + exts[ext_count++] = (struct ext){ r->rl_start, r->rl_end }; + + if (!next || (ext_count == max_exts && exts != exts_buf)) { + hfs_unlock(cp); + for (int i = 0; i < ext_count; ++i) { + ret = cluster_write(vp, NULL, fp->ff_size, exts[i].end + 1, + exts[i].start, 0, + IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); + if (ret) { + hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); + goto exit; + } + } + + if (!next) { + hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); + break; + } + + /* Push any existing clusters which should clean up our invalid + ranges as they go through hfs_vnop_blockmap. */ + cluster_push(vp, 0); + + hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); + + /* + * Get back to where we were (given we dropped the lock). + * This shouldn't be many because we pushed above. + */ + TAILQ_FOREACH(r, &fp->ff_invalidranges, rl_link) { + if (r->rl_end > exts[ext_count - 1].end) + break; + } + + ext_count = 0; + } else + r = next; + } + + ret = 0; + +exit: + + if (exts != exts_buf) + FREE(exts, M_TEMP); + + return ret; +} diff --git a/bsd/hfs/hfs_resize.c b/bsd/hfs/hfs_resize.c index ceaa4d572..f5dc27ad5 100644 --- a/bsd/hfs/hfs_resize.c +++ b/bsd/hfs/hfs_resize.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * Copyright (c) 2013-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,6 +30,7 @@ #include #include #include + #include #include #include @@ -39,15 +40,13 @@ #include "hfs_cnode.h" #include "hfs_endian.h" #include "hfs_btreeio.h" - -#if CONFIG_PROTECT -#include -#endif +#include "hfs_cprotect.h" /* Enable/disable debugging code for live volume resizing */ int hfs_resize_debug = 0; -static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec); +static errno_t hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, + struct HFSPlusCatalogFile *filerec, bool *overlaps); static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context); static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context); @@ -170,9 +169,9 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; hfs_unlock_mount (hfsmp); - + /* Start with a clean journal. */ - hfs_journal_flush(hfsmp, TRUE); + hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); /* * Enclose changes inside a transaction. @@ -371,7 +370,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) vcb->totalBlocks += addblks; vcb->freeBlocks += addblks; MarkVCBDirty(vcb); - error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT); if (error) { printf("hfs_extendfs: couldn't flush volume headers (%d)", error); /* @@ -469,7 +468,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } if (transaction_begun) { hfs_end_transaction(hfsmp); - hfs_journal_flush(hfsmp, TRUE); + hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); transaction_begun = 0; } @@ -501,9 +500,10 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } if (transaction_begun) { hfs_end_transaction(hfsmp); - hfs_journal_flush(hfsmp, FALSE); /* Just to be sure, sync all data to the disk */ - (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + int flush_error = hfs_flush(hfsmp, HFS_FLUSH_FULL); + if (flush_error && !error) + error = flush_error; } if (error) { printf ("hfs_extentfs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN); @@ -567,7 +567,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) error = EINVAL; goto out; } - + /* * Make sure that the file system has enough free blocks reclaim. * @@ -614,9 +614,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) error = ENOSPC; goto out; } - + /* Start with a clean journal. */ - hfs_journal_flush(hfsmp, TRUE); + hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); if (hfs_start_transaction(hfsmp) != 0) { error = EINVAL; @@ -674,6 +674,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) * an extent being relocated is more than the free blocks that * will exist after the volume is resized. */ + hfsmp->reclaimBlocks = reclaimblks; hfsmp->freeBlocks -= reclaimblks; updateFreeBlocks = true; hfs_unlock_mount(hfsmp); @@ -706,7 +707,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) */ hfs_end_transaction(hfsmp); transaction_begun = 0; - + /* Attempt to reclaim some space. */ error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context); if (error != 0) { @@ -714,6 +715,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) error = ENOSPC; goto out; } + if (hfs_start_transaction(hfsmp) != 0) { error = EINVAL; goto out; @@ -770,7 +772,8 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) hfsmp->totalBlocks = newblkcnt; hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size; hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size; - + hfsmp->reclaimBlocks = 0; + /* * At this point, a smaller HFS file system exists in a larger volume. * As per volume format, the alternate volume header is located 1024 bytes @@ -793,7 +796,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } MarkVCBDirty(hfsmp); - error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT); if (error) { panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error); } @@ -839,7 +842,8 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) if (error && (updateFreeBlocks == true)) { hfsmp->freeBlocks += reclaimblks; } - + hfsmp->reclaimBlocks = 0; + if (hfsmp->nextAllocation >= hfsmp->allocLimit) { hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1; } @@ -856,9 +860,10 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } if (transaction_begun) { hfs_end_transaction(hfsmp); - hfs_journal_flush(hfsmp, FALSE); /* Just to be sure, sync all data to the disk */ - (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + int flush_error = hfs_flush(hfsmp, HFS_FLUSH_FULL); + if (flush_error && !error) + error = flush_error; } if (error) { @@ -918,7 +923,7 @@ hfs_copy_extent( u_int32_t oldStart, /* The start of the source extent. */ u_int32_t newStart, /* The start of the destination extent. */ u_int32_t blockCount, /* The number of allocation blocks to copy. */ - vfs_context_t context) + __unused vfs_context_t context) { int err = 0; size_t bufferSize; @@ -955,36 +960,10 @@ hfs_copy_extent( * a special cpentry to the IOMedia/LwVM code for handling. */ if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) { - int cp_err = 0; - /* - * Ideally, the file whose extents we are about to manipulate is using the - * newer offset-based IVs so that we can manipulate it regardless of the - * current lock state. However, we must maintain support for older-style - * EAs. - * - * For the older EA case, the IV was tied to the device LBA for file content. - * This means that encrypted data cannot be moved from one location to another - * in the filesystem without garbling the IV data. As a result, we need to - * access the file's plaintext because we cannot do our AES-symmetry trick - * here. This requires that we attempt a key-unwrap here (via cp_handle_relocate) - * to make forward progress. If the keys are unavailable then we will - * simply stop the resize in its tracks here since we cannot move - * this extent at this time. - */ - if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) { - cp_err = cp_handle_relocate(cp, hfsmp); - } - - if (cp_err) { - printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err); - return cp_err; - } - cpenabled = 1; } #endif - - + /* * Determine the I/O size to use * @@ -994,7 +973,7 @@ hfs_copy_extent( */ vfs_ioattr(hfsmp->hfs_mp, &ioattr); bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt); - if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize)) + if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize, VM_KERN_MEMORY_FILE)) return ENOMEM; /* Get a buffer for doing the I/O */ @@ -1025,24 +1004,15 @@ hfs_copy_extent( /* Attach the new CP blob to the buffer if needed */ #if CONFIG_PROTECT if (cpenabled) { - if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) { - /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */ - cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT; - buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry); - } - else { - /* - * Use the cnode's cp key. This file is tied to the - * LBAs of the physical blocks that it occupies. - */ - buf_setcpaddr (bp, cp->c_cpentry); - } - + /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */ + cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT; + bufattr_setcpx(buf_attr(bp), hfsmp->hfs_resize_cpx); + /* Initialize the content protection file offset to start at 0 */ buf_setcpoff (bp, 0); } #endif - + /* Do the read */ err = VNOP_STRATEGY(bp); if (!err) @@ -1070,16 +1040,7 @@ hfs_copy_extent( #if CONFIG_PROTECT /* Attach the CP to the buffer if needed */ if (cpenabled) { - if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) { - buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry); - } - else { - /* - * Use the cnode's CP key. This file is still tied - * to the LBAs of the physical blocks that it occupies. - */ - buf_setcpaddr (bp, cp->c_cpentry); - } + bufattr_setcpx(buf_attr(bp), hfsmp->hfs_resize_cpx); /* * The last STRATEGY call may have updated the cp file offset behind our * back, so we cannot trust it. Re-initialize the content protection @@ -1117,9 +1078,10 @@ hfs_copy_extent( /* Make sure all writes have been flushed to disk. */ if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) { - err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + + err = hfs_flush(hfsmp, HFS_FLUSH_CACHE); if (err) { - printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err); + printf("hfs_copy_extent: hfs_flush failed (%d)\n", err); err = 0; /* Don't fail the copy. */ } } @@ -1901,7 +1863,7 @@ hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_r cp->c_flag |= C_MODIFIED; /* If this is a system file, sync volume headers on disk */ if (extent_info->is_sysfile) { - error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT); } } } else { @@ -1944,7 +1906,7 @@ hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_r */ if ((extent_info->catalog_fp) && (extent_info->is_sysfile == false)) { - (void) hfs_update(extent_info->vp, MNT_WAIT); + hfs_update(extent_info->vp, 0); } hfs_end_transaction(hfsmp); @@ -2072,7 +2034,7 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, /* If the current vnode is system vnode, flush journal * to make sure that all data is written to the disk. */ - error = hfs_journal_flush(hfsmp, TRUE); + error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); if (error) { printf ("hfs_reclaim_file: journal_flush returned %d\n", error); goto out; @@ -2266,7 +2228,7 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, FREE(extent_info->dirlink_fork, M_TEMP); } if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) { - (void) hfs_update(vp, MNT_WAIT); + hfs_update(vp, 0); } if (took_truncate_lock) { hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); @@ -2311,7 +2273,7 @@ hfs_journal_relocate_callback(void *_args) JournalInfoBlock *jibp; error = buf_meta_bread(hfsmp->hfs_devvp, - hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), + (uint64_t)hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), hfsmp->blockSize, vfs_context_ucred(args->context), &bp); if (error) { printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error); @@ -2331,9 +2293,9 @@ hfs_journal_relocate_callback(void *_args) return error; } if (!journal_uses_fua(hfsmp->jnl)) { - error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context); + error = hfs_flush(hfsmp, HFS_FLUSH_CACHE); if (error) { - printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); + printf("hfs_journal_relocate_callback: hfs_flush failed (%d)\n", error); error = 0; /* Don't fail the operation. */ } } @@ -2429,7 +2391,7 @@ hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize } /* Update the catalog record for .journal */ - journal_fork.cf_size = newBlockCount * hfsmp->blockSize; + journal_fork.cf_size = hfs_blk_to_bytes(newBlockCount, hfsmp->blockSize); journal_fork.cf_extents[0].startBlock = newStartBlock; journal_fork.cf_extents[0].blockCount = newBlockCount; journal_fork.cf_blocks = newBlockCount; @@ -2539,7 +2501,8 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_conte return 0; } - error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context); + error = hfs_relocate_journal_file(hfsmp, hfs_blk_to_bytes(blockCount, hfsmp->blockSize), + HFS_RESIZE_TRUNCATE, context); if (error == 0) { hfsmp->hfs_resize_blocksmoved += blockCount; hfs_truncatefs_progress(hfsmp); @@ -2596,7 +2559,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs /* Copy the old journal info block content to the new location */ error = buf_meta_bread(hfsmp->hfs_devvp, - hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), + (uint64_t)hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), hfsmp->blockSize, vfs_context_ucred(context), &old_bp); if (error) { printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error); @@ -2606,7 +2569,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs goto free_fail; } new_bp = buf_getblk(hfsmp->hfs_devvp, - newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), + (uint64_t)newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), hfsmp->blockSize, 0, 0, BLK_META); bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize); buf_brelse(old_bp); @@ -2618,9 +2581,9 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs goto free_fail; } if (!journal_uses_fua(hfsmp->jnl)) { - error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + error = hfs_flush(hfsmp, HFS_FLUSH_CACHE); if (error) { - printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); + printf("hfs_reclaim_journal_info_block: hfs_flush failed (%d)\n", error); /* Don't fail the operation. */ } } @@ -2653,7 +2616,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs /* Update the pointer to the journal info block in the volume header. */ hfsmp->vcbJinfoBlock = newBlock; - error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT); if (error) { printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error); goto fail; @@ -2663,7 +2626,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs if (error) { printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error); } - error = hfs_journal_flush(hfsmp, FALSE); + error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL); if (error) { printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error); } @@ -2964,7 +2927,7 @@ hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context /* Store the value to print total blocks moved by this function in end */ prev_blocksmoved = hfsmp->hfs_resize_blocksmoved; - if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator), VM_KERN_MEMORY_FILE)) { return ENOMEM; } bzero(iterator, sizeof(*iterator)); @@ -3095,7 +3058,7 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_ /* Store the value to print total blocks moved by this function at the end */ prev_blocksmoved = hfsmp->hfs_resize_blocksmoved; - if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator), VM_KERN_MEMORY_FILE)) { error = ENOMEM; goto reclaim_filespace_done; } @@ -3109,16 +3072,11 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_ * end of the function. */ if (cp_fs_protected (hfsmp->hfs_mp)) { - int needs = 0; - error = cp_needs_tempkeys(hfsmp, &needs); - - if ((error == 0) && (needs)) { - error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp); - if (error == 0) { - keys_generated = 1; - } + error = cpx_gentempkeys(&hfsmp->hfs_resize_cpx, hfsmp); + if (error == 0) { + keys_generated = 1; } - + if (error) { printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error); goto reclaim_filespace_done; @@ -3151,10 +3109,14 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_ } /* Check if any of the extents require relocation */ - if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) { + bool overlaps; + error = hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec, &overlaps); + if (error) + break; + + if (!overlaps) continue; - } - + /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */ if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) { if (hfs_resize_debug) { @@ -3226,8 +3188,8 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_ #if CONFIG_PROTECT if (keys_generated) { - cp_entry_destroy(hfsmp->hfs_resize_cpentry); - hfsmp->hfs_resize_cpentry = NULL; + cpx_free(hfsmp->hfs_resize_cpx); + hfsmp->hfs_resize_cpx = NULL; } #endif return error; @@ -3263,7 +3225,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaim } /* Just to be safe, sync the content of the journal to the disk before we proceed */ - hfs_journal_flush(hfsmp, TRUE); + hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); /* First, relocate journal file blocks if they're in the way. * Doing this first will make sure that journal relocate code @@ -3340,7 +3302,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaim * strictly required, but shouldn't hurt. */ if (hfsmp->hfs_resize_blocksmoved) { - hfs_journal_flush(hfsmp, TRUE); + hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); } /* Reclaim extents from catalog file records */ @@ -3356,7 +3318,25 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaim printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error); return error; } - + + /* + * Make sure reserved ranges in the region we're to allocate don't + * overlap. + */ + struct rl_entry *range; +again:; + int lockf = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_SHARED_LOCK); + TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) { + if (rl_overlap(range, hfsmp->allocLimit, RL_INFINITY) != RL_NOOVERLAP) { + // Wait 100ms + hfs_systemfile_unlock(hfsmp, lockf); + msleep(hfs_reclaimspace, NULL, PINOD, "waiting on reserved blocks", + &(struct timespec){ 0, 100 * 1000000 }); + goto again; + } + } + hfs_systemfile_unlock(hfsmp, lockf); + return error; } @@ -3369,20 +3349,21 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaim * true - One of the extents need to be relocated * false - No overflow extents need to be relocated, or there was an error */ -static int -hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec) +static errno_t +hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, + struct HFSPlusCatalogFile *filerec, bool *overlaps) { struct BTreeIterator * iterator = NULL; struct FSBufferDescriptor btdata; HFSPlusExtentRecord extrec; HFSPlusExtentKey *extkeyptr; FCB *fcb; - int overlapped = false; int i, j; int error; int lockflags = 0; u_int32_t endblock; - + errno_t ret = 0; + /* Check if data fork overlaps the target space */ for (i = 0; i < kHFSPlusExtentDensity; ++i) { if (filerec->dataFork.extents[i].blockCount == 0) { @@ -3391,7 +3372,7 @@ hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HF endblock = filerec->dataFork.extents[i].startBlock + filerec->dataFork.extents[i].blockCount; if (endblock > allocLimit) { - overlapped = true; + *overlaps = true; goto out; } } @@ -3404,19 +3385,19 @@ hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HF endblock = filerec->resourceFork.extents[j].startBlock + filerec->resourceFork.extents[j].blockCount; if (endblock > allocLimit) { - overlapped = true; + *overlaps = true; goto out; } } /* Return back if there are no overflow extents for this file */ if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) { + *overlaps = false; goto out; } - if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { - return 0; - } + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + bzero(iterator, sizeof(*iterator)); extkeyptr = (HFSPlusExtentKey *)&iterator->key; extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength; @@ -3438,9 +3419,10 @@ hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HF */ error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator); if (error && (error != btNotFound)) { + ret = MacToVFSError(error); goto out; } - + /* BTIterateRecord() might return error if the btree is empty, and * therefore we return that the extent does not overflow to the caller */ @@ -3457,22 +3439,29 @@ hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HF } endblock = extrec[i].startBlock + extrec[i].blockCount; if (endblock > allocLimit) { - overlapped = true; + *overlaps = true; goto out; } } /* Look for more records. */ error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); } - + + if (error && error != btNotFound) { + ret = MacToVFSError(error); + goto out; + } + + *overlaps = false; + out: if (lockflags) { hfs_systemfile_unlock(hfsmp, lockflags); } - if (iterator) { - kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); - } - return overlapped; + + FREE(iterator, M_TEMP); + + return ret; } diff --git a/bsd/hfs/hfs_search.c b/bsd/hfs/hfs_search.c index a76a9a9e5..45cd1a22d 100644 --- a/bsd/hfs/hfs_search.c +++ b/bsd/hfs/hfs_search.c @@ -324,7 +324,7 @@ hfs_vnop_search(ap) (void) hfs_fsync(vcb->catalogRefNum, MNT_WAIT, 0, p); if (hfsmp->jnl) { hfs_systemfile_unlock(hfsmp, lockflags); - hfs_journal_flush(hfsmp, FALSE); + hfs_flush(hfsmp, HFS_FLUSH_JOURNAL); lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); } @@ -332,6 +332,7 @@ hfs_vnop_search(ap) bzero((caddr_t)myCatPositionPtr, sizeof(*myCatPositionPtr)); err = BTScanInitialize(catalogFCB, 0, 0, 0, kCatSearchBufferSize, &myBTScanState); if (err) { + hfs_systemfile_unlock(hfsmp, lockflags); goto ExitThisRoutine; } } else { diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index 9df531ab8..a819362bb 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2014 Apple Inc. All rights reserved. + * Copyright (c) 1999-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -113,14 +113,11 @@ #include "hfs_quota.h" #include "hfs_btreeio.h" #include "hfs_kdebug.h" +#include "hfs_cprotect.h" #include "hfscommon/headers/FileMgrInternal.h" #include "hfscommon/headers/BTreesInternal.h" -#if CONFIG_PROTECT -#include -#endif - #define HFS_MOUNT_DEBUG 1 #if HFS_DIAGNOSTIC @@ -208,11 +205,6 @@ hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) vfsp = vfs_statfs(mp); (void)hfs_statfs(mp, vfsp, NULL); - /* Invoke ioctl that asks if the underlying device is Core Storage or not */ - error = VNOP_IOCTL(rvp, _DKIOCCORESTORAGE, NULL, 0, context); - if (error == 0) { - hfsmp->hfs_flags |= HFS_CS; - } return (0); } @@ -300,7 +292,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte /* mark the volume cleanly unmounted */ hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask; - retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT); hfsmp->hfs_flags |= HFS_READ_ONLY; /* @@ -412,7 +404,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte * for metadata writes. */ hfsmp->jnl = journal_open(hfsmp->jvp, - (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset, + hfs_blk_to_bytes(hfsmp->jnl_start, HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset, hfsmp->jnl_size, hfsmp->hfs_devvp, hfsmp->hfs_logical_block_size, @@ -463,7 +455,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte /* mark the volume dirty (clear clean unmount bit) */ hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask; - retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT); if (retval != E_NONE) { if (HFS_MOUNT_DEBUG) { printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN); @@ -483,12 +475,24 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte hfs_remove_orphans(hfsmp); /* - * Allow hot file clustering if conditions allow. + * Since we're upgrading to a read-write mount, allow + * hot file clustering if conditions allow. + * + * Note: this normally only would happen if you booted + * single-user and upgraded the mount to read-write + * + * Note: at this point we are not allowed to fail the + * mount operation because the HotFile init code + * in hfs_recording_init() will lookup vnodes with + * VNOP_LOOKUP() which hangs vnodes off the mount + * (and if we were to fail, VFS is not prepared to + * clean that up at this point. Since HotFiles are + * optional, this is not a big deal. */ if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && - ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) { + (((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0) || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) ) { (void) hfs_recording_init(hfsmp); - } + } /* Force ACLs on HFS+ file systems. */ if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) { vfs_setextendedsecurity(HFSTOVFS(hfsmp)); @@ -520,21 +524,6 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte /* After hfs_mountfs succeeds, we should have valid hfsmp */ hfsmp = VFSTOHFS(mp); - /* - * Check to see if the file system exists on CoreStorage. - * - * This must be done after examining the root folder's CP EA since - * hfs_vfs_root will create a vnode (which must not occur until after - * we've established the CP level of the FS). - */ - if (retval == 0) { - errno_t err; - /* Invoke ioctl that asks if the underlying device is Core Storage or not */ - err = VNOP_IOCTL(devvp, _DKIOCCORESTORAGE, NULL, 0, context); - if (err == 0) { - hfsmp->hfs_flags |= HFS_CS; - } - } } out: @@ -1008,10 +997,8 @@ static boolean_t hfs_has_elapsed (const struct timeval *a, } static void -hfs_syncer(void *arg0, void *unused) +hfs_syncer(void *arg0, __unused void *unused) { -#pragma unused(unused) - struct hfsmount *hfsmp = arg0; struct timeval now; @@ -1077,9 +1064,6 @@ hfs_syncer(void *arg0, void *unused) hfsmp->hfs_syncer_thread = current_thread(); - if (hfs_start_transaction(hfsmp) != 0) // so we hold off any new writes - goto out; - /* * We intentionally do a synchronous flush (of the journal or entire volume) here. * For journaled volumes, this means we wait until the metadata blocks are written @@ -1098,7 +1082,7 @@ hfs_syncer(void *arg0, void *unused) * user data to be written. */ if (hfsmp->jnl) { - hfs_journal_flush(hfsmp, TRUE); + hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); } else { hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); } @@ -1109,10 +1093,6 @@ hfs_syncer(void *arg0, void *unused) tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp), hfsmp->hfs_mp->mnt_pending_write_size, 0); - hfs_end_transaction(hfsmp); - -out: - hfsmp->hfs_syncer_thread = NULL; hfs_syncer_lock(hfsmp); @@ -1192,9 +1172,16 @@ void hfs_scan_blocks (struct hfsmount *hfsmp) { */ (void) ScanUnmapBlocks(hfsmp); + (void) hfs_lock_mount (hfsmp); + hfsmp->scan_var &= ~HFS_ALLOCATOR_SCAN_INFLIGHT; hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_COMPLETED; + wakeup((caddr_t) &hfsmp->scan_var); + hfs_unlock_mount (hfsmp); + buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0); + hfs_systemfile_unlock(hfsmp, flags); + } static int hfs_root_unmounted_cleanly = 0; @@ -1409,7 +1396,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, if (device_features & DK_FEATURE_UNMAP) { hfsmp->hfs_flags |= HFS_UNMAP; } - } + + if(device_features & DK_FEATURE_BARRIER) + hfsmp->hfs_flags |= HFS_FEATURE_BARRIER; + } /* * See if the disk is a solid state device, too. We need this to decide what to do about @@ -1421,6 +1411,25 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, } } + /* See if the underlying device is Core Storage or not */ + dk_corestorage_info_t cs_info; + memset(&cs_info, 0, sizeof(dk_corestorage_info_t)); + if (VNOP_IOCTL(devvp, DKIOCCORESTORAGE, (caddr_t)&cs_info, 0, context) == 0) { + hfsmp->hfs_flags |= HFS_CS; + if (isroot && (cs_info.flags & DK_CORESTORAGE_PIN_YOUR_METADATA)) { + hfsmp->hfs_flags |= HFS_CS_METADATA_PIN; + } + if (isroot && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES)) { + hfsmp->hfs_flags |= HFS_CS_HOTFILE_PIN; + hfsmp->hfs_cs_hotfile_size = cs_info.hotfile_size; + } + if ((cs_info.flags & DK_CORESTORAGE_PIN_YOUR_SWAPFILE)) { + hfsmp->hfs_flags |= HFS_CS_SWAPFILE_PIN; + + mp->mnt_ioflags |= MNT_IOFLAGS_SWAPPIN_SUPPORTED; + mp->mnt_max_swappin_available = cs_info.swapfile_pinning; + } + } /* * Init the volume information structure @@ -1490,6 +1499,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA; } + // Reservations + rl_init(&hfsmp->hfs_reserved_ranges[0]); + rl_init(&hfsmp->hfs_reserved_ranges[1]); + // record the current time at which we're mounting this volume struct timeval tv; microtime(&tv); @@ -1572,6 +1585,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) * (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz); + /* + * Cooperative Fusion is not allowed on embedded HFS+ + * filesystems (HFS+ inside HFS standard wrapper) + */ + hfsmp->hfs_flags &= ~HFS_CS_METADATA_PIN; + /* * If the embedded volume doesn't start on a block * boundary, then switch the device to a 512-byte @@ -2003,7 +2022,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, hfs_generate_volume_notifications(hfsmp); if (ronly == 0) { - (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT); } FREE(mdbp, M_TEMP); return (0); @@ -2178,7 +2197,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) } } - retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT); if (retval) { HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask; if (!force) @@ -2192,7 +2211,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) } if (hfsmp->jnl) { - hfs_journal_flush(hfsmp, FALSE); + hfs_flush(hfsmp, HFS_FLUSH_FULL); } /* @@ -2246,6 +2265,11 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) hfs_locks_destroy(hfsmp); hfs_delete_chash(hfsmp); hfs_idhash_destroy(hfsmp); + + assert(TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS]) + && TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS])); + assert(!hfsmp->lockedBlocks); + FREE(hfsmp, M_HFSMNT); return (0); @@ -2481,35 +2505,49 @@ hfs_sync_metadata(void *arg) struct hfs_sync_cargs { - kauth_cred_t cred; - struct proc *p; - int waitfor; - int error; + kauth_cred_t cred; + struct proc *p; + int waitfor; + int error; + int atime_only_syncs; + time_t sync_start_time; }; static int hfs_sync_callback(struct vnode *vp, void *cargs) { - struct cnode *cp; + struct cnode *cp = VTOC(vp); struct hfs_sync_cargs *args; int error; args = (struct hfs_sync_cargs *)cargs; - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { + if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { return (VNODE_RETURNED); } - cp = VTOC(vp); - if ((cp->c_flag & C_MODIFIED) || - (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) || - vnode_hasdirtyblks(vp)) { - error = hfs_fsync(vp, args->waitfor, 0, args->p); + hfs_dirty_t dirty_state = hfs_is_dirty(cp); + + bool sync = dirty_state == HFS_DIRTY || vnode_hasdirtyblks(vp); + + if (!sync && dirty_state == HFS_DIRTY_ATIME + && args->atime_only_syncs < 256) { + // We only update if the atime changed more than 60s ago + if (args->sync_start_time - cp->c_attr.ca_atime > 60) { + sync = true; + ++args->atime_only_syncs; + } + } + + if (sync) { + error = hfs_fsync(vp, args->waitfor, 0, args->p); if (error) args->error = error; - } + } else if (cp->c_touch_acctime) + hfs_touchtimes(VTOHFS(vp), cp); + hfs_unlock(cp); return (VNODE_RETURNED); } @@ -2557,6 +2595,13 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) args.waitfor = waitfor; args.p = p; args.error = 0; + args.atime_only_syncs = 0; + + struct timeval tv; + microtime(&tv); + + args.sync_start_time = tv.tv_sec; + /* * hfs_sync_callback will be called for each vnode * hung off of this mount point... the vnode will be @@ -2586,11 +2631,7 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); cp = VTOC(btvp); - if (((cp->c_flag & C_MODIFIED) == 0) && - (cp->c_touch_acctime == 0) && - (cp->c_touch_chgtime == 0) && - (cp->c_touch_modtime == 0) && - vnode_hasdirtyblks(btvp) == 0) { + if (!hfs_is_dirty(cp) && !vnode_hasdirtyblks(btvp)) { hfs_unlock(VTOC(btvp)); continue; } @@ -2628,13 +2669,13 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) * Write back modified superblock. */ if (IsVCBDirty(vcb)) { - error = hfs_flushvolumeheader(hfsmp, waitfor, 0); + error = hfs_flushvolumeheader(hfsmp, waitfor == MNT_WAIT ? HFS_FVH_WAIT : 0); if (error) allerror = error; } if (hfsmp->jnl) { - hfs_journal_flush(hfsmp, FALSE); + hfs_flush(hfsmp, HFS_FLUSH_JOURNAL); } hfs_lock_mount(hfsmp); @@ -3018,11 +3059,10 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, bcopy(&local_jib, buf_ptr, sizeof(local_jib)); if (buf_bwrite (jib_buf)) { return EIO; - } + } /* Force a flush track cache */ - (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); - + hfs_flush(hfsmp, HFS_FLUSH_CACHE); /* Now proceed with full volume sync */ hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context); @@ -3093,7 +3133,7 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); hfs_unlock_global (hfsmp); - hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); + hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT); { fsid_t fsid; @@ -3148,7 +3188,7 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfs_unlock_global (hfsmp); - hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); + hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT); { fsid_t fsid; @@ -3174,8 +3214,8 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, jnl_start = 0; jnl_size = 0; } else { - jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset; - jnl_size = (off_t)hfsmp->jnl_size; + jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, HFSTOVCB(hfsmp)->blockSize) + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset; + jnl_size = hfsmp->jnl_size; } if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) { @@ -3257,35 +3297,56 @@ hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_con error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0); if (error) - return (error); + return error; /* - * ADLs may need to have their origin state updated - * since build_path needs a valid parent. The same is true - * for hardlinked files as well. There isn't a race window here - * in re-acquiring the cnode lock since we aren't pulling any data - * out of the cnode; instead, we're going to the catalog. + * If the look-up was via the object ID (rather than the link ID), + * then we make sure there's a parent here. We can't leave this + * until hfs_vnop_getattr because if there's a problem getting the + * parent at that point, all the caller will do is call + * hfs_vfs_vget again and we'll end up in an infinite loop. */ - if ((VTOC(*vpp)->c_flag & C_HARDLINK) && - (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0)) { - cnode_t *cp = VTOC(*vpp); - struct cat_desc cdesc; - + + cnode_t *cp = VTOC(*vpp); + + if (ISSET(cp->c_flag, C_HARDLINK) && ino == cp->c_fileid) { + hfs_lock_always(cp, HFS_SHARED_LOCK); + if (!hfs_haslinkorigin(cp)) { - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_findname(hfsmp, (cnid_t)ino, &cdesc); - hfs_systemfile_unlock(hfsmp, lockflags); - if (error == 0) { - if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && - (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) { - hfs_savelinkorigin(cp, cdesc.cd_parentcnid); + if (!hfs_lock_upgrade(cp)) + hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); + + if (cp->c_cnid == cp->c_fileid) { + /* + * Descriptor is stale, so we need to refresh it. We + * pick the first link. + */ + cnid_t link_id; + + error = hfs_first_link(hfsmp, cp, &link_id); + + if (!error) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_findname(hfsmp, link_id, &cp->c_desc); + hfs_systemfile_unlock(hfsmp, lockflags); } - cat_releasedesc(&cdesc); + } else { + // We'll use whatever link the descriptor happens to have + error = 0; } + if (!error) + hfs_savelinkorigin(cp, cp->c_parentcnid); } + hfs_unlock(cp); + + if (error) { + vnode_put(*vpp); + *vpp = NULL; + } } - return (0); + + return error; } @@ -3626,7 +3687,7 @@ hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot) hfs_unlock_mount (hfsmp); if (hfsmp->jnl) { - hfs_flushvolumeheader(hfsmp, 0, 0); + hfs_flushvolumeheader(hfsmp, 0); } return (0); @@ -3737,7 +3798,8 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) * are always stored in-memory as "H+". */ int -hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) +hfs_flushvolumeheader(struct hfsmount *hfsmp, + hfs_flush_volume_header_options_t options) { ExtendedVCB *vcb = HFSTOVCB(hfsmp); struct filefork *fp; @@ -3746,20 +3808,25 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) struct buf *bp, *alt_bp; int i; daddr64_t priIDSector; - int critical; + bool critical = false; u_int16_t signature; u_int16_t hfsversion; daddr64_t avh_sector; + bool altflush = ISSET(options, HFS_FVH_WRITE_ALT); + + if (ISSET(options, HFS_FVH_FLUSH_IF_DIRTY) + && !hfs_header_needs_flushing(hfsmp)) { + return 0; + } if (hfsmp->hfs_flags & HFS_READ_ONLY) { return(0); } #if CONFIG_HFS_STD if (hfsmp->hfs_flags & HFS_STANDARD) { - return hfs_flushMDB(hfsmp, waitfor, altflush); + return hfs_flushMDB(hfsmp, ISSET(options, HFS_FVH_WAIT) ? MNT_WAIT : 0, altflush); } #endif - critical = altflush; priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size)); @@ -3939,7 +4006,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt); volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt); volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks); - volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks); + volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks + vcb->reclaimBlocks); volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation); volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz); volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz); @@ -3949,13 +4016,10 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) { bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)); - critical = 1; + critical = true; } - /* - * System files are only dirty when altflush is set. - */ - if (altflush == 0) { + if (!altflush && !ISSET(options, HFS_FVH_FLUSH_IF_DIRTY)) { goto done; } @@ -3972,6 +4036,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); FTOC(fp)->c_flag &= ~C_MODIFIED; + altflush = true; } /* Sync Catalog file meta data */ @@ -3987,6 +4052,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); FTOC(fp)->c_flag &= ~C_MODIFIED; + altflush = true; } /* Sync Allocation file meta data */ @@ -4002,6 +4068,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); FTOC(fp)->c_flag &= ~C_MODIFIED; + altflush = true; } /* Sync Attribute file meta data */ @@ -4013,7 +4080,10 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) volumeHeader->attributesFile.extents[i].blockCount = SWAP_BE32 (fp->ff_extents[i].blockCount); } - FTOC(fp)->c_flag &= ~C_MODIFIED; + if (ISSET(FTOC(fp)->c_flag, C_MODIFIED)) { + FTOC(fp)->c_flag &= ~C_MODIFIED; + altflush = true; + } volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size); volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); @@ -4033,9 +4103,13 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); FTOC(fp)->c_flag &= ~C_MODIFIED; + altflush = true; } } + if (altflush) + critical = true; + done: MarkVCBClean(hfsmp); hfs_unlock_mount (hfsmp); @@ -4150,8 +4224,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) * may now appear to be beyond the device EOF. */ (void) VNOP_BWRITE(alt_bp); - (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, - NULL, FWRITE, NULL); + hfs_flush(hfsmp, HFS_FLUSH_CACHE); } } else if (alt_bp) { buf_brelse(alt_bp); @@ -4163,14 +4236,13 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) if (hfsmp->jnl) { journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); } else { - if (waitfor != MNT_WAIT) { + if (!ISSET(options, HFS_FVH_WAIT)) { buf_bawrite(bp); } else { retval = VNOP_BWRITE(bp); /* When critical data changes, flush the device cache */ - if (critical && (retval == 0)) { - (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, - NULL, FWRITE, NULL); + if (critical && (retval == 0)) { + hfs_flush(hfsmp, HFS_FLUSH_CACHE); } } } @@ -4505,11 +4577,10 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p) strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN)); volname_length = strlen ((const char*)vcb->vcbVN); -#define DKIOCCSSETLVNAME _IOW('d', 198, char[256]) /* Send the volume name down to CoreStorage if necessary */ error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED); if (error == 0) { - (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current()); + (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current()); } error = 0; } @@ -4519,7 +4590,7 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p) if (error) MarkVCBDirty(vcb); - (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT); } hfs_end_transaction(hfsmp); } diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index ca011c652..1015fbd91 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -56,10 +56,6 @@ /* for parsing boot-args */ #include -#if CONFIG_PROTECT -#include -#endif - #include "hfs.h" #include "hfs_catalog.h" #include "hfs_dbg.h" @@ -67,6 +63,7 @@ #include "hfs_endian.h" #include "hfs_cnode.h" #include "hfs_fsctl.h" +#include "hfs_cprotect.h" #include "hfscommon/headers/FileMgrInternal.h" #include "hfscommon/headers/BTreesInternal.h" @@ -610,6 +607,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, } goto ErrorExit; } + hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp); hfs_unlock(hfsmp->hfs_extents_cp); @@ -800,13 +798,10 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, volname_length = strlen ((const char*)vcb->vcbVN); cat_releasedesc(&cndesc); -#define DKIOCCSSETLVNAME _IOW('d', 198, char[256]) - - /* Send the volume name down to CoreStorage if necessary */ retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED); if (retval == 0) { - (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current()); + (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current()); } /* reset retval == 0. we don't care about errors in volname conversion */ @@ -826,23 +821,19 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, hfs_lock_mount (hfsmp); kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner); - /* Wait until it registers that it's got the appropriate locks */ - while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) { - (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0); - if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) { - break; - } - else { - hfs_lock_mount (hfsmp); - } + /* Wait until it registers that it's got the appropriate locks (or that it is finished) */ + while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) { + msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_scan_blocks", 0); } + hfs_unlock_mount(hfsmp); + thread_deallocate (allocator_scanner); /* mark the volume dirty (clear clean unmount bit) */ vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { - hfs_flushvolumeheader(hfsmp, TRUE, 0); + hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT); } /* kHFSHasFolderCount is only supported/updated on HFSX volumes */ @@ -947,6 +938,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, MarkVCBDirty( vcb ); // mark VCB dirty so it will be written } + if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) { + hfs_pin_fs_metadata(hfsmp); + } /* * Distinguish 3 potential cases involving content protection: * 1. mount point bit set; vcbAtrb does not support it. Fail. @@ -975,17 +969,8 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, #if CONFIG_PROTECT /* Get the EAs as needed. */ int cperr = 0; - uint16_t majorversion; - uint16_t minorversion; - uint64_t flags; - uint8_t cryptogen = 0; struct cp_root_xattr *xattr = NULL; MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK); - if (xattr == NULL) { - retval = ENOMEM; - goto ErrorExit; - } - bzero (xattr, sizeof(struct cp_root_xattr)); /* go get the EA to get the version information */ cperr = cp_getrootxattr (hfsmp, xattr); @@ -997,56 +982,54 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if (cperr == 0) { /* Have to run a valid CP version. */ - if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) { + if (!cp_is_supported_version(xattr->major_version)) { cperr = EINVAL; } } else if (cperr == ENOATTR) { - printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS); + printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS); bzero(xattr, sizeof(struct cp_root_xattr)); - xattr->major_version = CP_NEW_MAJOR_VERS; + xattr->major_version = CP_CURRENT_VERS; xattr->minor_version = CP_MINOR_VERS; cperr = cp_setrootxattr (hfsmp, xattr); } - majorversion = xattr->major_version; - minorversion = xattr->minor_version; - flags = xattr->flags; - if (xattr->flags & CP_ROOT_CRYPTOG1) { - cryptogen = 1; - } - if (xattr) { + if (cperr) { FREE(xattr, M_TEMP); + retval = EPERM; + goto ErrorExit; } - /* Recheck for good status */ - if (cperr == 0) { - /* If we got here, then the CP version is valid. Set it in the mount point */ - hfsmp->hfs_running_cp_major_vers = majorversion; - printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion); - hfsmp->cproot_flags = flags; - hfsmp->cp_crypto_generation = cryptogen; + /* If we got here, then the CP version is valid. Set it in the mount point */ + hfsmp->hfs_running_cp_major_vers = xattr->major_version; + printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version); + hfsmp->cproot_flags = xattr->flags; + hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0; - /* - * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree. - * Ensure that the boot-arg's value is valid for FILES (not directories), - * since only files are actually protected for now. - */ - - PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); - - if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { - PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); - } - - if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { - hfsmp->default_cp_class = PROTECTION_CLASS_C; - } + FREE(xattr, M_TEMP); + + /* + * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree. + * Ensure that the boot-arg's value is valid for FILES (not directories), + * since only files are actually protected for now. + */ + + PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); + + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { + PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); } - else { - retval = EPERM; - goto ErrorExit; + +#if HFS_TMPDBG +#if !SECURE_KERNEL + PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose)); +#endif +#endif + + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { + hfsmp->default_cp_class = PROTECTION_CLASS_C; } + #else /* If CONFIG_PROTECT not built, ignore CP */ vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT); @@ -1097,8 +1080,30 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* * Allow hot file clustering if conditions allow. */ - if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && - ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) { + if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && !(hfsmp->hfs_flags & HFS_READ_ONLY) && + ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) { + // + // Wait until the bitmap scan completes before we initializes the + // hotfile area so that we do not run into any issues with the + // bitmap being read while hotfiles is initializing itself. On + // some older/slower machines, without this interlock, the bitmap + // would sometimes get corrupted at boot time. + // + hfs_lock_mount(hfsmp); + while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) { + (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0); + } + hfs_unlock_mount(hfsmp); + + /* + * Note: at this point we are not allowed to fail the + * mount operation because the HotFile init code + * in hfs_recording_init() will lookup vnodes with + * VNOP_LOOKUP() which hangs vnodes off the mount + * (and if we were to fail, VFS is not prepared to + * clean that up at this point. Since HotFiles are + * optional, this is not a big deal. + */ (void) hfs_recording_init(hfsmp); } @@ -1123,6 +1128,53 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, return (retval); } +static int +_pin_metafile(struct hfsmount *hfsmp, vnode_t vp) +{ + int err; + + err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); + if (err == 0) { + err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL, vfs_context_kernel()); + hfs_unlock(VTOC(vp)); + } + + return err; +} + +void +hfs_pin_fs_metadata(struct hfsmount *hfsmp) +{ + ExtendedVCB *vcb; + int err; + + vcb = HFSTOVCB(hfsmp); + + err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp); + if (err != 0) { + printf("hfs: failed to pin extents overflow file %d\n", err); + } + err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp); + if (err != 0) { + printf("hfs: failed to pin catalog file %d\n", err); + } + err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp); + if (err != 0) { + printf("hfs: failed to pin bitmap file %d\n", err); + } + err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp); + if (err != 0) { + printf("hfs: failed to pin extended attr file %d\n", err); + } + + hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1, vfs_context_kernel()); + hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1, vfs_context_kernel()); + + if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) { + // and hey, if we've got a journal, let's pin that too! + hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize), vfs_context_kernel()); + } +} /* * ReleaseMetaFileVNode @@ -1363,6 +1415,19 @@ void hfs_unlock_mount (struct hfsmount *hfsmp) { /* * Lock HFS system file(s). + * + * This function accepts a @flags parameter which indicates which + * system file locks are required. The value it returns should be + * used in a subsequent call to hfs_systemfile_unlock. The caller + * should treat this value as opaque; it may or may not have a + * relation to the @flags field that is passed in. The *only* + * guarantee that we make is that a value of zero means that no locks + * were taken and that there is no need to call hfs_systemfile_unlock + * (although it is harmless to do so). Recursion is supported but + * care must still be taken to ensure correct lock ordering. Note + * that requests for certain locks may cause other locks to also be + * taken, including locks that are not possible to ask for via the + * @flags parameter. */ int hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype) @@ -1371,19 +1436,20 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file */ if (flags & SFL_CATALOG) { + if (hfsmp->hfs_catalog_cp + && hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) { #ifdef HFS_CHECK_LOCK_ORDER - if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)"); - } - if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)"); - } - if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)"); - } + if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)"); + } + if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)"); + } + if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)"); + } #endif /* HFS_CHECK_LOCK_ORDER */ - if (hfsmp->hfs_catalog_cp) { (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT); /* * When the catalog file has overflow extents then @@ -1401,16 +1467,17 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp } if (flags & SFL_ATTRIBUTE) { + if (hfsmp->hfs_attribute_cp + && hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) { #ifdef HFS_CHECK_LOCK_ORDER - if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)"); - } - if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)"); - } + if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)"); + } + if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)"); + } #endif /* HFS_CHECK_LOCK_ORDER */ - - if (hfsmp->hfs_attribute_cp) { + (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT); /* * When the attribute file has overflow extents then @@ -1428,13 +1495,14 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp } if (flags & SFL_STARTUP) { + if (hfsmp->hfs_startup_cp + && hfsmp->hfs_startup_cp->c_lockowner != current_thread()) { #ifdef HFS_CHECK_LOCK_ORDER - if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Extents before Startup)"); - } + if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Extents before Startup)"); + } #endif /* HFS_CHECK_LOCK_ORDER */ - if (hfsmp->hfs_startup_cp) { (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT); /* * When the startup file has overflow extents then @@ -1508,6 +1576,9 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp void hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags) { + if (!flags) + return; + struct timeval tv; u_int32_t lastfsync; int numOfLockedBuffs; @@ -1739,7 +1810,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp) cat_cookie_t cookie; int catlock = 0; int catreserve = 0; - int started_tr = 0; + bool started_tr = false; int lockflags; int result; int orphaned_files = 0; @@ -1798,159 +1869,177 @@ hfs_remove_orphans(struct hfsmount * hfsmp) * where xxx is the file's cnid in decimal. * */ - if (bcmp(tempname, filename, namelen) == 0) { - struct filefork dfork; - struct filefork rfork; - struct cnode cnode; - int mode = 0; - - bzero(&dfork, sizeof(dfork)); - bzero(&rfork, sizeof(rfork)); - bzero(&cnode, sizeof(cnode)); - - /* Delete any attributes, ignore errors */ - (void) hfs_removeallattr(hfsmp, filerec.fileID); - - if (hfs_start_transaction(hfsmp) != 0) { - printf("hfs_remove_orphans: failed to start transaction\n"); - goto exit; - } - started_tr = 1; - - /* - * Reserve some space in the Catalog file. - */ - if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) { - printf("hfs_remove_orphans: cat_preflight failed\n"); - goto exit; - } - catreserve = 1; + if (bcmp(tempname, filename, namelen) != 0) + continue; - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - catlock = 1; + struct filefork dfork; + struct filefork rfork; + struct cnode cnode; + int mode = 0; - /* Build a fake cnode */ - cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr, - &dfork.ff_data, &rfork.ff_data); - cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; - cnode.c_desc.cd_nameptr = (const u_int8_t *)filename; - cnode.c_desc.cd_namelen = namelen; - cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid; - cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks; - - /* Position iterator at previous entry */ - if (BTIterateRecord(fcb, kBTreePrevRecord, iterator, - NULL, NULL) != 0) { - break; - } + bzero(&dfork, sizeof(dfork)); + bzero(&rfork, sizeof(rfork)); + bzero(&cnode, sizeof(cnode)); + + if (hfs_start_transaction(hfsmp) != 0) { + printf("hfs_remove_orphans: failed to start transaction\n"); + goto exit; + } + started_tr = true; + + /* + * Reserve some space in the Catalog file. + */ + if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) { + printf("hfs_remove_orphans: cat_preflight failed\n"); + goto exit; + } + catreserve = 1; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + catlock = 1; + + /* Build a fake cnode */ + cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr, + &dfork.ff_data, &rfork.ff_data); + cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + cnode.c_desc.cd_nameptr = (const u_int8_t *)filename; + cnode.c_desc.cd_namelen = namelen; + cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid; + cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks; + + /* Position iterator at previous entry */ + if (BTIterateRecord(fcb, kBTreePrevRecord, iterator, + NULL, NULL) != 0) { + break; + } - /* Truncate the file to zero (both forks) */ - if (dfork.ff_blocks > 0) { - u_int64_t fsize; + /* Truncate the file to zero (both forks) */ + if (dfork.ff_blocks > 0) { + u_int64_t fsize; - dfork.ff_cp = &cnode; - cnode.c_datafork = &dfork; - cnode.c_rsrcfork = NULL; - fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize; - while (fsize > 0) { - if (fsize > HFS_BIGFILE_SIZE) { - fsize -= HFS_BIGFILE_SIZE; - } else { - fsize = 0; - } - - if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, - cnode.c_attr.ca_fileid, false) != 0) { - printf("hfs: error truncating data fork!\n"); - break; - } - - // - // if we're iteratively truncating this file down, - // then end the transaction and start a new one so - // that no one transaction gets too big. - // - if (fsize > 0 && started_tr) { - /* Drop system file locks before starting - * another transaction to preserve lock order. - */ - hfs_systemfile_unlock(hfsmp, lockflags); - catlock = 0; - hfs_end_transaction(hfsmp); - - if (hfs_start_transaction(hfsmp) != 0) { - started_tr = 0; - break; - } - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - catlock = 1; - } + dfork.ff_cp = &cnode; + cnode.c_datafork = &dfork; + cnode.c_rsrcfork = NULL; + fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize; + while (fsize > 0) { + if (fsize > HFS_BIGFILE_SIZE) { + fsize -= HFS_BIGFILE_SIZE; + } else { + fsize = 0; } - } - if (rfork.ff_blocks > 0) { - rfork.ff_cp = &cnode; - cnode.c_datafork = NULL; - cnode.c_rsrcfork = &rfork; - if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) { - printf("hfs: error truncating rsrc fork!\n"); + if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, + cnode.c_attr.ca_fileid, false) != 0) { + printf("hfs: error truncating data fork!\n"); break; } + + // + // if we're iteratively truncating this file down, + // then end the transaction and start a new one so + // that no one transaction gets too big. + // + if (fsize > 0) { + /* Drop system file locks before starting + * another transaction to preserve lock order. + */ + hfs_systemfile_unlock(hfsmp, lockflags); + catlock = 0; + hfs_end_transaction(hfsmp); + + if (hfs_start_transaction(hfsmp) != 0) { + started_tr = false; + goto exit; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + catlock = 1; + } } + } - /* Remove the file or folder record from the Catalog */ - if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) { - printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid); - hfs_systemfile_unlock(hfsmp, lockflags); - catlock = 0; - hfs_volupdate(hfsmp, VOL_UPDATE, 0); + if (rfork.ff_blocks > 0) { + rfork.ff_cp = &cnode; + cnode.c_datafork = NULL; + cnode.c_rsrcfork = &rfork; + if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) { + printf("hfs: error truncating rsrc fork!\n"); break; } - - mode = cnode.c_attr.ca_mode & S_IFMT; + } - if (mode == S_IFDIR) { - orphaned_dirs++; - } - else { - orphaned_files++; - } + // Deal with extended attributes + if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) { + // hfs_removeallattr uses its own transactions + hfs_systemfile_unlock(hfsmp, lockflags); + catlock = false; + hfs_end_transaction(hfsmp); - /* Update parent and volume counts */ - hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; - if (mode == S_IFDIR) { - DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); + hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr); + + if (!started_tr) { + if (hfs_start_transaction(hfsmp) != 0) { + printf("hfs_remove_orphans: failed to start transaction\n"); + goto exit; + } + started_tr = true; } - (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], - &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + catlock = 1; + } - /* Drop locks and end the transaction */ + /* Remove the file or folder record from the Catalog */ + if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) { + printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid); hfs_systemfile_unlock(hfsmp, lockflags); - cat_postflight(hfsmp, &cookie, p); - catlock = catreserve = 0; + catlock = 0; + hfs_volupdate(hfsmp, VOL_UPDATE, 0); + break; + } - /* - Now that Catalog is unlocked, update the volume info, making - sure to differentiate between files and directories - */ - if (mode == S_IFDIR) { - hfs_volupdate(hfsmp, VOL_RMDIR, 0); - } - else{ - hfs_volupdate(hfsmp, VOL_RMFILE, 0); - } + mode = cnode.c_attr.ca_mode & S_IFMT; - if (started_tr) { - hfs_end_transaction(hfsmp); - started_tr = 0; - } + if (mode == S_IFDIR) { + orphaned_dirs++; + } + else { + orphaned_files++; + } + + /* Update parent and volume counts */ + hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; + if (mode == S_IFDIR) { + DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); + } - } /* end if */ + (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], + &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); + + /* Drop locks and end the transaction */ + hfs_systemfile_unlock(hfsmp, lockflags); + cat_postflight(hfsmp, &cookie, p); + catlock = catreserve = 0; + + /* + Now that Catalog is unlocked, update the volume info, making + sure to differentiate between files and directories + */ + if (mode == S_IFDIR) { + hfs_volupdate(hfsmp, VOL_RMDIR, 0); + } + else{ + hfs_volupdate(hfsmp, VOL_RMFILE, 0); + } + + hfs_end_transaction(hfsmp); + started_tr = false; } /* end for */ + +exit: + if (orphaned_files > 0 || orphaned_dirs > 0) printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs); -exit: if (catlock) { hfs_systemfile_unlock(hfsmp, lockflags); } @@ -2029,7 +2118,7 @@ static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks) return true; } - uint32_t loanedblks = hfsmp->loanedBlocks; + uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks; uint32_t bandblks = hfsmp->hfs_sparsebandblks; uint64_t maxblks = hfsmp->hfs_backingfs_maxblocks; @@ -2097,7 +2186,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) */ freeblks = hfsmp->freeBlocks; rsrvblks = hfsmp->reserveBlocks; - loanblks = hfsmp->loanedBlocks; + loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks; if (wantreserve) { if (freeblks > rsrvblks) freeblks -= rsrvblks; @@ -2119,20 +2208,6 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) freeblks = MIN(freeblks, vfreeblks); #endif /* HFS_SPARSE_DEV */ - if (hfsmp->hfs_flags & HFS_CS) { - uint64_t cs_free_bytes; - uint64_t cs_free_blks; - if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES, - (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) { - cs_free_blks = cs_free_bytes / hfsmp->blockSize; - if (cs_free_blks > loanblks) - cs_free_blks -= loanblks; - else - cs_free_blks = 0; - freeblks = MIN(cs_free_blks, freeblks); - } - } - return (freeblks); } @@ -3051,7 +3126,7 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable) * Add the existing size of the Extents Overflow B-tree. * (It rarely grows, so don't bother reserving additional room for it.) */ - zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize; + zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize); /* * If there is an Attributes B-tree, leave room for 11 clumps worth. @@ -3166,7 +3241,11 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable) filesize += temp / 3; hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize; - hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize; + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize); + } else { + hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize; + } /* Convert to allocation blocks. */ blk = zonesize / vcb->blockSize; @@ -3186,11 +3265,12 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable) hfsmp->hfs_hotfile_end = 0; hfsmp->hfs_hotfile_freeblks = 0; } -#if 0 - printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); - printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); - printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks); +#if DEBUG + printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); + printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); + printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks); #endif + hfsmp->hfs_flags |= HFS_METADATA_ZONE; } @@ -3202,19 +3282,33 @@ hfs_hotfile_freeblocks(struct hfsmount *hfsmp) int lockflags; int freeblocks; + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + // + // This is only used at initialization time and on an ssd + // we'll get the real info from the hotfile btree user + // info + // + return 0; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); freeblocks = MetaZoneFreeBlocks(vcb); hfs_systemfile_unlock(hfsmp, lockflags); /* Minus Extents overflow file reserve. */ - freeblocks -= - hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks; + if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) { + freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks; + } + /* Minus catalog file reserve. */ - freeblocks -= - hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks; + if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) { + freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks; + } + if (freeblocks < 0) freeblocks = 0; + // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks)); return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks); } @@ -3347,21 +3441,46 @@ hfs_start_transaction(struct hfsmount *hfsmp) } #endif /* HFS_CHECK_LOCK_ORDER */ - if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) { - /* - * The global lock should be held shared if journal is - * active to prevent disabling. If we're not the owner - * of the journal lock, verify that we're not already - * holding the global lock exclusive before moving on. - */ - if (hfsmp->hfs_global_lockowner == thread) { - ret = EBUSY; - goto out; +again: + + if (hfsmp->jnl) { + if (journal_owner(hfsmp->jnl) != thread) { + /* + * The global lock should be held shared if journal is + * active to prevent disabling. If we're not the owner + * of the journal lock, verify that we're not already + * holding the global lock exclusive before moving on. + */ + if (hfsmp->hfs_global_lockowner == thread) { + ret = EBUSY; + goto out; + } + + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + + // Things could have changed + if (!hfsmp->jnl) { + hfs_unlock_global(hfsmp); + goto again; + } + + OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); + unlock_on_err = 1; } + } else { + // No journal + if (hfsmp->hfs_global_lockowner != thread) { + hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK); + + // Things could have changed + if (hfsmp->jnl) { + hfs_unlock_global(hfsmp); + goto again; + } - hfs_lock_global (hfsmp, HFS_SHARED_LOCK); - OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); - unlock_on_err = 1; + OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); + unlock_on_err = 1; + } } /* If a downgrade to read-only mount is in progress, no other @@ -3376,13 +3495,13 @@ hfs_start_transaction(struct hfsmount *hfsmp) if (hfsmp->jnl) { ret = journal_start_transaction(hfsmp->jnl); - if (ret == 0) { - OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting); - } } else { ret = 0; } + if (ret == 0) + ++hfsmp->hfs_transaction_nesting; + out: if (ret != 0 && unlock_on_err) { hfs_unlock_global (hfsmp); @@ -3395,12 +3514,15 @@ hfs_start_transaction(struct hfsmount *hfsmp) int hfs_end_transaction(struct hfsmount *hfsmp) { - int need_unlock=0, ret; + int ret; + + assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread()); + assert(hfsmp->hfs_transaction_nesting > 0); - if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread() - && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) { - need_unlock = 1; - } + if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1) + hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY); + + bool need_unlock = !--hfsmp->hfs_transaction_nesting; if (hfsmp->jnl) { ret = journal_end_transaction(hfsmp->jnl); @@ -3440,49 +3562,105 @@ hfs_journal_unlock(struct hfsmount *hfsmp) hfs_unlock_global (hfsmp); } -/* - * Flush the contents of the journal to the disk. +/* + * Flush the contents of the journal to the disk. * - * Input: - * wait_for_IO - - * If TRUE, wait to write in-memory journal to the disk - * consistently, and also wait to write all asynchronous - * metadata blocks to its corresponding locations - * consistently on the disk. This means that the journal - * is empty at this point and does not contain any - * transactions. This is overkill in normal scenarios - * but is useful whenever the metadata blocks are required - * to be consistent on-disk instead of just the journal - * being consistent; like before live verification - * and live volume resizing. + * - HFS_FLUSH_JOURNAL + * Wait to write in-memory journal to the disk consistently. + * This means that the journal still contains uncommitted + * transactions and the file system metadata blocks in + * the journal transactions might be written asynchronously + * to the disk. But there is no guarantee that they are + * written to the disk before returning to the caller. + * Note that this option is sufficient for file system + * data integrity as it guarantees consistent journal + * content on the disk. + * + * - HFS_FLUSH_JOURNAL_META + * Wait to write in-memory journal to the disk + * consistently, and also wait to write all asynchronous + * metadata blocks to its corresponding locations + * consistently on the disk. This is overkill in normal + * scenarios but is useful whenever the metadata blocks + * are required to be consistent on-disk instead of + * just the journalbeing consistent; like before live + * verification and live volume resizing. The update of the + * metadata doesn't include a barrier of track cache flush. + * + * - HFS_FLUSH_FULL + * HFS_FLUSH_JOURNAL + force a track cache flush to media + * + * - HFS_FLUSH_CACHE + * Force a track cache flush to media. + * + * - HFS_FLUSH_BARRIER + * Barrier-only flush to ensure write order * - * If FALSE, only wait to write in-memory journal to the - * disk consistently. This means that the journal still - * contains uncommitted transactions and the file system - * metadata blocks in the journal transactions might be - * written asynchronously to the disk. But there is no - * guarantee that they are written to the disk before - * returning to the caller. Note that this option is - * sufficient for file system data integrity as it - * guarantees consistent journal content on the disk. */ -int -hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO) +errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode) { - int ret; + errno_t error = 0; + journal_flush_options_t options = 0; + dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER }; - /* Only peek at hfsmp->jnl while holding the global lock */ - hfs_lock_global (hfsmp, HFS_SHARED_LOCK); - if (hfsmp->jnl) { - ret = journal_flush(hfsmp->jnl, wait_for_IO); - } else { - ret = 0; + switch (mode) { + case HFS_FLUSH_JOURNAL_META: + // wait for journal, metadata blocks and previous async flush to finish + SET(options, JOURNAL_WAIT_FOR_IO); + + // no break + + case HFS_FLUSH_JOURNAL: + case HFS_FLUSH_JOURNAL_BARRIER: + case HFS_FLUSH_FULL: + + if (mode == HFS_FLUSH_JOURNAL_BARRIER && + !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER)) + mode = HFS_FLUSH_FULL; + + if (mode == HFS_FLUSH_FULL) + SET(options, JOURNAL_FLUSH_FULL); + + /* Only peek at hfsmp->jnl while holding the global lock */ + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + + if (hfsmp->jnl) + error = journal_flush(hfsmp->jnl, options); + + hfs_unlock_global (hfsmp); + + /* + * This may result in a double barrier as + * journal_flush may have issued a barrier itself + */ + if (mode == HFS_FLUSH_JOURNAL_BARRIER) + error = VNOP_IOCTL(hfsmp->hfs_devvp, + DKIOCSYNCHRONIZE, (caddr_t)&sync_req, + FWRITE, vfs_context_kernel()); + + break; + + case HFS_FLUSH_CACHE: + // Do a full sync + sync_req.options = 0; + + // no break + + case HFS_FLUSH_BARRIER: + // If barrier only flush doesn't support, fall back to use full flush. + if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER)) + sync_req.options = 0; + + error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req, + FWRITE, vfs_context_kernel()); + break; + + default: + error = EINVAL; } - hfs_unlock_global (hfsmp); - - return ret; -} + return error; +} /* * hfs_erase_unused_nodes @@ -3679,8 +3857,8 @@ hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid) *docid = extinfo->document_id++; // mark the root cnode dirty - cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; - (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); + cp->c_flag |= C_MODIFIED; + hfs_update(cp->c_vp, 0); hfs_systemfile_unlock (hfsmp, lockflags); (void) hfs_end_transaction(hfsmp); @@ -3799,7 +3977,7 @@ int hfs_freeze(struct hfsmount *hfsmp) might have the global lock at the moment and also so we can flush the journal. */ hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK); - journal_flush(hfsmp->jnl, TRUE); + journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO); hfs_unlock_global(hfsmp); // don't need to iterate on all vnodes, we just need to diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 0c327a792..dac4b088f 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -71,6 +70,8 @@ #include "hfs_quota.h" #include "hfs_endian.h" #include "hfs_kdebug.h" +#include "hfs_cprotect.h" + #include "hfscommon/headers/BTreesInternal.h" #include "hfscommon/headers/FileMgrInternal.h" @@ -107,10 +108,6 @@ static int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, hfs_move_data_options_t options); static int hfs_move_fork(filefork_t *srcfork, cnode_t *src, filefork_t *dstfork, cnode_t *dst); -static const struct cat_fork * -hfs_prepare_fork_for_update(const filefork_t *pfork, - struct cat_fork *fork_buf, - uint32_t block_size); #if HFS_COMPRESSION static int hfs_move_compressed(cnode_t *from_vp, cnode_t *to_vp); @@ -501,7 +498,7 @@ clear_tombstone_docid(struct doc_tombstone *ut, __unused struct hfsmount *hfsmp // printf("clearing doc-id from ino %d\n", ocp->c_desc.cd_cnid); ofip->document_id = 0; ocp->c_bsdflags &= ~UF_TRACKED; - ocp->c_flag |= C_MODIFIED | C_FORCEUPDATE; // mark it dirty + ocp->c_flag |= C_MODIFIED; /* cat_update(hfsmp, &ocp->c_desc, &ocp->c_attr, NULL, NULL); */ } @@ -693,6 +690,38 @@ hfs_vnop_open(struct vnop_open_args *ap) if (hfs_is_journal_file(hfsmp, cp)) return (EPERM); + bool have_lock = false; + +#if CONFIG_PROTECT + if (ISSET(ap->a_mode, FENCRYPTED) && cp->c_cpentry && vnode_isreg(vp)) { + bool have_trunc_lock = false; + + + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { + if (have_trunc_lock) + hfs_unlock_truncate(cp, 0); + return error; + } + + have_lock = true; + + if (cp->c_cpentry->cp_raw_open_count + 1 + < cp->c_cpentry->cp_raw_open_count) { + // Overflow; too many raw opens on this file + hfs_unlock(cp); + if (have_trunc_lock) + hfs_unlock_truncate(cp, 0); + return ENFILE; + } + + + if (have_trunc_lock) + hfs_unlock_truncate(cp, 0); + + ++cp->c_cpentry->cp_raw_open_count; + } +#endif + if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (hfsmp->jnl == NULL) || #if NAMEDSTREAMS @@ -700,10 +729,16 @@ hfs_vnop_open(struct vnop_open_args *ap) #else !vnode_isreg(vp) || vnode_isinuse(vp, 0)) { #endif + +#if CONFIG_PROTECT + if (have_lock) + hfs_unlock(cp); +#endif + return (0); } - if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) + if (!have_lock && (error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); #if QUOTA @@ -778,6 +813,13 @@ hfs_vnop_close(ap) cp = VTOC(vp); hfsmp = VTOHFS(vp); +#if CONFIG_PROTECT + if (cp->c_cpentry && ISSET(ap->a_fflag, FENCRYPTED) && vnode_isreg(vp)) { + assert(cp->c_cpentry->cp_raw_open_count > 0); + --cp->c_cpentry->cp_raw_open_count; + } +#endif + /* * If the rsrc fork is a named stream, it can cause the data fork to * stay around, preventing de-allocation of these blocks. @@ -1008,10 +1050,13 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); hfs_touchtimes(hfsmp, cp); - } - else { - if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) - return (error); + + // downgrade to a shared lock since that's all we need from here on out + cp->c_lockowner = HFS_SHARED_OWNER; + lck_rw_lock_exclusive_to_shared(&cp->c_rwlock); + + } else if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) { + return (error); } if (v_type == VDIR) { @@ -1172,19 +1217,9 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) VATTR_SET_SUPPORTED(vap, va_acl); } } - if (VATTR_IS_ACTIVE(vap, va_access_time)) { - /* Access times are lazily updated, get current time if needed */ - if (cp->c_touch_acctime) { - struct timeval tv; - - microtime(&tv); - vap->va_access_time.tv_sec = tv.tv_sec; - } else { - vap->va_access_time.tv_sec = cp->c_atime; - } - vap->va_access_time.tv_nsec = 0; - VATTR_SET_SUPPORTED(vap, va_access_time); - } + + vap->va_access_time.tv_sec = cp->c_atime; + vap->va_access_time.tv_nsec = 0; vap->va_create_time.tv_sec = cp->c_itime; vap->va_create_time.tv_nsec = 0; vap->va_modify_time.tv_sec = cp->c_mtime; @@ -1233,7 +1268,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) */ if (cp->c_flag & C_HARDLINK) { vap->va_linkid = (u_int64_t)hfs_currentcnid(cp); - vap->va_parentid = (u_int64_t)hfs_currentparent(cp); + vap->va_parentid = (u_int64_t)hfs_currentparent(cp, /* have_lock: */ true); } else { vap->va_linkid = (u_int64_t)cp->c_cnid; vap->va_parentid = (u_int64_t)cp->c_parentcnid; @@ -1256,7 +1291,6 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) } } else vap->va_data_size = data_size; -// vap->va_supported |= VNODE_ATTR_va_data_size; VATTR_SET_SUPPORTED(vap, va_data_size); } #else @@ -1266,7 +1300,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) #if CONFIG_PROTECT if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) { - vap->va_dataprotect_class = cp->c_cpentry ? cp->c_cpentry->cp_pclass : 0; + vap->va_dataprotect_class = cp->c_cpentry ? CP_CLASS(cp->c_cpentry->cp_pclass) : 0; VATTR_SET_SUPPORTED(vap, va_dataprotect_class); } #endif @@ -1288,7 +1322,8 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) } /* Mark them all at once instead of individual VATTR_SET_SUPPORTED calls. */ - vap->va_supported |= VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time | + vap->va_supported |= VNODE_ATTR_va_access_time | + VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time | VNODE_ATTR_va_change_time| VNODE_ATTR_va_backup_time | VNODE_ATTR_va_iosize | VNODE_ATTR_va_uid | VNODE_ATTR_va_gid | VNODE_ATTR_va_mode | @@ -1421,8 +1456,15 @@ hfs_vnop_setattr(ap) } #if CONFIG_PROTECT - if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { - return (error); + /* + * All metadata changes should be allowed except a size-changing setattr, which + * has effects on file content and requires calling into cp_handle_vnop + * to have content protection check. + */ + if (VATTR_IS_ACTIVE(vap, va_data_size)) { + if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { + return (error); + } } #endif /* CONFIG_PROTECT */ @@ -1612,6 +1654,7 @@ hfs_vnop_setattr(ap) } cp->c_bsdflags = vap->va_flags; + cp->c_flag |= C_MODIFIED; cp->c_touch_chgtime = TRUE; @@ -1662,13 +1705,14 @@ hfs_vnop_setattr(ap) */ if ((VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) && (cp->c_cnid != kHFSRootFolderID) && + !VATTR_IS_ACTIVE(vap, va_create_time) && (cp->c_mtime < cp->c_itime)) { cp->c_itime = cp->c_mtime; } } if (VATTR_IS_ACTIVE(vap, va_backup_time)) cp->c_btime = vap->va_backup_time.tv_sec; - cp->c_flag |= C_MODIFIED; + cp->c_flag |= C_MINOR_MOD; } /* @@ -1677,10 +1721,11 @@ hfs_vnop_setattr(ap) VATTR_SET_SUPPORTED(vap, va_encoding); if (VATTR_IS_ACTIVE(vap, va_encoding)) { cp->c_encoding = vap->va_encoding; + cp->c_flag |= C_MODIFIED; hfs_setencodingbits(hfsmp, cp->c_encoding); } - if ((error = hfs_update(vp, TRUE)) != 0) + if ((error = hfs_update(vp, 0)) != 0) goto out; out: if (cp) { @@ -1741,8 +1786,12 @@ hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struc return (0); }; #endif - cp->c_mode &= ~ALLPERMS; - cp->c_mode |= (mode & ALLPERMS); + + mode_t new_mode = (cp->c_mode & ~ALLPERMS) | (mode & ALLPERMS); + if (new_mode != cp->c_mode) { + cp->c_mode = new_mode; + cp->c_flag |= C_MINOR_MOD; + } cp->c_touch_chgtime = TRUE; return (0); } @@ -1841,6 +1890,13 @@ hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, ogid = cp->c_gid; ouid = cp->c_uid; + + if (ouid == uid && ogid == gid) { + // No change, just set change time + cp->c_touch_chgtime = TRUE; + return 0; + } + #if QUOTA if ((error = hfs_getinoquota(cp))) return (error); @@ -1912,6 +1968,11 @@ hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, panic("hfs_chown: lost quota"); #endif /* QUOTA */ + /* + * Without quotas, we could probably make this a minor + * modification. + */ + cp->c_flag |= C_MODIFIED; /* According to the SUSv3 Standard, chown() shall mark @@ -2342,7 +2403,7 @@ hfs_vnop_exchange(ap) * 2) Drop the special bits from the current flags * 3) swap the special flag bits to their destination */ - from_cp->c_flag |= to_flag_special; + from_cp->c_flag |= to_flag_special | C_MODIFIED; from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags; bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32); @@ -2371,7 +2432,7 @@ hfs_vnop_exchange(ap) * Only OR in the "from" flags into our cnode flags below. * Leave the rest of the flags alone. */ - to_cp->c_flag |= from_flag_special; + to_cp->c_flag |= from_flag_special | C_MODIFIED; to_cp->c_attr.ca_recflags = tempattr.ca_recflags; bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32); @@ -2403,15 +2464,15 @@ hfs_vnop_exchange(ap) hfs_end_transaction(hfsmp); } + if (have_cnode_locks) + hfs_unlockpair(from_cp, to_cp); + if (have_from_trunc_lock) hfs_unlock_truncate(from_cp, 0); if (have_to_trunc_lock) hfs_unlock_truncate(to_cp, 0); - if (have_cnode_locks) - hfs_unlockpair(from_cp, to_cp); - return (error); } @@ -2738,10 +2799,10 @@ int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, struct cat_fork dfork_buf; const struct cat_fork *dfork, *rfork; - dfork = hfs_prepare_fork_for_update(to_cp->c_datafork, &dfork_buf, - hfsmp->blockSize); - rfork = hfs_prepare_fork_for_update(from_rfork, &rfork_buf.ff_data, - hfsmp->blockSize); + dfork = hfs_prepare_fork_for_update(to_cp->c_datafork, NULL, + &dfork_buf, hfsmp->blockSize); + rfork = hfs_prepare_fork_for_update(from_rfork, NULL, + &rfork_buf.ff_data, hfsmp->blockSize); // Update the catalog nodes, to_cp first if ((error = cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, @@ -2749,7 +2810,7 @@ int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, goto exit; } - CLR(to_cp->c_flag, C_MODIFIED); + CLR(to_cp->c_flag, C_MODIFIED | C_MINOR_MOD); // Update in-memory resource fork data here if (from_rfork) { @@ -2778,9 +2839,9 @@ int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, } // Currently unnecessary, but might be useful in future... - dfork = hfs_prepare_fork_for_update(from_cp->c_datafork, &dfork_buf, + dfork = hfs_prepare_fork_for_update(from_cp->c_datafork, NULL, &dfork_buf, hfsmp->blockSize); - rfork = hfs_prepare_fork_for_update(from_rfork, &rfork_buf.ff_data, + rfork = hfs_prepare_fork_for_update(from_rfork, NULL, &rfork_buf.ff_data, hfsmp->blockSize); // Update from_cp @@ -2789,7 +2850,7 @@ int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, goto exit; } - CLR(from_cp->c_flag, C_MODIFIED); + CLR(from_cp->c_flag, C_MODIFIED | C_MINOR_MOD); exit: if (lockflags) { @@ -2820,8 +2881,9 @@ static int hfs_move_fork(filefork_t *srcfork, cnode_t *src_cp, filefork_t *dstfork, cnode_t *dst_cp) { // Move the invalid ranges - dstfork->ff_invalidranges = srcfork->ff_invalidranges; - rl_init(&srcfork->ff_invalidranges); + TAILQ_SWAP(&dstfork->ff_invalidranges, &srcfork->ff_invalidranges, + rl_entry, rl_link); + rl_remove_all(&srcfork->ff_invalidranges); // Move the fork data (copy whole structure) dstfork->ff_data = srcfork->ff_data; @@ -2868,19 +2930,19 @@ static void hfs_fsync_panic_hook(panic_hook_t *hook_) * cnode must be locked */ int -hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) +hfs_fsync(struct vnode *vp, int waitfor, hfs_fsync_mode_t fsyncmode, struct proc *p) { struct cnode *cp = VTOC(vp); struct filefork *fp = NULL; int retval = 0; struct hfsmount *hfsmp = VTOHFS(vp); - struct rl_entry *invalid_range; struct timeval tv; int waitdata; /* attributes necessary for data retrieval */ int wait; /* all other attributes (e.g. atime, etc.) */ int lockflag; int took_trunc_lock = 0; int locked_buffers = 0; + int fsync_default = 1; /* * Applications which only care about data integrity rather than full @@ -2889,8 +2951,11 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) */ wait = (waitfor == MNT_WAIT); waitdata = (waitfor == MNT_DWAIT) | wait; + if (always_do_fullfsync) - fullsync = 1; + fsyncmode = HFS_FSYNC_FULL; + if (fsyncmode != HFS_FSYNC) + fsync_default = 0; /* HFS directories don't have any data blocks. */ if (vnode_isdir(vp)) @@ -2944,7 +3009,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) cp->c_zftimeout != 0))) { microuptime(&tv); - if ((cp->c_flag & C_ALWAYS_ZEROFILL) == 0 && !fullsync && tv.tv_sec < (long)cp->c_zftimeout) { + if ((cp->c_flag & C_ALWAYS_ZEROFILL) == 0 && fsync_default && tv.tv_sec < (long)cp->c_zftimeout) { /* Remember that a force sync was requested. */ cp->c_flag |= C_ZFWANTSYNC; goto datasync; @@ -2959,30 +3024,11 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); took_trunc_lock = 1; } - while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) { - off_t start = invalid_range->rl_start; - off_t end = invalid_range->rl_end; - - /* The range about to be written must be validated - * first, so that VNOP_BLOCKMAP() will return the - * appropriate mapping for the cluster code: - */ - rl_remove(start, end, &fp->ff_invalidranges); - - /* Don't hold cnode lock when calling into cluster layer. */ - hfs_unlock(cp); - (void) cluster_write(vp, (struct uio *) 0, - fp->ff_size, end + 1, start, (off_t)0, - IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cp->c_flag |= C_MODIFIED; - } + hfs_flush_invalid_ranges(vp); hfs_unlock(cp); (void) cluster_push(vp, waitdata ? IO_SYNC : 0); hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); } - cp->c_flag &= ~C_ZFWANTSYNC; - cp->c_zftimeout = 0; } datasync: if (took_trunc_lock) { @@ -3029,7 +3075,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) * if the vnode is in the middle of a recycle (VL_TERMINATE or VL_DEAD is set). */ if (vnode_isrecycled(vp)) { - fullsync = 1; + fsync_default = 0; } } @@ -3043,7 +3089,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) cp->c_touch_chgtime = FALSE; cp->c_touch_modtime = FALSE; } else if ( !(vp->v_flag & VSWAP) ) /* User file */ { - retval = hfs_update(vp, wait); + retval = hfs_update(vp, HFS_UPDATE_FORCE); /* * When MNT_WAIT is requested push out the catalog record for @@ -3051,7 +3097,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) * because the journal_flush or hfs_metasync_all will push out * all of the metadata changes. */ - if ((retval == 0) && wait && !fullsync && cp->c_hint && + if ((retval == 0) && wait && fsync_default && cp->c_hint && !ISSET(cp->c_flag, C_DELETED | C_NOEXISTS)) { hfs_metasync(VTOHFS(vp), (daddr64_t)cp->c_hint, p); } @@ -3060,27 +3106,24 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) * If this was a full fsync, make sure all metadata * changes get to stable storage. */ - if (fullsync) { + if (!fsync_default) { if (hfsmp->jnl) { - hfs_journal_flush(hfsmp, FALSE); - - if (journal_uses_fua(hfsmp->jnl)) { - /* - * the journal_flush did NOT issue a sync track cache command, - * and the fullsync indicates we are supposed to flush all cached - * data to the media, so issue the sync track cache command - * explicitly - */ - VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); - } + if (fsyncmode == HFS_FSYNC_FULL) + hfs_flush(hfsmp, HFS_FLUSH_FULL); + else + hfs_flush(hfsmp, + HFS_FLUSH_JOURNAL_BARRIER); } else { retval = hfs_metasync_all(hfsmp); /* XXX need to pass context! */ - VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); + hfs_flush(hfsmp, HFS_FLUSH_CACHE); } } } + if (!hfs_is_dirty(cp) && !ISSET(cp->c_flag, C_DELETED)) + vnode_cleardirty(vp); + return (retval); } @@ -3459,9 +3502,9 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, dcp->c_touch_chgtime = TRUE; dcp->c_touch_modtime = TRUE; - hfs_touchtimes(hfsmp, cp); - (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); - cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE); + dcp->c_flag |= C_MODIFIED; + + hfs_update(dcp->c_vp, 0); } hfs_systemfile_unlock(hfsmp, lockflags); @@ -3634,6 +3677,8 @@ hfs_vnop_remove(ap) * more work. */ if (error == 0) { + hfs_hotfile_deleted(vp); + if (rvp) { recycle_rsrc = 1; } @@ -3758,23 +3803,6 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, return (EPERM); } - /* - * If removing a symlink, then we need to ensure that the - * data blocks for the symlink are not still in-flight or pending. - * If so, we will unlink the symlink here, making its blocks - * available for re-allocation by a subsequent transaction. That is OK, but - * then the I/O for the data blocks could then go out before the journal - * transaction that created it was flushed, leading to I/O ordering issues. - */ - if (vp->v_type == VLNK) { - /* - * This will block if the asynchronous journal flush is in progress. - * If this symlink is not being renamed over and doesn't have any open FDs, - * then we'll remove it from the journal's bufs below in kill_block. - */ - buf_wait_for_shadow_io (vp, 0); - } - /* * Hard links require special handling. */ @@ -4199,7 +4227,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, cp->c_flag |= C_NOEXISTS; cp->c_flag &= ~C_DELETED; - cp->c_touch_chgtime = TRUE; /* XXX needed ? */ + cp->c_touch_chgtime = TRUE; --cp->c_linkcount; /* @@ -5155,15 +5183,22 @@ hfs_vnop_rename(ap) } tdcp->c_entries++; tdcp->c_dirchangecnt++; + tdcp->c_flag |= C_MODIFIED; hfs_incr_gencount(tdcp); if (fdcp->c_entries > 0) fdcp->c_entries--; fdcp->c_dirchangecnt++; + fdcp->c_flag |= C_MODIFIED; fdcp->c_touch_chgtime = TRUE; fdcp->c_touch_modtime = TRUE; - fdcp->c_flag |= C_FORCEUPDATE; // XXXdbg - force it out! + if (ISSET(fcp->c_flag, C_HARDLINK)) { + hfs_relorigin(fcp, fdcp->c_fileid); + if (fdcp->c_fileid != fdcp->c_cnid) + hfs_relorigin(fcp, fdcp->c_cnid); + } + (void) hfs_update(fdvp, 0); } hfs_incr_gencount(fdcp); @@ -5172,7 +5207,6 @@ hfs_vnop_rename(ap) tdcp->c_touch_chgtime = TRUE; tdcp->c_touch_modtime = TRUE; - tdcp->c_flag |= C_FORCEUPDATE; // XXXdbg - force it out! (void) hfs_update(tdvp, 0); /* Update the vnode's name now that the rename has completed. */ @@ -5686,7 +5720,7 @@ hfs_vnop_readdir(ap) /* disk corruption */ cp->c_entries++; /* Mark the cnode as dirty. */ - cp->c_flag |= (C_MODIFIED | C_FORCEUPDATE); + cp->c_flag |= C_MODIFIED; printf("hfs_vnop_readdir: repairing valence to non-zero! \n"); bump_valence++; } @@ -5911,48 +5945,50 @@ hfs_vnop_pathconf(ap) /* * Prepares a fork for cat_update by making sure ff_size and ff_blocks * are no bigger than the valid data on disk thus reducing the chance - * of exposing unitialised data in the event of a non clean unmount. + * of exposing uninitialised data in the event of a non clean unmount. * fork_buf is where to put the temporary copy if required. (It can * be inside pfork.) */ -static const struct cat_fork * -hfs_prepare_fork_for_update(const filefork_t *pfork, - struct cat_fork *fork_buf, +const struct cat_fork * +hfs_prepare_fork_for_update(filefork_t *ff, + const struct cat_fork *cf, + struct cat_fork *cf_buf, uint32_t block_size) { - if (!pfork) + if (!ff) return NULL; - off_t max_size = pfork->ff_size; + if (!cf) + cf = &ff->ff_data; + if (!cf_buf) + cf_buf = &ff->ff_data; + + off_t max_size = ff->ff_size; // Check first invalid range - if (!TAILQ_EMPTY(&pfork->ff_invalidranges)) - max_size = TAILQ_FIRST(&pfork->ff_invalidranges)->rl_start; + if (!TAILQ_EMPTY(&ff->ff_invalidranges)) + max_size = TAILQ_FIRST(&ff->ff_invalidranges)->rl_start; - if (!pfork->ff_unallocblocks && pfork->ff_size <= max_size) - return &pfork->ff_data; // Nothing to do + if (!ff->ff_unallocblocks && ff->ff_size <= max_size) + return cf; // Nothing to do - if (pfork->ff_blocks < pfork->ff_unallocblocks) { + if (ff->ff_blocks < ff->ff_unallocblocks) { panic("hfs: ff_blocks %d is less than unalloc blocks %d\n", - pfork->ff_blocks, pfork->ff_unallocblocks); + ff->ff_blocks, ff->ff_unallocblocks); } - struct cat_fork *out = fork_buf; + struct cat_fork *out = cf_buf; - if (out != &pfork->ff_data) - bcopy(&pfork->ff_data, out, sizeof(*out)); + if (out != cf) + bcopy(cf, out, sizeof(*cf)); // Adjust cf_blocks for cf_vblocks out->cf_blocks -= out->cf_vblocks; /* - * We have to trim the size with the updated cf_blocks. You might - * think that this is unnecessary because the invalid ranges - * should catch this, but we update invalid ranges *before* we do - * I/O whereas cf_vblocks is updated later in hfs_vnop_blockmap. - * There might still be a chance that we will be exposing - * unitialised data because the metadata takes a different path to - * data but the window should be tiny (if it exists at all). + * Here we trim the size with the updated cf_blocks. This is + * probably unnecessary now because the invalid ranges should + * catch this (but that wasn't always the case). */ off_t alloc_bytes = hfs_blk_to_bytes(out->cf_blocks, block_size); if (out->cf_size > alloc_bytes) @@ -5968,13 +6004,11 @@ hfs_prepare_fork_for_update(const filefork_t *pfork, /* * Update a cnode's on-disk metadata. * - * If waitfor is set, then wait for the disk write of - * the node to complete. - * - * The cnode must be locked exclusive + * The cnode must be locked exclusive. See declaration for possible + * options. */ int -hfs_update(struct vnode *vp, __unused int waitfor) +hfs_update(struct vnode *vp, int options) { struct cnode *cp = VTOC(vp); struct proc *p; @@ -5987,6 +6021,9 @@ hfs_update(struct vnode *vp, __unused int waitfor) int error; uint32_t tstate = 0; + if (ISSET(cp->c_flag, C_NOEXISTS)) + return 0; + p = current_proc(); hfsmp = VTOHFS(vp); @@ -5995,14 +6032,14 @@ hfs_update(struct vnode *vp, __unused int waitfor) return (0); } if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (cp->c_mode == 0)) { - cp->c_flag &= ~C_MODIFIED; + CLR(cp->c_flag, C_MODIFIED | C_MINOR_MOD | C_NEEDS_DATEADDED); cp->c_touch_acctime = 0; cp->c_touch_chgtime = 0; cp->c_touch_modtime = 0; return (0); } if (kdebug_enable) { - if (cp->c_touch_acctime) + if (cp->c_touch_acctime || cp->c_atime != cp->c_attr.ca_atimeondisk) tstate |= DBG_HFS_UPDATE_ACCTIME; if (cp->c_touch_modtime) tstate |= DBG_HFS_UPDATE_MODTIME; @@ -6011,58 +6048,65 @@ hfs_update(struct vnode *vp, __unused int waitfor) if (cp->c_flag & C_MODIFIED) tstate |= DBG_HFS_UPDATE_MODIFIED; - if (cp->c_flag & C_FORCEUPDATE) + if (ISSET(options, HFS_UPDATE_FORCE)) tstate |= DBG_HFS_UPDATE_FORCE; if (cp->c_flag & C_NEEDS_DATEADDED) tstate |= DBG_HFS_UPDATE_DATEADDED; + if (cp->c_flag & C_MINOR_MOD) + tstate |= DBG_HFS_UPDATE_MINOR; } hfs_touchtimes(hfsmp, cp); - /* Nothing to update. */ - if ((cp->c_flag & (C_MODIFIED | C_FORCEUPDATE)) == 0) { - return (0); + if (!ISSET(cp->c_flag, C_MODIFIED | C_MINOR_MOD) + && !hfs_should_save_atime(cp)) { + // Nothing to update + return 0; } - - if (cp->c_datafork) - dataforkp = &cp->c_datafork->ff_data; - if (cp->c_rsrcfork) - rsrcforkp = &cp->c_rsrcfork->ff_data; - /* - * For delayed allocations updates are - * postponed until an fsync or the file - * gets written to disk. - * - * Deleted files can defer meta data updates until inactive. - * - * If we're ever called with the C_FORCEUPDATE flag though - * we have to do the update. - */ - if (ISSET(cp->c_flag, C_FORCEUPDATE) == 0 && - (ISSET(cp->c_flag, C_DELETED) || - (dataforkp && cp->c_datafork->ff_unallocblocks) || - (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks))) { - // cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE); - cp->c_flag |= C_MODIFIED; + KDBG(HFSDBG_UPDATE | DBG_FUNC_START, VM_KERNEL_ADDRPERM(vp), tstate); - return (0); + bool check_txn = false; + + if (!ISSET(options, HFS_UPDATE_FORCE) && !ISSET(cp->c_flag, C_MODIFIED)) { + /* + * This must be a minor modification. If the current + * transaction already has an update for this node, then we + * bundle in the modification. + */ + if (hfsmp->jnl + && journal_current_txn(hfsmp->jnl) == cp->c_update_txn) { + check_txn = true; + } else { + tstate |= DBG_HFS_UPDATE_SKIPPED; + error = 0; + goto exit; + } } - KERNEL_DEBUG_CONSTANT(HFSDBG_UPDATE | DBG_FUNC_START, VM_KERNEL_ADDRPERM(vp), tstate, 0, 0, 0); + if ((error = hfs_start_transaction(hfsmp)) != 0) + goto exit; - if ((error = hfs_start_transaction(hfsmp)) != 0) { - KERNEL_DEBUG_CONSTANT(HFSDBG_UPDATE | DBG_FUNC_END, VM_KERNEL_ADDRPERM(vp), tstate, error, -1, 0); - return error; + if (check_txn + && journal_current_txn(hfsmp->jnl) != cp->c_update_txn) { + hfs_end_transaction(hfsmp); + tstate |= DBG_HFS_UPDATE_SKIPPED; + error = 0; + goto exit; } - /* + if (cp->c_datafork) + dataforkp = &cp->c_datafork->ff_data; + if (cp->c_rsrcfork) + rsrcforkp = &cp->c_rsrcfork->ff_data; + + /* * Modify the values passed to cat_update based on whether or not * the file has invalid ranges or borrowed blocks. */ - dataforkp = hfs_prepare_fork_for_update(cp->c_datafork, &datafork, hfsmp->blockSize); - rsrcforkp = hfs_prepare_fork_for_update(cp->c_rsrcfork, &rsrcfork, hfsmp->blockSize); + dataforkp = hfs_prepare_fork_for_update(cp->c_datafork, NULL, &datafork, hfsmp->blockSize); + rsrcforkp = hfs_prepare_fork_for_update(cp->c_rsrcfork, NULL, &rsrcfork, hfsmp->blockSize); - if (kdebug_enable) { + if (__improbable(kdebug_enable & KDEBUG_TRACE)) { long dbg_parms[NUMPARMS]; int dbg_namelen; @@ -6080,19 +6124,22 @@ hfs_update(struct vnode *vp, __unused int waitfor) */ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); - /* XXX - waitfor is not enforced */ error = cat_update(hfsmp, &cp->c_desc, &cp->c_attr, dataforkp, rsrcforkp); + if (hfsmp->jnl) + cp->c_update_txn = journal_current_txn(hfsmp->jnl); + hfs_systemfile_unlock(hfsmp, lockflags); - /* After the updates are finished, clear the flags */ - cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE); + CLR(cp->c_flag, C_MODIFIED | C_MINOR_MOD); hfs_end_transaction(hfsmp); - KERNEL_DEBUG_CONSTANT(HFSDBG_UPDATE | DBG_FUNC_END, VM_KERNEL_ADDRPERM(vp), tstate, error, 0, 0); +exit: + + KDBG(HFSDBG_UPDATE | DBG_FUNC_END, VM_KERNEL_ADDRPERM(vp), tstate, error); - return (error); + return error; } /* @@ -6337,9 +6384,10 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, dcp->c_dirchangecnt++; hfs_incr_gencount(dcp); - dcp->c_ctime = tv.tv_sec; - dcp->c_mtime = tv.tv_sec; - (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + dcp->c_touch_chgtime = dcp->c_touch_modtime = true; + dcp->c_flag |= C_MODIFIED; + + hfs_update(dcp->c_vp, 0); #if CONFIG_PROTECT /* @@ -6378,7 +6426,9 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, hfs_systemfile_unlock(hfsmp, lockflags); if (error) goto exit; - + + uint32_t txn = hfsmp->jnl ? journal_current_txn(hfsmp->jnl) : 0; + /* Invalidate negative cache entries in the directory */ if (dcp->c_flag & C_NEG_ENTRIES) { cache_purge_negatives(dvp); @@ -6412,7 +6462,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, */ if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target)) { - cp_entry_destroy (entry); + cp_entry_destroy (hfsmp, entry); entry = NULL; } #endif @@ -6444,20 +6494,21 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, cp = VTOC(tvp); + cp->c_update_txn = txn; + struct doc_tombstone *ut; ut = get_uthread_doc_tombstone(); if ( ut->t_lastop_document_id != 0 && ut->t_lastop_parent == dvp && ut->t_lastop_parent_vid == vnode_vid(dvp) - && strcmp((char *)ut->t_lastop_filename, (char *)cp->c_desc.cd_nameptr) == 0) { + && strcmp((char *)ut->t_lastop_filename, (const char *)cp->c_desc.cd_nameptr) == 0) { struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16); //printf("CREATE: preserving doc-id %lld on %s\n", ut->t_lastop_document_id, ut->t_lastop_filename); fip->document_id = (uint32_t)(ut->t_lastop_document_id & 0xffffffff); cp->c_bsdflags |= UF_TRACKED; - // mark the cnode dirty - cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; + cp->c_flag |= C_MODIFIED; if ((error = hfs_start_transaction(hfsmp)) == 0) { lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); @@ -6484,6 +6535,36 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, } } + if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (vnode_isfastdevicecandidate(dvp) && !vnode_isautocandidate(dvp))) { + + //printf("hfs: flagging %s (fileid: %d) as VFASTDEVCANDIDATE (dvp name: %s)\n", + // cnp->cn_nameptr ? cnp->cn_nameptr : "", + // cp->c_fileid, + // dvp->v_name ? dvp->v_name : "no-dir-name"); + + // + // On new files we set the FastDevCandidate flag so that + // any new blocks allocated to it will be pinned. + // + cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask; + vnode_setfastdevicecandidate(tvp); + + // + // properly inherit auto-cached flags + // + if (vnode_isautocandidate(dvp)) { + cp->c_attr.ca_recflags |= kHFSAutoCandidateMask; + vnode_setautocandidate(tvp); + } + + + // + // We also want to add it to the hotfile adoption list so + // that it will eventually land in the hotfile btree + // + (void) hfs_addhotfile(tvp); + } + *vpp = tvp; #if CONFIG_PROTECT @@ -6507,7 +6588,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, * Upon success, the keys were generated and written out. * Update the cp pointer in the cnode. */ - cp_replace_entry (cp, keyed_entry); + cp_replace_entry (hfsmp, cp, keyed_entry); if (nocache) { cache_enter (dvp, tvp, cnp); } @@ -6578,7 +6659,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, * out the pointer if it was called already. */ if (entry) { - cp_entry_destroy (entry); + cp_entry_destroy (hfsmp, entry); entry = NULL; } #endif @@ -6608,6 +6689,11 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, * hfs_vgetrsrc acquires a resource fork vnode corresponding to the * cnode that is found in 'vp'. The cnode should be locked upon entry * and will be returned locked, but it may be dropped temporarily. + * + * If the resource fork vnode does not exist, HFS will attempt to acquire an + * empty (uninitialized) vnode from VFS so as to avoid deadlocks with + * jetsam. If we let the normal getnewvnode code produce the vnode for us + * we would be doing so while holding the cnode lock of our cnode. * * On success, *rvpp wlll hold the resource fork vnode with an * iocount. *Don't* forget the vnode_put. @@ -6615,7 +6701,8 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp) { - struct vnode *rvp; + struct vnode *rvp = NULLVP; + struct vnode *empty_rvp = NULLVP; struct vnode *dvp = NULLVP; struct cnode *cp = VTOC(vp); int error; @@ -6662,22 +6749,49 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp) int lockflags; int newvnode_flags = 0; - /* - * Make sure cnode lock is exclusive, if not upgrade it. + /* + * In this case, we don't currently see a resource fork vnode attached + * to this cnode. In most cases, we were called from a read-only VNOP + * like getattr, so it should be safe to drop the cnode lock and then + * re-acquire it. + * + * Here, we drop the lock so that we can acquire an empty/husk + * vnode so that we don't deadlock against jetsam. * - * We assume that we were called from a read-only VNOP (getattr) - * and that its safe to have the cnode lock dropped and reacquired. + * It does not currently appear possible to hold the truncate lock via + * FS re-entrancy when we get to this point. (8/2014) */ - if (cp->c_lockowner != current_thread()) { - /* - * If the upgrade fails we lose the lock and - * have to take the exclusive lock on our own. - */ - if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE) - lck_rw_lock_exclusive(&cp->c_rwlock); - cp->c_lockowner = current_thread(); + hfs_unlock (cp); + + error = vnode_create_empty (&empty_rvp); + + hfs_lock_always (cp, HFS_EXCLUSIVE_LOCK); + + if (error) { + /* If acquiring the 'empty' vnode failed, then nothing to clean up */ + return error; } + /* + * We could have raced with another thread here while we dropped our cnode + * lock. See if the cnode now has a resource fork vnode and restart if appropriate. + * + * Note: We just released the cnode lock, so there is a possibility that the + * cnode that we just acquired has been deleted or even removed from disk + * completely, though this is unlikely. If the file is open-unlinked, the + * check below will resolve it for us. If it has been completely + * removed (even from the catalog!), then when we examine the catalog + * directly, below, while holding the catalog lock, we will not find the + * item and we can fail out properly. + */ + if (cp->c_rsrc_vp) { + /* Drop the empty vnode before restarting */ + vnode_put (empty_rvp); + empty_rvp = NULL; + rvp = NULL; + goto restart; + } + /* * hfs_vgetsrc may be invoked for a cnode that has already been marked * C_DELETED. This is because we need to continue to provide rsrc @@ -6760,6 +6874,8 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp) hfs_systemfile_unlock(hfsmp, lockflags); if (error) { + /* Drop our 'empty' vnode ! */ + vnode_put (empty_rvp); return (error); } /* @@ -6781,20 +6897,33 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp) // Should never happen because cn.cn_nameptr won't ever be long... if (cn.cn_namelen >= MAXPATHLEN) { FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); + /* Drop our 'empty' vnode ! */ + vnode_put (empty_rvp); return ENAMETOOLONG; + } } dvp = vnode_getparent(vp); + + /* + * We are about to call hfs_getnewvnode and pass in the vnode that we acquired + * earlier when we were not holding any locks. The semantics of GNV_USE_VP require that + * either hfs_getnewvnode consume the vnode and vend it back to us, properly initialized, + * or it will consume/dispose of it properly if it errors out. + */ + rvp = empty_rvp; + error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL, - descptr, GNV_WANTRSRC | GNV_SKIPLOCK, &cp->c_attr, - &rsrcfork, &rvp, &newvnode_flags); + descptr, (GNV_WANTRSRC | GNV_SKIPLOCK | GNV_USE_VP), + &cp->c_attr, &rsrcfork, &rvp, &newvnode_flags); + if (dvp) vnode_put(dvp); if (cn.cn_pnbuf) FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); if (error) return (error); - } + } /* End 'else' for rsrc fork not existing */ *rvpp = rvp; return (0); @@ -6941,17 +7070,17 @@ hfsfifo_close(ap) static u_int32_t hfs_get_document_id_internal(const uint8_t *finderinfo, mode_t mode) { - u_int8_t *finfo = NULL; + const uint8_t *finfo = NULL; u_int32_t doc_id = 0; /* overlay the FinderInfo to the correct pointer, and advance */ - finfo = ((uint8_t *)finderinfo) + 16; + finfo = finderinfo + 16; if (S_ISDIR(mode) || S_ISREG(mode)) { - struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo; doc_id = extinfo->document_id; } else if (S_ISDIR(mode)) { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)finderinfo + 16); + const struct FndrExtendedDirInfo *extinfo = (const struct FndrExtendedDirInfo *)finfo; doc_id = extinfo->document_id; } diff --git a/bsd/hfs/hfs_xattr.c b/bsd/hfs/hfs_xattr.c index 3a989c132..c63dce8ea 100644 --- a/bsd/hfs/hfs_xattr.c +++ b/bsd/hfs/hfs_xattr.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include "hfs.h" @@ -48,6 +47,7 @@ #include "hfs_endian.h" #include "hfs_btreeio.h" #include "hfs_fsctl.h" +#include "hfs_cprotect.h" #include "hfscommon/headers/BTreesInternal.h" @@ -495,7 +495,7 @@ int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, btdata.bufferAddress = recp; btdata.itemSize = sizeof(HFSPlusAttrRecord); btdata.itemCount = 1; - + result = hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key); if (result) { goto exit; @@ -856,7 +856,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) cp->c_bsdflags &= ~UF_HIDDEN; } - result = hfs_update(vp, FALSE); + result = hfs_update(vp, 0); hfs_unlock(cp); return (result); @@ -1032,21 +1032,11 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi int exists = 0; int allocatedblks = 0; u_int32_t target_id; - int takelock = 1; if (cp) { target_id = cp->c_fileid; } else { target_id = fileid; - if (target_id != 1) { - /* - * If we are manipulating something other than - * the root folder (id 1), and do not have a cnode-in-hand, - * then we must already hold the requisite b-tree locks from - * earlier up the call stack. (See hfs_makenode) - */ - takelock = 0; - } } /* Start a transaction for our changes. */ @@ -1079,10 +1069,7 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi hfsmp->hfs_max_inline_attrsize = getmaxinlineattrsize(hfsmp->hfs_attribute_vp); } - if (takelock) { - /* Take exclusive access to the attributes b-tree. */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); - } + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); /* Build the b-tree key. */ MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); @@ -1277,9 +1264,7 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi if (btfile && started_transaction) { (void) BTFlushPath(btfile); } - if (lockflags) { - hfs_systemfile_unlock(hfsmp, lockflags); - } + hfs_systemfile_unlock(hfsmp, lockflags); if (result == 0) { if (vp) { cp = VTOC(vp); @@ -1287,6 +1272,7 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi * modified time of the file. */ cp->c_touch_chgtime = TRUE; + cp->c_flag |= C_MODIFIED; cp->c_attr.ca_recflags |= kHFSHasAttributesMask; if ((bcmp(ap->a_name, KAUTH_FILESEC_XATTR, sizeof(KAUTH_FILESEC_XATTR)) == 0)) { cp->c_attr.ca_recflags |= kHFSHasSecurityMask; @@ -1401,7 +1387,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) if (result == 0) { cp->c_touch_chgtime = TRUE; cp->c_flag |= C_MODIFIED; - result = hfs_update(vp, FALSE); + result = hfs_update(vp, 0); } hfs_end_transaction(hfsmp); @@ -1490,7 +1476,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) /* Updating finderInfo updates change time and modified time */ cp->c_touch_chgtime = TRUE; cp->c_flag |= C_MODIFIED; - hfs_update(vp, FALSE); + hfs_update(vp, 0); hfs_unlock(cp); @@ -1540,6 +1526,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) result = file_attribute_exist(hfsmp, cp->c_fileid); if (result == 0) { cp->c_attr.ca_recflags &= ~kHFSHasAttributesMask; + cp->c_flag |= C_MODIFIED; } if (result == EEXIST) { result = 0; @@ -1550,6 +1537,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) /* If ACL was removed, clear security bit */ if ((bcmp(ap->a_name, KAUTH_FILESEC_XATTR, sizeof(KAUTH_FILESEC_XATTR)) == 0)) { cp->c_attr.ca_recflags &= ~kHFSHasSecurityMask; + cp->c_flag |= C_MODIFIED; } (void) hfs_update(vp, 0); } @@ -1963,18 +1951,28 @@ listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *dat * * This function takes the necessary locks on the attribute * b-tree file and the allocation (bitmap) file. + * + * NOTE: Upon sucecss, this function will return with an open + * transaction. The reason we do it this way is because when we + * delete the last attribute, we must make sure the flag in the + * catalog record that indicates there are no more records is cleared. + * The caller is responsible for doing this and *must* do it before + * ending the transaction. */ int -hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid) +hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid, + bool *open_transaction) { BTreeIterator *iterator = NULL; HFSPlusAttrKey *key; struct filefork *btfile; - int result, lockflags; + int result, lockflags = 0; + + *open_transaction = false; + + if (hfsmp->hfs_attribute_vp == NULL) + return 0; - if (hfsmp->hfs_attribute_vp == NULL) { - return (0); - } btfile = VTOF(hfsmp->hfs_attribute_vp); MALLOC(iterator, BTreeIterator *, sizeof(BTreeIterator), M_TEMP, M_WAITOK); @@ -1985,25 +1983,32 @@ hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid) key = (HFSPlusAttrKey *)&iterator->key; /* Loop until there are no more attributes for this file id */ - for(;;) { + do { + if (!*open_transaction) + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK); + + (void) hfs_buildattrkey(fileid, NULL, key); + result = BTIterateRecord(btfile, kBTreeNextRecord, iterator, NULL, NULL); + if (result || key->fileID != fileid) + goto exit; + + hfs_systemfile_unlock(hfsmp, lockflags); + lockflags = 0; + + if (*open_transaction) { + hfs_end_transaction(hfsmp); + *open_transaction = false; + } + if (hfs_start_transaction(hfsmp) != 0) { result = EINVAL; goto exit; } - /* Lock the attribute b-tree and the allocation (bitmap) files */ + *open_transaction = true; + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - /* - * Go to first possible attribute key/record pair - */ - (void) hfs_buildattrkey(fileid, NULL, key); - result = BTIterateRecord(btfile, kBTreeNextRecord, iterator, NULL, NULL); - if (result || key->fileID != fileid) { - hfs_systemfile_unlock(hfsmp, lockflags); - hfs_end_transaction(hfsmp); - goto exit; - } result = remove_attribute_records(hfsmp, iterator); #if HFS_XATTR_VERBOSE @@ -2011,14 +2016,22 @@ hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid) printf("hfs_removeallattr: unexpected err %d\n", result); } #endif + } while (!result); + +exit: + FREE(iterator, M_TEMP); + + if (lockflags) hfs_systemfile_unlock(hfsmp, lockflags); + + result = result == btNotFound ? 0 : MacToVFSError(result); + + if (result && *open_transaction) { hfs_end_transaction(hfsmp); - if (result) - break; + *open_transaction = false; } -exit: - FREE(iterator, M_TEMP); - return (result == btNotFound ? 0: MacToVFSError(result)); + + return result; } __private_extern__ diff --git a/bsd/hfs/hfscommon/BTree/BTree.c b/bsd/hfs/hfscommon/BTree/BTree.c index 99888cafd..a8a057e64 100644 --- a/bsd/hfs/hfscommon/BTree/BTree.c +++ b/bsd/hfs/hfscommon/BTree/BTree.c @@ -1722,14 +1722,22 @@ OSStatus BTDeleteRecord (FCB *filePtr, /////////////////////// Extend File If Necessary //////////////////////////// - if ((btreePtr->treeDepth + 1UL) > btreePtr->totalNodes) + /* + * Worst case: we delete the first record in the tree and + * following key is sufficiently larger to cause all parents to + * require splitting and we need a new root node and a new map + * node. + */ + if (index == 0 && btreePtr->treeDepth + 1 > btreePtr->freeNodes) { - nodesNeeded = btreePtr->treeDepth + 1 + btreePtr->totalNodes; + nodesNeeded = btreePtr->treeDepth + btreePtr->totalNodes; if (nodesNeeded > CalcMapBits (btreePtr)) ++nodesNeeded; - err = ExtendBTree (btreePtr, nodesNeeded); - M_ExitOnError (err); + if (nodesNeeded - btreePtr->totalNodes > btreePtr->freeNodes) { + err = ExtendBTree (btreePtr, nodesNeeded); + M_ExitOnError (err); + } } ///////////////////////////// Delete Record ///////////////////////////////// diff --git a/bsd/hfs/hfscommon/BTree/BTreeAllocate.c b/bsd/hfs/hfscommon/BTree/BTreeAllocate.c index fe2f91714..dbd0a8a54 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeAllocate.c +++ b/bsd/hfs/hfscommon/BTree/BTreeAllocate.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003, 2005-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2003, 2005-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -196,7 +196,7 @@ OSStatus AllocateNode (BTreeControlBlockPtr btreePtr, u_int32_t *nodeNum) M_ExitOnError (err); --btreePtr->freeNodes; - btreePtr->flags |= kBTHeaderDirty; + M_BTreeHeaderDirty(btreePtr); /* Account for allocations from node reserve */ BTUpdateReserve(btreePtr, 1); @@ -273,7 +273,7 @@ OSStatus FreeNode (BTreeControlBlockPtr btreePtr, u_int32_t nodeNum) M_ExitOnError (err); ++btreePtr->freeNodes; - btreePtr->flags |= kBTHeaderDirty; // how about a macro for this + M_BTreeHeaderDirty(btreePtr); return noErr; @@ -494,7 +494,7 @@ OSStatus ExtendBTree (BTreeControlBlockPtr btreePtr, btreePtr->totalNodes = newTotalNodes; btreePtr->freeNodes += (newTotalNodes - oldTotalNodes) - newMapNodes; - btreePtr->flags |= kBTHeaderDirty; //�� how about a macro for this + M_BTreeHeaderDirty(btreePtr); /* Force the b-tree header changes to disk */ (void) UpdateHeader (btreePtr, true); diff --git a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c index 45456569f..34bd8e41b 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -315,7 +315,7 @@ OSStatus SearchTree (BTreeControlBlockPtr btreePtr, { goto ErrorExit; } - + // The child node should be at a level one less than the parent. --level; } @@ -1318,8 +1318,8 @@ static OSStatus AddNewRootNode (BTreeControlBlockPtr btreePtr, // update BTreeInfoRec btreePtr->rootNode = rootNum; - btreePtr->flags |= kBTHeaderDirty; - + M_BTreeHeaderDirty(btreePtr); + return noErr; diff --git a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c index 909ab5c1d..fa7e210d0 100644 --- a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c +++ b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c @@ -675,21 +675,11 @@ static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly, u_int16_t btRecordSize; OSErr err; - + MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), + M_TEMP, M_WAITOK | M_ZERO); - MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK); - if (btIterator == NULL) { - return memFullErr; // translates to ENOMEM - } - - MALLOC (tmpIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK); - if (tmpIterator == NULL) { - FREE (btIterator, M_TEMP); - return memFullErr; // translates to ENOMEM - } - - bzero(btIterator, sizeof(*btIterator)); - bzero (tmpIterator, sizeof(*tmpIterator)); + MALLOC (tmpIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), + M_TEMP, M_WAITOK | M_ZERO); fcb = GetFileControlBlock(vcb->extentsRefNum); @@ -721,7 +711,10 @@ static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly, extentKeyPtr->hfs.startBlock = 0; } #else - else return cmBadNews; + else { + err = cmBadNews; + goto exit; + } #endif err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator); @@ -730,8 +723,8 @@ static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly, if (err == noErr) { // Did we find a bogus extent record? err = cmBadNews; // Yes, so indicate things are messed up. } - - return err; // Got some unexpected error, so return it + + goto exit; } do @@ -770,6 +763,8 @@ static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly, if (err != noErr) break; } while ( true ); + +exit: FREE (tmpIterator, M_TEMP); FREE (btIterator, M_TEMP); diff --git a/bsd/hfs/hfscommon/Misc/BTreeWrapper.c b/bsd/hfs/hfscommon/Misc/BTreeWrapper.c index 92b49c840..bd4b905ad 100644 --- a/bsd/hfs/hfscommon/Misc/BTreeWrapper.c +++ b/bsd/hfs/hfscommon/Misc/BTreeWrapper.c @@ -147,8 +147,8 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, case kHFSFileRecord: { - HFSExtentDescriptor *dataExtent; - HFSExtentDescriptor *rsrcExtent; + const HFSExtentDescriptor *dataExtent; + const HFSExtentDescriptor *rsrcExtent; if ( recordSize != sizeof(HFSCatalogFile) ) return false; @@ -171,8 +171,8 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, if ( catalogRecord->hfsFile.rsrcPhysicalSize < catalogRecord->hfsFile.rsrcLogicalSize ) return false; - dataExtent = (HFSExtentDescriptor*) &catalogRecord->hfsFile.dataExtents; - rsrcExtent = (HFSExtentDescriptor*) &catalogRecord->hfsFile.rsrcExtents; + dataExtent = (const HFSExtentDescriptor*) &catalogRecord->hfsFile.dataExtents; + rsrcExtent = (const HFSExtentDescriptor*) &catalogRecord->hfsFile.rsrcExtents; #if 0 for (i = 0; i < kHFSExtentDensity; ++i) @@ -222,8 +222,8 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, case kHFSPlusFileRecord: { // u_int16_t i; - HFSPlusExtentDescriptor *dataExtent; - HFSPlusExtentDescriptor *rsrcExtent; + const HFSPlusExtentDescriptor *dataExtent; + const HFSPlusExtentDescriptor *rsrcExtent; if ( recordSize != sizeof(HFSPlusCatalogFile) ) return false; @@ -237,8 +237,8 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, // make sure 0 � LEOF � PEOF for both forks - dataExtent = (HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.dataFork.extents; - rsrcExtent = (HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.resourceFork.extents; + dataExtent = (const HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.dataFork.extents; + rsrcExtent = (const HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.resourceFork.extents; #if 0 for (i = 0; i < kHFSPlusExtentDensity; ++i) diff --git a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c index 018a8701e..117475101 100644 --- a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c +++ b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -35,6 +35,7 @@ #include "../headers/BTreesInternal.h" #include +#include /* ============================================================ @@ -66,7 +67,9 @@ Public (Exported) Routines: FlushExtentFile Flush the extents file for a given volume. - + SearchExtentFile + Search the FCB and extents file for an extent record that + contains a given file position (in bytes). ============================================================ @@ -74,9 +77,6 @@ Internal Routines: ============================================================ FindExtentRecord Search the extents BTree for a particular extent record. - SearchExtentFile - Search the FCB and extents file for an extent record that - contains a given file position (in bytes). SearchExtentRecord Search a given extent record to see if it contains a given file position (in bytes). Used by SearchExtentFile. @@ -143,16 +143,6 @@ static OSErr GetFCBExtentRecord( const FCB *fcb, HFSPlusExtentRecord extents); -static OSErr SearchExtentFile( - ExtendedVCB *vcb, - const FCB *fcb, - int64_t filePosition, - HFSPlusExtentKey *foundExtentKey, - HFSPlusExtentRecord foundExtentData, - u_int32_t *foundExtentDataIndex, - u_int32_t *extentBTreeHint, - u_int32_t *endingFABNPlusOne ); - static OSErr SearchExtentRecord( ExtendedVCB *vcb, u_int32_t searchFABN, @@ -877,6 +867,64 @@ int32_t CompareExtentKeysPlus( const HFSPlusExtentKey *searchKey, const HFSPlusE return( result ); } +static int +should_pin_blocks(hfsmount_t *hfsmp, FCB *fcb) +{ + if (!ISSET(hfsmp->hfs_flags, HFS_CS_HOTFILE_PIN) + || fcb->ff_cp == NULL || fcb->ff_cp->c_vp == NULL) { + return 0; + } + + int pin_blocks; + + // + // File system metadata should get pinned + // + if (vnode_issystem(fcb->ff_cp->c_vp)) { + return 1; + } + + // + // If a file is AutoCandidate, we should not pin its blocks because + // it was an automatically added file and this function is intended + // to pin new blocks being added to user-generated content. + // + // If a file is marked FastDevPinned or FastDevCandidate it is an + // existing pinned file or a new file that should be pinned. + // + if (fcb->ff_cp->c_attr.ca_recflags & kHFSAutoCandidateMask) { + return 0; + } + + if ((fcb->ff_cp->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSFastDevCandidateMask)) != 0) { + pin_blocks = 1; + } else { + pin_blocks = 0; + } + + return pin_blocks; +} + + + +static void +pin_blocks_if_needed(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockCount) +{ + if (!should_pin_blocks(vcb, fcb)) { + return; + } + + // ask CoreStorage to pin the new blocks being added to this file + if (hfs_pin_block_range((struct hfsmount *)vcb, HFS_PIN_IT, startBlock, blockCount, vfs_context_kernel()) == 0) { + struct vnode *vp = fcb->ff_cp->c_vp; + + // and make sure to keep our accounting in order + hfs_hotfile_adjust_blocks(vp, -blockCount); + } +} + + + /* * Add a file extent to a file. * @@ -928,8 +976,12 @@ AddFileExtent(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockC foundIndex = 0; error = CreateExtentRecord(vcb, &foundKey, foundData, &hint); - if (error == fxOvFlErr) + if (error == fxOvFlErr) { error = dskFulErr; + } else if (error == 0) { + pin_blocks_if_needed(vcb, fcb, startBlock, blockCount); + } + } else { /* * Add a new extent into existing record. @@ -937,6 +989,9 @@ AddFileExtent(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockC foundData[foundIndex].startBlock = startBlock; foundData[foundIndex].blockCount = blockCount; error = UpdateExtentRecord(vcb, fcb, 0, &foundKey, foundData, hint); + if (error == 0) { + pin_blocks_if_needed(vcb, fcb, startBlock, blockCount); + } } (void) FlushExtentFile(vcb); @@ -983,6 +1038,8 @@ OSErr ExtendFileC ( int64_t availbytes; int64_t peof; u_int32_t prevblocks; + uint32_t fastdev = 0; + struct hfsmount *hfsmp = (struct hfsmount*)vcb; allowFlushTxns = 0; needsFlush = false; @@ -1030,7 +1087,12 @@ OSErr ExtendFileC ( FTOC(fcb)->c_blocks += blocksToAdd; fcb->ff_blocks += blocksToAdd; - FTOC(fcb)->c_flag |= C_MODIFIED | C_FORCEUPDATE; + /* + * We haven't touched the disk here; no blocks have been + * allocated and the volume will not be inconsistent if we + * don't update the catalog record immediately. + */ + FTOC(fcb)->c_flag |= C_MINOR_MOD; *actualBytesAdded = bytesToAdd; return (0); } @@ -1100,7 +1162,7 @@ OSErr ExtendFileC ( // Enough blocks are already allocated. Just update the FCB to reflect the new length. fcb->ff_blocks = peof / volumeBlockSize; FTOC(fcb)->c_blocks += (bytesToAdd / volumeBlockSize); - FTOC(fcb)->c_flag |= C_MODIFIED | C_FORCEUPDATE; + FTOC(fcb)->c_flag |= C_MODIFIED; goto Exit; } if (err != fxRangeErr) // Any real error? @@ -1172,6 +1234,8 @@ OSErr ExtendFileC ( wantContig = true; } + if (should_pin_blocks(hfsmp, fcb)) + fastdev = HFS_ALLOC_FAST_DEV; useMetaZone = flags & kEFMetadataMask; do { @@ -1193,7 +1257,7 @@ OSErr ExtendFileC ( err = dskFulErr; } else { - uint32_t ba_flags = 0; + uint32_t ba_flags = fastdev; if (wantContig) { ba_flags |= HFS_ALLOC_FORCECONTIG; @@ -1253,12 +1317,6 @@ OSErr ExtendFileC ( } if (err == noErr) { - if (actualNumBlocks != 0) { - // this catalog entry *must* get forced to disk when - // hfs_update() is called - FTOC(fcb)->c_flag |= C_FORCEUPDATE; - } - // Add the new extent to the existing extent record, or create a new one. if ((actualStartBlock == startBlock) && (blockHint == 0)) { // We grew the file's last extent, so just adjust the number of blocks. @@ -1321,7 +1379,7 @@ OSErr ExtendFileC ( if (err != noErr) break; } } - + // Figure out how many bytes were actually allocated. // NOTE: BlockAllocate could have allocated more than we asked for. // Don't set the PEOF beyond what our client asked for. @@ -1336,7 +1394,7 @@ OSErr ExtendFileC ( } fcb->ff_blocks += (bytesThisExtent / volumeBlockSize); FTOC(fcb)->c_blocks += (bytesThisExtent / volumeBlockSize); - FTOC(fcb)->c_flag |= C_MODIFIED | C_FORCEUPDATE; + FTOC(fcb)->c_flag |= C_MODIFIED; // If contiguous allocation was requested, then we've already got one contiguous // chunk. If we didn't get all we wanted, then adjust the error to disk full. @@ -1366,6 +1424,11 @@ OSErr ExtendFileC ( *actualBytesAdded = 0; } + if (fastdev) { + hfs_hotfile_adjust_blocks(fcb->ff_cp->c_vp, + (int64_t)prevblocks - fcb->ff_blocks); + } + if (needsFlush) (void) FlushExtentFile(vcb); @@ -1474,7 +1537,7 @@ OSErr TruncateFileC ( * has been removed from disk already. We wouldn't need to force * another update */ - FTOC(fcb)->c_flag |= (C_MODIFIED | C_FORCEUPDATE); + FTOC(fcb)->c_flag |= C_MODIFIED; } // // If the new PEOF is 0, then truncateToExtent has no meaning (we should always deallocate @@ -1715,7 +1778,7 @@ OSErr HeadTruncateFile ( FTOC(fcb)->c_blocks -= headblks; fcb->ff_blocks = blkcnt; - FTOC(fcb)->c_flag |= C_FORCEUPDATE; + FTOC(fcb)->c_flag |= C_MODIFIED; FTOC(fcb)->c_touch_chgtime = TRUE; (void) FlushExtentFile(vcb); @@ -1851,7 +1914,7 @@ static OSErr SearchExtentRecord( // (other) (some other internal I/O error) //������������������������������������������������������������������������������� -static OSErr SearchExtentFile( +OSErr SearchExtentFile( ExtendedVCB *vcb, const FCB *fcb, int64_t filePosition, diff --git a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c index 79547be7f..612171809 100644 --- a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c +++ b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c @@ -38,11 +38,12 @@ /* Public routines: - BlockAllocate + BlockAllocate / hfs_block_alloc Allocate space on a volume. Can allocate space contiguously. If not contiguous, then allocation may be less than what was asked for. Returns the starting block number, and number of - blocks. (Will only do a single extent???) + blocks. It will only return a single extent. + BlockDeallocate Deallocate a contiguous run of allocation blocks. @@ -92,20 +93,20 @@ Internal routines: block number of the first block in the range is returned. This is only called by the bitmap scanning logic as the red-black tree should be able to do this internally by searching its tree. - BlockAllocateAny + BlockFindAny Find and allocate a contiguous range of blocks up to a given size. The first range of contiguous free blocks found are allocated, even if there are fewer blocks than requested (and even if a contiguous range of blocks of the given size exists elsewhere). - BlockAllocateAnyBitmap + BlockFindAnyBitmap Finds a range of blocks per the above requirements without using the Allocation RB Tree. This relies on the bitmap-scanning logic in order to find any valid range of free space needed. - BlockAllocateContig - Find and allocate a contiguous range of blocks of a given size. If - a contiguous range of free blocks of the given size isn't found, then - the allocation fails (i.e. it is "all or nothing"). - BlockAllocateKnown + BlockFindContig + Find a contiguous range of blocks of a given size. + If the minimum cannot be satisfied, nothing is + returned. + BlockFindKnown Try to allocate space from known free space in the volume's free extent cache. ReadBitmapBlock @@ -155,23 +156,22 @@ Optimization Routines */ + #include #include - #if !HFS_ALLOC_TEST #include "../../hfs_macos_defs.h" #include #include #include - /* For VM Page size */ #include +#include #include "../../hfs.h" #include "../../hfs_endian.h" #include "../headers/FileMgrInternal.h" -#include #endif // !HFS_ALLOC_TEST @@ -183,6 +183,8 @@ Optimization Routines #include "../../hfs_dbg.h" #include "../../hfs_format.h" #include "../../hfs_kdebug.h" +#include "../../rangelist.h" +#include "../../hfs_extents.h" /* Headers for unmap-on-mount support */ #include @@ -243,38 +245,44 @@ static OSErr ReadBitmapBlock( ExtendedVCB *vcb, u_int32_t bit, u_int32_t **buffer, - uintptr_t *blockRef); + uintptr_t *blockRef, + hfs_block_alloc_flags_t flags); static OSErr ReleaseBitmapBlock( ExtendedVCB *vcb, uintptr_t blockRef, Boolean dirty); -static OSErr BlockAllocateAny( +static OSErr hfs_block_alloc_int(hfsmount_t *hfsmp, + HFSPlusExtentDescriptor *extent, + hfs_block_alloc_flags_t flags, + hfs_alloc_extra_args_t *ap); + +static OSErr BlockFindAny( ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t endingBlock, u_int32_t maxBlocks, - u_int32_t flags, + hfs_block_alloc_flags_t flags, Boolean trustSummary, u_int32_t *actualStartBlock, u_int32_t *actualNumBlocks); -static OSErr BlockAllocateAnyBitmap( +static OSErr BlockFindAnyBitmap( ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t endingBlock, u_int32_t maxBlocks, - u_int32_t flags, + hfs_block_alloc_flags_t flags, u_int32_t *actualStartBlock, u_int32_t *actualNumBlocks); -static OSErr BlockAllocateContig( +static OSErr BlockFindContig( ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t minBlocks, u_int32_t maxBlocks, - u_int32_t flags, + hfs_block_alloc_flags_t flags, u_int32_t *actualStartBlock, u_int32_t *actualNumBlocks); @@ -287,18 +295,25 @@ static OSErr BlockFindContiguous( Boolean useMetaZone, Boolean trustSummary, u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks); + u_int32_t *actualNumBlocks, + hfs_block_alloc_flags_t flags); -static OSErr BlockAllocateKnown( +static OSErr BlockFindKnown( ExtendedVCB *vcb, u_int32_t maxBlocks, u_int32_t *actualStartBlock, u_int32_t *actualNumBlocks); +static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp, + HFSPlusExtentDescriptor *extent, + uint32_t max_blocks, + hfs_block_alloc_flags_t flags); + static OSErr BlockMarkAllocatedInternal ( ExtendedVCB *vcb, u_int32_t startingBlock, - register u_int32_t numBlocks); + u_int32_t numBlocks, + hfs_block_alloc_flags_t flags); static OSErr BlockMarkFreeInternal( ExtendedVCB *vcb, @@ -362,6 +377,8 @@ static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount); static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated); +static void hfs_release_reserved(hfsmount_t *hfsmp, struct rl_entry *range, int list); + /* Functions for getting free exents */ typedef struct bitmap_context { @@ -525,7 +542,7 @@ static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, u_int64_t length; int error = 0; - if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) { + if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL) && list->allocated_count && list->extents != NULL) { int extent_no = list->extent_count; offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; length = (u_int64_t) numBlocks * hfsmp->blockSize; @@ -564,7 +581,7 @@ static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0); } - if (list->extent_count > 0) { + if (list->extent_count > 0 && list->extents != NULL) { bzero(&unmap, sizeof(unmap)); unmap.extents = list->extents; unmap.extentsCount = list->extent_count; @@ -619,7 +636,7 @@ static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlo { u_int64_t offset; u_int64_t length; - int err; + int err = 0; if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); @@ -813,6 +830,7 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) dk_extent_t *extents; }; */ + bzero (&trimlist, sizeof(trimlist)); /* * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM @@ -835,7 +853,6 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) if (extents == NULL) { return ENOMEM; } - bzero (&trimlist, sizeof(trimlist)); trimlist.extents = (dk_extent_t*)extents; trimlist.allocated_count = alloc_count; trimlist.extent_count = 0; @@ -881,10 +898,129 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) return error; } +static void add_to_reserved_list(hfsmount_t *hfsmp, uint32_t start, + uint32_t count, int list, + struct rl_entry **reservation) +{ + struct rl_entry *range, *next_range; + + if (list == HFS_TENTATIVE_BLOCKS) { + int nranges = 0; + // Don't allow more than 4 tentative reservations + TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], + rl_link, next_range) { + if (++nranges > 3) + hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS); + } + } + + MALLOC(range, struct rl_entry *, sizeof(*range), M_TEMP, M_WAITOK); + range->rl_start = start; + range->rl_end = start + count - 1; + TAILQ_INSERT_HEAD(&hfsmp->hfs_reserved_ranges[list], range, rl_link); + *reservation = range; +} + +static void hfs_release_reserved(hfsmount_t *hfsmp, + struct rl_entry *range, + int list) +{ + if (range->rl_start == -1) + return; + + TAILQ_REMOVE(&hfsmp->hfs_reserved_ranges[list], range, rl_link); + + if (rl_len(range) > 0) { + if (list == HFS_TENTATIVE_BLOCKS) + hfsmp->tentativeBlocks -= rl_len(range); + else { + /* + * We don't need to unmap tentative blocks because we won't have + * written to them, but we might have written to reserved blocks. + * Nothing can refer to those blocks so this doesn't have to be + * via the journal. If this proves to be too expensive, we could + * consider not sending down the unmap or we could require this + * to always be called within a transaction and then we can use + * the journal. + */ + dk_extent_t extent = { + .offset = (hfs_blk_to_bytes(range->rl_start, hfsmp->blockSize) + + hfsmp->hfsPlusIOPosOffset), + .length = hfs_blk_to_bytes(rl_len(range), hfsmp->blockSize) + }; + dk_unmap_t unmap = { + .extents = &extent, + .extentsCount = 1, + }; + VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, + 0, vfs_context_kernel()); + assert(hfsmp->lockedBlocks >= rl_len(range)); + hfsmp->lockedBlocks -= rl_len(range); + } + hfs_release_summary(hfsmp, range->rl_start, rl_len(range)); + add_free_extent_cache(hfsmp, range->rl_start, rl_len(range)); + } + + range->rl_start = -1; + range->rl_end = -2; +} + +static void hfs_free_locked_internal(hfsmount_t *hfsmp, + struct rl_entry **reservation, + int list) +{ + if (*reservation) { + hfs_release_reserved(hfsmp, *reservation, list); + FREE(*reservation, M_TEMP); + *reservation = NULL; + } +} + +void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation) +{ + hfs_free_locked_internal(hfsmp, reservation, HFS_TENTATIVE_BLOCKS); +} + +void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation) +{ + hfs_free_locked_internal(hfsmp, reservation, HFS_LOCKED_BLOCKS); +} + +OSErr BlockAllocate ( + hfsmount_t *hfsmp, /* which volume to allocate space on */ + u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */ + u_int32_t minBlocks, /* desired number of blocks to allocate */ + u_int32_t maxBlocks, /* maximum number of blocks to allocate */ + hfs_block_alloc_flags_t flags, /* option flags */ + u_int32_t *actualStartBlock, /* actual first block of allocation */ + u_int32_t *actualNumBlocks) +{ + hfs_alloc_extra_args_t extra_args = { + .max_blocks = maxBlocks + }; + + HFSPlusExtentDescriptor extent = { startingBlock, minBlocks }; + + OSErr err = hfs_block_alloc_int(hfsmp, &extent, flags, &extra_args); + + *actualStartBlock = extent.startBlock; + *actualNumBlocks = extent.blockCount; + + return err; +} + +errno_t hfs_block_alloc(hfsmount_t *hfsmp, + HFSPlusExtentDescriptor *extent, + hfs_block_alloc_flags_t flags, + hfs_alloc_extra_args_t *ap) +{ + return MacToVFSError(hfs_block_alloc_int(hfsmp, extent, flags, ap)); +} + /* ;________________________________________________________________________________ ; - ; Routine: BlockAllocate + ; Routine: hfs_block_alloc_int ; ; Function: Allocate space on a volume. If contiguous allocation is requested, ; at least the requested number of bytes will be allocated or an @@ -899,57 +1035,125 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) ; point. ; ; Input Arguments: - ; vcb - Pointer to ExtendedVCB for the volume to allocate space on - ; fcb - Pointer to FCB for the file for which storage is being allocated - ; startingBlock - Preferred starting allocation block, 0 = no preference - ; minBlocks - Number of blocks requested. If the allocation is non-contiguous, - ; less than this may actually be allocated - ; maxBlocks - The maximum number of blocks to allocate. If there is additional free - ; space after bytesRequested, then up to maxBlocks bytes should really - ; be allocated. (Used by ExtendFileC to round up allocations to a multiple - ; of the file's clump size.) - ; flags - Flags to specify options like contiguous, use metadata zone, - ; skip free block check, etc. + ; hfsmp - Pointer to the HFS mount structure. + ; extent - startBlock indicates the block to start + ; searching from and blockCount is the number of + ; blocks required. Depending on the flags used, + ; more or less blocks may be returned. The + ; allocated extent is returned via this + ; parameter. + ; flags - Flags to specify options like contiguous, use + ; metadata zone, skip free block check, etc. + ; ap - Additional arguments used depending on flags. + ; See hfs_alloc_extra_args_t and below. ; ; Output: - ; (result) - Error code, zero for successful allocation - ; *startBlock - Actual starting allocation block - ; *actualBlccks - Actual number of allocation blocks allocated + ; (result) - Error code, zero for successful allocation + ; extent - If successful, the allocated extent. ; ; Side effects: ; The volume bitmap is read and updated; the volume bitmap cache may be changed. + ; + ; HFS_ALLOC_TENTATIVE + ; Blocks will be reserved but not marked allocated. They can be + ; stolen if free space is limited. Tentative blocks can be used by + ; passing HFS_ALLOC_USE_TENTATIVE and passing in the resevation. + ; @ap->reservation_out is used to store the reservation. + ; + ; HFS_ALLOC_USE_TENTATIVE + ; Use blocks previously returned with HFS_ALLOC_TENTATIVE. + ; @ap->reservation_in should be set to whatever @ap->reservation_out + ; was set to when HFS_ALLOC_TENTATIVE was used. If the tentative + ; reservation was stolen, a normal allocation will take place. + ; + ; HFS_ALLOC_LOCKED + ; Blocks will be reserved but not marked allocated. Unlike tentative + ; reservations they cannot be stolen. It is safe to write to these + ; blocks. @ap->reservation_out is used to store the reservation. + ; + ; HFS_ALLOC_COMMIT + ; This will take blocks previously returned with HFS_ALLOC_LOCKED and + ; mark them allocated on disk. @ap->reservation_in is used. + ; + ; HFS_ALLOC_ROLL_BACK + ; Take blocks that were just recently deallocated and mark them + ; allocated. This is for roll back situations. Blocks got + ; deallocated and then something went wrong and we need to roll back + ; by marking the blocks allocated. + ; + ; HFS_ALLOC_FORCECONTIG + ; It will not return fewer than @min_blocks. + ; + ; HFS_ALLOC_TRY_HARD + ; We will perform an exhaustive search to try and find @max_blocks. + ; It will not return fewer than @min_blocks. + ; ;________________________________________________________________________________ */ -OSErr BlockAllocate ( - ExtendedVCB *vcb, /* which volume to allocate space on */ - u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */ - u_int32_t minBlocks, /* desired number of blocks to allocate */ - u_int32_t maxBlocks, /* maximum number of blocks to allocate */ - u_int32_t flags, /* option flags */ - u_int32_t *actualStartBlock, /* actual first block of allocation */ - u_int32_t *actualNumBlocks) -/* - * actualNumBlocks is the number of blocks actually allocated; - * if forceContiguous was zero, then this may represent fewer than minBlocks - */ +OSErr hfs_block_alloc_int(hfsmount_t *hfsmp, + HFSPlusExtentDescriptor *extent, + hfs_block_alloc_flags_t flags, + hfs_alloc_extra_args_t *ap) { u_int32_t freeBlocks; - OSErr err; + OSErr err = 0; Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated - struct hfsmount *hfsmp; Boolean useMetaZone; - Boolean forceContiguous; + Boolean forceContiguous = false; Boolean forceFlush; + uint32_t startingBlock = extent->startBlock; + uint32_t minBlocks = extent->blockCount; + uint32_t maxBlocks = (ap && ap->max_blocks) ? ap->max_blocks : minBlocks; + if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0); - if (flags & HFS_ALLOC_FORCECONTIG) { - forceContiguous = true; - } else { - forceContiguous = false; + if (ISSET(flags, HFS_ALLOC_COMMIT)) { + extent->startBlock = (*ap->reservation_in)->rl_start; + extent->blockCount = rl_len(*ap->reservation_in); + goto mark_allocated; + } + + if (ISSET(flags, HFS_ALLOC_ROLL_BACK)) + goto mark_allocated; + + freeBlocks = hfs_freeblks(hfsmp, 0); + + if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) { + struct rl_entry *range = *ap->reservation_in; + + if (range && range->rl_start != -1) { + /* + * It's possible that we have a tentative reservation + * but there aren't enough free blocks due to loaned blocks + * or insufficient space in the backing store. + */ + uint32_t count = min(min(maxBlocks, rl_len(range)), freeBlocks); + + if (count >= minBlocks) { + extent->startBlock = range->rl_start; + extent->blockCount = count; + + // Should we go straight to commit? + if (!ISSET(flags, HFS_ALLOC_LOCKED)) + SET(flags, HFS_ALLOC_COMMIT); + + goto mark_allocated; + } + } + + /* + * We can't use the tentative reservation so free it and allocate + * normally. + */ + hfs_free_tentative(hfsmp, ap->reservation_in); + CLR(flags, HFS_ALLOC_USE_TENTATIVE); } + if (ISSET(flags, HFS_ALLOC_FORCECONTIG | HFS_ALLOC_TRY_HARD)) + forceContiguous = true; + if (flags & HFS_ALLOC_METAZONE) { useMetaZone = true; } else { @@ -963,15 +1167,11 @@ OSErr BlockAllocate ( forceFlush = false; } + assert(hfsmp->freeBlocks >= hfsmp->tentativeBlocks); - // - // Initialize outputs in case we get an error - // - *actualStartBlock = 0; - *actualNumBlocks = 0; - hfsmp = VCBTOHFS (vcb); - freeBlocks = hfs_freeblks(hfsmp, 0); - + // See if we have to steal tentative blocks + if (freeBlocks < hfsmp->tentativeBlocks + minBlocks) + SET(flags, HFS_ALLOC_IGNORE_TENTATIVE); /* Skip free block check if blocks are being allocated for relocating * data during truncating a volume. @@ -989,11 +1189,11 @@ OSErr BlockAllocate ( // If the disk is already full, don't bother. if (freeBlocks == 0) { err = dskFulErr; - goto Exit; + goto exit; } if (forceContiguous && freeBlocks < minBlocks) { err = dskFulErr; - goto Exit; + goto exit; } /* @@ -1007,6 +1207,14 @@ OSErr BlockAllocate ( } } + if (ISSET(flags, HFS_ALLOC_TRY_HARD)) { + err = hfs_alloc_try_hard(hfsmp, extent, maxBlocks, flags); + if (err) + goto exit; + + goto mark_allocated; + } + // // If caller didn't specify a starting block number, then use the volume's // next block to allocate from. @@ -1015,18 +1223,18 @@ OSErr BlockAllocate ( hfs_lock_mount (hfsmp); /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */ - if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) { - startingBlock = vcb->sparseAllocation; + if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + startingBlock = hfsmp->sparseAllocation; } else { - startingBlock = vcb->nextAllocation; + startingBlock = hfsmp->nextAllocation; } hfs_unlock_mount(hfsmp); updateAllocPtr = true; } - if (startingBlock >= vcb->allocLimit) { + if (startingBlock >= hfsmp->allocLimit) { startingBlock = 0; /* overflow so start at beginning */ } @@ -1035,8 +1243,8 @@ OSErr BlockAllocate ( // that is long enough. Otherwise, find the first free block. // if (forceContiguous) { - err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks, - flags, actualStartBlock, actualNumBlocks); + err = BlockFindContig(hfsmp, startingBlock, minBlocks, maxBlocks, + flags, &extent->startBlock, &extent->blockCount); /* * If we allocated from a new position then also update the roving allocator. * This will keep the roving allocation pointer up-to-date even @@ -1045,9 +1253,9 @@ OSErr BlockAllocate ( * the block to vend out. */ if ((err == noErr) && - (*actualStartBlock > startingBlock) && - ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || - (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { + (extent->startBlock > startingBlock) && + ((extent->startBlock < hfsmp->hfs_metazone_start) || + (extent->startBlock > hfsmp->hfs_metazone_end))) { updateAllocPtr = true; } } else { @@ -1069,12 +1277,13 @@ OSErr BlockAllocate ( } /* - * BlockAllocateKnown only examines the free extent cache; anything in there will + * BlockFindKnown only examines the free extent cache; anything in there will * have been committed to stable storage already. */ - err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks); + err = BlockFindKnown(hfsmp, maxBlocks, &extent->startBlock, + &extent->blockCount); - /* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */ + /* dskFulErr out of BlockFindKnown indicates an empty Free Extent Cache */ if (err == dskFulErr) { /* @@ -1082,9 +1291,9 @@ OSErr BlockAllocate ( * allocation limit. We 'trust' the summary bitmap in this call, if it tells us * that it could not find any free space. */ - err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit, + err = BlockFindAny(hfsmp, startingBlock, hfsmp->allocLimit, maxBlocks, flags, true, - actualStartBlock, actualNumBlocks); + &extent->startBlock, &extent->blockCount); } if (err == dskFulErr) { /* @@ -1094,14 +1303,14 @@ OSErr BlockAllocate ( * If it is off, then we trust the above and go up until the startingBlock. */ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { - err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks, + err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks, flags, false, - actualStartBlock, actualNumBlocks); + &extent->startBlock, &extent->blockCount); } else { - err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks, + err = BlockFindAny(hfsmp, 1, startingBlock, maxBlocks, flags, false, - actualStartBlock, actualNumBlocks); + &extent->startBlock, &extent->blockCount); } /* @@ -1109,60 +1318,82 @@ OSErr BlockAllocate ( */ if (err == dskFulErr && forceFlush) { flags |= HFS_ALLOC_FLUSHTXN; - err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks, + err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks, flags, false, - actualStartBlock, actualNumBlocks); + &extent->startBlock, &extent->blockCount); } } } -Exit: - if ((hfsmp->hfs_flags & HFS_CS) && *actualNumBlocks != 0) { - errno_t ec; - _dk_cs_map_t cm; - uint64_t mapped_blocks; - - cm.cm_extent.offset = (uint64_t)*actualStartBlock * hfsmp->blockSize + hfsmp->hfsPlusIOPosOffset; - cm.cm_extent.length = (uint64_t)*actualNumBlocks * hfsmp->blockSize; - cm.cm_bytes_mapped = 0; - ec = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP, (caddr_t)&cm, 0, vfs_context_current()); - if (ec != 0 && ec != ENOSPC) { - printf ("VNOP_IOCTL(_DKIOCCSMAP) returned an unexpected error code=%d\n", ec); - err = ec; - goto Exit_CS; - } - mapped_blocks = cm.cm_bytes_mapped / hfsmp->blockSize; - /* CoreStorage returned more blocks than requested */ - if (mapped_blocks > *actualNumBlocks) { - printf ("VNOP_IOCTL(_DKIOCCSMAP) mapped too many blocks, mapped=%lld, actual=%d\n", - mapped_blocks, *actualNumBlocks); - } - if (*actualNumBlocks > mapped_blocks) { - if (forceContiguous && mapped_blocks < minBlocks) { - mapped_blocks = 0; - } - } - uint64_t numBlocksToFree = *actualNumBlocks - mapped_blocks; - uint64_t firstBlockToFree = *actualStartBlock + mapped_blocks; - if (numBlocksToFree > 0) { - err = BlockDeallocate(vcb, firstBlockToFree, numBlocksToFree, flags); - if (err != noErr) { - printf ("BlockDeallocate failed (err=%d)\n", err); - goto Exit_CS; + if (err) + goto exit; + +mark_allocated: + + // Handle alignment + if (ap && ap->alignment && extent->blockCount < ap->max_blocks) { + /* + * See the comment in FileMgrInternal.h for alignment + * semantics. + */ + uint32_t rounding = ((extent->blockCount + ap->alignment_offset) + % ap->alignment); + + // @minBlocks is still the minimum + if (extent->blockCount >= minBlocks + rounding) + extent->blockCount -= rounding; + } + + err = BlockMarkAllocatedInternal(hfsmp, extent->startBlock, + extent->blockCount, flags); + + if (err) + goto exit; + + if (ISSET(hfsmp->hfs_flags, HFS_CS) && extent->blockCount != 0 + && !ISSET(flags, HFS_ALLOC_TENTATIVE)) { + if (ISSET(flags, HFS_ALLOC_FAST_DEV)) { +#if !HFS_ALLOC_TEST /* need this guard because this file is compiled outside of the kernel */ + hfs_pin_block_range(hfsmp, HFS_PIN_IT, + extent->startBlock, extent->blockCount, + vfs_context_kernel()); +#endif + } else { + _dk_cs_map_t cm = { + .cm_extent = { + (hfs_blk_to_bytes(extent->startBlock, hfsmp->blockSize) + + hfsmp->hfsPlusIOPosOffset), + hfs_blk_to_bytes(extent->blockCount, hfsmp->blockSize) + } + }; + + errno_t err2 = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP, + (caddr_t)&cm, 0, vfs_context_current()); + + /* + * Ignore errors for now; we are fully provisioned so in + * theory CoreStorage should be able to handle this + * allocation. Should we want to change this in future, then + * we should think carefully how we handle errors. Allowing + * CoreStorage to truncate our allocation is problematic + * because we might have minimum and alignment requirements + * and backing out changes we have already made is + * non-trivial. + */ + + if (err2 || cm.cm_bytes_mapped < cm.cm_extent.length) { + printf("hfs: _DKIOCCSMAP error: %d, bytes_mapped: %llu\n", + err2, cm.cm_bytes_mapped); } } - *actualNumBlocks = mapped_blocks; - if (*actualNumBlocks == 0 && err == noErr) { - err = dskFulErr; - } } -Exit_CS: + // if we actually allocated something then go update the // various bits of state that we maintain regardless of // whether there was an error (i.e. partial allocations // still need to update things like the free block count). // - if (*actualNumBlocks != 0) { + if (extent->blockCount != 0) { // // If we used the volume's roving allocation pointer, then we need to update it. // Adding in the length of the current allocation might reduce the next allocate @@ -1173,24 +1404,39 @@ OSErr BlockAllocate ( // hfs_lock_mount (hfsmp); - lck_spin_lock(&hfsmp->vcbFreeExtLock); - if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) { - vcb->sparseAllocation = *actualStartBlock; - } - lck_spin_unlock(&hfsmp->vcbFreeExtLock); - if (*actualNumBlocks < vcb->hfs_freed_block_count) { - vcb->hfs_freed_block_count -= *actualNumBlocks; - } else { - vcb->hfs_freed_block_count = 0; - } + if (!ISSET(flags, HFS_ALLOC_USE_TENTATIVE | HFS_ALLOC_COMMIT)) { + lck_spin_lock(&hfsmp->vcbFreeExtLock); + if (hfsmp->vcbFreeExtCnt == 0 && hfsmp->hfs_freed_block_count == 0) { + hfsmp->sparseAllocation = extent->startBlock; + } + lck_spin_unlock(&hfsmp->vcbFreeExtLock); + if (extent->blockCount < hfsmp->hfs_freed_block_count) { + hfsmp->hfs_freed_block_count -= extent->blockCount; + } else { + hfsmp->hfs_freed_block_count = 0; + } + + if (updateAllocPtr && + ((extent->startBlock < hfsmp->hfs_metazone_start) || + (extent->startBlock > hfsmp->hfs_metazone_end))) { + HFS_UPDATE_NEXT_ALLOCATION(hfsmp, extent->startBlock); + } - if (updateAllocPtr && - ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || - (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { - HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock); + (void) remove_free_extent_cache(hfsmp, extent->startBlock, extent->blockCount); } - (void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks); + if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) { + (*ap->reservation_in)->rl_start += extent->blockCount; + hfsmp->tentativeBlocks -= extent->blockCount; + if (rl_len(*ap->reservation_in) <= 0) + hfs_free_tentative(hfsmp, ap->reservation_in); + } else if (ISSET(flags, HFS_ALLOC_COMMIT)) { + // Handle committing locked extents + assert(hfsmp->lockedBlocks >= extent->blockCount); + (*ap->reservation_in)->rl_start += extent->blockCount; + hfsmp->lockedBlocks -= extent->blockCount; + hfs_free_locked(hfsmp, ap->reservation_in); + } /* * Update the number of free blocks on the volume @@ -1198,36 +1444,122 @@ OSErr BlockAllocate ( * Skip updating the free blocks count if the block are * being allocated to relocate data as part of hfs_truncatefs() */ - if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { - vcb->freeBlocks -= *actualNumBlocks; + + if (ISSET(flags, HFS_ALLOC_TENTATIVE)) { + hfsmp->tentativeBlocks += extent->blockCount; + } else if (ISSET(flags, HFS_ALLOC_LOCKED)) { + hfsmp->lockedBlocks += extent->blockCount; + } else if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { + hfsmp->freeBlocks -= extent->blockCount; } - MarkVCBDirty(vcb); + MarkVCBDirty(hfsmp); hfs_unlock_mount(hfsmp); - hfs_generate_volume_notifications(VCBTOHFS(vcb)); + hfs_generate_volume_notifications(hfsmp); + + if (ISSET(flags, HFS_ALLOC_TENTATIVE)) { + add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount, + 0, ap->reservation_out); + } else if (ISSET(flags, HFS_ALLOC_LOCKED)) { + add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount, + 1, ap->reservation_out); + } + + if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) { + /* + * See if we used tentative blocks. Note that we cannot + * free the reservations here because we don't have access + * to the external pointers. All we can do is update the + * reservations and they'll be cleaned up when whatever is + * holding the pointers calls us back. + * + * We use the rangelist code to detect overlaps and + * constrain the tentative block allocation. Note that + * @end is inclusive so that our rangelist code will + * resolve the various cases for us. As a result, we need + * to ensure that we account for it properly when removing + * the blocks from the tentative count in the mount point + * and re-inserting the remainder (either head or tail) + */ + struct rl_entry *range, *next_range; + struct rl_head *ranges = &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS]; + const uint32_t start = extent->startBlock; + const uint32_t end = start + extent->blockCount - 1; + TAILQ_FOREACH_SAFE(range, ranges, rl_link, next_range) { + switch (rl_overlap(range, start, end)) { + case RL_OVERLAPCONTAINSRANGE: + // Keep the bigger part + if (start - range->rl_start > range->rl_end - end) { + // Discard the tail + hfsmp->tentativeBlocks -= range->rl_end + 1 - start; + hfs_release_summary(hfsmp, end + 1, range->rl_end - end); + const uint32_t old_end = range->rl_end; + range->rl_end = start - 1; + add_free_extent_cache(hfsmp, end + 1, old_end - end); + } else { + // Discard the head + hfsmp->tentativeBlocks -= end + 1 - range->rl_start; + hfs_release_summary(hfsmp, range->rl_start, + start - range->rl_start); + const uint32_t old_start = range->rl_start; + range->rl_start = end + 1; + add_free_extent_cache(hfsmp, old_start, + start - old_start); + } + assert(range->rl_end >= range->rl_start); + break; + case RL_MATCHINGOVERLAP: + case RL_OVERLAPISCONTAINED: + hfsmp->tentativeBlocks -= rl_len(range); + range->rl_end = range->rl_start - 1; + hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS); + break; + case RL_OVERLAPSTARTSBEFORE: + hfsmp->tentativeBlocks -= range->rl_end + 1 - start; + range->rl_end = start - 1; + assert(range->rl_end >= range->rl_start); + break; + case RL_OVERLAPENDSAFTER: + hfsmp->tentativeBlocks -= end + 1 - range->rl_start; + range->rl_start = end + 1; + assert(range->rl_end >= range->rl_start); + break; + case RL_NOOVERLAP: + break; + } + } + } } +exit: + if (ALLOC_DEBUG) { if (err == noErr) { - if (*actualStartBlock >= hfsmp->totalBlocks) { + if (extent->startBlock >= hfsmp->totalBlocks) { panic ("BlockAllocate: vending invalid blocks!"); } - if (*actualStartBlock >= hfsmp->allocLimit) { + if (extent->startBlock >= hfsmp->allocLimit) { panic ("BlockAllocate: vending block past allocLimit!"); } - if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) { + if ((extent->startBlock + extent->blockCount) >= hfsmp->totalBlocks) { panic ("BlockAllocate: vending too many invalid blocks!"); } - if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) { + if ((extent->startBlock + extent->blockCount) >= hfsmp->allocLimit) { panic ("BlockAllocate: vending too many invalid blocks past allocLimit!"); } } } + if (err) { + // Just to be safe... + extent->startBlock = 0; + extent->blockCount = 0; + } + if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); + KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, extent->startBlock, extent->blockCount, 0, 0); return err; } @@ -1251,6 +1583,7 @@ OSErr BlockAllocate ( ; Side effects: ; The volume bitmap is read and updated; the volume bitmap cache may be changed. ; The Allocator's red-black trees may also be modified as a result. +; ;________________________________________________________________________________ */ @@ -1258,8 +1591,11 @@ OSErr BlockDeallocate ( ExtendedVCB *vcb, // Which volume to deallocate space on u_int32_t firstBlock, // First block in range to deallocate u_int32_t numBlocks, // Number of contiguous blocks to deallocate - u_int32_t flags) + hfs_block_alloc_flags_t flags) { + if (ISSET(flags, HFS_ALLOC_TENTATIVE | HFS_ALLOC_LOCKED)) + return 0; + OSErr err; struct hfsmount *hfsmp; hfsmp = VCBTOHFS(vcb); @@ -1390,7 +1726,8 @@ MetaZoneFreeBlocks(ExtendedVCB *vcb) (void) ReleaseBitmapBlock(vcb, blockRef, false); blockRef = 0; } - if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) { + if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef, + HFS_ALLOC_IGNORE_TENTATIVE) != 0) { return (0); } buffer = (u_int8_t *)currCache; @@ -1432,6 +1769,104 @@ static u_int32_t NextBitmapBlock( } +// Assumes @bitmap is aligned to 8 bytes and multiple of 8 bytes. +static void bits_set(void *bitmap, int start, int end) +{ + const int start_bit = start & 63; + const int end_bit = end & 63; + +#define LEFT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull << (64 - bit)) +#define RIGHT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull >> bit) + + uint64_t *p = (uint64_t *)bitmap + start / 64; + + if ((start & ~63) == (end & ~63)) { + // Start and end in same 64 bits + *p |= RIGHT_MASK(start_bit) & LEFT_MASK(end_bit); + } else { + *p++ |= RIGHT_MASK(start_bit); + + int nquads = (end - end_bit - start - 1) / 64; + + while (nquads--) + *p++ = 0xffffffffffffffffull; + + if (end_bit) + *p |= LEFT_MASK(end_bit); + } +} + +// Modifies the buffer and applies any reservations that we might have +static buf_t process_reservations(hfsmount_t *hfsmp, buf_t bp, off_t offset, + hfs_block_alloc_flags_t flags, + bool always_copy) +{ + bool taken_copy = false; + void *buffer = (void *)buf_dataptr(bp); + const uint32_t nbytes = buf_count(bp); + const off_t end = offset + nbytes * 8 - 1; + + for (int i = (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE) + ? HFS_LOCKED_BLOCKS : HFS_TENTATIVE_BLOCKS); i < 2; ++i) { + struct rl_entry *entry; + TAILQ_FOREACH(entry, &hfsmp->hfs_reserved_ranges[i], rl_link) { + uint32_t a, b; + + enum rl_overlaptype overlap_type = rl_overlap(entry, offset, end); + + if (overlap_type == RL_NOOVERLAP) + continue; + + /* + * If always_copy is false, we only take a copy if B_LOCKED is + * set because ReleaseScanBitmapRange doesn't invalidate the + * buffer in that case. + */ + if (!taken_copy && (always_copy || ISSET(buf_flags(bp), B_LOCKED))) { + buf_t new_bp = buf_create_shadow(bp, true, 0, NULL, NULL); + buf_brelse(bp); + bp = new_bp; + buf_setflags(bp, B_NOCACHE); + buffer = (void *)buf_dataptr(bp); + taken_copy = true; + } + + switch (overlap_type) { + case RL_OVERLAPCONTAINSRANGE: + case RL_MATCHINGOVERLAP: + memset(buffer, 0xff, nbytes); + return bp; + case RL_OVERLAPISCONTAINED: + a = entry->rl_start; + b = entry->rl_end; + break; + case RL_OVERLAPSTARTSBEFORE: + a = offset; + b = entry->rl_end; + break; + case RL_OVERLAPENDSAFTER: + a = entry->rl_start; + b = end; + break; + case RL_NOOVERLAP: + __builtin_unreachable(); + } + + a -= offset; + b -= offset; + + assert(a < buf_count(bp) * 8); + assert(b < buf_count(bp) * 8); + assert(b >= a); + + // b is inclusive + bits_set(buffer, a, b + 1); + } + } // for (;;) + + return bp; +} + /* ;_______________________________________________________________________ ; @@ -1449,11 +1884,11 @@ static u_int32_t NextBitmapBlock( ; blockRef ;_______________________________________________________________________ */ -static OSErr ReadBitmapBlock( - ExtendedVCB *vcb, - u_int32_t bit, - u_int32_t **buffer, - uintptr_t *blockRef) +static OSErr ReadBitmapBlock(ExtendedVCB *vcb, + u_int32_t bit, + u_int32_t **buffer, + uintptr_t *blockRef, + hfs_block_alloc_flags_t flags) { OSErr err; struct buf *bp = NULL; @@ -1492,6 +1927,13 @@ static OSErr ReadBitmapBlock( *blockRef = 0; *buffer = NULL; } else { + if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) { + bp = process_reservations(vcb, bp, block * blockSize * 8, + flags, /* always_copy: */ true); + } + + buf_setfsprivate(bp, (void *)(uintptr_t)flags); + *blockRef = (uintptr_t)bp; *buffer = (u_int32_t *)buf_dataptr(bp); } @@ -1572,6 +2014,9 @@ static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset, *blockRef = 0; *buffer = NULL; } else { + bp = process_reservations(hfsmp, bp, (offset * 8), 0, + /* always_copy: */ false); + *blockRef = bp; *buffer = (u_int32_t *)buf_dataptr(bp); } @@ -1616,7 +2061,11 @@ static OSErr ReleaseBitmapBlock( if (bp) { if (dirty) { - // XXXdbg + hfs_block_alloc_flags_t flags = (uintptr_t)buf_fsprivate(bp); + + if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) + panic("Modified read-only bitmap buffer!"); + struct hfsmount *hfsmp = VCBTOHFS(vcb); if (hfsmp->jnl) { @@ -1670,15 +2119,65 @@ static OSErr ReleaseScanBitmapRange(struct buf *bp ) { return (0); } +/* + * @extent.startBlock, on input, contains a preferred block for the + * allocation. @extent.blockCount, on input, contains the minimum + * number of blocks acceptable. Upon success, the result is conveyed + * in @extent. + */ +static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp, + HFSPlusExtentDescriptor *extent, + uint32_t max_blocks, + hfs_block_alloc_flags_t flags) +{ + OSErr err = dskFulErr; + + const uint32_t min_blocks = extent->blockCount; + + // It's > rather than >= because the last block is always reserved + if (extent->startBlock > 0 && extent->startBlock < hfsmp->allocLimit + && hfsmp->allocLimit - extent->startBlock > max_blocks) { + /* + * This is just checking to see if there's an extent starting + * at extent->startBlock that will suit. We only check for + * @max_blocks here; @min_blocks is ignored. + */ + + err = BlockFindContiguous(hfsmp, extent->startBlock, extent->startBlock + max_blocks, + max_blocks, max_blocks, true, true, + &extent->startBlock, &extent->blockCount, flags); + + if (err != dskFulErr) + return err; + } + + err = BlockFindKnown(hfsmp, max_blocks, &extent->startBlock, + &extent->blockCount); + + if (!err) { + if (extent->blockCount >= max_blocks) + return 0; + } else if (err != dskFulErr) + return err; + + // Try a more exhaustive search + return BlockFindContiguous(hfsmp, 1, hfsmp->allocLimit, + min_blocks, max_blocks, + /* useMetaZone: */ true, + /* trustSummary: */ true, + &extent->startBlock, &extent->blockCount, flags); +} + /* _______________________________________________________________________ -Routine: BlockAllocateContig +Routine: BlockFindContig -Function: Allocate a contiguous group of allocation blocks. The - allocation is all-or-nothing. The caller guarantees that - there are enough free blocks (though they may not be - contiguous, in which case this call will fail). +Function: Find a contiguous group of allocation blocks. If the + minimum cannot be satisfied, nothing is returned. The + caller guarantees that there are enough free blocks + (though they may not be contiguous, in which case this + call will fail). Inputs: vcb Pointer to volume where space is to be allocated @@ -1692,12 +2191,12 @@ Function: Allocate a contiguous group of allocation blocks. The actualNumBlocks Number of blocks allocated, or 0 if error _______________________________________________________________________ */ -static OSErr BlockAllocateContig( +static OSErr BlockFindContig( ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t minBlocks, u_int32_t maxBlocks, - u_int32_t flags, + hfs_block_alloc_flags_t flags, u_int32_t *actualStartBlock, u_int32_t *actualNumBlocks) { @@ -1718,40 +2217,39 @@ static OSErr BlockAllocateContig( struct hfsmount *hfsmp = VCBTOHFS(vcb); if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0); + KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0); while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) { /* Try and find something that works. */ - do { + + /* + * NOTE: If the only contiguous free extent of at least minBlocks + * crosses startingBlock (i.e. starts before, ends after), then we + * won't find it. Earlier versions *did* find this case by letting + * the second search look past startingBlock by minBlocks. But + * with the free extent cache, this can lead to duplicate entries + * in the cache, causing the same blocks to be allocated twice. + */ + retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks, + maxBlocks, useMetaZone, true, &foundStart, &foundCount, flags); + + if (retval == dskFulErr && currentStart != 0) { /* - * NOTE: If the only contiguous free extent of at least minBlocks - * crosses startingBlock (i.e. starts before, ends after), then we - * won't find it. Earlier versions *did* find this case by letting - * the second search look past startingBlock by minBlocks. But - * with the free extent cache, this can lead to duplicate entries - * in the cache, causing the same blocks to be allocated twice. + * We constrain the endingBlock so we don't bother looking for ranges + * that would overlap those found in the previous call, if the summary bitmap + * is not on for this volume. If it is, then we assume that it was not trust + * -worthy and do a full scan. */ - retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks, - maxBlocks, useMetaZone, true, &foundStart, &foundCount); - - if (retval == dskFulErr && currentStart != 0) { - /* - * We constrain the endingBlock so we don't bother looking for ranges - * that would overlap those found in the previous call, if the summary bitmap - * is not on for this volume. If it is, then we assume that it was not trust - * -worthy and do a full scan. - */ - if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { - retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks, - maxBlocks, useMetaZone, false, &foundStart, &foundCount); - } - else { - retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks, - maxBlocks, useMetaZone, false, &foundStart, &foundCount); - } - } - } while (0); + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks, + maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags); + } + else { + retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks, + maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags); + } + } if (retval != noErr) { goto bailout; @@ -1819,17 +2317,15 @@ static OSErr BlockAllocateContig( } // end while loop. bailout: - /* mark the blocks as in-use */ + if (retval == noErr) { *actualStartBlock = foundStart; *actualNumBlocks = foundCount; - err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks); - - if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) { - KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, *actualStartBlock, *actualNumBlocks, 0, 0, 0); - } } + if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) + KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_END, foundStart, foundCount, retval, 0, 0); + return retval; } @@ -1838,12 +2334,11 @@ static OSErr BlockAllocateContig( /* _______________________________________________________________________ -Routine: BlockAllocateAny +Routine: BlockFindAny -Function: Allocate one or more allocation blocks. If there are fewer - free blocks than requested, all free blocks will be - allocated. The caller guarantees that there is at least - one free block. +Function: Find one or more allocation blocks and may return fewer than + requested. The caller guarantees that there is at least one + free block. Inputs: vcb Pointer to volume where space is to be allocated @@ -1858,12 +2353,12 @@ Function: Allocate one or more allocation blocks. If there are fewer _______________________________________________________________________ */ -static OSErr BlockAllocateAny( +static OSErr BlockFindAny( ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t endingBlock, u_int32_t maxBlocks, - u_int32_t flags, + hfs_block_alloc_flags_t flags, Boolean trustSummary, u_int32_t *actualStartBlock, u_int32_t *actualNumBlocks) @@ -1913,7 +2408,7 @@ static OSErr BlockAllocateAny( } } - err = BlockAllocateAnyBitmap(vcb, start_blk, end_blk, maxBlocks, + err = BlockFindAnyBitmap(vcb, start_blk, end_blk, maxBlocks, flags, actualStartBlock, actualNumBlocks); return err; @@ -1921,33 +2416,32 @@ static OSErr BlockAllocateAny( /* - * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out - * where the free allocation blocks are. Inputs and outputs are the same as for - * BlockAllocateAny and BlockAllocateAnyRBTree + * BlockFindAnyBitmap finds free ranges by scanning the bitmap to + * figure out where the free allocation blocks are. Inputs and + * outputs are the same as for BlockFindAny. */ -static OSErr BlockAllocateAnyBitmap( +static OSErr BlockFindAnyBitmap( ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t endingBlock, u_int32_t maxBlocks, - u_int32_t flags, + hfs_block_alloc_flags_t flags, u_int32_t *actualStartBlock, u_int32_t *actualNumBlocks) { OSErr err; - register u_int32_t block; // current block number + register u_int32_t block = 0; // current block number register u_int32_t currentWord; // Pointer to current word within bitmap block register u_int32_t bitMask; // Word with given bits already set (ready to OR in) register u_int32_t wordsLeft; // Number of words left in this bitmap block u_int32_t *buffer = NULL; u_int32_t *currCache = NULL; - uintptr_t blockRef; + uintptr_t blockRef = 0; u_int32_t bitsPerBlock; u_int32_t wordsPerBlock; Boolean dirty = false; struct hfsmount *hfsmp = VCBTOHFS(vcb); - uint32_t summary_block_scan = 0; Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE); Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN); @@ -1955,6 +2449,7 @@ static OSErr BlockAllocateAnyBitmap( KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0); restartSearchAny: + /* * When we're skipping the metadata zone and the start/end * range overlaps with the metadata zone then adjust the @@ -1981,7 +2476,7 @@ static OSErr BlockAllocateAnyBitmap( // // Pre-read the first bitmap block // - err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef); + err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef, flags); if (err != noErr) goto Exit; buffer = currCache; @@ -2005,6 +2500,8 @@ static OSErr BlockAllocateAnyBitmap( * While loop 1: * Find the first unallocated block starting at 'block' */ + uint32_t summary_block_scan = 0; + block=startingBlock; while (block < endingBlock) { if ((currentWord & bitMask) == 0) @@ -2053,7 +2550,7 @@ static OSErr BlockAllocateAnyBitmap( goto Exit; } - err = ReadBitmapBlock(vcb, block, &currCache, &blockRef); + err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags); if (err != noErr) goto Exit; buffer = currCache; summary_block_scan = block; @@ -2156,7 +2653,7 @@ static OSErr BlockAllocateAnyBitmap( goto Exit; } - err = ReadBitmapBlock(vcb, block, &currCache, &blockRef); + err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags); if (err != noErr) { goto Exit; } @@ -2179,11 +2676,8 @@ static OSErr BlockAllocateAnyBitmap( // sanity check if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) { - panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); + panic("hfs: BlockFindAnyBitmap: allocation overflow on \"%s\"", vcb->vcbVN); } - - /* Mark the bits found as in-use */ - err = BlockMarkAllocatedInternal (vcb, *actualStartBlock, *actualNumBlocks); } else { *actualStartBlock = 0; @@ -2200,10 +2694,11 @@ static OSErr BlockAllocateAnyBitmap( /* _______________________________________________________________________ -Routine: BlockAllocateKnown +Routine: BlockFindKnown -Function: Try to allocate space from known free space in the free - extent cache. +Function: Return a potential extent from the free extent cache. The + returned extent *must* be marked allocated and removed + from the cache by the *caller*. Inputs: vcb Pointer to volume where space is to be allocated @@ -2218,7 +2713,7 @@ Function: Try to allocate space from known free space in the free _______________________________________________________________________ */ -static OSErr BlockAllocateKnown( +static OSErr BlockFindKnown( ExtendedVCB *vcb, u_int32_t maxBlocks, u_int32_t *actualStartBlock, @@ -2229,7 +2724,7 @@ static OSErr BlockAllocateKnown( struct hfsmount *hfsmp = VCBTOHFS(vcb); if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0); + KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0); hfs_lock_mount (hfsmp); lck_spin_lock(&vcb->vcbFreeExtLock); @@ -2238,7 +2733,7 @@ static OSErr BlockAllocateKnown( lck_spin_unlock(&vcb->vcbFreeExtLock); hfs_unlock_mount(hfsmp); if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0); + KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0); return dskFulErr; } lck_spin_unlock(&vcb->vcbFreeExtLock); @@ -2255,29 +2750,17 @@ static OSErr BlockAllocateKnown( lck_spin_unlock(&vcb->vcbFreeExtLock); - remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks); - // sanity check if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) { printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN); hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED); - *actualStartBlock = 0; - *actualNumBlocks = 0; err = EIO; - } - else - { - // - // Now mark the found extent in the bitmap - // - err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks); - } - - sanity_check_free_ext(vcb, 0); + } else + err = 0; if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); + KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); return err; } @@ -2300,12 +2783,11 @@ OSErr BlockMarkAllocated( hfsmp = VCBTOHFS(vcb); - return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks); + return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks, 0); } - /* _______________________________________________________________________ @@ -2327,9 +2809,10 @@ _______________________________________________________________________ */ static OSErr BlockMarkAllocatedInternal ( - ExtendedVCB *vcb, - u_int32_t startingBlock, - register u_int32_t numBlocks) + ExtendedVCB *vcb, + u_int32_t startingBlock, + u_int32_t numBlocks, + hfs_block_alloc_flags_t flags) { OSErr err; register u_int32_t *currentWord; // Pointer to current word within bitmap block @@ -2338,14 +2821,24 @@ OSErr BlockMarkAllocatedInternal ( u_int32_t firstBit; // Bit index within word of first bit to allocate u_int32_t numBits; // Number of bits in word to allocate u_int32_t *buffer = NULL; - uintptr_t blockRef; + uintptr_t blockRef = 0; u_int32_t bitsPerBlock; u_int32_t wordsPerBlock; // XXXdbg struct hfsmount *hfsmp = VCBTOHFS(vcb); if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); + KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, flags, 0, 0); + +#if DEBUG + + struct rl_entry *range; + TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) { + assert(rl_overlap(range, startingBlock, + startingBlock + numBlocks - 1) == RL_NOOVERLAP); + } + +#endif int force_flush = 0; /* @@ -2368,11 +2861,24 @@ OSErr BlockMarkAllocatedInternal ( hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks); + /* + * Don't make changes to the disk if we're just reserving. Note that + * we could do better in the tentative case because we could, in theory, + * avoid the journal flush above. However, that would mean that we would + * need to catch the callback to stop it incorrectly addding the extent + * to our free cache. + */ + if (ISSET(flags, HFS_ALLOC_LOCKED | HFS_ALLOC_TENTATIVE)) { + err = 0; + goto Exit; + } + // // Pre-read the bitmap block containing the first word of allocation // - err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); + err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_RESERVED); if (err != noErr) goto Exit; // // Initialize currentWord, and wordsLeft. @@ -2407,7 +2913,7 @@ OSErr BlockMarkAllocatedInternal ( numBits = numBlocks; // entire allocation is inside this one word bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last } -#if DEBUG_BUILD +#if DEBUG if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!"); } @@ -2433,7 +2939,8 @@ OSErr BlockMarkAllocatedInternal ( err = ReleaseBitmapBlock(vcb, blockRef, true); if (err != noErr) goto Exit; - err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); + err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_RESERVED); if (err != noErr) goto Exit; // XXXdbg @@ -2445,7 +2952,7 @@ OSErr BlockMarkAllocatedInternal ( currentWord = buffer; wordsLeft = wordsPerBlock; } -#if DEBUG_BUILD +#if DEBUG if (*currentWord != 0) { panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!"); } @@ -2471,7 +2978,8 @@ OSErr BlockMarkAllocatedInternal ( err = ReleaseBitmapBlock(vcb, blockRef, true); if (err != noErr) goto Exit; - err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); + err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_RESERVED); if (err != noErr) goto Exit; // XXXdbg @@ -2483,7 +2991,7 @@ OSErr BlockMarkAllocatedInternal ( currentWord = buffer; wordsLeft = wordsPerBlock; } -#if DEBUG_BUILD +#if DEBUG if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!"); } @@ -2652,7 +3160,7 @@ OSErr BlockMarkFreeInternal( u_int32_t currentBit; // Bit index within word of current bit to allocate u_int32_t numBits; // Number of bits in word to allocate u_int32_t *buffer = NULL; - uintptr_t blockRef; + uintptr_t blockRef = 0; u_int32_t bitsPerBlock; u_int32_t wordsPerBlock; // XXXdbg @@ -2667,27 +3175,46 @@ OSErr BlockMarkFreeInternal( */ if ((do_validate == true) && (startingBlock + numBlocks > vcb->totalBlocks)) { - if (ALLOC_DEBUG) { - panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN); - } - +#if ALLOC_DEBUG || DEBUG + panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN); + __builtin_unreachable(); +#else printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN); hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED); err = EIO; goto Exit; +#endif } // // Pre-read the bitmap block containing the first word of allocation // - err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); + err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_RESERVED); if (err != noErr) goto Exit; // XXXdbg if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); } + uint32_t min_unmap = 0, max_unmap = UINT32_MAX; + + // Work out the bounds of any unmap we can send down + struct rl_entry *range; + for (int i = 0; i < 2; ++i) { + TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) { + if (range->rl_start < startingBlock + && range->rl_end >= min_unmap) { + min_unmap = range->rl_end + 1; + } + if (range->rl_end >= startingBlock + numBlocks + && range->rl_start < max_unmap) { + max_unmap = range->rl_start; + } + } + } + // // Figure out how many bits and words per bitmap block. // @@ -2703,7 +3230,7 @@ OSErr BlockMarkFreeInternal( currentWord = buffer + wordIndexInBlock; currentBit = startingBlock % kBitsPerWord; bitMask = kHighBitInWordMask >> currentBit; - while (true) { + while (unmapStart > min_unmap) { // Move currentWord/bitMask back by one bit bitMask <<= 1; if (bitMask == 0) { @@ -2758,7 +3285,8 @@ OSErr BlockMarkFreeInternal( err = ReleaseBitmapBlock(vcb, blockRef, true); if (err != noErr) goto Exit; - err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); + err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_RESERVED); if (err != noErr) goto Exit; // XXXdbg @@ -2795,7 +3323,8 @@ OSErr BlockMarkFreeInternal( err = ReleaseBitmapBlock(vcb, blockRef, true); if (err != noErr) goto Exit; - err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); + err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_RESERVED); if (err != noErr) goto Exit; // XXXdbg @@ -2825,7 +3354,7 @@ OSErr BlockMarkFreeInternal( currentWord = buffer + wordIndexInBlock; currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord; bitMask = kHighBitInWordMask >> currentBit; - while (true) { + while (unmapStart + unmapCount < max_unmap) { // Move currentWord/bitMask/wordsLeft forward one bit bitMask >>= 1; if (bitMask == 0) { @@ -2855,10 +3384,12 @@ OSErr BlockMarkFreeInternal( return err; Corruption: -#if DEBUG_BUILD +#if DEBUG panic("hfs: BlockMarkFreeInternal: blocks not allocated!"); + __builtin_unreachable(); #else - printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN); + printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s <%u, %u>\n", + vcb->vcbVN, startingBlock_in, numBlocks_in); hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED); err = EIO; goto Exit; @@ -2905,7 +3436,8 @@ static OSErr BlockFindContiguous( Boolean useMetaZone, Boolean trustSummary, u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks) + u_int32_t *actualNumBlocks, + hfs_block_alloc_flags_t flags) { OSErr err; register u_int32_t currentBlock; // Block we're currently looking at. @@ -2917,10 +3449,11 @@ static OSErr BlockFindContiguous( register u_int32_t bitMask; register u_int32_t wordsLeft; register u_int32_t tempWord; - uintptr_t blockRef; + uintptr_t blockRef = 0; u_int32_t wordsPerBlock; u_int32_t updated_free_extent = 0; struct hfsmount *hfsmp = (struct hfsmount*) vcb; + HFSPlusExtentDescriptor best = { 0, 0 }; if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0); @@ -2966,16 +3499,19 @@ static OSErr BlockFindContiguous( */ if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { uint32_t suggestion; - if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) { - currentBlock = suggestion; - } + err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion); + if (err && err != ENOSPC) + goto ErrorExit; + if (err == ENOSPC || suggestion >= stopBlock) + goto DiskFull; + currentBlock = suggestion; } // // Pre-read the first bitmap block. // - err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); + err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags); if ( err != noErr ) goto ErrorExit; // @@ -2987,6 +3523,10 @@ static OSErr BlockFindContiguous( currentWord = buffer + wordsLeft; wordsLeft = wordsPerBlock - wordsLeft; + uint32_t remaining = (hfsmp->freeBlocks - hfsmp->lockedBlocks + - (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE) + ? 0 : hfsmp->tentativeBlocks)); + /* * This outer do-while loop is the main body of this function. Its job is * to search through the blocks (until we hit 'stopBlock'), and iterate @@ -2996,6 +3536,13 @@ static OSErr BlockFindContiguous( do { foundBlocks = 0; + /* + * We will try and update the summary table as we search + * below. Note that we will never update the summary table + * for the first and last blocks that the summary table + * covers. Ideally, we should, but the benefits probably + * aren't that significant so we leave things alone for now. + */ uint32_t summary_block_scan = 0; /* * Inner while loop 1: @@ -3066,14 +3613,15 @@ static OSErr BlockFindContiguous( /* Skip over fully allocated bitmap blocks if we can */ if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { uint32_t suggestion; - if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) { - if (suggestion < stopBlock) { - currentBlock = suggestion; - } - } + err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion); + if (err && err != ENOSPC) + goto ErrorExit; + if (err == ENOSPC || suggestion >= stopBlock) + goto LoopExit; + currentBlock = suggestion; } - err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); + err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags); if ( err != noErr ) goto ErrorExit; /* @@ -3172,7 +3720,7 @@ static OSErr BlockFindContiguous( } } - err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); + err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags); if ( err != noErr ) goto ErrorExit; currentWord = buffer; @@ -3214,37 +3762,82 @@ static OSErr BlockFindContiguous( foundBlocks = currentBlock - firstBlock; if (foundBlocks > maxBlocks) foundBlocks = maxBlocks; - if (foundBlocks >= minBlocks) + + if (remaining) { + if (foundBlocks > remaining) { +#if DEBUG || DEVELOPMENT + printf("hfs: found more blocks than are indicated free!\n"); +#endif + remaining = UINT32_MAX; + } else + remaining -= foundBlocks; + } + + if (ISSET(flags, HFS_ALLOC_TRY_HARD)) { + if (foundBlocks > best.blockCount) { + best.startBlock = firstBlock; + best.blockCount = foundBlocks; + } + + if (foundBlocks >= maxBlocks || best.blockCount >= remaining) + break; + + /* + * Note that we will go ahead and add this free extent to our + * cache below but that's OK because we'll remove it again if we + * decide to use this extent. + */ + } else if (foundBlocks >= minBlocks) break; // Found what we needed! /* - * We did not find the total blocks were were looking for, but + * We did not find the total blocks we were looking for, but * add this free block run to our free extent cache list, if possible. */ - if (hfsmp->jnl == NULL) { - /* If there is no journal, go ahead and add to the free ext cache. */ - updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks); + + // If we're ignoring tentative ranges, we need to account for them here + if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) { + struct rl_entry free_extent = rl_make(firstBlock, firstBlock + foundBlocks - 1); + struct rl_entry *range;; + TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], rl_link) { + rl_subtract(&free_extent, range); + if (rl_len(range) == 0) + break; + } + firstBlock = free_extent.rl_start; + foundBlocks = rl_len(&free_extent); } - else { - /* - * If journaled, only add to the free extent cache if this block is not - * waiting for a TRIM to complete; that implies that the transaction that freed it - * has not yet been committed to stable storage. - */ - int recently_deleted = 0; - uint32_t nextblock; - err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock, - (uint64_t)foundBlocks, &recently_deleted, &nextblock); - if ((err) || (recently_deleted == 0)) { - /* if we hit an error, or the blocks not recently freed, go ahead and insert it */ + + if (foundBlocks) { + if (hfsmp->jnl == NULL) { + /* If there is no journal, go ahead and add to the free ext cache. */ updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks); } - err = 0; + else { + /* + * If journaled, only add to the free extent cache if this block is not + * waiting for a TRIM to complete; that implies that the transaction that freed it + * has not yet been committed to stable storage. + */ + int recently_deleted = 0; + uint32_t nextblock; + err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock, + (uint64_t)foundBlocks, &recently_deleted, &nextblock); + if ((err) || (recently_deleted == 0)) { + /* if we hit an error, or the blocks not recently freed, go ahead and insert it */ + updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks); + } + err = 0; + } } - } while (currentBlock < stopBlock); LoopExit: + if (ISSET(flags, HFS_ALLOC_TRY_HARD)) { + firstBlock = best.startBlock; + foundBlocks = best.blockCount; + } + // Return the outputs. if (foundBlocks < minBlocks) { @@ -3365,7 +3958,8 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, /* * Pre-read the bitmap block containing the first word of allocation */ - error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); + error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_TENTATIVE); if (error) goto JustReturn; @@ -3418,7 +4012,8 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, error = ReleaseBitmapBlock(hfsmp, blockRef, false); if (error) goto Exit; - error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); + error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_TENTATIVE); if (error) goto Exit; /* Readjust currentWord and wordsLeft. */ @@ -3450,7 +4045,8 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, error = ReleaseBitmapBlock(hfsmp, blockRef, false); if (error) goto Exit; - error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); + error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_TENTATIVE); if (error) goto Exit; currentWord = buffer; @@ -3750,7 +4346,7 @@ int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *ne * Compute how much of hfs_summary_size is useable for the given number * of allocation blocks eligible on this FS. */ - err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit, &summary_cap); + err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit - 1, &summary_cap); if (err) { goto summary_exit; } @@ -3810,7 +4406,7 @@ int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *ne if (maybe_has_blocks == 0) { err = ENOSPC; } - } + } /* If the summary table is not active for this mount, we'll just return ENOSPC */ summary_exit: @@ -4607,15 +5203,7 @@ static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint * have to complete the I/O on VBMIOSize boundaries, but we can only read * up until the end of the bitmap file. */ - bitmap_len = hfsmp->totalBlocks / kBitsPerByte; - if (bitmap_len % (hfsmp->blockSize)) { - bitmap_len = (bitmap_len / hfsmp->blockSize); - /* round up to the end of the next alloc block */ - bitmap_len++; - - /* Convert the # of alloc blocks back to bytes. */ - bitmap_len = bitmap_len * hfsmp->blockSize; - } + bitmap_len = roundup(hfsmp->totalBlocks, hfsmp->blockSize * 8) / 8; remaining_bitmap = bitmap_len - bitmap_off; @@ -4647,7 +5235,7 @@ int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int3 u_int32_t firstBit; // Bit index within word of first bit to allocate u_int32_t numBits; // Number of bits in word to allocate u_int32_t bitsPerBlock; - uintptr_t blockRef; + uintptr_t blockRef = 0; u_int32_t wordsPerBlock; u_int32_t numBlocks = 1; u_int32_t *buffer = NULL; @@ -4664,7 +5252,8 @@ int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int3 /* * Pre-read the bitmap block containing the first word of allocation */ - error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); + error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, + HFS_ALLOC_IGNORE_TENTATIVE); if (error) return (error); } @@ -4784,11 +5373,17 @@ u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) { /* Force a rebuild of the summary table. */ (void) hfs_rebuild_summary (hfsmp); - return 0; + // Delete any tentative ranges that are in the area we're shrinking + struct rl_entry *range, *next_range; + TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], + rl_link, next_range) { + if (rl_overlap(range, new_end_block, RL_INFINITY) != RL_NOOVERLAP) + hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS); + } + return 0; } - /* * Remove an extent from the list of free extents. * @@ -5046,6 +5641,16 @@ static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0); +#if DEBUG + for (i = 0; i < 2; ++i) { + struct rl_entry *range; + TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) { + assert(rl_overlap(range, startBlock, + startBlock + blockCount - 1) == RL_NOOVERLAP); + } + } +#endif + /* No need to add extent that is beyond current allocLimit */ if (startBlock >= hfsmp->allocLimit) { goto out_not_locked; @@ -5233,7 +5838,7 @@ static errno_t get_more_bits(bitmap_context_t *bitmap_ctx) hfs_journal_lock(hfsmp); /* Flush the journal and wait for all I/Os to finish up */ - error = hfs_journal_flush(hfsmp, TRUE); + error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); if (error) { hfs_journal_unlock(hfsmp); return error; @@ -5290,6 +5895,8 @@ static errno_t get_more_bits(bitmap_context_t *bitmap_ctx) if (error) return error; + assert(iosize != 0); + /* hfs_scan_range_size should have verified startbit. Convert it to bytes */ byte_offset = start_bit / kBitsPerByte; @@ -5569,7 +6176,7 @@ errno_t hfs_find_free_extents(struct hfsmount *hfsmp, hfs_journal_lock(hfsmp); /* Flush the journal and wait for all I/Os to finish up */ - error = hfs_journal_flush(hfsmp, TRUE); + error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META); if (error) { hfs_journal_unlock(hfsmp); return error; diff --git a/bsd/hfs/hfscommon/headers/BTreesInternal.h b/bsd/hfs/hfscommon/headers/BTreesInternal.h index b95d8c992..f3c4e37d2 100644 --- a/bsd/hfs/hfscommon/headers/BTreesInternal.h +++ b/bsd/hfs/hfscommon/headers/BTreesInternal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/hfs/hfscommon/headers/BTreesPrivate.h b/bsd/hfs/hfscommon/headers/BTreesPrivate.h index 3b8dd7ac1..07f06afb8 100644 --- a/bsd/hfs/hfscommon/headers/BTreesPrivate.h +++ b/bsd/hfs/hfscommon/headers/BTreesPrivate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -179,7 +179,6 @@ typedef enum { #define M_SetBitNum(integer,bitNumber) ((integer) |= (1<<(bitNumber))) #define M_IsOdd(integer) (((integer) & 1) != 0) #define M_IsEven(integer) (((integer) & 1) == 0) -#define M_BTreeHeaderDirty(btreePtr) btreePtr->flags |= kBTHeaderDirty #define M_MapRecordSize(nodeSize) (nodeSize - sizeof (BTNodeDescriptor) - 6) #define M_HeaderMapRecordSize(nodeSize) (nodeSize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec) - 128 - 8) @@ -229,8 +228,11 @@ typedef struct BTreeControlBlock { // fields specific to BTree CBs u_int32_t numValidHints; // Hint used to find correct record. u_int32_t reservedNodes; BTreeIterator iterator; // useable when holding exclusive b-tree lock -} BTreeControlBlock, *BTreeControlBlockPtr; +#if DEBUG + void *madeDirtyBy[2]; +#endif +} BTreeControlBlock, *BTreeControlBlockPtr; u_int32_t CalcKeySize(const BTreeControlBlock *btcb, const BTreeKey *key); #define CalcKeySize(btcb, key) ( ((btcb)->attributes & kBTBigKeysMask) ? ((key)->length16 + 2) : ((key)->length8 + 1) ) @@ -244,6 +246,13 @@ typedef enum { kBTHeaderDirty = 0x00000001 } BTreeFlags; +static inline void M_BTreeHeaderDirty(BTreeControlBlock *bt) { +#if DEBUG + bt->madeDirtyBy[0] = __builtin_return_address(0); + bt->madeDirtyBy[1] = __builtin_return_address(1); +#endif + bt->flags |= kBTHeaderDirty; +} typedef int8_t *NodeBuffer; typedef BlockDescriptor NodeRec, *NodePtr; //�� remove this someday... diff --git a/bsd/hfs/hfscommon/headers/FileMgrInternal.h b/bsd/hfs/hfscommon/headers/FileMgrInternal.h index 30eb8a84e..20d38dd93 100644 --- a/bsd/hfs/hfscommon/headers/FileMgrInternal.h +++ b/bsd/hfs/hfscommon/headers/FileMgrInternal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2015 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -212,27 +212,84 @@ ReplaceBTreeRecord (FileReference refNum, /* Prototypes for exported routines in VolumeAllocation.c*/ /* - * Flags for BlockAllocate() and BlockDeallocate() + * Flags for BlockAllocate(), BlockDeallocate() and hfs_block_alloc. + * Some of these are for internal use only. See the comment at the + * top of hfs_alloc_int for more details on the semantics of these + * flags. */ -#define HFS_ALLOC_FORCECONTIG 0x1 //force contiguous block allocation; minblocks must be allocated -#define HFS_ALLOC_METAZONE 0x2 //can use metazone blocks -#define HFS_ALLOC_SKIPFREEBLKS 0x4 //skip checking/updating freeblocks during alloc/dealloc -#define HFS_ALLOC_FLUSHTXN 0x8 //pick best fit for allocation, even if a jnl flush is req'd - +#define HFS_ALLOC_FORCECONTIG 0x001 //force contiguous block allocation; minblocks must be allocated +#define HFS_ALLOC_METAZONE 0x002 //can use metazone blocks +#define HFS_ALLOC_SKIPFREEBLKS 0x004 //skip checking/updating freeblocks during alloc/dealloc +#define HFS_ALLOC_FLUSHTXN 0x008 //pick best fit for allocation, even if a jnl flush is req'd +#define HFS_ALLOC_TENTATIVE 0x010 //reserved allocation that can be claimed back +#define HFS_ALLOC_LOCKED 0x020 //reserved allocation that can't be claimed back +#define HFS_ALLOC_IGNORE_TENTATIVE 0x040 //Steal tentative blocks if necessary +#define HFS_ALLOC_IGNORE_RESERVED 0x080 //Ignore tentative/committed blocks +#define HFS_ALLOC_USE_TENTATIVE 0x100 //Use the supplied tentative range (if possible) +#define HFS_ALLOC_COMMIT 0x200 //Commit the supplied extent to disk +#define HFS_ALLOC_TRY_HARD 0x400 //Search hard to try and get maxBlocks; implies HFS_ALLOC_FLUSHTXN +#define HFS_ALLOC_ROLL_BACK 0x800 //Reallocate blocks that were just deallocated +#define HFS_ALLOC_FAST_DEV 0x1000 //Prefer fast device for allocation + +typedef uint32_t hfs_block_alloc_flags_t; + +struct rl_entry; EXTERN_API_C( OSErr ) -BlockAllocate (ExtendedVCB * vcb, - u_int32_t startingBlock, - u_int32_t minBlocks, - u_int32_t maxBlocks, - u_int32_t flags, - u_int32_t * startBlock, - u_int32_t * actualBlocks); +BlockAllocate (ExtendedVCB * vcb, + u_int32_t startingBlock, + u_int32_t minBlocks, + u_int32_t maxBlocks, + hfs_block_alloc_flags_t flags, + u_int32_t * startBlock, + u_int32_t * actualBlocks); + +typedef struct hfs_alloc_extra_args { + // Used with HFS_ALLOC_TRY_HARD and HFS_ALLOC_FORCECONTIG + uint32_t max_blocks; + + // Used with with HFS_ALLOC_USE_TENTATIVE & HFS_ALLOC_COMMIT + struct rl_entry **reservation_in; + + // Used with HFS_ALLOC_TENTATIVE & HFS_ALLOC_LOCKED + struct rl_entry **reservation_out; + + /* + * If the maximum cannot be returned, the allocation will be + * trimmed to the specified alignment after taking + * @alignment_offset into account. @alignment and + * @alignment_offset are both in terms of blocks, *not* bytes. + * The result will be such that: + * + * (block_count + @alignment_offset) % @alignment == 0 + * + * Alignment is *not* guaranteed. + * + * One example where alignment might be useful is in the case + * where the page size is greater than the allocation block size + * and I/O is being performed in multiples of the page size. + */ + int alignment; + int alignment_offset; +} hfs_alloc_extra_args_t; + +/* + * Same as BlockAllocate but slightly different API. + * @extent.startBlock is a hint for where to start searching and + * @extent.blockCount is the minimum number of blocks acceptable. + * Additional arguments can be passed in @extra_args and use will + * depend on @flags. See comment at top of hfs_block_alloc_int for + * more information. + */ +errno_t hfs_block_alloc(hfsmount_t *hfsmp, + HFSPlusExtentDescriptor *extent, + hfs_block_alloc_flags_t flags, + hfs_alloc_extra_args_t *extra_args); EXTERN_API_C( OSErr ) -BlockDeallocate (ExtendedVCB * vcb, - u_int32_t firstBlock, - u_int32_t numBlocks, - u_int32_t flags); +BlockDeallocate (ExtendedVCB * vcb, + u_int32_t firstBlock, + u_int32_t numBlocks, + hfs_block_alloc_flags_t flags); EXTERN_API_C ( void ) ResetVCBFreeExtCache(struct hfsmount *hfsmp); @@ -261,6 +318,9 @@ hfs_init_summary (struct hfsmount *hfsmp); errno_t hfs_find_free_extents(struct hfsmount *hfsmp, void (*callback)(void *data, off_t), void *callback_arg); +void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation); +void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation); + /* File Extent Mapping routines*/ EXTERN_API_C( OSErr ) FlushExtentFile (ExtendedVCB * vcb); @@ -275,6 +335,15 @@ EXTERN_API_C( int32_t ) CompareExtentKeysPlus (const HFSPlusExtentKey *searchKey, const HFSPlusExtentKey *trialKey); +OSErr SearchExtentFile(ExtendedVCB *vcb, + const FCB *fcb, + int64_t filePosition, + HFSPlusExtentKey *foundExtentKey, + HFSPlusExtentRecord foundExtentData, + u_int32_t *foundExtentDataIndex, + u_int32_t *extentBTreeHint, + u_int32_t *endingFABNPlusOne ); + EXTERN_API_C( OSErr ) TruncateFileC (ExtendedVCB *vcb, FCB *fcb, int64_t peof, int deleted, int rsrc, uint32_t fileid, Boolean truncateToExtent); @@ -307,8 +376,6 @@ NodesAreContiguous (ExtendedVCB * vcb, u_int32_t nodeSize); #endif - - /* Get the current time in UTC (GMT)*/ EXTERN_API_C( u_int32_t ) GetTimeUTC (void); diff --git a/bsd/hfs/rangelist.c b/bsd/hfs/rangelist.c index 0a1b412b6..81b384c48 100644 --- a/bsd/hfs/rangelist.c +++ b/bsd/hfs/rangelist.c @@ -33,6 +33,10 @@ #include #include +#if !RANGELIST_TEST +#include +#endif + #include "rangelist.h" static enum rl_overlaptype rl_scan_from(struct rl_head *rangelist, off_t start, off_t end, struct rl_entry **overlap, struct rl_entry *range); @@ -67,8 +71,6 @@ rl_init(struct rl_head *rangelist) TAILQ_INIT(rangelist); } - - /* * Add a range to the list */ @@ -177,7 +179,7 @@ rl_remove(off_t start, off_t end, struct rl_head *rangelist) if (TAILQ_EMPTY(rangelist)) { return; }; - + range = TAILQ_FIRST(rangelist); while ((ovcase = rl_scan_from(rangelist, start, end, &overlap, range))) { switch (ovcase) { @@ -257,16 +259,53 @@ rl_scan(struct rl_head *rangelist, off_t start, off_t end, struct rl_entry **overlap) { - - if (TAILQ_EMPTY(rangelist)) { - *overlap = NULL; - return RL_NOOVERLAP; - }; - + return rl_scan_from(rangelist, start, end, overlap, TAILQ_FIRST(rangelist)); } +enum rl_overlaptype +rl_overlap(const struct rl_entry *range, off_t start, off_t end) +{ + /* + * OK, check for overlap + * + * Six cases: + * 0) no overlap (RL_NOOVERLAP) + * 1) overlap == range (RL_MATCHINGOVERLAP) + * 2) overlap contains range (RL_OVERLAPCONTAINSRANGE) + * 3) range contains overlap (RL_OVERLAPISCONTAINED) + * 4) overlap starts before range (RL_OVERLAPSTARTSBEFORE) + * 5) overlap ends after range (RL_OVERLAPENDSAFTER) + */ + if (start > range->rl_end || range->rl_start > end) { + /* Case 0 (RL_NOOVERLAP) */ + return RL_NOOVERLAP; + } + + if (range->rl_start == start && range->rl_end == end) { + /* Case 1 (RL_MATCHINGOVERLAP) */ + return RL_MATCHINGOVERLAP; + } + + if (range->rl_start <= start && range->rl_end >= end) { + /* Case 2 (RL_OVERLAPCONTAINSRANGE) */ + return RL_OVERLAPCONTAINSRANGE; + } + if (start <= range->rl_start && end >= range->rl_end) { + /* Case 3 (RL_OVERLAPISCONTAINED) */ + return RL_OVERLAPISCONTAINED; + } + + if (range->rl_start < start && range->rl_end < end) { + /* Case 4 (RL_OVERLAPSTARTSBEFORE) */ + return RL_OVERLAPSTARTSBEFORE; + } + + /* Case 5 (RL_OVERLAPENDSAFTER) */ + // range->rl_start > start && range->rl_end > end + return RL_OVERLAPENDSAFTER; +} /* * Walk the list of ranges for an entry to @@ -276,88 +315,29 @@ rl_scan(struct rl_head *rangelist, * There may be more than one. */ static enum rl_overlaptype -rl_scan_from(struct rl_head *rangelist, +rl_scan_from(struct rl_head *rangelist __unused, off_t start, off_t end, struct rl_entry **overlap, - struct rl_entry *range) + struct rl_entry *range) { - if (TAILQ_EMPTY(rangelist)) { - *overlap = NULL; - return RL_NOOVERLAP; - }; - #ifdef RL_DIAGNOSTIC - rl_verify(rangelist); + rl_verify(rangelist); #endif - *overlap = range; - - while (1) { - /* - * OK, check for overlap - * - * Six cases: - * 0) no overlap (RL_NOOVERLAP) - * 1) overlap == range (RL_MATCHINGOVERLAP) - * 2) overlap contains range (RL_OVERLAPCONTAINSRANGE) - * 3) range contains overlap (RL_OVERLAPISCONTAINED) - * 4) overlap starts before range (RL_OVERLAPSTARTSBEFORE) - * 5) overlap ends after range (RL_OVERLAPENDSAFTER) - */ - if (((range->rl_end != RL_INFINITY) && (start > range->rl_end)) || - ((end != RL_INFINITY) && (range->rl_start > end))) { - /* Case 0 (RL_NOOVERLAP), at least with the current entry: */ - if ((end != RL_INFINITY) && (range->rl_start > end)) { - return RL_NOOVERLAP; - }; - - /* Check the other entries in the list: */ - range = TAILQ_NEXT(range, rl_link); + while (range) { + enum rl_overlaptype ot = rl_overlap(range, start, end); + + if (ot != RL_NOOVERLAP || range->rl_start > end) { *overlap = range; - if (range == NULL) - return RL_NOOVERLAP; - - continue; - } - - if ((range->rl_start == start) && (range->rl_end == end)) { - /* Case 1 (RL_MATCHINGOVERLAP) */ - return RL_MATCHINGOVERLAP; - } - - if ((range->rl_start <= start) && - (end != RL_INFINITY) && - ((range->rl_end >= end) || (range->rl_end == RL_INFINITY))) { - /* Case 2 (RL_OVERLAPCONTAINSRANGE) */ - return RL_OVERLAPCONTAINSRANGE; - } - - if ((start <= range->rl_start) && - ((end == RL_INFINITY) || - ((range->rl_end != RL_INFINITY) && (end >= range->rl_end)))) { - /* Case 3 (RL_OVERLAPISCONTAINED) */ - return RL_OVERLAPISCONTAINED; - } - - if ((range->rl_start < start) && - ((range->rl_end >= start) || (range->rl_end == RL_INFINITY))) { - /* Case 4 (RL_OVERLAPSTARTSBEFORE) */ - return RL_OVERLAPSTARTSBEFORE; - } - - if ((range->rl_start > start) && - (end != RL_INFINITY) && - ((range->rl_end > end) || (range->rl_end == RL_INFINITY))) { - /* Case 5 (RL_OVERLAPENDSAFTER) */ - return RL_OVERLAPENDSAFTER; + return ot; } - /* Control should never reach here... */ -#ifdef RL_DIAGNOSTIC - panic("hfs: rl_scan_from: unhandled overlap condition?!"); -#endif + range = TAILQ_NEXT(range, rl_link); } + + *overlap = NULL; + return RL_NOOVERLAP; } @@ -421,6 +401,38 @@ void rl_remove_all(struct rl_head *rangelist) TAILQ_INIT(rangelist); } +/* + * In the case where b is contained by a, we return the the largest part + * remaining. The result is stored in a. + */ +void rl_subtract(struct rl_entry *a, const struct rl_entry *b) +{ + switch (rl_overlap(b, a->rl_start, a->rl_end)) { + case RL_MATCHINGOVERLAP: + case RL_OVERLAPCONTAINSRANGE: + a->rl_end = a->rl_start - 1; + break; + case RL_OVERLAPISCONTAINED: + // Keep the bigger part + if (b->rl_start - a->rl_start >= a->rl_end - b->rl_end) { + // Keep left + a->rl_end = b->rl_start - 1; + } else { + // Keep right + a->rl_start = b->rl_end + 1; + } + break; + case RL_OVERLAPSTARTSBEFORE: + a->rl_start = b->rl_end + 1; + break; + case RL_OVERLAPENDSAFTER: + a->rl_end = b->rl_start - 1; + break; + case RL_NOOVERLAP: + break; + } +} + #else /* not HFS - temp workaround until 4277828 is fixed */ /* stubs for exported routines that aren't present when we build kernel without HFS */ diff --git a/bsd/hfs/rangelist.h b/bsd/hfs/rangelist.h index 0f66d34c9..41708be5d 100644 --- a/bsd/hfs/rangelist.h +++ b/bsd/hfs/rangelist.h @@ -44,7 +44,7 @@ enum rl_overlaptype { RL_OVERLAPENDSAFTER /* 5 */ }; -#define RL_INFINITY ((off_t)-1) +#define RL_INFINITY INT64_MAX TAILQ_HEAD(rl_head, rl_entry); @@ -63,6 +63,22 @@ enum rl_overlaptype rl_scan(struct rl_head *rangelist, off_t start, off_t end, struct rl_entry **overlap); +enum rl_overlaptype rl_overlap(const struct rl_entry *range, + off_t start, off_t end); + +static __attribute__((pure)) inline +off_t rl_len(const struct rl_entry *range) +{ + return range->rl_end - range->rl_start + 1; +} + +void rl_subtract(struct rl_entry *a, const struct rl_entry *b); + +static inline struct rl_entry rl_make(off_t start, off_t end) +{ + return (struct rl_entry){ .rl_start = start, .rl_end = end }; +} + __END_DECLS #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/i386/Makefile b/bsd/i386/Makefile index 7433ece89..6c5370018 100644 --- a/bsd/i386/Makefile +++ b/bsd/i386/Makefile @@ -13,6 +13,9 @@ DATAFILES = \ types.h vmparam.h _types.h _param.h \ _mcontext.h +PRIVATE_DATAFILES = \ + disklabel.h + KERNELFILES = \ endian.h param.h \ profile.h signal.h limits.h _limits.h \ @@ -21,7 +24,7 @@ KERNELFILES = \ INSTALL_MD_LIST = ${DATAFILES} -INSTALL_MD_LCL_LIST = ${DATAFILES} disklabel.h +INSTALL_MD_LCL_LIST = ${PRIVATE_DATAFILES} INSTALL_MD_DIR = i386 diff --git a/bsd/kern/ast.h b/bsd/kern/ast.h index a8dc21932..94a326584 100644 --- a/bsd/kern/ast.h +++ b/bsd/kern/ast.h @@ -36,7 +36,6 @@ #include -extern void astbsd_on(void); extern void act_set_astbsd(thread_t); extern void bsd_ast(thread_t); diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index b344c7a9d..d9b90aff2 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -157,13 +157,15 @@ #include /* flow_divert_init() */ #include /* for cfil_init() */ #include /* for necp_init() */ +#include /* for netagent_init() */ #include /* for pkt_mnglr_init() */ #include /* for utun_register_control() */ -#include /* for ipsec_register_control() */ +#include /* for ipsec_register_control() */ #include /* for net_str_id_init() */ #include /* for netsrc_init() */ #include /* for nstat_init() */ #include /* for tcp_cc_init() */ +#include /* for mptcp_control_register() */ #include /* for assert() */ #include /* for init_system_override() */ @@ -193,6 +195,7 @@ #include #include + void * get_user_regs(thread_t); /* XXX kludge for */ void IOKitInitializeTime(void); /* XXX */ void IOSleep(unsigned int); /* XXX */ @@ -243,6 +246,11 @@ struct kmemstats kmemstats[M_LAST]; struct vnode *rootvp; int boothowto = RB_DEBUG; +int minimalboot = 0; + +#if PROC_REF_DEBUG +__private_extern__ int proc_ref_tracking_disabled = 0; /* disable panics on leaked proc refs across syscall boundary */ +#endif extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *); extern void IOSecureBSDRoot(const char * rootName); @@ -271,6 +279,10 @@ void bsd_exec_setup(int); __private_extern__ int bootarg_vnode_cache_defeat = 0; +#if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) +__private_extern__ int bootarg_no_vnode_jetsam = 0; +#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */ + /* * Prevent kernel-based ASLR from being used, for testing. */ @@ -288,12 +300,11 @@ void bsd_utaskbootstrap(void); static void parse_bsd_args(void); extern task_t bsd_init_task; extern boolean_t init_task_died; -extern char init_task_failure_data[]; #if CONFIG_DEV_KMEM extern void dev_kmem_init(void); #endif extern void time_zone_slock_init(void); -extern void select_wait_queue_init(void); +extern void select_waitq_init(void); static void process_name(const char *, proc_t); static void setconf(void); @@ -340,11 +351,8 @@ extern int check_policy_init(int); static void process_name(const char *s, proc_t p) { - size_t length = strlen(s); - - bcopy(s, p->p_comm, - length >= sizeof(p->p_comm) ? sizeof(p->p_comm) : - length + 1); + strlcpy(p->p_comm, s, sizeof(p->p_comm)); + strlcpy(p->p_name, s, sizeof(p->p_name)); } /* To allow these values to be patched, they're globals here */ @@ -500,7 +508,7 @@ bsd_init(void) /* Initialize System Override call */ init_system_override(); - + /* * Create process 0. */ @@ -534,10 +542,6 @@ bsd_init(void) LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash); proc_list_unlock(); -#if CONFIG_LCTX - kernproc->p_lctx = NULL; -#endif - kernproc->task = kernel_task; kernproc->p_stat = SRUN; @@ -644,7 +648,7 @@ bsd_init(void) &minimum, (vm_size_t)bsd_pageable_map_size, TRUE, - VM_FLAGS_ANYWHERE, + VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_BSD), &bsd_pageable_map); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to allocate bsd pageable map"); @@ -750,8 +754,8 @@ bsd_init(void) psem_cache_init(); bsd_init_kprintf("calling time_zone_slock_init\n"); time_zone_slock_init(); - bsd_init_kprintf("calling select_wait_queue_init\n"); - select_wait_queue_init(); + bsd_init_kprintf("calling select_waitq_init\n"); + select_waitq_init(); /* * Initialize protocols. Block reception of incoming packets @@ -857,7 +861,9 @@ bsd_init(void) /* Initialize Network Extension Control Policies */ necp_init(); #endif - + + netagent_init(); + /* register user tunnel kernel control handler */ utun_register_control(); #if IPSEC @@ -866,6 +872,9 @@ bsd_init(void) netsrc_init(); nstat_init(); tcp_cc_init(); +#if MPTCP + mptcp_control_register(); +#endif /* MPTCP */ #endif /* NETWORKING */ bsd_init_kprintf("calling vnode_pager_bootstrap\n"); @@ -966,7 +975,7 @@ bsd_init(void) devfs_kernel_mount(mounthere); } #endif /* DEVFS */ - + /* Initialize signal state for process 0. */ bsd_init_kprintf("calling siginit\n"); siginit(kernproc); @@ -990,6 +999,7 @@ bsd_init(void) consider_zone_gc(FALSE); #endif + bsd_init_kprintf("done\n"); } @@ -1015,7 +1025,6 @@ bsdinit_task(void) bsd_init_task = get_threadtask(thread); init_task_died = FALSE; - init_task_failure_data[0] = 0; #if CONFIG_MACF mac_cred_label_associate_user(p->p_ucred); @@ -1103,7 +1112,7 @@ bsd_utaskbootstrap(void) ut = (struct uthread *)get_bsdthread_info(thread); ut->uu_sigmask = 0; act_set_astbsd(thread); - (void) thread_resume(thread); + proc_clear_return_wait(initproc, thread); } static void @@ -1121,6 +1130,15 @@ parse_bsd_args(void) if (PE_parse_boot_argn("-x", namep, sizeof (namep))) /* safe boot */ boothowto |= RB_SAFEBOOT; + if (PE_parse_boot_argn("-minimalboot", namep, sizeof(namep))) { + /* + * -minimalboot indicates that we want userspace to be bootstrapped to a + * minimal environment. What constitutes minimal is up to the bootstrap + * process. + */ + minimalboot = 1; + } + /* disable vnode_cache_is_authorized() by setting vnode_cache_defeat */ if (PE_parse_boot_argn("-vnode_cache_defeat", namep, sizeof (namep))) @@ -1150,6 +1168,21 @@ parse_bsd_args(void) if (PE_parse_boot_argn("-novfscache", namep, sizeof(namep))) { nc_disabled = 1; } + +#if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) + if (PE_parse_boot_argn("-no_vnode_jetsam", namep, sizeof(namep))) + bootarg_no_vnode_jetsam = 1; +#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */ + + + +#if PROC_REF_DEBUG + if (PE_parse_boot_argn("-disable_procref_tracking", namep, sizeof(namep))) { + proc_ref_tracking_disabled = 1; + } +#endif + + PE_parse_boot_argn("sigrestrict", &sigrestrict_arg, sizeof(sigrestrict_arg)); } void diff --git a/bsd/kern/bsd_stubs.c b/bsd/kern/bsd_stubs.c index 648d6e305..f941c0128 100644 --- a/bsd/kern/bsd_stubs.c +++ b/bsd/kern/bsd_stubs.c @@ -2,7 +2,7 @@ * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include @@ -37,7 +37,7 @@ #include #include #include -#include /* for SET */ +#include /* for SET */ #include #include #include @@ -48,29 +48,27 @@ extern int chrtoblk_set(int, int); extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int); /* XXX most of these just exist to export; there's no good header for them*/ -void pcb_synch(void); +void pcb_synch(void); -TAILQ_HEAD(,devsw_lock) devsw_locks; +TAILQ_HEAD(, devsw_lock) devsw_locks; lck_mtx_t devsw_lock_list_mtx; -lck_grp_t *devsw_lock_grp; +lck_grp_t * devsw_lock_grp; /* Just to satisfy pstat command */ -int dmmin, dmmax, dmtext; +int dmmin, dmmax, dmtext; vm_offset_t -kmem_mb_alloc(vm_map_t mbmap, int size, int physContig) +kmem_mb_alloc(vm_map_t mbmap, int size, int physContig) { - vm_offset_t addr = 0; + vm_offset_t addr = 0; kern_return_t kr = KERN_SUCCESS; - if(!physContig) - kr = kernel_memory_allocate(mbmap, &addr, size, - 0, KMA_NOPAGEWAIT|KMA_KOBJECT|KMA_LOMEM); + if (!physContig) + kr = kernel_memory_allocate(mbmap, &addr, size, 0, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_LOMEM, VM_KERN_MEMORY_MBUF); else - kr = kmem_alloc_contig(mbmap, &addr, size, PAGE_MASK, - 0xfffff, 0, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_LOMEM); + kr = kmem_alloc_contig(mbmap, &addr, size, PAGE_MASK, 0xfffff, 0, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_LOMEM, VM_KERN_MEMORY_MBUF); - if( kr != KERN_SUCCESS) + if (kr != KERN_SUCCESS) addr = 0; return addr; @@ -89,17 +87,17 @@ current_proc(void) { /* Never returns a NULL */ struct uthread * ut; - struct proc *p; + struct proc * p; thread_t thread = current_thread(); - ut = (struct uthread *)get_bsdthread_info(thread); - if (ut && (ut->uu_flag & UT_VFORK) && ut->uu_proc) { + ut = (struct uthread *)get_bsdthread_info(thread); + if (ut && (ut->uu_flag & UT_VFORK) && ut->uu_proc) { p = ut->uu_proc; - if ((p->p_lflag & P_LINVFORK) == 0) + if ((p->p_lflag & P_LINVFORK) == 0) panic("returning child proc not under vfork"); - if (p->p_vforkact != (void *)thread) + if (p->p_vforkact != (void *)thread) panic("returning child proc which is not cur_act"); - return(p); + return (p); } p = (struct proc *)get_bsdtask_info(current_task()); @@ -114,7 +112,7 @@ current_proc(void) struct bdevsw nobdev = NO_BDEVICE; struct cdevsw nocdev = NO_CDEVICE; -/* +/* * if index is -1, return a free slot if avaliable * else see whether the index is free * return the major number that is free else -1 @@ -126,32 +124,31 @@ struct cdevsw nocdev = NO_CDEVICE; int bdevsw_isfree(int index) { - struct bdevsw *devsw; + struct bdevsw * devsw; if (index < 0) { - if (index == -1) - index = 1; /* start at 1 to avoid collision with volfs (Radar 2842228) */ - else - index = -index; /* start at least this far up in the table */ - devsw = &bdevsw[index]; - for(; index < nblkdev; index++, devsw++) { - if(memcmp((char *)devsw, - (char *)&nobdev, - sizeof(struct bdevsw)) == 0) - break; - } + if (index == -1) + index = 1; /* start at 1 to avoid collision with volfs (Radar 2842228) */ + else + index = -index; /* start at least this far up in the table */ + devsw = &bdevsw[index]; + for (; index < nblkdev; index++, devsw++) { + if (memcmp((char *)devsw, (char *)&nobdev, sizeof(struct bdevsw)) == 0) + break; + } } + + if (index < 0 || index >= nblkdev) + return (-1); + devsw = &bdevsw[index]; - if ((index < 0) || (index >= nblkdev) || - (memcmp((char *)devsw, - (char *)&nobdev, - sizeof(struct bdevsw)) != 0)) { - return(-1); + if ((memcmp((char *)devsw, (char *)&nobdev, sizeof(struct bdevsw)) != 0)) { + return (-1); } - return(index); + return (index); } -/* +/* * if index is -1, find a free slot to add * else see whether the slot is free * return the major number that is used else -1 @@ -161,36 +158,36 @@ bdevsw_isfree(int index) * instead of starting at 0 */ int -bdevsw_add(int index, struct bdevsw * bsw) +bdevsw_add(int index, struct bdevsw * bsw) { index = bdevsw_isfree(index); if (index < 0) { - return(-1); + return (-1); } bdevsw[index] = *bsw; - return(index); + return (index); } /* * if the slot has the same bsw, then remove * else -1 */ int -bdevsw_remove(int index, struct bdevsw * bsw) +bdevsw_remove(int index, struct bdevsw * bsw) { - struct bdevsw *devsw; + struct bdevsw * devsw; + + if (index < 0 || index >= nblkdev) + return (-1); devsw = &bdevsw[index]; - if ((index < 0) || (index >= nblkdev) || - (memcmp((char *)devsw, - (char *)bsw, - sizeof(struct bdevsw)) != 0)) { - return(-1); + if ((memcmp((char *)devsw, (char *)bsw, sizeof(struct bdevsw)) != 0)) { + return (-1); } bdevsw[index] = nobdev; - return(index); + return (index); } -/* +/* * if index is -1, return a free slot if avaliable * else see whether the index is free * return the major number that is free else -1 @@ -202,32 +199,31 @@ bdevsw_remove(int index, struct bdevsw * bsw) int cdevsw_isfree(int index) { - struct cdevsw *devsw; + struct cdevsw * devsw; if (index < 0) { - if (index == -1) - index = 0; - else - index = -index; /* start at least this far up in the table */ - devsw = &cdevsw[index]; - for(; index < nchrdev; index++, devsw++) { - if(memcmp((char *)devsw, - (char *)&nocdev, - sizeof(struct cdevsw)) == 0) - break; - } + if (index == -1) + index = 0; + else + index = -index; /* start at least this far up in the table */ + devsw = &cdevsw[index]; + for (; index < nchrdev; index++, devsw++) { + if (memcmp((char *)devsw, (char *)&nocdev, sizeof(struct cdevsw)) == 0) + break; + } } + + if (index < 0 || index >= nchrdev) + return (-1); + devsw = &cdevsw[index]; - if ((index < 0) || (index >= nchrdev) || - (memcmp((char *)devsw, - (char *)&nocdev, - sizeof(struct cdevsw)) != 0)) { - return(-1); + if ((memcmp((char *)devsw, (char *)&nocdev, sizeof(struct cdevsw)) != 0)) { + return (-1); } - return(index); + return (index); } -/* +/* * if index is -1, find a free slot to add * else see whether the slot is free * return the major number that is used else -1 @@ -242,34 +238,34 @@ cdevsw_isfree(int index) * before them. -24 is currently a safe starting point. */ int -cdevsw_add(int index, struct cdevsw * csw) +cdevsw_add(int index, struct cdevsw * csw) { index = cdevsw_isfree(index); if (index < 0) { - return(-1); + return (-1); } cdevsw[index] = *csw; - return(index); + return (index); } /* * if the slot has the same csw, then remove * else -1 */ int -cdevsw_remove(int index, struct cdevsw * csw) +cdevsw_remove(int index, struct cdevsw * csw) { - struct cdevsw *devsw; + struct cdevsw * devsw; + + if (index < 0 || index >= nchrdev) + return (-1); devsw = &cdevsw[index]; - if ((index < 0) || (index >= nchrdev) || - (memcmp((char *)devsw, - (char *)csw, - sizeof(struct cdevsw)) != 0)) { - return(-1); + if ((memcmp((char *)devsw, (char *)csw, sizeof(struct cdevsw)) != 0)) { + return (-1); } cdevsw[index] = nocdev; cdevsw_flags[index] = 0; - return(index); + return (index); } static int @@ -278,7 +274,7 @@ cdev_set_bdev(int cdev, int bdev) return (chrtoblk_set(cdev, bdev)); } -int +int cdevsw_add_with_bdev(int index, struct cdevsw * csw, int bdev) { index = cdevsw_add(index, csw); @@ -293,17 +289,17 @@ cdevsw_add_with_bdev(int index, struct cdevsw * csw, int bdev) } int -cdevsw_setkqueueok(int index, struct cdevsw *csw, int use_offset) +cdevsw_setkqueueok(int index, struct cdevsw * csw, int use_offset) { - struct cdevsw *devsw; + struct cdevsw * devsw; uint64_t flags = CDEVSW_SELECT_KQUEUE; + if (index < 0 || index >= nchrdev) + return (-1); + devsw = &cdevsw[index]; - if ((index < 0) || (index >= nchrdev) || - (memcmp((char *)devsw, - (char *)csw, - sizeof(struct cdevsw)) != 0)) { - return(-1); + if ((memcmp((char *)devsw, (char *)csw, sizeof(struct cdevsw)) != 0)) { + return (-1); } if (use_offset) { @@ -314,19 +310,19 @@ cdevsw_setkqueueok(int index, struct cdevsw *csw, int use_offset) return 0; } -#include /* for PE_parse_boot_arg */ +#include /* for PE_parse_boot_arg */ /* * Copy the "hostname" variable into a caller-provided buffer * Returns: 0 for success, ENAMETOOLONG for insufficient buffer space. - * On success, "len" will be set to the number of characters preceding + * On success, "len" will be set to the number of characters preceding * the NULL character in the hostname. */ int -bsd_hostname(char *buf, int bufsize, int *len) +bsd_hostname(char * buf, int bufsize, int * len) { /* - * "hostname" is null-terminated, and "hostnamelen" is equivalent to strlen(hostname). + * "hostname" is null-terminated, and "hostnamelen" is equivalent to strlen(hostname). */ if (hostnamelen < bufsize) { strlcpy(buf, hostname, bufsize); @@ -334,7 +330,7 @@ bsd_hostname(char *buf, int bufsize, int *len) return 0; } else { return ENAMETOOLONG; - } + } } void @@ -343,19 +339,20 @@ devsw_lock(dev_t dev, int mode) devsw_lock_t newlock, tmplock; int res; - assert(0 <= major(dev) && major(dev) < nchrdev); + assert(0 <= major(dev) && major(dev) < nchrdev); assert(mode == S_IFCHR || mode == S_IFBLK); MALLOC(newlock, devsw_lock_t, sizeof(struct devsw_lock), M_TEMP, M_WAITOK | M_ZERO); newlock->dl_dev = dev; newlock->dl_thread = current_thread(); newlock->dl_mode = mode; - + lck_mtx_lock_spin(&devsw_lock_list_mtx); retry: - TAILQ_FOREACH(tmplock, &devsw_locks, dl_list) { + TAILQ_FOREACH(tmplock, &devsw_locks, dl_list) + { if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) { - res = msleep(tmplock, &devsw_lock_list_mtx, PVFS, "devsw_lock", NULL); + res = msleep(tmplock, &devsw_lock_list_mtx, PVFS, "devsw_lock", NULL); assert(res == 0); goto retry; } @@ -363,19 +360,19 @@ devsw_lock(dev_t dev, int mode) TAILQ_INSERT_TAIL(&devsw_locks, newlock, dl_list); lck_mtx_unlock(&devsw_lock_list_mtx); - } void devsw_unlock(dev_t dev, int mode) { devsw_lock_t tmplock; - assert(0 <= major(dev) && major(dev) < nchrdev); + assert(0 <= major(dev) && major(dev) < nchrdev); lck_mtx_lock_spin(&devsw_lock_list_mtx); - TAILQ_FOREACH(tmplock, &devsw_locks, dl_list) { - if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) { + TAILQ_FOREACH(tmplock, &devsw_locks, dl_list) + { + if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) { break; } } @@ -390,9 +387,9 @@ devsw_unlock(dev_t dev, int mode) wakeup(tmplock); TAILQ_REMOVE(&devsw_locks, tmplock, dl_list); - + lck_mtx_unlock(&devsw_lock_list_mtx); - + FREE(tmplock, M_TEMP); } diff --git a/bsd/kern/decmpfs.c b/bsd/kern/decmpfs.c index ce43a4785..5c71793ad 100644 --- a/bsd/kern/decmpfs.c +++ b/bsd/kern/decmpfs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,18 +64,33 @@ baseName(const char *path) return ret; } +static char* +vnpath(vnode_t vp, char *path, int len) +{ + int origlen = len; + path[0] = 0; + vn_getpath(vp, path, &len); + path[origlen - 1] = 0; + return path; +} + #define ErrorLog(x, args...) printf("%s:%d:%s: " x, baseName(__FILE__), __LINE__, __FUNCTION__, ## args) +#define ErrorLogWithPath(x, args...) do { char *path; MALLOC(path, char *, PATH_MAX, M_TEMP, M_WAITOK); printf("%s:%d:%s: %s: " x, baseName(__FILE__), __LINE__, __FUNCTION__, vnpath(vp, path, PATH_MAX), ## args); FREE(path, M_TEMP); } while(0) #if COMPRESSION_DEBUG #define DebugLog ErrorLog +#define DebugLogWithPath ErrorLogWithPath #else #define DebugLog(x...) do { } while(0) +#define DebugLogWithPath(x...) do { } while(0) #endif #if COMPRESSION_DEBUG_VERBOSE #define VerboseLog ErrorLog +#define VerboseLogWithPath ErrorLogWithPath #else #define VerboseLog(x...) do { } while(0) +#define VerboseLogWithPath(x...) do { } while(0) #endif #if MALLOC_DEBUG @@ -197,7 +212,7 @@ extern boolean_t IOServiceWaitForMatchingResource( const char * property, uint64 extern boolean_t IOCatalogueMatchingDriversPresent( const char * property ); static void * -_decmp_get_func(uint32_t type, uintptr_t offset) +_decmp_get_func(vnode_t vp, uint32_t type, uintptr_t offset) { /* this function should be called while holding a shared lock to decompressorsLock, @@ -220,7 +235,7 @@ _decmp_get_func(uint32_t type, uintptr_t offset) char resourceName[80]; uint64_t delay = 10000000ULL; // 10 milliseconds. snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", type); - printf("waiting for %s\n", resourceName); + ErrorLogWithPath("waiting for %s\n", resourceName); while(decompressors[type] == NULL) { lck_rw_unlock_shared(decompressorsLock); // we have to unlock to allow the kext to register if (IOServiceWaitForMatchingResource(resourceName, delay)) { @@ -229,17 +244,17 @@ _decmp_get_func(uint32_t type, uintptr_t offset) } if (!IOCatalogueMatchingDriversPresent(providesName)) { // - printf("the kext with %s is no longer present\n", providesName); + ErrorLogWithPath("the kext with %s is no longer present\n", providesName); lck_rw_lock_shared(decompressorsLock); break; } - printf("still waiting for %s\n", resourceName); + ErrorLogWithPath("still waiting for %s\n", resourceName); delay *= 2; lck_rw_lock_shared(decompressorsLock); } // IOKit says the kext is loaded, so it should be registered too! if (decompressors[type] == NULL) { - ErrorLog("we found %s, but the type still isn't registered\n", providesName); + ErrorLogWithPath("we found %s, but the type still isn't registered\n", providesName); return NULL; } // it's now registered, so let's return the function @@ -247,25 +262,15 @@ _decmp_get_func(uint32_t type, uintptr_t offset) } // the compressor hasn't registered, so it never will unless someone manually kextloads it - ErrorLog("tried to access a compressed file of unregistered type %d\n", type); + ErrorLogWithPath("tried to access a compressed file of unregistered type %d\n", type); return NULL; } -#define decmp_get_func(type, func) ((typeof(((decmpfs_registration*)NULL)->func))_decmp_get_func(type, offsetof_func(func))) +#define decmp_get_func(vp, type, func) ((typeof(((decmpfs_registration*)NULL)->func))_decmp_get_func(vp, type, offsetof_func(func))) #pragma mark --- utilities --- #if COMPRESSION_DEBUG -static char* -vnpath(vnode_t vp, char *path, int len) -{ - int origlen = len; - path[0] = 0; - vn_getpath(vp, path, &len); - path[origlen - 1] = 0; - return path; -} - static int vnsize(vnode_t vp, uint64_t *size) { @@ -274,7 +279,7 @@ vnsize(vnode_t vp, uint64_t *size) VATTR_WANTED(&va, va_data_size); int error = vnode_getattr(vp, &va, decmpfs_ctx); if (error != 0) { - ErrorLog("vnode_getattr err %d\n", error); + ErrorLogWithPath("vnode_getattr err %d\n", error); return error; } *size = va.va_data_size; @@ -499,7 +504,7 @@ decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header ** } if (hdr->compression_magic != DECMPFS_MAGIC) { - ErrorLog("invalid compression_magic 0x%08x, should be 0x%08x\n", hdr->compression_magic, DECMPFS_MAGIC); + ErrorLogWithPath("invalid compression_magic 0x%08x, should be 0x%08x\n", hdr->compression_magic, DECMPFS_MAGIC); err = EINVAL; goto out; } @@ -509,7 +514,7 @@ decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header ** /* return the header even though the type is out of range */ err = ERANGE; } else { - ErrorLog("compression_type %d out of range\n", hdr->compression_type); + ErrorLogWithPath("compression_type %d out of range\n", hdr->compression_type); err = EINVAL; } goto out; @@ -517,7 +522,7 @@ decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header ** out: if (err && (err != ERANGE)) { - DebugLog("err %d\n", err); + DebugLogWithPath("err %d\n", err); if (data) FREE(data, M_TEMP); *hdrOut = NULL; } else { @@ -597,11 +602,11 @@ decmpfs_validate_compressed_file(vnode_t vp, decmpfs_cnode *cp) } lck_rw_lock_shared(decompressorsLock); - decmpfs_validate_compressed_file_func validate = decmp_get_func(hdr->compression_type, validate); + decmpfs_validate_compressed_file_func validate = decmp_get_func(vp, hdr->compression_type, validate); if (validate) { /* make sure this validation function is valid */ /* is the data okay? */ err = validate(vp, decmpfs_ctx, hdr); - } else if (decmp_get_func(hdr->compression_type, fetch) == NULL) { + } else if (decmp_get_func(vp, hdr->compression_type, fetch) == NULL) { /* the type isn't registered */ err = EIO; } else { @@ -613,7 +618,7 @@ decmpfs_validate_compressed_file(vnode_t vp, decmpfs_cnode *cp) if (hdr) FREE(hdr, M_TEMP); #if COMPRESSION_DEBUG if (err) { - DebugLog("decmpfs_validate_compressed_file ret %d, vp->v_flag %d\n", err, vp->v_flag); + DebugLogWithPath("decmpfs_validate_compressed_file ret %d, vp->v_flag %d\n", err, vp->v_flag); } #endif return err; @@ -664,7 +669,7 @@ decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp) break; default: /* unknown state, assume file is not compressed */ - ErrorLog("unknown cmp_state %d\n", cmp_state); + ErrorLogWithPath("unknown cmp_state %d\n", cmp_state); return 0; } @@ -748,7 +753,7 @@ decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp) /* update the decompression flags in the decmpfs cnode */ lck_rw_lock_shared(decompressorsLock); - decmpfs_get_decompression_flags_func get_flags = decmp_get_func(hdr->compression_type, get_flags); + decmpfs_get_decompression_flags_func get_flags = decmp_get_func(vp, hdr->compression_type, get_flags); if (get_flags) { decompression_flags = get_flags(vp, decmpfs_ctx, hdr); } @@ -772,7 +777,7 @@ decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp) return 1; default: /* unknown state, assume file is not compressed */ - ErrorLog("unknown ret %d\n", ret); + ErrorLogWithPath("unknown ret %d\n", ret); return 0; } } @@ -887,12 +892,12 @@ decmpfs_hides_xattr(vfs_context_t ctx, decmpfs_cnode *cp, const char *xattr) if (ctx == decmpfs_ctx) return 0; - if (strncmp(xattr, XATTR_RESOURCEFORK_NAME, 22) == 0) + if (strncmp(xattr, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME) - 1) == 0) return decmpfs_hides_rsrc(ctx, cp); if (!decmpfs_fast_file_is_compressed(cp)) /* file is not compressed, so don't hide this xattr */ return 0; - if (strncmp(xattr, DECMPFS_XATTR_NAME, 11) == 0) + if (strncmp(xattr, DECMPFS_XATTR_NAME, sizeof(DECMPFS_XATTR_NAME) - 1) == 0) /* it's our xattr, so hide it */ return 1; /* don't hide this xattr */ @@ -965,14 +970,14 @@ unregister_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration } static int -compression_type_valid(decmpfs_header *hdr) +compression_type_valid(vnode_t vp, decmpfs_header *hdr) { /* fast pre-check to determine if the given compressor has checked in */ int ret = 0; /* every compressor must have at least a fetch function */ lck_rw_lock_shared(decompressorsLock); - if (decmp_get_func(hdr->compression_type, fetch) != NULL) { + if (decmp_get_func(vp, hdr->compression_type, fetch) != NULL) { ret = 1; } lck_rw_unlock_shared(decompressorsLock); @@ -1012,7 +1017,7 @@ decmpfs_fetch_uncompressed_data(vnode_t vp, decmpfs_cnode *cp, decmpfs_header *h } lck_rw_lock_shared(decompressorsLock); - decmpfs_fetch_uncompressed_data_func fetch = decmp_get_func(hdr->compression_type, fetch); + decmpfs_fetch_uncompressed_data_func fetch = decmp_get_func(vp, hdr->compression_type, fetch); if (fetch) { err = fetch(vp, decmpfs_ctx, hdr, offset, size, nvec, vec, bytes_read); lck_rw_unlock_shared(decompressorsLock); @@ -1050,7 +1055,7 @@ commit_upl(upl_t upl, upl_offset_t pl_offset, size_t uplSize, int flags, int abo VerboseLog("aborting upl, flags 0x%08x\n", flags); kr = ubc_upl_abort_range(upl, pl_offset, uplSize, flags); if (kr != KERN_SUCCESS) - ErrorLog("ubc_upl_commit_range error %d\n", (int)kr); + ErrorLog("ubc_upl_abort_range error %d\n", (int)kr); } else { VerboseLog("committing upl, flags 0x%08x\n", flags | UPL_COMMIT_CLEAR_DIRTY); kr = ubc_upl_commit_range(upl, pl_offset, uplSize, flags | UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_WRITTEN_BY_KERNEL); @@ -1067,7 +1072,7 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp /* handles a page-in request from vfs for a compressed file */ int err = 0; - struct vnode *vp = ap->a_vp; + vnode_t vp = ap->a_vp; upl_t pl = ap->a_pl; upl_offset_t pl_offset = ap->a_pl_offset; off_t f_offset = ap->a_f_offset; @@ -1088,7 +1093,7 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp if (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)) { - DebugLog("pagein: unknown flags 0x%08x\n", (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD))); + DebugLogWithPath("pagein: unknown flags 0x%08x\n", (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD))); } err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0); @@ -1098,7 +1103,7 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp cachedSize = hdr->uncompressed_size; - if (!compression_type_valid(hdr)) { + if (!compression_type_valid(vp, hdr)) { /* compressor not registered */ err = ENOTSUP; goto out; @@ -1138,7 +1143,7 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp uint64_t did_read = 0; if (decmpfs_fast_get_state(cp) == FILE_IS_CONVERTING) { - ErrorLog("unexpected pagein during decompress\n"); + ErrorLogWithPath("unexpected pagein during decompress\n"); /* if the file is converting, this must be a recursive call to pagein from underneath a call to decmpfs_decompress_file; pretend that it succeeded but don't do anything since we're just going to write over the pages anyway @@ -1149,19 +1154,19 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, uplPos, uplSize, 1, &vec, &did_read); } if (err) { - DebugLog("decmpfs_fetch_uncompressed_data err %d\n", err); + DebugLogWithPath("decmpfs_fetch_uncompressed_data err %d\n", err); int cmp_state = decmpfs_fast_get_state(cp); if (cmp_state == FILE_IS_CONVERTING) { - DebugLog("cmp_state == FILE_IS_CONVERTING\n"); + DebugLogWithPath("cmp_state == FILE_IS_CONVERTING\n"); cmp_state = wait_for_decompress(cp); if (cmp_state == FILE_IS_COMPRESSED) { - DebugLog("cmp_state == FILE_IS_COMPRESSED\n"); + DebugLogWithPath("cmp_state == FILE_IS_COMPRESSED\n"); /* a decompress was attempted but it failed, let's try calling fetch again */ goto decompress; } } if (cmp_state == FILE_IS_NOT_COMPRESSED) { - DebugLog("cmp_state == FILE_IS_NOT_COMPRESSED\n"); + DebugLogWithPath("cmp_state == FILE_IS_NOT_COMPRESSED\n"); /* the file was decompressed after we started reading it */ abort_pagein = 1; /* we're not going to commit our data */ *is_compressed = 0; /* instruct caller to fall back to its normal path */ @@ -1180,7 +1185,7 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp kr = ubc_upl_unmap(pl); data = NULL; /* make sure to set data to NULL so we don't try to unmap again below */ if (kr != KERN_SUCCESS) - ErrorLog("ubc_upl_unmap error %d\n", (int)kr); + ErrorLogWithPath("ubc_upl_unmap error %d\n", (int)kr); else { if (!abort_pagein) { /* commit our pages */ @@ -1192,9 +1197,16 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp if (data) ubc_upl_unmap(pl); if (hdr) FREE(hdr, M_TEMP); if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 0); - if (err) - ErrorLog("err %d\n", err); - + if (err) { +#if DEVELOPMENT || DEBUG + char *path; + MALLOC(path, char *, PATH_MAX, M_TEMP, M_WAITOK); + panic("%s: decmpfs_pagein_compressed: err %d", vnpath(vp, path, PATH_MAX), err); + FREE(path, M_TEMP); +#else + ErrorLogWithPath("err %d\n", err); +#endif + } return err; } @@ -1228,7 +1240,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c uplPos = uio_offset(uio); uplSize = uio_resid(uio); - VerboseLog("uplPos %lld uplSize %lld\n", uplPos, uplSize); + VerboseLogWithPath("uplPos %lld uplSize %lld\n", uplPos, uplSize); cachedSize = decmpfs_cnode_get_vnode_cached_size(cp); @@ -1260,7 +1272,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c if (err != 0) { goto out; } - if (!compression_type_valid(hdr)) { + if (!compression_type_valid(vp, hdr)) { err = ENOTSUP; goto out; } @@ -1268,16 +1280,15 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c uplPos = uioPos; uplSize = uioRemaining; #if COMPRESSION_DEBUG - char path[PATH_MAX]; - DebugLog("%s: uplPos %lld uplSize %lld\n", vnpath(vp, path, sizeof(path)), (uint64_t)uplPos, (uint64_t)uplSize); + DebugLogWithPath("uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize); #endif lck_rw_lock_shared(decompressorsLock); - decmpfs_adjust_fetch_region_func adjust_fetch = decmp_get_func(hdr->compression_type, adjust_fetch); + decmpfs_adjust_fetch_region_func adjust_fetch = decmp_get_func(vp, hdr->compression_type, adjust_fetch); if (adjust_fetch) { /* give the compressor a chance to adjust the portion of the file that we read */ adjust_fetch(vp, decmpfs_ctx, hdr, &uplPos, &uplSize); - VerboseLog("adjusted uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize); + VerboseLogWithPath("adjusted uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize); } lck_rw_unlock_shared(decompressorsLock); @@ -1305,7 +1316,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c /* round size up to page multiple */ uplSize = (uplSize + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); - VerboseLog("new uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize); + VerboseLogWithPath("new uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize); uplRemaining = uplSize; curUplPos = uplPos; @@ -1324,11 +1335,11 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c /* create the upl */ kr = ubc_create_upl(vp, curUplPos, curUplSize, &upl, &pli, UPL_SET_LITE); if (kr != KERN_SUCCESS) { - ErrorLog("ubc_create_upl error %d\n", (int)kr); + ErrorLogWithPath("ubc_create_upl error %d\n", (int)kr); err = EINVAL; goto out; } - VerboseLog("curUplPos %lld curUplSize %lld\n", (uint64_t)curUplPos, (uint64_t)curUplSize); + VerboseLogWithPath("curUplPos %lld curUplSize %lld\n", (uint64_t)curUplPos, (uint64_t)curUplSize); #if CONFIG_IOSCHED /* Mark the UPL as the requesting UPL for decompression */ @@ -1340,8 +1351,14 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c if (kr != KERN_SUCCESS) { commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1); - - ErrorLog("ubc_upl_map error %d\n", (int)kr); +#if DEVELOPMENT || DEBUG + char *path; + MALLOC(path, char *, PATH_MAX, M_TEMP, M_WAITOK); + panic("%s: decmpfs_read_compressed: ubc_upl_map error %d", vnpath(vp, path, PATH_MAX), (int)kr); + FREE(path, M_TEMP); +#else + ErrorLogWithPath("ubc_upl_map error %d\n", (int)kr); +#endif err = EINVAL; goto out; } @@ -1351,7 +1368,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1); - ErrorLog("ubc_upl_map mapped null\n"); + ErrorLogWithPath("ubc_upl_map mapped null\n"); err = EINVAL; goto out; } @@ -1362,21 +1379,21 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c vec = (decmpfs_vector){ .buf = data, .size = curUplSize }; err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, curUplPos, curUplSize, 1, &vec, &did_read); if (err) { - ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err); + ErrorLogWithPath("decmpfs_fetch_uncompressed_data err %d\n", err); /* maybe the file is converting to decompressed */ int cmp_state = decmpfs_fast_get_state(cp); if (cmp_state == FILE_IS_CONVERTING) { - ErrorLog("cmp_state == FILE_IS_CONVERTING\n"); + ErrorLogWithPath("cmp_state == FILE_IS_CONVERTING\n"); cmp_state = wait_for_decompress(cp); if (cmp_state == FILE_IS_COMPRESSED) { - ErrorLog("cmp_state == FILE_IS_COMPRESSED\n"); + ErrorLogWithPath("cmp_state == FILE_IS_COMPRESSED\n"); /* a decompress was attempted but it failed, let's try fetching again */ goto decompress; } } if (cmp_state == FILE_IS_NOT_COMPRESSED) { - ErrorLog("cmp_state == FILE_IS_NOT_COMPRESSED\n"); + ErrorLogWithPath("cmp_state == FILE_IS_NOT_COMPRESSED\n"); /* the file was decompressed after we started reading it */ abort_read = 1; /* we're not going to commit our data */ *is_compressed = 0; /* instruct caller to fall back to its normal path */ @@ -1391,11 +1408,11 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c if (abort_read) { kr = commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1); } else { - VerboseLog("uioPos %lld uioRemaining %lld\n", (uint64_t)uioPos, (uint64_t)uioRemaining); + VerboseLogWithPath("uioPos %lld uioRemaining %lld\n", (uint64_t)uioPos, (uint64_t)uioRemaining); if (uioRemaining) { off_t uplOff = uioPos - curUplPos; if (uplOff < 0) { - ErrorLog("uplOff %lld should never be negative\n", (int64_t)uplOff); + ErrorLogWithPath("uplOff %lld should never be negative\n", (int64_t)uplOff); err = EINVAL; } else { off_t count = curUplPos + curUplSize - uioPos; @@ -1407,9 +1424,9 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c int io_resid = count; err = cluster_copy_upl_data(uio, upl, uplOff, &io_resid); int copied = count - io_resid; - VerboseLog("uplOff %lld count %lld copied %lld\n", (uint64_t)uplOff, (uint64_t)count, (uint64_t)copied); + VerboseLogWithPath("uplOff %lld count %lld copied %lld\n", (uint64_t)uplOff, (uint64_t)count, (uint64_t)copied); if (err) { - ErrorLog("cluster_copy_upl_data err %d\n", err); + ErrorLogWithPath("cluster_copy_upl_data err %d\n", err); } uioPos += copied; uioRemaining -= copied; @@ -1422,7 +1439,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c } } } else { - ErrorLog("ubc_upl_unmap error %d\n", (int)kr); + ErrorLogWithPath("ubc_upl_unmap error %d\n", (int)kr); } uplRemaining -= curUplSize; @@ -1433,14 +1450,14 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c if (hdr) FREE(hdr, M_TEMP); if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 0); if (err) {/* something went wrong */ - ErrorLog("err %d\n", err); + ErrorLogWithPath("err %d\n", err); return err; } #if COMPRESSION_DEBUG uplSize = uio_resid(uio); if (uplSize) - VerboseLog("still %lld bytes to copy\n", uplSize); + VerboseLogWithPath("still %lld bytes to copy\n", uplSize); #endif return 0; } @@ -1456,10 +1473,10 @@ decmpfs_free_compressed_data(vnode_t vp, decmpfs_cnode *cp) decmpfs_header *hdr = NULL; int err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0); if (err) { - ErrorLog("decmpfs_fetch_compressed_header err %d\n", err); + ErrorLogWithPath("decmpfs_fetch_compressed_header err %d\n", err); } else { lck_rw_lock_shared(decompressorsLock); - decmpfs_free_compressed_data_func free_data = decmp_get_func(hdr->compression_type, free_data); + decmpfs_free_compressed_data_func free_data = decmp_get_func(vp, hdr->compression_type, free_data); if (free_data) { err = free_data(vp, decmpfs_ctx, hdr); } else { @@ -1469,7 +1486,7 @@ decmpfs_free_compressed_data(vnode_t vp, decmpfs_cnode *cp) lck_rw_unlock_shared(decompressorsLock); if (err != 0) { - ErrorLog("decompressor err %d\n", err); + ErrorLogWithPath("decompressor err %d\n", err); } } @@ -1498,7 +1515,7 @@ unset_compressed_flag(vnode_t vp) err = vnode_getattr(vp, &va, decmpfs_ctx); if (err != 0) { - ErrorLog("vnode_getattr err %d\n", err); + ErrorLogWithPath("vnode_getattr err %d\n", err); } else { new_bsdflags = va.va_flags & ~UF_COMPRESSED; @@ -1506,7 +1523,7 @@ unset_compressed_flag(vnode_t vp) VATTR_SET(&va, va_flags, new_bsdflags); err = vnode_setattr(vp, &va, decmpfs_ctx); if (err != 0) { - ErrorLog("vnode_setattr err %d\n", err); + ErrorLogWithPath("vnode_setattr err %d\n", err); } } return err; @@ -1618,7 +1635,7 @@ decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncat decmpfs_vector vec = { .buf = data, .size = MIN(allocSize, remaining) }; err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, offset, vec.size, 1, &vec, &bytes_read); if (err != 0) { - ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err); + ErrorLogWithPath("decmpfs_fetch_uncompressed_data err %d\n", err); goto out; } @@ -1630,7 +1647,7 @@ decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncat uio_reset(uio_w, offset, UIO_SYSSPACE, UIO_WRITE); err = uio_addiov(uio_w, CAST_USER_ADDR_T(data), bytes_read); if (err != 0) { - ErrorLog("uio_addiov err %d\n", err); + ErrorLogWithPath("uio_addiov err %d\n", err); err = ENOMEM; goto out; } @@ -1638,7 +1655,7 @@ decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncat err = VNOP_WRITE(vp, uio_w, 0, decmpfs_ctx); if (err != 0) { /* if the write failed, truncate the file to zero bytes */ - ErrorLog("VNOP_WRITE err %d\n", err); + ErrorLogWithPath("VNOP_WRITE err %d\n", err); break; } offset += bytes_read; @@ -1647,7 +1664,7 @@ decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncat if (err == 0) { if (offset != toSize) { - ErrorLog("file decompressed to %lld instead of %lld\n", offset, toSize); + ErrorLogWithPath("file decompressed to %lld instead of %lld\n", offset, toSize); err = EINVAL; goto out; } @@ -1657,18 +1674,18 @@ decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncat /* sync the data and metadata */ err = VNOP_FSYNC(vp, MNT_WAIT, decmpfs_ctx); if (err != 0) { - ErrorLog("VNOP_FSYNC err %d\n", err); + ErrorLogWithPath("VNOP_FSYNC err %d\n", err); goto out; } } if (err != 0) { /* write, setattr, or fsync failed */ - ErrorLog("aborting decompress, err %d\n", err); + ErrorLogWithPath("aborting decompress, err %d\n", err); if (truncate_okay) { /* truncate anything we might have written */ int error = vnode_setsize(vp, 0, 0, decmpfs_ctx); - ErrorLog("vnode_setsize err %d\n", error); + ErrorLogWithPath("vnode_setsize err %d\n", error); } goto out; } @@ -1682,7 +1699,7 @@ decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncat /* free the compressed data associated with this file */ err = decmpfs_free_compressed_data(vp, cp); if (err != 0) { - ErrorLog("decmpfs_free_compressed_data err %d\n", err); + ErrorLogWithPath("decmpfs_free_compressed_data err %d\n", err); } /* @@ -1699,7 +1716,7 @@ decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncat { uint64_t filesize = 0; vnsize(vp, &filesize); - DebugLog("new file size %lld\n", filesize); + DebugLogWithPath("new file size %lld\n", filesize); } #endif @@ -1763,8 +1780,7 @@ decmpfs_fetch_uncompressed_data_Type1(__unused vnode_t vp, __unused vfs_context_ #if COMPRESSION_DEBUG static int dummy = 0; // prevent syslog from coalescing printfs - char path[PATH_MAX]; - DebugLog("%s: %d memcpy %lld at %lld\n", vnpath(vp, path, sizeof(path)), dummy++, size, (uint64_t)offset); + DebugLogWithPath("%d memcpy %lld at %lld\n", dummy++, size, (uint64_t)offset); #endif remaining = size; diff --git a/bsd/kern/kdebug.c b/bsd/kern/kdebug.c index 65c98080d..27ad69aa1 100644 --- a/bsd/kern/kdebug.c +++ b/bsd/kern/kdebug.c @@ -33,6 +33,7 @@ #include #include #include +#include #define HZ 100 #include @@ -55,6 +56,7 @@ #include #include #include +#include #include #include @@ -124,12 +126,13 @@ int cpu_number(void); /* XXX include path broken */ void commpage_update_kdebug_enable(void); /* XXX sign */ /* XXX should probably be static, but it's debugging code... */ -int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t); +int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t, uint32_t); void kdbg_control_chud(int, void *); int kdbg_control(int *, u_int, user_addr_t, size_t *); int kdbg_readcpumap(user_addr_t, size_t *); int kdbg_readcurcpumap(user_addr_t, size_t *); int kdbg_readthrmap(user_addr_t, size_t *, vnode_t, vfs_context_t); +int kdbg_readthrmap_v3(user_addr_t, size_t *, int); int kdbg_readcurthrmap(user_addr_t, size_t *); int kdbg_getreg(kd_regtype *); int kdbg_setreg(kd_regtype *); @@ -140,11 +143,30 @@ void kdbg_thrmap_init(void); int kdbg_reinit(boolean_t); int kdbg_bootstrap(boolean_t); -int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size); -kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount); +int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, + uint8_t** cpumap, uint32_t* cpumap_size); + +kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, + unsigned int *mapsize, + unsigned int *mapcount); + +static boolean_t kdebug_current_proc_enabled(uint32_t debugid); +static boolean_t kdebug_debugid_enabled(uint32_t debugid); +static errno_t kdebug_check_trace_string(uint32_t debugid, uint64_t str_id); + +int kdbg_write_v3_header(user_addr_t, size_t *, int); +int kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag, + uint32_t sub_tag, uint64_t length, + vnode_t vp, vfs_context_t ctx); + +user_addr_t kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag, + uint64_t length, vnode_t vp, + vfs_context_t ctx); static int kdbg_enable_typefilter(void); static int kdbg_disable_typefilter(void); +static int kdbg_allocate_typefilter(void); +static int kdbg_deallocate_typefilter(void); static int create_buffers(boolean_t); static void delete_buffers(void); @@ -162,7 +184,6 @@ static boolean_t kd_early_overflow = FALSE; #define SLOW_NOLOG 0x01 #define SLOW_CHECKS 0x02 -#define SLOW_ENTROPY 0x04 /* Obsolescent */ #define SLOW_CHUD 0x08 #define EVENTS_PER_STORAGE_UNIT 2048 @@ -214,6 +235,11 @@ struct kd_bufinfo { uint32_t num_bufs; } __attribute__(( aligned(MAX_CPU_CACHE_LINE_SIZE) )); + +/* + * In principle, this control block can be shared in DRAM with other + * coprocessors and runtimes, for configuring what tracing is enabled. + */ struct kd_ctrl_page_t { union kds_ptr kds_free_list; uint32_t enabled :1; @@ -238,6 +264,10 @@ struct kd_bufinfo *kdbip = NULL; #define KDCOPYBUF_COUNT 8192 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf)) + +#define PAGE_4KB 4096 +#define PAGE_16KB 16384 + kd_buf *kdcopybuf = NULL; boolean_t kdlog_bg_trace = FALSE; @@ -259,25 +289,19 @@ static lck_grp_t * kd_trace_mtx_sysctl_grp; static lck_attr_t * kd_trace_mtx_sysctl_attr; static lck_grp_attr_t *kd_trace_mtx_sysctl_grp_attr; -static lck_grp_t *stackshot_subsys_lck_grp; -static lck_grp_attr_t *stackshot_subsys_lck_grp_attr; -static lck_attr_t *stackshot_subsys_lck_attr; -static lck_mtx_t stackshot_subsys_mutex; +extern kern_return_t stack_snapshot2(int pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval); + +#if CONFIG_TELEMETRY +extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval); +#endif /* CONFIG_TELEMETRY */ -void *stackshot_snapbuf = NULL; +extern kern_return_t kern_stack_snapshot_with_reason(char* reason); -int -stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval); +extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user); -int -stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced); -extern void -kdp_snapshot_preflight(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset); +extern kern_return_t stack_snapshot_from_kernel_internal(int pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytes_traced); -extern int -kdp_stack_snapshot_geterror(void); -extern unsigned int -kdp_stack_snapshot_bytes_traced(void); +int stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytes_traced); kd_threadmap *kd_mapptr = 0; unsigned int kd_mapsize = 0; @@ -290,25 +314,27 @@ int RAW_file_written = 0; pid_t global_state_pid = -1; /* Used to control exclusive use of kd_buffer */ -#define DBG_FUNC_MASK 0xfffffffc +/* + * A globally increasing counter for identifying strings in trace. Starts at + * 1 because 0 is a reserved return value. + */ +__attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE))) +static uint64_t g_curr_str_id = 1; -/* TODO: move to kdebug.h */ -#define CLASS_MASK 0xff000000 -#define CLASS_OFFSET 24 -#define SUBCLASS_MASK 0x00ff0000 -#define SUBCLASS_OFFSET 16 -#define CSC_MASK 0xffff0000 /* class and subclass mask */ -#define CSC_OFFSET SUBCLASS_OFFSET +#define STR_ID_SIG_OFFSET (48) +#define STR_ID_MASK ((1ULL << STR_ID_SIG_OFFSET) - 1) +#define STR_ID_SIG_MASK (~STR_ID_MASK) -#define EXTRACT_CLASS(debugid) ( (uint8_t) ( ((debugid) & CLASS_MASK ) >> CLASS_OFFSET ) ) -#define EXTRACT_SUBCLASS(debugid) ( (uint8_t) ( ((debugid) & SUBCLASS_MASK) >> SUBCLASS_OFFSET ) ) -#define EXTRACT_CSC(debugid) ( (uint16_t)( ((debugid) & CSC_MASK ) >> CSC_OFFSET ) ) +/* + * A bit pattern for identifying string IDs generated by + * kdebug_trace_string(2). + */ +static uint64_t g_str_id_signature = (0x70acULL << STR_ID_SIG_OFFSET); #define INTERRUPT 0x01050000 #define MACH_vmfault 0x01300008 #define BSC_SysCall 0x040c0000 #define MACH_SysCall 0x010c0000 -#define DBG_SCALL_MASK 0xffff0000 /* task to string structure */ struct tts @@ -337,8 +363,6 @@ typedef void (*kd_chudhook_fn) (uint32_t debugid, uintptr_t arg1, volatile kd_chudhook_fn kdebug_chudhook = 0; /* pointer to CHUD toolkit function */ -__private_extern__ void stackshot_lock_init( void ); - static uint8_t *type_filter_bitmap; /* @@ -505,7 +529,7 @@ create_buffers(boolean_t early_trace) kd_ctrl_page.kdebug_cpus = kd_ctrl_page.kdebug_iops ? kd_ctrl_page.kdebug_iops->cpu_id + 1 : kdbg_cpu_count(early_trace); - if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus) != KERN_SUCCESS) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) { error = ENOSPC; goto out; } @@ -529,19 +553,19 @@ create_buffers(boolean_t early_trace) kd_bufs = NULL; if (kdcopybuf == 0) { - if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE) != KERN_SUCCESS) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) { error = ENOSPC; goto out; } } - if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers))) != KERN_SUCCESS) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)), VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) { error = ENOSPC; goto out; } bzero(kd_bufs, n_storage_buffers * sizeof(struct kd_storage_buffers)); for (i = 0; i < f_buffers; i++) { - if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size) != KERN_SUCCESS) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) { error = ENOSPC; goto out; } @@ -550,7 +574,7 @@ create_buffers(boolean_t early_trace) kd_bufs[i].kdsb_size = f_buffer_size; } if (p_buffer_size) { - if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size) != KERN_SUCCESS) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) { error = ENOSPC; goto out; } @@ -784,7 +808,7 @@ int kernel_debug_register_callback(kd_callback_t callback) { kd_iop_t* iop; - if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t)) == KERN_SUCCESS) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t), VM_KERN_MEMORY_DIAG) == KERN_SUCCESS) { memcpy(&iop->callback, &callback, sizeof(kd_callback_t)); /* @@ -859,8 +883,18 @@ kernel_debug_enter( goto out1; if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) { - if (isset(type_filter_bitmap, EXTRACT_CSC(debugid))) - goto record_event; + /* + * Recheck if TYPEFILTER is being used, and if so, + * dereference bitmap. If the trace facility is being + * disabled, we have ~100ms of preemption-free CPU + * usage to access the bitmap. + */ + disable_preemption(); + if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) { + if (isset(type_filter_bitmap, KDBG_EXTRACT_CSC(debugid))) + goto record_event_preempt_disabled; + } + enable_preemption(); goto out1; } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) { @@ -869,10 +903,10 @@ kernel_debug_enter( goto out1; } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) { - if ((debugid & DBG_FUNC_MASK) != kdlog_value1 && - (debugid & DBG_FUNC_MASK) != kdlog_value2 && - (debugid & DBG_FUNC_MASK) != kdlog_value3 && - (debugid & DBG_FUNC_MASK) != kdlog_value4) + if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 && + (debugid & KDBG_EVENTID_MASK) != kdlog_value2 && + (debugid & KDBG_EVENTID_MASK) != kdlog_value3 && + (debugid & KDBG_EVENTID_MASK) != kdlog_value4) goto out1; } } @@ -881,6 +915,7 @@ kernel_debug_enter( disable_preemption(); +record_event_preempt_disabled: if (kd_ctrl_page.enabled == 0) goto out; @@ -1034,16 +1069,26 @@ kernel_debug_internal( if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) { /* Always record trace system info */ - if (EXTRACT_CLASS(debugid) == DBG_TRACE) + if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) goto record_event; - if (isset(type_filter_bitmap, EXTRACT_CSC(debugid))) - goto record_event; + /* + * Recheck if TYPEFILTER is being used, and if so, + * dereference bitmap. If the trace facility is being + * disabled, we have ~100ms of preemption-free CPU + * usage to access the bitmap. + */ + disable_preemption(); + if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) { + if (isset(type_filter_bitmap, KDBG_EXTRACT_CSC(debugid))) + goto record_event_preempt_disabled; + } + enable_preemption(); goto out1; } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) { /* Always record trace system info */ - if (EXTRACT_CLASS(debugid) == DBG_TRACE) + if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) goto record_event; if (debugid < kdlog_beg || debugid > kdlog_end) @@ -1051,19 +1096,20 @@ kernel_debug_internal( } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) { /* Always record trace system info */ - if (EXTRACT_CLASS(debugid) == DBG_TRACE) + if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) goto record_event; - if ((debugid & DBG_FUNC_MASK) != kdlog_value1 && - (debugid & DBG_FUNC_MASK) != kdlog_value2 && - (debugid & DBG_FUNC_MASK) != kdlog_value3 && - (debugid & DBG_FUNC_MASK) != kdlog_value4) + if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 && + (debugid & KDBG_EVENTID_MASK) != kdlog_value2 && + (debugid & KDBG_EVENTID_MASK) != kdlog_value3 && + (debugid & KDBG_EVENTID_MASK) != kdlog_value4) goto out1; } } record_event: disable_preemption(); +record_event_preempt_disabled: if (kd_ctrl_page.enabled == 0) goto out; @@ -1120,8 +1166,8 @@ kernel_debug_internal( uint32_t etype; uint32_t stype; - etype = debugid & DBG_FUNC_MASK; - stype = debugid & DBG_SCALL_MASK; + etype = debugid & KDBG_EVENTID_MASK; + stype = debugid & KDBG_CSC_MASK; if (etype == INTERRUPT || etype == MACH_vmfault || stype == BSC_SysCall || stype == MACH_SysCall) { @@ -1181,7 +1227,7 @@ kernel_debug1( } void -kernel_debug_string(const char *message) +kernel_debug_string_simple(const char *message) { uintptr_t arg[4] = {0, 0, 0, 0}; @@ -1268,7 +1314,24 @@ kernel_debug_early_end(void) TRACE_LOST_EVENTS, 0, 0, 0, 0, 0); /* This trace marks the start of kernel tracing */ - kernel_debug_string("early trace done"); + kernel_debug_string_simple("early trace done"); +} + +/* + * Returns non-zero if debugid is in a reserved class. + */ +static int +kdebug_validate_debugid(uint32_t debugid) +{ + uint8_t debugid_class; + + debugid_class = KDBG_EXTRACT_CLASS(debugid); + switch (debugid_class) { + case DBG_TRACE: + return EPERM; + } + + return 0; } /* @@ -1293,17 +1356,10 @@ kdebug_trace(struct proc *p, struct kdebug_trace_args *uap, int32_t *retval) */ int kdebug_trace64(__unused struct proc *p, struct kdebug_trace64_args *uap, __unused int32_t *retval) { - uint8_t code_class; - - /* - * Not all class are supported for injection from userspace, especially ones used by the core - * kernel tracing infrastructure. - */ - code_class = EXTRACT_CLASS(uap->code); + int err; - switch (code_class) { - case DBG_TRACE: - return EPERM; + if ((err = kdebug_validate_debugid(uap->code)) != 0) { + return err; } if ( __probable(kdebug_enable == 0) ) @@ -1314,6 +1370,307 @@ int kdebug_trace64(__unused struct proc *p, struct kdebug_trace64_args *uap, __u return(0); } +/* + * Adding enough padding to contain a full tracepoint for the last + * portion of the string greatly simplifies the logic of splitting the + * string between tracepoints. Full tracepoints can be generated using + * the buffer itself, without having to manually add zeros to pad the + * arguments. + */ + +/* 2 string args in first tracepoint and 9 string data tracepoints */ +#define STR_BUF_ARGS (2 + (9 * 4)) +/* times the size of each arg on K64 */ +#define MAX_STR_LEN (STR_BUF_ARGS * sizeof(uint64_t)) +/* on K32, ending straddles a tracepoint, so reserve blanks */ +#define STR_BUF_SIZE (MAX_STR_LEN + (2 * sizeof(uint32_t))) + +/* + * This function does no error checking and assumes that it is called with + * the correct arguments, including that the buffer pointed to by str is at + * least STR_BUF_SIZE bytes. However, str must be aligned to word-size and + * be NUL-terminated. In cases where a string can fit evenly into a final + * tracepoint without its NUL-terminator, this function will not end those + * strings with a NUL in trace. It's up to clients to look at the function + * qualifier for DBG_FUNC_END in this case, to end the string. + */ +static uint64_t +kernel_debug_string_internal(uint32_t debugid, uint64_t str_id, void *vstr, + size_t str_len) +{ + /* str must be word-aligned */ + uintptr_t *str = vstr; + size_t written = 0; + uintptr_t thread_id; + int i; + uint32_t trace_debugid = TRACEDBG_CODE(DBG_TRACE_STRING, + TRACE_STRING_GLOBAL); + + thread_id = (uintptr_t)thread_tid(current_thread()); + + /* if the ID is being invalidated, just emit that */ + if (str_id != 0 && str_len == 0) { + kernel_debug_internal(trace_debugid | DBG_FUNC_START | DBG_FUNC_END, + (uintptr_t)debugid, (uintptr_t)str_id, 0, 0, + thread_id); + return str_id; + } + + /* generate an ID, if necessary */ + if (str_id == 0) { + str_id = OSIncrementAtomic64((SInt64 *)&g_curr_str_id); + str_id = (str_id & STR_ID_MASK) | g_str_id_signature; + } + + trace_debugid |= DBG_FUNC_START; + /* string can fit in a single tracepoint */ + if (str_len <= (2 * sizeof(uintptr_t))) { + trace_debugid |= DBG_FUNC_END; + } + + kernel_debug_internal(trace_debugid, (uintptr_t)debugid, + (uintptr_t)str_id, str[0], + str[1], thread_id); + + trace_debugid &= KDBG_EVENTID_MASK; + i = 2; + written += 2 * sizeof(uintptr_t); + + for (; written < str_len; i += 4, written += 4 * sizeof(uintptr_t)) { + if ((written + (4 * sizeof(uintptr_t))) >= str_len) { + trace_debugid |= DBG_FUNC_END; + } + kernel_debug_internal(trace_debugid, str[i], + str[i + 1], + str[i + 2], + str[i + 3], thread_id); + } + + return str_id; +} + +/* + * Returns true if the current process can emit events, and false otherwise. + * Trace system and scheduling events circumvent this check, as do events + * emitted in interrupt context. + */ +static boolean_t +kdebug_current_proc_enabled(uint32_t debugid) +{ + /* can't determine current process in interrupt context */ + if (ml_at_interrupt_context()) { + return TRUE; + } + + /* always emit trace system and scheduling events */ + if ((KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE || + (debugid & KDBG_CSC_MASK) == MACHDBG_CODE(DBG_MACH_SCHED, 0))) + { + return TRUE; + } + + if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) { + proc_t cur_proc = current_proc(); + + /* only the process with the kdebug bit set is allowed */ + if (cur_proc && !(cur_proc->p_kdebug)) { + return FALSE; + } + } else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) { + proc_t cur_proc = current_proc(); + + /* every process except the one with the kdebug bit set is allowed */ + if (cur_proc && cur_proc->p_kdebug) { + return FALSE; + } + } + + return TRUE; +} + +/* + * Returns true if the debugid is disabled by filters, and false if the + * debugid is allowed to be traced. A debugid may not be traced if the + * typefilter disables its class and subclass, it's outside a range + * check, or if it's not an allowed debugid in a value check. Trace + * system events bypass this check. + */ +static boolean_t +kdebug_debugid_enabled(uint32_t debugid) +{ + boolean_t is_enabled = TRUE; + + /* if no filtering is enabled */ + if (!kd_ctrl_page.kdebug_slowcheck) { + return TRUE; + } + + if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) { + return TRUE; + } + + if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) { + disable_preemption(); + + /* + * Recheck if typefilter is still being used. If tracing is being + * disabled, there's a 100ms sleep on the other end to keep the + * bitmap around for this check. + */ + if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) { + if (!(isset(type_filter_bitmap, KDBG_EXTRACT_CSC(debugid)))) { + is_enabled = FALSE; + } + } + + enable_preemption(); + } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) { + if (debugid < kdlog_beg || debugid > kdlog_end) { + is_enabled = FALSE; + } + } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) { + if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 && + (debugid & KDBG_EVENTID_MASK) != kdlog_value2 && + (debugid & KDBG_EVENTID_MASK) != kdlog_value3 && + (debugid & KDBG_EVENTID_MASK) != kdlog_value4) + { + is_enabled = FALSE; + } + } + + return is_enabled; +} + +/* + * Returns 0 if a string can be traced with these arguments. Returns errno + * value if error occurred. + */ +static errno_t +kdebug_check_trace_string(uint32_t debugid, uint64_t str_id) +{ + /* if there are function qualifiers on the debugid */ + if (debugid & ~KDBG_EVENTID_MASK) { + return EINVAL; + } + + if (kdebug_validate_debugid(debugid)) { + return EPERM; + } + + if (str_id != 0 && (str_id & STR_ID_SIG_MASK) != g_str_id_signature) { + return EINVAL; + } + + return 0; +} + +/* + * Implementation of KPI kernel_debug_string. + */ +int +kernel_debug_string(uint32_t debugid, uint64_t *str_id, const char *str) +{ + /* arguments to tracepoints must be word-aligned */ + __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE]; + assert_static(sizeof(str_buf) > MAX_STR_LEN); + vm_size_t len_copied; + int err; + + assert(str_id); + + if (__probable(kdebug_enable == 0)) { + return 0; + } + + if (!kdebug_current_proc_enabled(debugid)) { + return 0; + } + + if (!kdebug_debugid_enabled(debugid)) { + return 0; + } + + if ((err = kdebug_check_trace_string(debugid, *str_id)) != 0) { + return err; + } + + if (str == NULL) { + if (str_id == 0) { + return EINVAL; + } + + *str_id = kernel_debug_string_internal(debugid, *str_id, NULL, 0); + return 0; + } + + memset(str_buf, 0, sizeof(str_buf)); + len_copied = strlcpy(str_buf, str, MAX_STR_LEN + 1); + *str_id = kernel_debug_string_internal(debugid, *str_id, str_buf, + len_copied); + return 0; +} + +/* + * Support syscall kdebug_trace_string. + */ +int +kdebug_trace_string(__unused struct proc *p, + struct kdebug_trace_string_args *uap, + uint64_t *retval) +{ + __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE]; + assert_static(sizeof(str_buf) > MAX_STR_LEN); + size_t len_copied; + int err; + + if (__probable(kdebug_enable == 0)) { + return 0; + } + + if (!kdebug_current_proc_enabled(uap->debugid)) { + return 0; + } + + if (!kdebug_debugid_enabled(uap->debugid)) { + return 0; + } + + if ((err = kdebug_check_trace_string(uap->debugid, uap->str_id)) != 0) { + return err; + } + + if (uap->str == USER_ADDR_NULL) { + if (uap->str_id == 0) { + return EINVAL; + } + + *retval = kernel_debug_string_internal(uap->debugid, uap->str_id, + NULL, 0); + return 0; + } + + memset(str_buf, 0, sizeof(str_buf)); + err = copyinstr(uap->str, str_buf, MAX_STR_LEN + 1, &len_copied); + + /* it's alright to truncate the string, so allow ENAMETOOLONG */ + if (err == ENAMETOOLONG) { + str_buf[MAX_STR_LEN] = '\0'; + } else if (err) { + return err; + } + + if (len_copied <= 1) { + return EINVAL; + } + + /* convert back to a length */ + len_copied--; + + *retval = kernel_debug_string_internal(uap->debugid, uap->str_id, str_buf, + len_copied); + return 0; +} + static void kdbg_lock_init(void) { @@ -1376,7 +1733,7 @@ kdbg_reinit(boolean_t early_trace) kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize); kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT; kd_mapsize = 0; - kd_mapptr = (kd_threadmap *) 0; + kd_mapptr = NULL; kd_mapcount = 0; } ret = kdbg_bootstrap(early_trace); @@ -1496,7 +1853,7 @@ kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, *cpumap_size = bytes_needed; if (*cpumap == NULL) { - if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size) != KERN_SUCCESS) { + if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) { return ENOMEM; } } else if (bytes_available < bytes_needed) { @@ -1585,7 +1942,7 @@ kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsiz if (count && count < *mapcount) return (0); - if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize) == KERN_SUCCESS)) { + if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) { bzero((void *)kaddr, *mapsize); mapptr = (kd_threadmap *)kaddr; } else @@ -1593,7 +1950,7 @@ kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsiz tts_mapsize = tts_count * sizeof(struct tts); - if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize) == KERN_SUCCESS)) { + if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) { bzero((void *)kaddr, tts_mapsize); tts_mapptr = (struct tts *)kaddr; } else { @@ -1650,12 +2007,13 @@ kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsiz static void kdbg_clear(void) { - /* + /* * Clean up the trace buffer * First make sure we're not in * the middle of cutting a trace */ kdbg_set_tracing_enabled(FALSE, KDEBUG_ENABLE_TRACE); + kdbg_disable_typefilter(); /* * make sure the SLOW_NOLOG is seen @@ -1664,13 +2022,12 @@ kdbg_clear(void) */ IOSleep(100); - global_state_pid = -1; + global_state_pid = -1; kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES; kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK); kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE); - kdbg_disable_typefilter(); - + kdbg_deallocate_typefilter(); delete_buffers(); nkdbufs = 0; @@ -1793,16 +2150,13 @@ kdbg_setrtcdec(kd_regtype *kdr) int kdbg_enable_typefilter(void) { - if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) { - /* free the old filter */ - kdbg_disable_typefilter(); - } - - if (kmem_alloc(kernel_map, (vm_offset_t *)&type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE) != KERN_SUCCESS) { - return ENOSPC; + int ret; + + /* Allocate memory for bitmap if not already allocated */ + ret = kdbg_allocate_typefilter(); + if (ret) { + return ret; } - - bzero(type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE); /* Turn off range and value checks */ kd_ctrl_page.kdebug_flags &= ~(KDBG_RANGECHECK | KDBG_VALCHECK); @@ -1818,20 +2172,55 @@ kdbg_disable_typefilter(void) { /* Disable filter checking */ kd_ctrl_page.kdebug_flags &= ~KDBG_TYPEFILTER_CHECK; - + /* Turn off slow checks unless pid checks are using them */ if ( (kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) ) kdbg_set_flags(SLOW_CHECKS, 0, TRUE); else kdbg_set_flags(SLOW_CHECKS, 0, FALSE); - - if(type_filter_bitmap == NULL) - return 0; - vm_offset_t old_bitmap = (vm_offset_t)type_filter_bitmap; - type_filter_bitmap = NULL; + /* typefilter bitmap will be deallocated later */ + + return 0; +} + +static int +kdbg_allocate_typefilter(void) +{ + if (type_filter_bitmap == NULL) { + vm_offset_t bitmap = 0; + + if (kmem_alloc(kernel_map, &bitmap, KDBG_TYPEFILTER_BITMAP_SIZE, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) { + return ENOSPC; + } + + bzero((void *)bitmap, KDBG_TYPEFILTER_BITMAP_SIZE); + + if (!OSCompareAndSwapPtr(NULL, (void *)bitmap, &type_filter_bitmap)) { + kmem_free(kernel_map, bitmap, KDBG_TYPEFILTER_BITMAP_SIZE); + return 0; /* someone assigned a buffer */ + } + } else { + bzero(type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE); + } + + return 0; +} + +static int +kdbg_deallocate_typefilter(void) +{ + if(type_filter_bitmap) { + vm_offset_t bitmap = (vm_offset_t)type_filter_bitmap; + + if (OSCompareAndSwapPtr((void *)bitmap, NULL, &type_filter_bitmap)) { + kmem_free(kernel_map, bitmap, KDBG_TYPEFILTER_BITMAP_SIZE); + return 0; + } else { + /* already swapped */ + } + } - kmem_free(kernel_map, old_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE); return 0; } @@ -1945,50 +2334,317 @@ kdbg_getreg(__unused kd_regtype * kdr) return(EINVAL); } +static int +kdbg_write_to_vnode(caddr_t buffer, size_t size, vnode_t vp, vfs_context_t ctx, off_t file_offset) +{ + return vn_rdwr(UIO_WRITE, vp, buffer, size, file_offset, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, + vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); +} + int -kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size) +kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag, uint32_t sub_tag, uint64_t length, vnode_t vp, vfs_context_t ctx) { - uint8_t* cpumap = NULL; - uint32_t cpumap_size = 0; int ret = KERN_SUCCESS; - - if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) { - if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) { - if (user_cpumap) { - size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size; - if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) { - ret = EFAULT; - } + kd_chunk_header_v3 header; + + header.tag = tag; + header.sub_tag = sub_tag; + header.length = length; + + // Check that only one of them is valid + assert(!buffer ^ !vp); + assert((vp == NULL) || (ctx != NULL)); + + // Write the 8-byte future_chunk_timestamp field in the payload + if (buffer || vp) { + if (vp) { + ret = kdbg_write_to_vnode((caddr_t)&header, sizeof(kd_chunk_header_v3), vp, ctx, RAW_file_offset); + if (ret) { + goto write_error; } - *user_cpumap_size = cpumap_size; - kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size); - } else - ret = EINVAL; - } else - ret = EINVAL; - - return (ret); + RAW_file_offset += (sizeof(kd_chunk_header_v3)); + } + else { + ret = copyout(&header, buffer, sizeof(kd_chunk_header_v3)); + if (ret) { + goto write_error; + } + } + } +write_error: + return ret; } int -kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize) +kdbg_write_v3_chunk_header_to_buffer(void * buffer, uint32_t tag, uint32_t sub_tag, uint64_t length) { - kd_threadmap *mapptr; - unsigned int mapsize; - unsigned int mapcount; - unsigned int count = 0; - int ret = 0; + kd_chunk_header_v3 header; - count = *bufsize/sizeof(kd_threadmap); - *bufsize = 0; - - if ( (mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount)) ) { - if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap))) - ret = EFAULT; - else - *bufsize = (mapcount * sizeof(kd_threadmap)); + header.tag = tag; + header.sub_tag = sub_tag; + header.length = length; - kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize); + if (!buffer) { + return 0; + } + + memcpy(buffer, &header, sizeof(kd_chunk_header_v3)); + + return (sizeof(kd_chunk_header_v3)); +} + +int +kdbg_write_v3_chunk_to_fd(uint32_t tag, uint32_t sub_tag, uint64_t length, void *payload, uint64_t payload_size, int fd) +{ + proc_t p; + struct vfs_context context; + struct fileproc *fp; + vnode_t vp; + p = current_proc(); + + proc_fdlock(p); + if ( (fp_lookup(p, fd, &fp, 1)) ) { + proc_fdunlock(p); + return EFAULT; + } + + context.vc_thread = current_thread(); + context.vc_ucred = fp->f_fglob->fg_cred; + + if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { + fp_drop(p, fd, fp, 1); + proc_fdunlock(p); + return EBADF; + } + vp = (struct vnode *) fp->f_fglob->fg_data; + proc_fdunlock(p); + + if ( (vnode_getwithref(vp)) == 0 ) { + RAW_file_offset = fp->f_fglob->fg_offset; + + kd_chunk_header_v3 chunk_header = { .tag = tag, .sub_tag = sub_tag, .length = length }; + + int ret = kdbg_write_to_vnode((caddr_t) &chunk_header, sizeof(kd_chunk_header_v3), vp, &context, RAW_file_offset); + if (!ret) { + RAW_file_offset += sizeof(kd_chunk_header_v3); + } + + ret = kdbg_write_to_vnode((caddr_t) payload, (size_t) payload_size, vp, &context, RAW_file_offset); + if (!ret) { + RAW_file_offset += payload_size; + } + + fp->f_fglob->fg_offset = RAW_file_offset; + vnode_put(vp); + } + + fp_drop(p, fd, fp, 0); + return KERN_SUCCESS; +} + +user_addr_t +kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag, uint64_t length, vnode_t vp, vfs_context_t ctx) +{ + uint64_t future_chunk_timestamp = 0; + length += sizeof(uint64_t); + + if (kdbg_write_v3_chunk_header(buffer, tag, V3_EVENT_DATA_VERSION, length, vp, ctx)) { + return 0; + } + if (buffer) { + buffer += sizeof(kd_chunk_header_v3); + } + + // Check that only one of them is valid + assert(!buffer ^ !vp); + assert((vp == NULL) || (ctx != NULL)); + + // Write the 8-byte future_chunk_timestamp field in the payload + if (buffer || vp) { + if (vp) { + int ret = kdbg_write_to_vnode((caddr_t)&future_chunk_timestamp, sizeof(uint64_t), vp, ctx, RAW_file_offset); + if (!ret) { + RAW_file_offset += (sizeof(uint64_t)); + } + } + else { + if (copyout(&future_chunk_timestamp, buffer, sizeof(uint64_t))) { + return 0; + } + } + } + + return (buffer + sizeof(uint64_t)); +} + +int +kdbg_write_v3_header(user_addr_t user_header, size_t *user_header_size, int fd) +{ + int ret = KERN_SUCCESS; + kd_header_v3 header; + + uint8_t* cpumap = 0; + uint32_t cpumap_size = 0; + uint32_t thrmap_size = 0; + + size_t bytes_needed = 0; + + // Check that only one of them is valid + assert(!user_header ^ !fd); + assert(user_header_size); + + if ( !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ) { + ret = EINVAL; + goto bail; + } + + if ( !(user_header || fd) ) { + ret = EINVAL; + goto bail; + } + + // Initialize the cpu map + ret = kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size); + if (ret != KERN_SUCCESS) { + goto bail; + } + + // Check if a thread map is initialized + if ( !kd_mapptr ) { + ret = EINVAL; + goto bail; + } + thrmap_size = kd_mapcount * sizeof(kd_threadmap); + + // Setup the header. + // See v3 header description in sys/kdebug.h for more inforamtion. + + header.tag = RAW_VERSION3; + header.sub_tag = V3_HEADER_VERSION; + header.length = ( sizeof(kd_header_v3) + cpumap_size - sizeof(kd_cpumap_header)); + + mach_timebase_info_data_t timebase = {0, 0}; + clock_timebase_info(&timebase); + header.timebase_numer = timebase.numer; + header.timebase_denom = timebase.denom; + header.timestamp = 0; + header.walltime_secs = 0; + header.walltime_usecs = 0; + header.timezone_minuteswest = 0; + header.timezone_dst = 0; + +#if defined __LP64__ + header.flags = 1; +#else + header.flags = 0; +#endif + + // If its a buffer, check if we have enough space to copy the header and the maps. + if (user_header) { + bytes_needed = header.length + thrmap_size + (2 * sizeof(kd_chunk_header_v3)); + if ( !user_header_size ) { + ret = EINVAL; + goto bail; + } + if (*user_header_size < bytes_needed) { + ret = EINVAL; + goto bail; + } + } + + // Start writing the header + if (fd) { + void *hdr_ptr = (void *)(((uintptr_t) &header) + sizeof(kd_chunk_header_v3)); + size_t payload_size = (sizeof(kd_header_v3) - sizeof(kd_chunk_header_v3)); + + ret = kdbg_write_v3_chunk_to_fd(RAW_VERSION3, V3_HEADER_VERSION, header.length, hdr_ptr, payload_size, fd); + if (ret) { + goto bail; + } + } + else { + if (copyout(&header, user_header, sizeof(kd_header_v3))) { + ret = EFAULT; + goto bail; + } + // Update the user pointer + user_header += sizeof(kd_header_v3); + } + + // Write a cpu map. This is a sub chunk of the header + cpumap = (uint8_t*)((uintptr_t) cpumap + sizeof(kd_cpumap_header)); + size_t payload_size = (size_t)(cpumap_size - sizeof(kd_cpumap_header)); + if (fd) { + ret = kdbg_write_v3_chunk_to_fd(V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, (void *)cpumap, payload_size, fd); + if (ret) { + goto bail; + } + } + else { + ret = kdbg_write_v3_chunk_header(user_header, V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, NULL, NULL); + if (ret) { + goto bail; + } + user_header += sizeof(kd_chunk_header_v3); + if (copyout(cpumap, user_header, payload_size)) { + ret = EFAULT; + goto bail; + } + // Update the user pointer + user_header += payload_size; + } + + // Write a thread map + if (fd) { + ret = kdbg_write_v3_chunk_to_fd(V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, (void *)kd_mapptr, thrmap_size, fd); + if (ret) { + goto bail; + } + } + else { + ret = kdbg_write_v3_chunk_header(user_header, V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, NULL, NULL); + if (ret) { + goto bail; + } + user_header += sizeof(kd_chunk_header_v3); + if (copyout(kd_mapptr, user_header, thrmap_size)) { + ret = EFAULT; + goto bail; + } + user_header += thrmap_size; + } + + if (fd) { + RAW_file_written += bytes_needed; + } + + *user_header_size = bytes_needed; +bail: + if (cpumap) { + kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size); + } + return (ret); +} + +int +kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size) +{ + uint8_t* cpumap = NULL; + uint32_t cpumap_size = 0; + int ret = KERN_SUCCESS; + + if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) { + if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) { + if (user_cpumap) { + size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size; + if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) { + ret = EFAULT; + } + } + *user_cpumap_size = cpumap_size; + kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size); + } else + ret = EINVAL; } else ret = EINVAL; @@ -1996,113 +2652,181 @@ kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize) } int -kdbg_readthrmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) +kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize) { - int avail = *number; - int ret = 0; - uint32_t count = 0; + kd_threadmap *mapptr; unsigned int mapsize; + unsigned int mapcount; + unsigned int count = 0; + int ret = 0; - count = avail/sizeof (kd_threadmap); + count = *bufsize/sizeof(kd_threadmap); + *bufsize = 0; - mapsize = kd_mapcount * sizeof(kd_threadmap); + if ( (mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount)) ) { + if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap))) + ret = EFAULT; + else + *bufsize = (mapcount * sizeof(kd_threadmap)); - if (count && (count <= kd_mapcount)) - { - if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) - { - if (*number < mapsize) - ret = EINVAL; - else - { - if (vp) - { - RAW_header header; - clock_sec_t secs; - clock_usec_t usecs; - char *pad_buf; - uint32_t pad_size; - uint32_t extra_thread_count = 0; - uint32_t cpumap_size; - - /* - * To write a RAW_VERSION1+ file, we - * must embed a cpumap in the "padding" - * used to page align the events folloing - * the threadmap. If the threadmap happens - * to not require enough padding, we - * artificially increase its footprint - * until it needs enough padding. - */ + kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize); + } else + ret = EINVAL; - pad_size = PAGE_SIZE - ((sizeof(RAW_header) + (count * sizeof(kd_threadmap))) & PAGE_MASK_64); - cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap); + return (ret); +} - if (cpumap_size > pad_size) { - /* Force an overflow onto the next page, we get a full page of padding */ - extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1; - } +static int +kdbg_write_v1_plus_header(uint32_t count, vnode_t vp, vfs_context_t ctx) +{ + int ret = 0; + RAW_header header; + clock_sec_t secs; + clock_usec_t usecs; + char *pad_buf; + uint32_t pad_size; + uint32_t extra_thread_count = 0; + uint32_t cpumap_size; + unsigned int mapsize = kd_mapcount * sizeof(kd_threadmap); - header.version_no = RAW_VERSION1; - header.thread_count = count + extra_thread_count; + /* + * To write a RAW_VERSION1+ file, we + * must embed a cpumap in the "padding" + * used to page align the events following + * the threadmap. If the threadmap happens + * to not require enough padding, we + * artificially increase its footprint + * until it needs enough padding. + */ - clock_get_calendar_microtime(&secs, &usecs); - header.TOD_secs = secs; - header.TOD_usecs = usecs; + assert(vp); + assert(ctx); - ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); - if (ret) - goto write_error; - RAW_file_offset += sizeof(RAW_header); + pad_size = PAGE_16KB - ((sizeof(RAW_header) + (count * sizeof(kd_threadmap))) & PAGE_MASK_64); + cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap); - ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, mapsize, RAW_file_offset, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); - if (ret) - goto write_error; - RAW_file_offset += mapsize; + if (cpumap_size > pad_size) { + /* If the cpu map doesn't fit in the current available pad_size, + * we increase the pad_size by 16K. We do this so that the event + * data is always available on a page aligned boundary for both + * 4k and 16k systems. We enforce this alignment for the event + * data so that we can take advantage of optimized file/disk writes.*/ + pad_size += PAGE_16KB; + } - if (extra_thread_count) { - pad_size = extra_thread_count * sizeof(kd_threadmap); - pad_buf = (char *)kalloc(pad_size); - memset(pad_buf, 0, pad_size); + /* The way we are silently embedding a cpumap in the "padding" is by artificially + * increasing the number of thread entries. However, we'll also need to ensure that + * the cpumap is embedded in the last 4K page before when the event data is expected. + * This way the tools can read the data starting the next page boundary on both + * 4K and 16K systems preserving compatibility with older versions of the tools + */ + if (pad_size > PAGE_4KB) { + pad_size -= PAGE_4KB; + extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1; + } - ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); - kfree(pad_buf, pad_size); + header.version_no = RAW_VERSION1; + header.thread_count = count + extra_thread_count; + + clock_get_calendar_microtime(&secs, &usecs); + header.TOD_secs = secs; + header.TOD_usecs = usecs; + + ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + if (ret) + goto write_error; + RAW_file_offset += sizeof(RAW_header); + + ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, mapsize, RAW_file_offset, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + if (ret) + goto write_error; + RAW_file_offset += mapsize; + + if (extra_thread_count) { + pad_size = extra_thread_count * sizeof(kd_threadmap); + pad_buf = (char *)kalloc(pad_size); + if (!pad_buf) { + ret = ENOMEM; + goto write_error; + } + memset(pad_buf, 0, pad_size); - if (ret) - goto write_error; - RAW_file_offset += pad_size; + ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + kfree(pad_buf, pad_size); - } - - pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64); - if (pad_size) { - pad_buf = (char *)kalloc(pad_size); - memset(pad_buf, 0, pad_size); - - /* - * embed a cpumap in the padding bytes. - * older code will skip this. - * newer code will know how to read it. - */ - uint32_t temp = pad_size; - if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) { - memset(pad_buf, 0, pad_size); - } - - ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); - kfree(pad_buf, pad_size); - - if (ret) - goto write_error; - RAW_file_offset += pad_size; - } - RAW_file_written += sizeof(RAW_header) + mapsize + pad_size; + if (ret) + goto write_error; + RAW_file_offset += pad_size; - } else { + } + + pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64); + if (pad_size) { + pad_buf = (char *)kalloc(pad_size); + if (!pad_buf) { + ret = ENOMEM; + goto write_error; + } + memset(pad_buf, 0, pad_size); + + /* + * embed a cpumap in the padding bytes. + * older code will skip this. + * newer code will know how to read it. + */ + uint32_t temp = pad_size; + if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) { + memset(pad_buf, 0, pad_size); + } + + ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + kfree(pad_buf, pad_size); + + if (ret) + goto write_error; + RAW_file_offset += pad_size; + } + RAW_file_written += sizeof(RAW_header) + mapsize + pad_size; + +write_error: + return ret; +} + +int +kdbg_readthrmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) +{ + + int avail = 0; + int ret = 0; + uint32_t count = 0; + unsigned int mapsize; + + if ((!vp && !buffer) || (vp && buffer)) { + return EINVAL; + } + + assert(number); + assert((vp == NULL) || (ctx != NULL)); + + avail = *number; + count = avail/sizeof (kd_threadmap); + mapsize = kd_mapcount * sizeof(kd_threadmap); + + if (count && (count <= kd_mapcount)) { + if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) { + if (*number < mapsize) + ret = EINVAL; + else { + if (vp) { + ret = kdbg_write_v1_plus_header(count, vp, ctx); + if (ret) + goto write_error; + } + else { if (copyout(kd_mapptr, buffer, mapsize)) ret = EINVAL; } @@ -2118,10 +2842,11 @@ kdbg_readthrmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ct { count = 0; - vn_rdwr(UIO_WRITE, vp, (caddr_t)&count, sizeof(uint32_t), RAW_file_offset, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); - RAW_file_offset += sizeof(uint32_t); - RAW_file_written += sizeof(uint32_t); + ret = kdbg_write_to_vnode((caddr_t)&count, sizeof(uint32_t), vp, ctx, RAW_file_offset); + if (!ret) { + RAW_file_offset += sizeof(uint32_t); + RAW_file_written += sizeof(uint32_t); + } } write_error: if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) @@ -2135,6 +2860,54 @@ kdbg_readthrmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ct return(ret); } +int +kdbg_readthrmap_v3(user_addr_t buffer, size_t *number, int fd) +{ + int avail = 0; + int ret = 0; + uint32_t count = 0; + unsigned int mapsize; + + if ((!fd && !buffer) || (fd && buffer)) { + return EINVAL; + } + + assert(number); + + avail = *number; + count = avail/sizeof (kd_threadmap); + mapsize = kd_mapcount * sizeof(kd_threadmap); + + if (count && (count <= kd_mapcount)) { + if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) { + if (*number < mapsize) { + ret = EINVAL; + } + else { + ret = kdbg_write_v3_header(buffer, number, fd); + if (ret) { + goto write_error; + } + } + } + else { + ret = EINVAL; + } + } + else { + ret = EINVAL; + } +write_error: + if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) { + kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize); + kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT; + kd_mapsize = 0; + kd_mapptr = (kd_threadmap *) 0; + kd_mapcount = 0; + } + return(ret); +} + static int kdbg_set_nkdbufs(unsigned int value) @@ -2164,6 +2937,7 @@ kdbg_enable_bg_trace(void) kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE); kdlog_bg_trace_running = TRUE; } + wakeup(&kdlog_bg_trace); } return ret; } @@ -2219,7 +2993,9 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) if (name[0] == KERN_KDGETENTROPY || name[0] == KERN_KDWRITETR || + name[0] == KERN_KDWRITETR_V3 || name[0] == KERN_KDWRITEMAP || + name[0] == KERN_KDWRITEMAP_V3 || name[0] == KERN_KDEFLAGS || name[0] == KERN_KDDFLAGS || name[0] == KERN_KDENABLE || @@ -2281,7 +3057,6 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) ret = EINVAL; } goto out; - case KERN_KDGETENTROPY: { /* Obsolescent - just fake with a random buffer */ char *buffer = (char *) kalloc(size); @@ -2301,6 +3076,43 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) kdlog_bg_trace = FALSE; kdbg_disable_bg_trace(); goto out; + + case KERN_KDWAIT_BG_TRACE_RESET: + if (!kdlog_bg_trace){ + ret = EINVAL; + goto out; + } + wait_result_t wait_result = assert_wait(&kdlog_bg_trace, THREAD_ABORTSAFE); + lck_mtx_unlock(kd_trace_mtx_sysctl); + if (wait_result == THREAD_WAITING) + wait_result = thread_block(THREAD_CONTINUE_NULL); + if (wait_result == THREAD_INTERRUPTED) + ret = EINTR; + lck_mtx_lock(kd_trace_mtx_sysctl); + goto out; + + case KERN_KDSET_BG_TYPEFILTER: + if (!kdlog_bg_trace || !kdlog_bg_trace_running){ + ret = EINVAL; + goto out; + } + + if (size != KDBG_TYPEFILTER_BITMAP_SIZE) { + ret = EINVAL; + goto out; + } + + if ((kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) == 0){ + if ((ret = kdbg_enable_typefilter())) + goto out; + } + + if (copyin(where, type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE)) { + ret = EINVAL; + goto out; + } + kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_TYPEFILTER_CHANGED, type_filter_bitmap); + goto out; } if ((curproc = current_proc()) != NULL) @@ -2406,10 +3218,12 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) break; case KERN_KDREADTR: - ret = kdbg_read(where, sizep, NULL, NULL); + ret = kdbg_read(where, sizep, NULL, NULL, RAW_VERSION1); break; case KERN_KDWRITETR: + case KERN_KDWRITETR_V3: case KERN_KDWRITEMAP: + case KERN_KDWRITEMAP_V3: { struct vfs_context context; struct fileproc *fp; @@ -2417,9 +3231,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) vnode_t vp; int fd; - kdbg_disable_bg_trace(); - - if (name[0] == KERN_KDWRITETR) { + if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) { int s; int wait_result = THREAD_AWAKENED; u_int64_t abstime; @@ -2472,17 +3284,23 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) if ((ret = vnode_getwithref(vp)) == 0) { RAW_file_offset = fp->f_fglob->fg_offset; - if (name[0] == KERN_KDWRITETR) { + if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) { number = nkdbufs * sizeof(kd_buf); KERNEL_DEBUG_CONSTANT(TRACE_WRITING_EVENTS | DBG_FUNC_START, 0, 0, 0, 0, 0); - ret = kdbg_read(0, &number, vp, &context); + if (name[0] == KERN_KDWRITETR_V3) + ret = kdbg_read(0, &number, vp, &context, RAW_VERSION3); + else + ret = kdbg_read(0, &number, vp, &context, RAW_VERSION1); KERNEL_DEBUG_CONSTANT(TRACE_WRITING_EVENTS | DBG_FUNC_END, number, 0, 0, 0, 0); *sizep = number; } else { number = kd_mapcount * sizeof(kd_threadmap); - kdbg_readthrmap(0, &number, vp, &context); + if (name[0] == KERN_KDWRITEMAP_V3) + kdbg_readthrmap_v3(0, &number, fd); + else + kdbg_readthrmap(0, &number, vp, &context); } fp->f_fglob->fg_offset = RAW_file_offset; vnode_put(vp); @@ -2597,16 +3415,16 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) case KERN_KDSET_TYPEFILTER: kdbg_disable_bg_trace(); - if ((kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) == 0){ - if ((ret = kdbg_enable_typefilter())) - break; - } - if (size != KDBG_TYPEFILTER_BITMAP_SIZE) { ret = EINVAL; break; } + if ((kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) == 0){ + if ((ret = kdbg_enable_typefilter())) + break; + } + if (copyin(where, type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE)) { ret = EINVAL; break; @@ -2630,7 +3448,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) * move through the lists w/o use of any locks */ int -kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) +kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version) { unsigned int count; unsigned int cpu, min_cpu; @@ -2650,6 +3468,7 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) boolean_t lostevents = FALSE; boolean_t out_of_events = FALSE; + assert(number); count = *number/sizeof(kd_buf); *number = 0; @@ -2805,12 +3624,23 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) break; } if (tempbuf_number) { + if (file_version == RAW_VERSION3) { + if ( !(kdbg_write_v3_event_chunk_header(buffer, V3_RAW_EVENTS, (tempbuf_number * sizeof(kd_buf)), vp, ctx))) { + error = EFAULT; + goto check_error; + } + if (buffer) + buffer += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t)); + assert(count >= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t))); + count -= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t)); + *number += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t)); + } if (vp) { - error = vn_rdwr(UIO_WRITE, vp, (caddr_t)kdcopybuf, tempbuf_number * sizeof(kd_buf), RAW_file_offset, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); - - RAW_file_offset += (tempbuf_number * sizeof(kd_buf)); + size_t write_size = tempbuf_number * sizeof(kd_buf); + error = kdbg_write_to_vnode((caddr_t)kdcopybuf, write_size, vp, ctx, RAW_file_offset); + if (!error) + RAW_file_offset += write_size; if (RAW_file_written >= RAW_FLUSH_SIZE) { cluster_push(vp, 0); @@ -2821,6 +3651,7 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf)); buffer += (tempbuf_number * sizeof(kd_buf)); } +check_error: if (error) { *number = 0; error = EINVAL; @@ -2852,31 +3683,33 @@ unsigned char *getProcName(struct proc *proc) { } -#define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex) -#define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex) -#if defined(__i386__) || defined (__x86_64__) -#define TRAP_DEBUGGER __asm__ volatile("int3"); -#else -#error No TRAP_DEBUGGER definition for this architecture -#endif - -#define SANE_TRACEBUF_SIZE (8 * 1024 * 1024) -#define SANE_BOOTPROFILE_TRACEBUF_SIZE (64 * 1024 * 1024) - -/* Initialize the mutex governing access to the stack snapshot subsystem */ -__private_extern__ void -stackshot_lock_init( void ) +static int +stackshot_kern_return_to_bsd_error(kern_return_t kr) { - stackshot_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); - - stackshot_subsys_lck_grp = lck_grp_alloc_init("stackshot_subsys_lock", stackshot_subsys_lck_grp_attr); - - stackshot_subsys_lck_attr = lck_attr_alloc_init(); - - lck_mtx_init(&stackshot_subsys_mutex, stackshot_subsys_lck_grp, stackshot_subsys_lck_attr); + switch (kr) { + case KERN_SUCCESS: + return 0; + case KERN_RESOURCE_SHORTAGE: + return ENOMEM; + case KERN_NO_SPACE: + return ENOSPC; + case KERN_NO_ACCESS: + return EPERM; + case KERN_MEMORY_PRESENT: + return EEXIST; + case KERN_NOT_SUPPORTED: + return ENOTSUP; + case KERN_NOT_IN_SET: + return ENOENT; + default: + return EINVAL; + } } + /* + * DEPRECATION WARNING: THIS SYSCALL IS BEING REPLACED WITH SYS_stack_snapshot_with_config and SYS_microstackshot. + * * stack_snapshot: Obtains a coherent set of stack traces for all threads * on the system, tracing both kernel and user stacks * where available. Uses machine specific trace routines @@ -2901,208 +3734,147 @@ stackshot_lock_init( void ) int stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, int32_t *retval) { int error = 0; + kern_return_t kr; if ((error = suser(kauth_cred_get(), &p->p_acflag))) return(error); - return stack_snapshot2(uap->pid, uap->tracebuf, uap->tracebuf_size, - uap->flags, uap->dispatch_offset, retval); + kr = stack_snapshot2(uap->pid, uap->tracebuf, uap->tracebuf_size, uap->flags, retval); + return stackshot_kern_return_to_bsd_error(kr); } -int -stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced) -{ - int error = 0; - boolean_t istate; - - if ((buf == NULL) || (size <= 0) || (bytesTraced == NULL)) { - return -1; - } - - /* cap in individual stackshot to SANE_TRACEBUF_SIZE */ - if (size > SANE_TRACEBUF_SIZE) { - size = SANE_TRACEBUF_SIZE; - } - -/* Serialize tracing */ - STACKSHOT_SUBSYS_LOCK(); - istate = ml_set_interrupts_enabled(FALSE); - - -/* Preload trace parameters*/ - kdp_snapshot_preflight(pid, buf, size, flags, 0); - -/* Trap to the debugger to obtain a coherent stack snapshot; this populates - * the trace buffer +/* + * stack_snapshot_with_config: Obtains a coherent set of stack traces for specified threads on the sysem, + * tracing both kernel and user stacks where available. Allocates a buffer from the + * kernel and maps the buffer into the calling task's address space. + * + * Inputs: uap->stackshot_config_version - version of the stackshot config that is being passed + * uap->stackshot_config - pointer to the stackshot config + * uap->stackshot_config_size- size of the stackshot config being passed + * Outputs: EINVAL if there is a problem with the arguments + * EFAULT if we failed to copy in the arguments succesfully + * EPERM if the caller is not privileged + * ENOTSUP if the caller is passing a version of arguments that is not supported by the kernel + * (indicates libsyscall:kernel mismatch) or if the caller is requesting unsupported flags + * ENOENT if the caller is requesting an existing buffer that doesn't exist or if the + * requested PID isn't found + * ENOMEM if the kernel is unable to allocate enough memory to serve the request + * ENOSPC if there isn't enough space in the caller's address space to remap the buffer + * ESRCH if the target PID isn't found + * returns KERN_SUCCESS on success */ - TRAP_DEBUGGER; - - ml_set_interrupts_enabled(istate); - - *bytesTraced = kdp_stack_snapshot_bytes_traced(); - - error = kdp_stack_snapshot_geterror(); - - STACKSHOT_SUBSYS_UNLOCK(); - - return error; - -} - int -stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval) +stack_snapshot_with_config(struct proc *p, struct stack_snapshot_with_config_args *uap, __unused int *retval) { - boolean_t istate; int error = 0; - unsigned bytesTraced = 0; + kern_return_t kr; -#if CONFIG_TELEMETRY - if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) { - telemetry_global_ctl(1); - *retval = 0; - return (0); - } else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) { - telemetry_global_ctl(0); - *retval = 0; - return (0); - } - - if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_ENABLE) { - error = telemetry_enable_window(); - - if (error != KERN_SUCCESS) { - /* We are probably out of memory */ - *retval = -1; - return ENOMEM; - } + if ((error = suser(kauth_cred_get(), &p->p_acflag))) + return(error); - *retval = 0; - return (0); - } else if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_DISABLE) { - telemetry_disable_window(); - *retval = 0; - return (0); + if((void*)uap->stackshot_config == NULL) { + return EINVAL; } -#endif - *retval = -1; -/* Serialize tracing */ - STACKSHOT_SUBSYS_LOCK(); - - if (tracebuf_size <= 0) { - error = EINVAL; - goto error_exit; + switch (uap->stackshot_config_version) { + case STACKSHOT_CONFIG_TYPE: + if (uap->stackshot_config_size != sizeof(stackshot_config_t)) { + return EINVAL; + } + stackshot_config_t config; + error = copyin(uap->stackshot_config, &config, sizeof(stackshot_config_t)); + if (error != KERN_SUCCESS) + { + return EFAULT; + } + kr = kern_stack_snapshot_internal(uap->stackshot_config_version, &config, sizeof(stackshot_config_t), TRUE); + return stackshot_kern_return_to_bsd_error(kr); + default: + return ENOTSUP; } +} #if CONFIG_TELEMETRY - if (flags & STACKSHOT_GET_MICROSTACKSHOT) { - - if (tracebuf_size > SANE_TRACEBUF_SIZE) { - error = EINVAL; - goto error_exit; - } - - bytesTraced = tracebuf_size; - error = telemetry_gather(tracebuf, &bytesTraced, - (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? TRUE : FALSE); - if (error == KERN_NO_SPACE) { - error = ENOSPC; - } - - *retval = (int)bytesTraced; - goto error_exit; - } - - if (flags & STACKSHOT_GET_WINDOWED_MICROSTACKSHOTS) { - - if (tracebuf_size > SANE_TRACEBUF_SIZE) { - error = EINVAL; - goto error_exit; - } - - bytesTraced = tracebuf_size; - error = telemetry_gather_windowed(tracebuf, &bytesTraced); - if (error == KERN_NO_SPACE) { - error = ENOSPC; - } - - *retval = (int)bytesTraced; - goto error_exit; - } - - if (flags & STACKSHOT_GET_BOOT_PROFILE) { - - if (tracebuf_size > SANE_BOOTPROFILE_TRACEBUF_SIZE) { - error = EINVAL; - goto error_exit; - } - - bytesTraced = tracebuf_size; - error = bootprofile_gather(tracebuf, &bytesTraced); - if (error == KERN_NO_SPACE) { - error = ENOSPC; - } - - *retval = (int)bytesTraced; - goto error_exit; - } -#endif - - if (tracebuf_size > SANE_TRACEBUF_SIZE) { - error = EINVAL; - goto error_exit; - } - - assert(stackshot_snapbuf == NULL); - if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&stackshot_snapbuf, tracebuf_size) != KERN_SUCCESS) { - error = ENOMEM; - goto error_exit; - } +/* + * microstackshot: Catch all system call for microstackshot related operations, including + * enabling/disabling both global and windowed microstackshots as well + * as retrieving windowed or global stackshots and the boot profile. + * Inputs: uap->tracebuf - address of the user space destination + * buffer + * uap->tracebuf_size - size of the user space trace buffer + * uap->flags - various flags + * Outputs: EPERM if the caller is not privileged + * EINVAL if the supplied mss_args is NULL, mss_args.tracebuf is NULL or mss_args.tracebuf_size is not sane + * ENOMEM if we don't have enough memory to satisfy the request + * *retval contains the number of bytes traced, if successful + * and -1 otherwise. + */ +int +microstackshot(struct proc *p, struct microstackshot_args *uap, int32_t *retval) +{ + int error = 0; + kern_return_t kr; - if (panic_active()) { - error = ENOMEM; - goto error_exit; - } + if ((error = suser(kauth_cred_get(), &p->p_acflag))) + return(error); - istate = ml_set_interrupts_enabled(FALSE); -/* Preload trace parameters*/ - kdp_snapshot_preflight(pid, stackshot_snapbuf, tracebuf_size, flags, dispatch_offset); + kr = stack_microstackshot(uap->tracebuf, uap->tracebuf_size, uap->flags, retval); + return stackshot_kern_return_to_bsd_error(kr); +} +#endif /* CONFIG_TELEMETRY */ -/* Trap to the debugger to obtain a coherent stack snapshot; this populates - * the trace buffer +/* + * kern_stack_snapshot_with_reason: Obtains a coherent set of stack traces for specified threads on the sysem, + * tracing both kernel and user stacks where available. Allocates a buffer from the + * kernel and stores the address of this buffer. + * + * Inputs: reason - the reason for triggering a stackshot (unused at the moment, but in the + * future will be saved in the stackshot) + * Outputs: EINVAL/ENOTSUP if there is a problem with the arguments + * EPERM if the caller doesn't pass at least one KERNEL stackshot flag + * ENOMEM if the kernel is unable to allocate enough memory to serve the request + * ESRCH if the target PID isn't found + * returns KERN_SUCCESS on success */ +int +kern_stack_snapshot_with_reason(__unused char *reason) +{ + stackshot_config_t config; + kern_return_t kr; + + config.sc_pid = -1; + config.sc_flags = (STACKSHOT_SAVE_LOADINFO | STACKSHOT_GET_GLOBAL_MEM_STATS | STACKSHOT_SAVE_IN_KERNEL_BUFFER | + STACKSHOT_KCDATA_FORMAT); + config.sc_since_timestamp = 0; + config.sc_out_buffer_addr = 0; + config.sc_out_size_addr = 0; + + kr = kern_stack_snapshot_internal(STACKSHOT_CONFIG_TYPE, &config, sizeof(stackshot_config_t), FALSE); + return stackshot_kern_return_to_bsd_error(kr); +} - TRAP_DEBUGGER; - - ml_set_interrupts_enabled(istate); - - bytesTraced = kdp_stack_snapshot_bytes_traced(); - - if (bytesTraced > 0) { - if ((error = copyout(stackshot_snapbuf, tracebuf, - ((bytesTraced < tracebuf_size) ? - bytesTraced : tracebuf_size)))) - goto error_exit; - *retval = bytesTraced; - } - else { - error = ENOENT; - goto error_exit; - } +/* + * stack_snapshot_from_kernel: Stackshot function for kernel consumers who have their own buffer. + * + * Inputs: pid - the PID to be traced or -1 for the whole system + * buf - a pointer to the buffer where the stackshot should be written + * size - the size of the buffer + * flags - flags to be passed to the stackshot + * *bytes_traced - a pointer to be filled with the length of the stackshot + * Outputs: -1 if there is a problem with the arguments + * the error returned by the stackshot code otherwise + */ +int +stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytes_traced) +{ + kern_return_t kr; - error = kdp_stack_snapshot_geterror(); - if (error == -1) { - error = ENOSPC; - *retval = -1; - goto error_exit; + kr = stack_snapshot_from_kernel_internal(pid, buf, size, flags, bytes_traced); + if (kr == KERN_FAILURE) { + return -1; } -error_exit: - if (stackshot_snapbuf != NULL) - kmem_free(kernel_map, (vm_offset_t) stackshot_snapbuf, tracebuf_size); - stackshot_snapbuf = NULL; - STACKSHOT_SUBSYS_UNLOCK(); - return error; + return kr; } void @@ -3114,7 +3886,7 @@ start_kern_tracing(unsigned int new_nkdbufs, boolean_t need_map) nkdbufs = kdbg_set_nkdbufs(new_nkdbufs); kdbg_lock_init(); - kernel_debug_string("start_kern_tracing"); + kernel_debug_string_simple("start_kern_tracing"); if (0 == kdbg_reinit(TRUE)) { @@ -3169,7 +3941,8 @@ start_kern_tracing_with_typefilter(unsigned int new_nkdbufs, /* setup the typefiltering */ if (0 == kdbg_enable_typefilter()) - setbit(type_filter_bitmap, typefilter & (CSC_MASK >> CSC_OFFSET)); + setbit(type_filter_bitmap, + typefilter & (KDBG_CSC_MASK >> KDBG_CSC_OFFSET)); } void @@ -3214,7 +3987,7 @@ kdbg_dump_trace_to_file(const char *filename) kdbg_readthrmap(0, &number, vp, ctx); number = nkdbufs*sizeof(kd_buf); - kdbg_read(0, &number, vp, ctx); + kdbg_read(0, &number, vp, ctx, RAW_VERSION1); vnode_close(vp, FWRITE, ctx); @@ -3309,7 +4082,7 @@ kdebug_serial_print( uint64_t delta = timestamp - kd_last_timstamp; uint64_t delta_us = delta / NSEC_PER_USEC; uint64_t delta_us_tenth = (delta % NSEC_PER_USEC) / 100; - uint32_t event_id = debugid & DBG_FUNC_MASK; + uint32_t event_id = debugid & KDBG_EVENTID_MASK; const char *command; const char *bra; const char *ket; @@ -3371,7 +4144,7 @@ kdebug_serial_print( /* threadid, cpu and command name */ if (threadid == (uintptr_t)thread_tid(current_thread()) && current_proc() && - current_proc()->p_comm) + current_proc()->p_comm[0]) command = current_proc()->p_comm; else command = "-"; diff --git a/bsd/kern/kern_aio.c b/bsd/kern/kern_aio.c index 2513122e6..44c956e9b 100644 --- a/bsd/kern/kern_aio.c +++ b/bsd/kern/kern_aio.c @@ -63,6 +63,7 @@ #include #include +#include #include #include #include @@ -123,7 +124,7 @@ typedef struct aio_workq { TAILQ_HEAD(, aio_workq_entry) aioq_entries; int aioq_count; lck_mtx_t aioq_mtx; - wait_queue_t aioq_waitq; + struct waitq aioq_waitq; } *aio_workq_t; #define AIO_NUM_WORK_QUEUES 1 @@ -303,7 +304,7 @@ aio_workq_init(aio_workq_t wq) TAILQ_INIT(&wq->aioq_entries); wq->aioq_count = 0; lck_mtx_init(&wq->aioq_mtx, aio_queue_lock_grp, aio_lock_attr); - wq->aioq_waitq = wait_queue_alloc(SYNC_POLICY_FIFO); + waitq_init(&wq->aioq_waitq, SYNC_POLICY_FIFO|SYNC_POLICY_DISABLE_IRQ); } @@ -1393,7 +1394,8 @@ aio_enqueue_work( proc_t procp, aio_workq_entry *entryp, int proc_locked) /* And work queue */ aio_workq_lock_spin(queue); aio_workq_add_entry_locked(queue, entryp); - wait_queue_wakeup_one(queue->aioq_waitq, queue, THREAD_AWAKENED, -1); + waitq_wakeup64_one(&queue->aioq_waitq, CAST_EVENT64_T(queue), + THREAD_AWAKENED, WAITQ_ALL_PRIORITIES); aio_workq_unlock(queue); if (proc_locked == 0) { @@ -1824,7 +1826,7 @@ aio_get_some_work( void ) nowork: /* We will wake up when someone enqueues something */ - wait_queue_assert_wait(queue->aioq_waitq, queue, THREAD_UNINT, 0); + waitq_assert_wait64(&queue->aioq_waitq, CAST_EVENT64_T(queue), THREAD_UNINT, 0); aio_workq_unlock(queue); thread_block( (thread_continue_t)aio_work_thread ); diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c index 8e5b0150b..ebda4203d 100644 --- a/bsd/kern/kern_control.c +++ b/bsd/kern/kern_control.c @@ -50,12 +50,50 @@ #include #include #include +#include #include #include #include +struct kctl { + TAILQ_ENTRY(kctl) next; /* controller chain */ + kern_ctl_ref kctlref; + + /* controller information provided when registering */ + char name[MAX_KCTL_NAME]; /* unique identifier */ + u_int32_t id; + u_int32_t reg_unit; + + /* misc communication information */ + u_int32_t flags; /* support flags */ + u_int32_t recvbufsize; /* request more than the default buffer size */ + u_int32_t sendbufsize; /* request more than the default buffer size */ + + /* Dispatch functions */ + ctl_connect_func connect; /* Make contact */ + ctl_disconnect_func disconnect; /* Break contact */ + ctl_send_func send; /* Send data to nke */ + ctl_send_list_func send_list; /* Send list of packets */ + ctl_setopt_func setopt; /* set kctl configuration */ + ctl_getopt_func getopt; /* get kctl configuration */ + ctl_rcvd_func rcvd; /* Notify nke when client reads data */ + + TAILQ_HEAD(, ctl_cb) kcb_head; + u_int32_t lastunit; +}; + +struct ctl_cb { + TAILQ_ENTRY(ctl_cb) next; /* controller chain */ + lck_mtx_t *mtx; + struct socket *so; /* controlling socket */ + struct kctl *kctl; /* back pointer to controller */ + void *userdata; + u_int32_t unit; + u_int32_t usecount; +}; + #ifndef ROUNDUP64 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t)) #endif @@ -84,7 +122,6 @@ static lck_mtx_t *ctl_mtx; /* all the controllers are chained */ TAILQ_HEAD(kctl_list, kctl) ctl_head; - static int ctl_attach(struct socket *, int, struct proc *); static int ctl_detach(struct socket *); static int ctl_sofreelastref(struct socket *so); @@ -103,7 +140,8 @@ static int ctl_usr_rcvd(struct socket *so, int flags); static struct kctl *ctl_find_by_name(const char *); static struct kctl *ctl_find_by_id_unit(u_int32_t id, u_int32_t unit); -static struct socket *kcb_find_socket(struct kctl *, u_int32_t unit); +static struct socket *kcb_find_socket(kern_ctl_ref kctlref, u_int32_t unit, + u_int32_t *); static struct ctl_cb *kcb_find(struct kctl *, u_int32_t unit); static void ctl_post_msg(u_int32_t event_code, u_int32_t id); @@ -154,7 +192,6 @@ __private_extern__ int kctl_reg_list SYSCTL_HANDLER_ARGS; __private_extern__ int kctl_pcblist SYSCTL_HANDLER_ARGS; __private_extern__ int kctl_getstat SYSCTL_HANDLER_ARGS; -static int kctl_proto_count = (sizeof (kctlsw) / sizeof (struct protosw)); SYSCTL_NODE(_net_systm, OID_AUTO, kctl, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Kernel control family"); @@ -184,6 +221,19 @@ u_int32_t ctl_debug = 0; SYSCTL_INT(_net_systm_kctl, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, &ctl_debug, 0, ""); +#define KCTL_TBL_INC 16 + +static uintptr_t kctl_tbl_size = 0; +static u_int32_t kctl_tbl_growing = 0; +static uintptr_t kctl_tbl_count = 0; +static struct kctl **kctl_table = NULL; +static uintptr_t kctl_ref_gencnt = 0; + +static void kctl_tbl_grow(void); +static kern_ctl_ref kctl_make_ref(struct kctl *kctl); +static void kctl_delete_ref(kern_ctl_ref); +static struct kctl *kctl_from_ref(kern_ctl_ref); + /* * Install the protosw's for the Kernel Control manager. */ @@ -192,6 +242,7 @@ kern_control_init(struct domain *dp) { struct protosw *pr; int i; + int kctl_proto_count = (sizeof (kctlsw) / sizeof (struct protosw)); VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); VERIFY(dp == systemdomain); @@ -307,7 +358,6 @@ ctl_detach(struct socket *so) return (0); } - static int ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { @@ -419,7 +469,7 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) soisconnecting(so); socket_unlock(so, 0); - error = (*kctl->connect)(kctl, &sa, &kcb->userdata); + error = (*kctl->connect)(kctl->kctlref, &sa, &kcb->userdata); socket_lock(so, 0); if (error) goto end; @@ -429,7 +479,7 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) end: if (error && kctl->disconnect) { socket_unlock(so, 0); - (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata); + (*kctl->disconnect)(kctl->kctlref, kcb->unit, kcb->userdata); socket_lock(so, 0); } done: @@ -457,7 +507,8 @@ ctl_disconnect(struct socket *so) if (kctl && kctl->disconnect) { socket_unlock(so, 0); - (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata); + (*kctl->disconnect)(kctl->kctlref, kcb->unit, + kcb->userdata); socket_lock(so, 0); } @@ -547,7 +598,7 @@ ctl_usr_rcvd(struct socket *so, int flags) if (kctl->rcvd) { socket_unlock(so, 0); - (*kctl->rcvd)(kctl, kcb->unit, kcb->userdata, flags); + (*kctl->rcvd)(kctl->kctlref, kcb->unit, kcb->userdata, flags); socket_lock(so, 0); } @@ -578,7 +629,8 @@ ctl_send(struct socket *so, int flags, struct mbuf *m, if (error == 0 && kctl->send) { so_tc_update_stats(m, so, m_get_service_class(m)); socket_unlock(so, 0); - error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, flags); + error = (*kctl->send)(kctl->kctlref, kcb->unit, kcb->userdata, + m, flags); socket_lock(so, 0); } else { m_freem(m); @@ -615,8 +667,8 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m, so_tc_update_stats(nxt, so, m_get_service_class(nxt)); socket_unlock(so, 0); - error = (*kctl->send_list)(kctl, kcb->unit, kcb->userdata, m, - flags); + error = (*kctl->send_list)(kctl->kctlref, kcb->unit, + kcb->userdata, m, flags); socket_lock(so, 0); } else if (error == 0 && kctl->send) { while (m != NULL && error == 0) { @@ -625,8 +677,8 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m, m->m_nextpkt = NULL; so_tc_update_stats(m, so, m_get_service_class(m)); socket_unlock(so, 0); - error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, - flags); + error = (*kctl->send)(kctl->kctlref, kcb->unit, + kcb->userdata, m, flags); socket_lock(so, 0); m = nextpkt; } @@ -643,27 +695,27 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m, } static errno_t -ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize, - u_int32_t flags) +ctl_rcvbspace(struct socket *so, u_int32_t datasize, + u_int32_t kctlflags, u_int32_t flags) { struct sockbuf *sb = &so->so_rcv; u_int32_t space = sbspace(sb); errno_t error; - if ((kctl->flags & CTL_FLAG_REG_CRIT) == 0) { + if ((kctlflags & CTL_FLAG_REG_CRIT) == 0) { if ((u_int32_t) space >= datasize) error = 0; else error = ENOBUFS; } else if ((flags & CTL_DATA_CRIT) == 0) { - /* - * Reserve 25% for critical messages - */ - if (space < (sb->sb_hiwat >> 2) || - space < datasize) - error = ENOBUFS; - else - error = 0; + /* + * Reserve 25% for critical messages + */ + if (space < (sb->sb_hiwat >> 2) || + space < datasize) + error = ENOBUFS; + else + error = 0; } else { u_int32_t autorcvbuf_max; @@ -688,10 +740,18 @@ ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize, if (sb->sb_hiwat > ctl_autorcvbuf_high) ctl_autorcvbuf_high = sb->sb_hiwat; + /* + * A final check + */ + if ((u_int32_t) sbspace(sb) >= datasize) { + error = 0; + } else { + error = ENOBUFS; + } + if (ctl_debug) - printf("%s - grown to %d\n", - __func__, sb->sb_hiwat); - error = 0; + printf("%s - grown to %d error %d\n", + __func__, sb->sb_hiwat, error); } else { error = ENOBUFS; } @@ -703,22 +763,20 @@ ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize, } errno_t -ctl_enqueuembuf(void *kctlref, u_int32_t unit, struct mbuf *m, u_int32_t flags) +ctl_enqueuembuf(kern_ctl_ref kctlref, u_int32_t unit, struct mbuf *m, + u_int32_t flags) { struct socket *so; errno_t error = 0; - struct kctl *kctl = (struct kctl *)kctlref; int len = m->m_pkthdr.len; + u_int32_t kctlflags; - if (kctl == NULL) - return (EINVAL); - - so = kcb_find_socket(kctl, unit); - - if (so == NULL) + so = kcb_find_socket(kctlref, unit, &kctlflags); + if (so == NULL) { return (EINVAL); + } - if (ctl_rcvbspace(kctl, so, len, flags) != 0) { + if (ctl_rcvbspace(so, len, kctlflags, flags) != 0) { error = ENOBUFS; OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock); goto bye; @@ -768,21 +826,26 @@ ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list, { struct socket *so = NULL; errno_t error = 0; - struct kctl *kctl = (struct kctl *)kctlref; struct mbuf *m, *nextpkt; int needwakeup = 0; int len; + u_int32_t kctlflags; /* * Need to point the beginning of the list in case of early exit */ m = m_list; - if (kctl == NULL) { + /* + * kcb_find_socket takes the socket lock with a reference + */ + so = kcb_find_socket(kctlref, unit, &kctlflags); + if (so == NULL) { error = EINVAL; goto done; } - if (kctl->flags & CTL_FLAG_REG_SOCK_STREAM) { + + if (kctlflags & CTL_FLAG_REG_SOCK_STREAM) { error = EOPNOTSUPP; goto done; } @@ -790,14 +853,6 @@ ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list, error = EINVAL; goto done; } - /* - * kcb_find_socket takes the socket lock with a reference - */ - so = kcb_find_socket(kctl, unit); - if (so == NULL) { - error = EINVAL; - goto done; - } for (m = m_list; m != NULL; m = nextpkt) { nextpkt = m->m_nextpkt; @@ -811,7 +866,7 @@ ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list, * so it's not reliable from a data standpoint */ len = m_space(m); - if (ctl_rcvbspace(kctl, so, len, flags) != 0) { + if (ctl_rcvbspace(so, len, kctlflags, flags) != 0) { error = ENOBUFS; OSIncrementAtomic64( (SInt64 *)&kctlstat.kcs_enqueue_fullsock); @@ -879,19 +934,17 @@ ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, struct socket *so; struct mbuf *m; errno_t error = 0; - struct kctl *kctl = (struct kctl *)kctlref; unsigned int num_needed; struct mbuf *n; size_t curlen = 0; + u_int32_t kctlflags; - if (kctlref == NULL) - return (EINVAL); - - so = kcb_find_socket(kctl, unit); - if (so == NULL) + so = kcb_find_socket(kctlref, unit, &kctlflags); + if (so == NULL) { return (EINVAL); + } - if (ctl_rcvbspace(kctl, so, len, flags) != 0) { + if (ctl_rcvbspace(so, len, kctlflags, flags) != 0) { error = ENOBUFS; OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock); goto bye; @@ -940,20 +993,50 @@ ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, return (error); } +errno_t +ctl_getenqueuepacketcount(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *pcnt) +{ + struct socket *so; + u_int32_t cnt; + struct mbuf *m1; + + if (pcnt == NULL) + return (EINVAL); + + so = kcb_find_socket(kctlref, unit, NULL); + if (so == NULL) { + return (EINVAL); + } + + cnt = 0; + m1 = so->so_rcv.sb_mb; + while (m1 != NULL) { + if (m1->m_type == MT_DATA || + m1->m_type == MT_HEADER || + m1->m_type == MT_OOBDATA) + cnt += 1; + m1 = m1->m_nextpkt; + } + *pcnt = cnt; + + socket_unlock(so, 1); + + return (0); +} errno_t ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space) { - struct kctl *kctl = (struct kctl *)kctlref; struct socket *so; long avail; - if (kctlref == NULL || space == NULL) + if (space == NULL) return (EINVAL); - so = kcb_find_socket(kctl, unit); - if (so == NULL) + so = kcb_find_socket(kctlref, unit, NULL); + if (so == NULL) { return (EINVAL); + } avail = sbspace(&so->so_rcv); *space = (avail < 0) ? 0 : avail; @@ -966,15 +1049,15 @@ errno_t ctl_getenqueuereadable(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *difference) { - struct kctl *kctl = (struct kctl *)kctlref; struct socket *so; - if (kctlref == NULL || difference == NULL) + if (difference == NULL) return (EINVAL); - so = kcb_find_socket(kctl, unit); - if (so == NULL) + so = kcb_find_socket(kctlref, unit, NULL); + if (so == NULL) { return (EINVAL); + } if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat) { *difference = 0; @@ -1017,16 +1100,13 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt) if (data == NULL) return (ENOMEM); error = sooptcopyin(sopt, data, - sopt->sopt_valsize, - sopt->sopt_valsize); + sopt->sopt_valsize, sopt->sopt_valsize); } if (error == 0) { socket_unlock(so, 0); - error = (*kctl->setopt)(kcb->kctl, kcb->unit, - kcb->userdata, - sopt->sopt_name, - data, - sopt->sopt_valsize); + error = (*kctl->setopt)(kctl->kctlref, + kcb->unit, kcb->userdata, sopt->sopt_name, + data, sopt->sopt_valsize); socket_lock(so, 0); } FREE(data, M_TEMP); @@ -1050,7 +1130,7 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt) } len = sopt->sopt_valsize; socket_unlock(so, 0); - error = (*kctl->getopt)(kcb->kctl, kcb->unit, + error = (*kctl->getopt)(kctl->kctlref, kcb->unit, kcb->userdata, sopt->sopt_name, data, &len); if (data != NULL && len > sopt->sopt_valsize) @@ -1126,6 +1206,148 @@ ctl_ioctl(struct socket *so, u_long cmd, caddr_t data, return (error); } +static void +kctl_tbl_grow() +{ + struct kctl **new_table; + uintptr_t new_size; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); + + while (kctl_tbl_growing) { + /* Another thread is allocating */ + (void) msleep((caddr_t) &kctl_tbl_growing, ctl_mtx, + PSOCK | PCATCH, "kctl_tbl_growing", 0); + } + /* Another thread grew the table */ + if (kctl_table != NULL && kctl_tbl_count < kctl_tbl_size) + return; + + /* Verify we have a sane size */ + if (kctl_tbl_size + KCTL_TBL_INC >= UINT16_MAX) { + printf("%s kctl_tbl_size %lu too big\n", + __func__, kctl_tbl_size); + return; + } + kctl_tbl_growing = 1; + + new_size = kctl_tbl_size + KCTL_TBL_INC; + + lck_mtx_unlock(ctl_mtx); + new_table = _MALLOC(sizeof(struct kctl *) * new_size, + M_TEMP, M_WAIT | M_ZERO); + lck_mtx_lock(ctl_mtx); + + if (new_table != NULL) { + if (kctl_table != NULL) { + bcopy(kctl_table, new_table, + kctl_tbl_size * sizeof(struct kctl *)); + + _FREE(kctl_table, M_TEMP); + } + kctl_table = new_table; + kctl_tbl_size = new_size; + } + + kctl_tbl_growing = 0; +} + +#define KCTLREF_INDEX_MASK 0x0000FFFF +#define KCTLREF_GENCNT_MASK 0xFFFF0000 +#define KCTLREF_GENCNT_SHIFT 16 + +static kern_ctl_ref +kctl_make_ref(struct kctl *kctl) +{ + uintptr_t i; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); + + if (kctl_tbl_count >= kctl_tbl_size) + kctl_tbl_grow(); + + kctl->kctlref = NULL; + for (i = 0; i < kctl_tbl_size; i++) { + if (kctl_table[i] == NULL) { + uintptr_t ref; + + /* + * Reference is index plus one + */ + kctl_ref_gencnt += 1; + + /* + * Add generation count as salt to reference to prevent + * use after deregister + */ + ref = ((kctl_ref_gencnt << KCTLREF_GENCNT_SHIFT) & + KCTLREF_GENCNT_MASK) + + ((i + 1) & KCTLREF_INDEX_MASK); + + kctl->kctlref = (void *)(ref); + kctl_table[i] = kctl; + kctl_tbl_count++; + break; + } + } + + if (kctl->kctlref == NULL) + panic("%s no space in table", __func__); + + if (ctl_debug > 0) + printf("%s %p for %p\n", + __func__, kctl->kctlref, kctl); + + return (kctl->kctlref); +} + +static void +kctl_delete_ref(kern_ctl_ref kctlref) +{ + /* + * Reference is index plus one + */ + uintptr_t i = (((uintptr_t)kctlref) & KCTLREF_INDEX_MASK) - 1; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); + + if (i < kctl_tbl_size) { + struct kctl *kctl = kctl_table[i]; + + if (kctl->kctlref == kctlref) { + kctl_table[i] = NULL; + kctl_tbl_count--; + } else { + kctlstat.kcs_bad_kctlref++; + } + } else { + kctlstat.kcs_bad_kctlref++; + } +} + +static struct kctl * +kctl_from_ref(kern_ctl_ref kctlref) +{ + /* + * Reference is index plus one + */ + uintptr_t i = (((uintptr_t)kctlref) & KCTLREF_INDEX_MASK) - 1; + struct kctl *kctl = NULL; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); + + if (i >= kctl_tbl_size) { + kctlstat.kcs_bad_kctlref++; + return (NULL); + } + kctl = kctl_table[i]; + if (kctl->kctlref != kctlref) { + kctlstat.kcs_bad_kctlref++; + return (NULL); + } + return (kctl); +} + /* * Register/unregister a NKE */ @@ -1153,6 +1375,12 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) lck_mtx_lock(ctl_mtx); + if (kctl_make_ref(kctl) == NULL) { + lck_mtx_unlock(ctl_mtx); + FREE(kctl, M_TEMP); + return (ENOMEM); + } + /* * Kernel Control IDs * @@ -1169,6 +1397,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) /* Verify the same name isn't already registered */ if (ctl_find_by_name(userkctl->ctl_name) != NULL) { + kctl_delete_ref(kctl->kctlref); lck_mtx_unlock(ctl_mtx); FREE(kctl, M_TEMP); return (EEXIST); @@ -1212,6 +1441,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) } if (ctl_find_by_id_unit(userkctl->ctl_id, userkctl->ctl_unit)) { + kctl_delete_ref(kctl->kctlref); lck_mtx_unlock(ctl_mtx); FREE(kctl, M_TEMP); return (EEXIST); @@ -1263,7 +1493,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) lck_mtx_unlock(ctl_mtx); - *kctlref = kctl; + *kctlref = kctl->kctlref; ctl_post_msg(KEV_CTL_REGISTERED, kctl->id); return (0); @@ -1274,18 +1504,16 @@ ctl_deregister(void *kctlref) { struct kctl *kctl; - if (kctlref == NULL) /* sanity check */ - return (EINVAL); - lck_mtx_lock(ctl_mtx); - TAILQ_FOREACH(kctl, &ctl_head, next) { - if (kctl == (struct kctl *)kctlref) - break; - } - if (kctl != (struct kctl *)kctlref) { + if ((kctl = kctl_from_ref(kctlref)) == NULL) { + kctlstat.kcs_bad_kctlref++; lck_mtx_unlock(ctl_mtx); + if (ctl_debug != 0) + printf("%s invalid kctlref %p\n", + __func__, kctlref); return (EINVAL); } + if (!TAILQ_EMPTY(&kctl->kcb_head)) { lck_mtx_unlock(ctl_mtx); return (EBUSY); @@ -1296,6 +1524,7 @@ ctl_deregister(void *kctlref) kctlstat.kcs_reg_count--; kctlstat.kcs_gencnt++; + kctl_delete_ref(kctl->kctlref); lck_mtx_unlock(ctl_mtx); ctl_post_msg(KEV_CTL_DEREGISTERED, kctl->id); @@ -1347,7 +1576,7 @@ ctl_name_by_id(u_int32_t id, char *out_name, size_t maxsize) break; } - if (kctl && kctl->name) { + if (kctl) { if (maxsize > MAX_KCTL_NAME) maxsize = MAX_KCTL_NAME; strlcpy(out_name, kctl->name, maxsize); @@ -1396,48 +1625,66 @@ kcb_find(struct kctl *kctl, u_int32_t unit) } static struct socket * -kcb_find_socket(struct kctl *kctl, u_int32_t unit) +kcb_find_socket(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *kctlflags) { struct socket *so = NULL; struct ctl_cb *kcb; void *lr_saved; + struct kctl *kctl; + int i; lr_saved = __builtin_return_address(0); lck_mtx_lock(ctl_mtx); - kcb = kcb_find(kctl, unit); - if (kcb && kcb->kctl == kctl) { - so = kcb->so; - if (so) { - kcb->usecount++; - } + /* + * First validate the kctlref + */ + if ((kctl = kctl_from_ref(kctlref)) == NULL) { + kctlstat.kcs_bad_kctlref++; + lck_mtx_unlock(ctl_mtx); + if (ctl_debug != 0) + printf("%s invalid kctlref %p\n", + __func__, kctlref); + return (NULL); } - lck_mtx_unlock(ctl_mtx); - if (so == NULL) { + kcb = kcb_find(kctl, unit); + if (kcb == NULL || kcb->kctl != kctl || (so = kcb->so) == NULL) { + lck_mtx_unlock(ctl_mtx); return (NULL); } + /* + * This prevents the socket from being closed + */ + kcb->usecount++; + /* + * Respect lock ordering: socket before ctl_mtx + */ + lck_mtx_unlock(ctl_mtx); socket_lock(so, 1); + /* + * The socket lock history is more useful if we store + * the address of the caller. + */ + i = (so->next_lock_lr + SO_LCKDBG_MAX - 1) % SO_LCKDBG_MAX; + so->lock_lr[i] = lr_saved; lck_mtx_lock(ctl_mtx); - if (kcb->kctl == NULL) { + + if ((kctl = kctl_from_ref(kctlref)) == NULL || kcb->kctl == NULL) { lck_mtx_unlock(ctl_mtx); socket_unlock(so, 1); so = NULL; lck_mtx_lock(ctl_mtx); - } else { - /* - * The socket lock history is more useful if we store - * the address of the caller. - */ - int i = (so->next_lock_lr + SO_LCKDBG_MAX - 1) % SO_LCKDBG_MAX; - - so->lock_lr[i] = lr_saved; + } else if (kctlflags != NULL) { + *kctlflags = kctl->flags; } + kcb->usecount--; if (kcb->usecount == 0) wakeup((event_t)&kcb->usecount); + lck_mtx_unlock(ctl_mtx); return (so); @@ -1626,7 +1873,7 @@ kctl_reg_list SYSCTL_HANDLER_ARGS xkr->xkr_id = kctl->id; xkr->xkr_reg_unit = kctl->reg_unit; xkr->xkr_flags = kctl->flags; - xkr->xkr_kctlref = (uint64_t)VM_KERNEL_ADDRPERM(kctl); + xkr->xkr_kctlref = (uint64_t)(kctl->kctlref); xkr->xkr_recvbufsize = kctl->recvbufsize; xkr->xkr_sendbufsize = kctl->sendbufsize; xkr->xkr_lastunit = kctl->lastunit; @@ -1808,3 +2055,25 @@ kctl_getstat SYSCTL_HANDLER_ARGS lck_mtx_unlock(ctl_mtx); return (error); } + +void +kctl_fill_socketinfo(struct socket *so, struct socket_info *si) +{ + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct kern_ctl_info *kcsi = + &si->soi_proto.pri_kern_ctl; + struct kctl *kctl = kcb->kctl; + + si->soi_kind = SOCKINFO_KERN_CTL; + + if (kctl == 0) + return; + + kcsi->kcsi_id = kctl->id; + kcsi->kcsi_reg_unit = kctl->reg_unit; + kcsi->kcsi_flags = kctl->flags; + kcsi->kcsi_recvbufsize = kctl->recvbufsize; + kcsi->kcsi_sendbufsize = kctl->sendbufsize; + kcsi->kcsi_unit = kcb->unit; + strlcpy(kcsi->kcsi_name, kctl->name, MAX_KCTL_NAME); +} diff --git a/bsd/kern/kern_core.c b/bsd/kern/kern_core.c index 2bd9de059..9477378ef 100644 --- a/bsd/kern/kern_core.c +++ b/bsd/kern/kern_core.c @@ -99,7 +99,6 @@ extern int freespace_mb(vnode_t vp); kern_return_t thread_getstatus(register thread_t act, int flavor, thread_state_t tstate, mach_msg_type_number_t *count); void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); -extern kern_return_t task_suspend_internal(task_t); static cpu_type_t process_cpu_type(proc_t proc); static cpu_type_t process_cpu_subtype(proc_t proc); @@ -192,7 +191,7 @@ collectth_state(thread_t th_act, void *tirp) * Parameters: core_proc Process to dump core [*] * reserve_mb If non-zero, leave filesystem with * at least this much free space. - * ignore_ulimit If set, ignore the process's core file ulimit. + * coredump_flags Extra options (ignore rlimit, run fsync) * * Returns: 0 Success * EFAULT Failed @@ -203,7 +202,7 @@ collectth_state(thread_t th_act, void *tirp) */ #define MAX_TSTATE_FLAVORS 10 int -coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit) +coredump(proc_t core_proc, uint32_t reserve_mb, int coredump_flags) { /* Begin assumptions that limit us to only the current process */ vfs_context_t ctx = vfs_context_current(); @@ -265,8 +264,10 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit) mapsize = get_vmmap_size(map); - if ((mapsize >= core_proc->p_rlimit[RLIMIT_CORE].rlim_cur) && (ignore_ulimit == 0)) + if (((coredump_flags & COREDUMP_IGNORE_ULIMIT) == 0) && + (mapsize >= core_proc->p_rlimit[RLIMIT_CORE].rlim_cur)) return (EFAULT); + (void) task_suspend_internal(task); MALLOC(alloced_name, char *, MAXPATHLEN, M_TEMP, M_NOWAIT | M_ZERO); @@ -325,7 +326,7 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit) header_size = command_size + mach_header_sz; - if (kmem_alloc(kernel_map, &header, (vm_size_t)header_size) != KERN_SUCCESS) { + if (kmem_alloc(kernel_map, &header, (vm_size_t)header_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) { error = ENOMEM; goto out; } @@ -416,6 +417,7 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit) sc64->maxprot = maxprot; sc64->initprot = prot; sc64->nsects = 0; + sc64->flags = 0; } else { sc = (struct segment_command *) (header + hoffset); sc->cmd = LC_SEGMENT; @@ -429,6 +431,7 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit) sc->maxprot = maxprot; sc->initprot = prot; sc->nsects = 0; + sc->flags = 0; } /* @@ -488,6 +491,9 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit) error = vn_rdwr(UIO_WRITE, vp, (caddr_t)header, header_size, (off_t)0, UIO_SYSSPACE, IO_NOCACHE|IO_NODELOCKED|IO_UNIT, cred, (int *) 0, core_proc); kmem_free(kernel_map, header, header_size); + + if ((coredump_flags & COREDUMP_FULLFSYNC) && error == 0) + error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx); out: error1 = vnode_close(vp, FWRITE, ctx); out2: diff --git a/bsd/kern/kern_credential.c b/bsd/kern/kern_credential.c index b3c0d357a..fbbfb752a 100644 --- a/bsd/kern/kern_credential.c +++ b/bsd/kern/kern_credential.c @@ -264,9 +264,7 @@ static void kauth_groups_trimcache(int newsize); #endif /* CONFIG_EXT_RESOLVER */ -static const int kauth_cred_primes[KAUTH_CRED_PRIMES_COUNT] = KAUTH_CRED_PRIMES; -static int kauth_cred_primes_index = 0; -static int kauth_cred_table_size = 0; +#define KAUTH_CRED_TABLE_SIZE 97 TAILQ_HEAD(kauth_cred_entry_head, ucred); static struct kauth_cred_entry_head * kauth_cred_table_anchor = NULL; @@ -3364,15 +3362,14 @@ kauth_cred_init(void) int i; kauth_cred_hash_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0/*LCK_ATTR_NULL*/); - kauth_cred_table_size = kauth_cred_primes[kauth_cred_primes_index]; /*allocate credential hash table */ MALLOC(kauth_cred_table_anchor, struct kauth_cred_entry_head *, - (sizeof(struct kauth_cred_entry_head) * kauth_cred_table_size), + (sizeof(struct kauth_cred_entry_head) * KAUTH_CRED_TABLE_SIZE), M_KAUTH, M_WAITOK | M_ZERO); if (kauth_cred_table_anchor == NULL) panic("startup: kauth_cred_init"); - for (i = 0; i < kauth_cred_table_size; i++) { + for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) { TAILQ_INIT(&kauth_cred_table_anchor[i]); } } @@ -5095,7 +5092,7 @@ kauth_cred_add(kauth_cred_t new_cred) KAUTH_CRED_HASH_LOCK_ASSERT(); hash_key = kauth_cred_get_hashkey(new_cred); - hash_key %= kauth_cred_table_size; + hash_key %= KAUTH_CRED_TABLE_SIZE; /* race fix - there is a window where another matching credential * could have been inserted between the time this one was created and we @@ -5140,7 +5137,7 @@ kauth_cred_remove(kauth_cred_t cred) kauth_cred_t found_cred; hash_key = kauth_cred_get_hashkey(cred); - hash_key %= kauth_cred_table_size; + hash_key %= KAUTH_CRED_TABLE_SIZE; /* Avoid race */ if (cred->cr_ref < 1) @@ -5200,7 +5197,7 @@ kauth_cred_find(kauth_cred_t cred) #endif hash_key = kauth_cred_get_hashkey(cred); - hash_key %= kauth_cred_table_size; + hash_key %= KAUTH_CRED_TABLE_SIZE; /* Find cred in the credential hash table */ TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[hash_key], cr_link) { @@ -5325,7 +5322,7 @@ kauth_cred_hash_print(void) printf("\n\t kauth credential hash table statistics - current cred count %d \n", kauth_cred_count); /* count slot hits, misses, collisions, and max depth */ - for (i = 0; i < kauth_cred_table_size; i++) { + for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) { printf("[%02d] ", i); j = 0; TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[i], cr_link) { @@ -5510,7 +5507,7 @@ sysctl_dump_creds( __unused struct sysctl_oid *oidp, __unused void *arg1, __unus return (EPERM); /* calculate space needed */ - for (i = 0; i < kauth_cred_table_size; i++) { + for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) { TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[i], cr_link) { counter++; } @@ -5531,7 +5528,7 @@ sysctl_dump_creds( __unused struct sysctl_oid *oidp, __unused void *arg1, __unus /* fill in creds to send back */ nextp = cred_listp; space = 0; - for (i = 0; i < kauth_cred_table_size; i++) { + for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) { TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[i], cr_link) { nextp->credp = found_cred; nextp->cr_ref = found_cred->cr_ref; diff --git a/bsd/kern/kern_cs.c b/bsd/kern/kern_cs.c index 66af46613..1459a472f 100644 --- a/bsd/kern/kern_cs.c +++ b/bsd/kern/kern_cs.c @@ -78,27 +78,27 @@ int cs_force_kill = 0; int cs_force_hard = 0; int cs_debug = 0; #if SECURE_KERNEL -const int cs_enforcement_enable=1; -const int cs_library_val_enable=1; -#else +const int cs_enforcement_enable = 1; +const int cs_library_val_enable = 1; +#else /* !SECURE_KERNEL */ +int cs_enforcement_panic=0; + #if CONFIG_ENFORCE_SIGNED_CODE -int cs_enforcement_enable=1; +int cs_enforcement_enable = 1; #else -int cs_enforcement_enable=0; -#endif /* CONFIG_ENFORCE_SIGNED_CODE */ +int cs_enforcement_enable = 0; +#endif #if CONFIG_ENFORCE_LIBRARY_VALIDATION int cs_library_val_enable = 1; #else int cs_library_val_enable = 0; -#endif /* CONFIG_ENFORCE_LIBRARY_VALIDATION */ +#endif -int cs_enforcement_panic=0; -#endif /* SECURE_KERNEL */ +#endif /* !SECURE_KERNEL */ int cs_all_vnodes = 0; static lck_grp_t *cs_lockgrp; -static lck_rw_t * SigPUPLock; SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_kill, 0, ""); SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_hard, 0, ""); @@ -109,7 +109,11 @@ SYSCTL_INT(_vm, OID_AUTO, cs_all_vnodes, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_all_vn #if !SECURE_KERNEL SYSCTL_INT(_vm, OID_AUTO, cs_enforcement, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_enforcement_enable, 0, ""); SYSCTL_INT(_vm, OID_AUTO, cs_enforcement_panic, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_enforcement_panic, 0, ""); + +#if !CONFIG_ENFORCE_LIBRARY_VALIDATION +SYSCTL_INT(_vm, OID_AUTO, cs_library_validation, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_library_val_enable, 0, ""); #endif +#endif /* !SECURE_KERNEL */ int panic_on_cs_killed = 0; void @@ -133,10 +137,15 @@ cs_init(void) } PE_parse_boot_argn("cs_debug", &cs_debug, sizeof (cs_debug)); + +#if !CONFIG_ENFORCE_LIBRARY_VALIDATION + PE_parse_boot_argn("cs_library_val_enable", &cs_library_val_enable, + sizeof (cs_library_val_enable)); #endif +#endif /* !SECURE_KERNEL */ + lck_grp_attr_t *attr = lck_grp_attr_alloc_init(); cs_lockgrp = lck_grp_alloc_init("KERNCS", attr); - SigPUPLock = lck_rw_alloc_init(cs_lockgrp, NULL); } int @@ -258,232 +267,6 @@ cs_enforcement(struct proc *p) return 0; } -static struct { - struct cscsr_functions *funcs; - vm_map_offset_t csr_map_base; - vm_map_size_t csr_map_size; - int inuse; - int disabled; -} csr_state; - -SYSCTL_INT(_vm, OID_AUTO, sigpup_disable, CTLFLAG_RW | CTLFLAG_LOCKED, &csr_state.disabled, 0, ""); - -static int -vnsize(vfs_context_t vfs, vnode_t vp, uint64_t *size) -{ - struct vnode_attr va; - int error; - - VATTR_INIT(&va); - VATTR_WANTED(&va, va_data_size); - - error = vnode_getattr(vp, &va, vfs); - if (error) - return error; - *size = va.va_data_size; - return 0; -} - -int -sigpup_install(user_addr_t argsp) -{ - struct sigpup_install_table args; - memory_object_control_t control; - kern_return_t result; - vfs_context_t vfs = NULL; - struct vnode_attr va; - vnode_t vp = NULL; - char *buf = NULL; - uint64_t size; - size_t len = 0; - int error = 0; - - if (!cs_enforcement_enable || csr_state.funcs == NULL) - return ENOTSUP; - - lck_rw_lock_exclusive(SigPUPLock); - - if (kauth_cred_issuser(kauth_cred_get()) == 0) { - error = EPERM; - goto cleanup; - } - - if (cs_debug > 10) - printf("sigpup install\n"); - - if (csr_state.csr_map_base != 0 || csr_state.inuse) { - error = EPERM; - goto cleanup; - } - - if (USER_ADDR_NULL == argsp) { - error = EINVAL; - goto cleanup; - } - if ((error = copyin(argsp, &args, sizeof(args))) != 0) - goto cleanup; - - if (cs_debug > 10) - printf("sigpup install with args\n"); - - MALLOC(buf, char *, MAXPATHLEN, M_TEMP, M_WAITOK); - if (buf == NULL) { - error = ENOMEM; - goto cleanup; - } - if ((error = copyinstr((user_addr_t)args.path, buf, MAXPATHLEN, &len)) != 0) - goto cleanup; - - if ((vfs = vfs_context_create(NULL)) == NULL) { - error = ENOMEM; - goto cleanup; - } - - if ((error = vnode_lookup(buf, VNODE_LOOKUP_NOFOLLOW, &vp, vfs)) != 0) - goto cleanup; - - if (cs_debug > 10) - printf("sigpup found file: %s\n", buf); - - /* make sure vnode is on the process's root volume */ - if (rootvnode->v_mount != vp->v_mount) { - if (cs_debug) printf("sigpup csr no on root volume\n"); - error = EPERM; - goto cleanup; - } - - /* make sure vnode is owned by "root" */ - VATTR_INIT(&va); - VATTR_WANTED(&va, va_uid); - error = vnode_getattr(vp, &va, vfs); - if (error) - goto cleanup; - - if (va.va_uid != 0) { - if (cs_debug) printf("sigpup: csr file not owned by root\n"); - error = EPERM; - goto cleanup; - } - - error = vnsize(vfs, vp, &size); - if (error) - goto cleanup; - - control = ubc_getobject(vp, 0); - if (control == MEMORY_OBJECT_CONTROL_NULL) { - error = EINVAL; - goto cleanup; - } - - csr_state.csr_map_size = mach_vm_round_page(size); - - if (cs_debug > 10) - printf("mmap!\n"); - - result = vm_map_enter_mem_object_control(kernel_map, - &csr_state.csr_map_base, - csr_state.csr_map_size, - 0, VM_FLAGS_ANYWHERE, - control, 0 /* file offset */, - 0 /* cow */, - VM_PROT_READ, - VM_PROT_READ, - VM_INHERIT_DEFAULT); - if (result != KERN_SUCCESS) { - error = EINVAL; - goto cleanup; - } - - error = csr_state.funcs->csr_validate_header((const uint8_t *)csr_state.csr_map_base, - csr_state.csr_map_size); - if (error) { - if (cs_debug > 10) - printf("sigpup header invalid, dropping mapping"); - sigpup_drop(); - goto cleanup; - } - - if (cs_debug > 10) - printf("table loaded %ld bytes\n", (long)csr_state.csr_map_size); - -cleanup: - lck_rw_unlock_exclusive(SigPUPLock); - - if (buf) - FREE(buf, M_TEMP); - if (vp) - (void)vnode_put(vp); - if (vfs) - (void)vfs_context_rele(vfs); - - if (error) - printf("sigpup: load failed with error: %d\n", error); - - - return error; -} - -int -sigpup_drop(void) -{ - - if (kauth_cred_issuser(kauth_cred_get()) == 0) - return EPERM; - - lck_rw_lock_exclusive(SigPUPLock); - - if (csr_state.csr_map_base == 0 || csr_state.inuse) { - printf("failed to unload the sigpup database\n"); - lck_rw_unlock_exclusive(SigPUPLock); - return EINVAL; - } - - if (cs_debug > 10) - printf("sigpup: unloading\n"); - - (void)mach_vm_deallocate(kernel_map, - csr_state.csr_map_base, csr_state.csr_map_size); - - csr_state.csr_map_base = 0; - csr_state.csr_map_size = 0; - - lck_rw_unlock_exclusive(SigPUPLock); - - return 0; -} - -void sigpup_attach_vnode(vnode_t); /* XXX */ - -void -sigpup_attach_vnode(vnode_t vp) -{ - const void *csblob; - size_t cslen; - - if (!cs_enforcement_enable || csr_state.funcs == NULL || csr_state.csr_map_base == 0 || csr_state.disabled) - return; - - /* if the file is not on the root volumes or already been check, skip */ - if (vp->v_mount != rootvnode->v_mount || (vp->v_flag & VNOCS)) - return; - - csblob = csr_state.funcs->csr_find_file_codedirectory(vp, (const uint8_t *)csr_state.csr_map_base, - (size_t)csr_state.csr_map_size, &cslen); - if (csblob) { - ubc_cs_sigpup_add(vp, (vm_address_t)csblob, (vm_size_t)cslen); - csr_state.inuse = 1; - } - vp->v_flag |= VNOCS; -} - -void -cs_register_cscsr(struct cscsr_functions *funcs) -{ - if (csr_state.funcs || funcs->csr_version < CSCSR_VERSION) - return; - csr_state.funcs = funcs; -} - /* * Library validation functions */ @@ -504,36 +287,32 @@ cs_require_lv(struct proc *p) } /* - * Function: csblob_get_teamid + * Function: csblob_get_platform_binary * - * Description: This function returns a pointer to the team id - stored within the codedirectory of the csblob. - If the codedirectory predates team-ids, it returns - NULL. - This does not copy the name but returns a pointer to - it within the CD. Subsequently, the CD must be - available when this is used. + * Description: This function returns true if the binary is + * in the trust cache. */ -const char * -csblob_get_teamid(struct cs_blob *csblob) -{ - const CS_CodeDirectory *cd; - if ((cd = (const CS_CodeDirectory *)cs_find_blob( - csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL) - return NULL; - - if (ntohl(cd->version) < CS_SUPPORTSTEAMID) - return NULL; +int +csblob_get_platform_binary(struct cs_blob *blob) +{ + if (blob && blob->csb_platform_binary) + return 1; + return 0; +} - if (ntohl(cd->teamOffset) == 0) - return NULL; - - const char *name = ((const char *)cd) + ntohl(cd->teamOffset); - if (cs_debug > 1) - printf("found team-id %s in cdblob\n", name); +/* + * Function: csblob_get_flags + * + * Description: This function returns the flags for a given blob +*/ - return name; +unsigned int +csblob_get_flags(struct cs_blob *blob) +{ + if (blob) + return blob->csb_flags; + return 0; } /* @@ -542,7 +321,7 @@ csblob_get_teamid(struct cs_blob *csblob) * Description: This function returns the cs_blob * for the process p */ -static struct cs_blob * +struct cs_blob * csproc_get_blob(struct proc *p) { if (NULL == p) @@ -554,6 +333,63 @@ csproc_get_blob(struct proc *p) return ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff); } +/* + * Function: csproc_get_blob + * + * Description: This function returns the cs_blob + * for the vnode vp + */ +struct cs_blob * +csvnode_get_blob(struct vnode *vp, off_t offset) +{ + return ubc_cs_blob_get(vp, -1, offset); +} + +/* + * Function: csblob_get_teamid + * + * Description: This function returns a pointer to the + * team id of csblob +*/ +const char * +csblob_get_teamid(struct cs_blob *csblob) +{ + return csblob->csb_teamid; +} + +/* + * Function: csblob_get_identity + * + * Description: This function returns a pointer to the + * identity string + */ +const char * +csblob_get_identity(struct cs_blob *csblob) +{ + const CS_CodeDirectory *cd; + + cd = (const CS_CodeDirectory *)csblob_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY); + if (cd == NULL) + return NULL; + + if (cd->identOffset == 0) + return NULL; + + return ((const char *)cd) + ntohl(cd->identOffset); +} + +/* + * Function: csblob_get_cdhash + * + * Description: This function returns a pointer to the + * cdhash of csblob (20 byte array) + */ +const uint8_t * +csblob_get_cdhash(struct cs_blob *csblob) +{ + return csblob->csb_cdhash; +} + /* * Function: csproc_get_teamid * @@ -566,8 +402,10 @@ csproc_get_teamid(struct proc *p) struct cs_blob *csblob; csblob = csproc_get_blob(p); + if (csblob == NULL) + return NULL; - return (csblob == NULL) ? NULL : csblob->csb_teamid; + return csblob_get_teamid(csblob); } /* @@ -585,8 +423,10 @@ csvnode_get_teamid(struct vnode *vp, off_t offset) return NULL; csblob = ubc_cs_blob_get(vp, -1, offset); + if (csblob == NULL) + return NULL; - return (csblob == NULL) ? NULL : csblob->csb_teamid; + return csblob_get_teamid(csblob); } /* @@ -607,6 +447,14 @@ csproc_get_platform_binary(struct proc *p) return (csblob == NULL) ? 0 : csblob->csb_platform_binary; } +int +csproc_get_platform_path(struct proc *p) +{ + struct cs_blob *csblob = csproc_get_blob(p); + + return (csblob == NULL) ? 0 : csblob->csb_platform_path; +} + /* * Function: csfg_get_platform_binary * @@ -648,6 +496,28 @@ csfg_get_platform_binary(struct fileglob *fg) return platform_binary; } +uint8_t * +csfg_get_cdhash(struct fileglob *fg, uint64_t offset, size_t *cdhash_size) +{ + vnode_t vp; + + if (FILEGLOB_DTYPE(fg) != DTYPE_VNODE) + return NULL; + + vp = (struct vnode *)fg->fg_data; + if (vp == NULL) + return NULL; + + struct cs_blob *csblob = NULL; + if ((csblob = ubc_cs_blob_get(vp, -1, offset)) == NULL) + return NULL; + + if (cdhash_size) + *cdhash_size = CS_CDHASH_LEN; + + return csblob->csb_cdhash; +} + /* * Function: csfg_get_teamid * @@ -694,6 +564,12 @@ cs_entitlement_flags(struct proc *p) return (p->p_csflags & CS_ENTITLEMENT_FLAGS); } +int +cs_restricted(struct proc *p) +{ + return (p->p_csflags & CS_RESTRICT) ? 1 : 0; +} + /* * Function: csfg_get_path * @@ -717,3 +593,100 @@ csfg_get_path(struct fileglob *fg, char *path, int *len) or an error code */ return vn_getpath(vp, path, len); } + +/* Retrieve the entitlements blob for a process. + * Returns: + * EINVAL no text vnode associated with the process + * EBADEXEC invalid code signing data + * 0 no error occurred + * + * On success, out_start and out_length will point to the + * entitlements blob if found; or will be set to NULL/zero + * if there were no entitlements. + */ + +int +cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length) +{ + struct cs_blob *csblob; + + *out_start = NULL; + *out_length = 0; + + if (NULL == p->p_textvp) + return EINVAL; + + if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) + return 0; + + return csblob_get_entitlements(csblob, out_start, out_length); +} + +/* Retrieve the codesign identity for a process. + * Returns: + * NULL an error occured + * string the cs_identity + */ + +const char * +cs_identity_get(proc_t p) +{ + struct cs_blob *csblob; + + if (NULL == p->p_textvp) + return NULL; + + if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) + return NULL; + + return csblob_get_identity(csblob); +} + + +/* Retrieve the codesign blob for a process. + * Returns: + * EINVAL no text vnode associated with the process + * 0 no error occurred + * + * On success, out_start and out_length will point to the + * cms blob if found; or will be set to NULL/zero + * if there were no blob. + */ + +int +cs_blob_get(proc_t p, void **out_start, size_t *out_length) +{ + struct cs_blob *csblob; + + *out_start = NULL; + *out_length = 0; + + if (NULL == p->p_textvp) + return EINVAL; + + if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) + return 0; + + *out_start = (void *)csblob->csb_mem_kaddr; + *out_length = csblob->csb_mem_size; + + return 0; +} + +/* + * return cshash of a process, cdhash is of size CS_CDHASH_LEN + */ + +uint8_t * +cs_get_cdhash(struct proc *p) +{ + struct cs_blob *csblob; + + if (NULL == p->p_textvp) + return NULL; + + if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) + return NULL; + + return csblob->csb_cdhash; +} diff --git a/bsd/kern/kern_csr.c b/bsd/kern/kern_csr.c index 7badfcc58..4b5c26815 100644 --- a/bsd/kern/kern_csr.c +++ b/bsd/kern/kern_csr.c @@ -33,37 +33,8 @@ #include #include -/* allow everything by default? */ -/* XXX: set this to 0 later: */ -static int csr_allow_all = 1; - -/* allow everything if CSR_ALLOW_APPLE_INTERNAL is set */ -static int csr_allow_internal = 1; - -/* Current boot-arg policy: - * rootless=0 - * csr_allow_all = 1 - * rootless=1 - * csr_allow_all = 0 - * csr_allow_internal = 0 - * - * After : - * rootless=0 - * no effect - * rootless=1 - * csr_allow_internal = 0 - * - * Enforcement policy: - * =============================== - * | csr_allow_internal - * | 0 1 - * =============================== - * csr_ 0 | always customer - * allow_ | - * all 1 | never never - * =============================== - * NB: "customer" means enforce when - * CSR_ALLOW_APPLE_INTERNAL not set */ +/* enable enforcement by default */ +static int csr_allow_all = 0; void csr_init(void) @@ -73,62 +44,6 @@ csr_init(void) /* special booter; allow everything */ csr_allow_all = 1; } - - int rootless_boot_arg; - if (PE_parse_boot_argn("rootless", &rootless_boot_arg, sizeof(rootless_boot_arg))) { - /* XXX: set csr_allow_all to boot arg value for now - * (to be removed by ) */ - csr_allow_all = !rootless_boot_arg; - /* if rootless=1, do not allow everything when CSR_ALLOW_APPLE_INTERNAL is set */ - csr_allow_internal &= !rootless_boot_arg; - } -} - -int -csrctl(__unused proc_t p, struct csrctl_args *uap, __unused int32_t *retval) -{ - int error = 0; - - if (uap->useraddr == 0) - return EINVAL; - if (uap->usersize != sizeof(csr_config_t)) - return EINVAL; - - switch (uap->op) { - case CSR_OP_CHECK: - { - csr_config_t mask; - error = copyin(uap->useraddr, &mask, sizeof(csr_config_t)); - - if (error) - return error; - - error = csr_check(mask); - break; - } - - case CSR_OP_GET_ACTIVE_CONFIG: - case CSR_OP_GET_PENDING_CONFIG: /* fall through */ - { - csr_config_t config = 0; - if (uap->op == CSR_OP_GET_ACTIVE_CONFIG) - error = csr_get_active_config(&config); - else - error = csr_get_pending_config(&config); - - if (error) - return error; - - error = copyout(&config, uap->useraddr, sizeof(csr_config_t)); - break; - } - - default: - error = EINVAL; - break; - } - - return error; } int @@ -138,28 +53,19 @@ csr_get_active_config(csr_config_t *config) if (args->flags & kBootArgsFlagCSRActiveConfig) { *config = args->csrActiveConfig & CSR_VALID_FLAGS; } else { - /* XXX: change to 0 when is in the build */ - *config = CSR_ALLOW_APPLE_INTERNAL; + *config = 0; } return 0; } int -csr_get_pending_config(csr_config_t *config) +csr_check(csr_config_t mask) { boot_args *args = (boot_args *)PE_state.bootArgs; - if (args->flags & kBootArgsFlagCSRPendingConfig) { - *config = args->csrPendingConfig & CSR_VALID_FLAGS; - return 0; - } else { - return ENOENT; - } -} + if ((mask & CSR_ALLOW_DEVICE_CONFIGURATION) && !(args->flags & kBootArgsFlagCSRConfigMode)) + return EPERM; -int -csr_check(csr_config_t mask) -{ if (csr_allow_all) { return 0; } @@ -170,10 +76,6 @@ csr_check(csr_config_t mask) return error; } - if (csr_allow_internal && (config & CSR_ALLOW_APPLE_INTERNAL)) { - return 0; - } - if (mask == 0) { /* pass 0 to check if Rootless enforcement is active */ return -1; @@ -188,3 +90,60 @@ csr_set_allow_all(int value) { csr_allow_all = !!value; // force value to 0 or 1 } + +/* + * Syscall stubs + */ + +int syscall_csr_check(struct csrctl_args *args); +int syscall_csr_get_active_config(struct csrctl_args *args); + + +int +syscall_csr_check(struct csrctl_args *args) +{ + csr_config_t mask = 0; + int error = 0; + + if (args->useraddr == 0 || args->usersize != sizeof(mask)) + return EINVAL; + + error = copyin(args->useraddr, &mask, sizeof(mask)); + if (error) + return error; + + return csr_check(mask); +} + +int +syscall_csr_get_active_config(struct csrctl_args *args) +{ + csr_config_t config = 0; + int error = 0; + + if (args->useraddr == 0 || args->usersize != sizeof(config)) + return EINVAL; + + error = csr_get_active_config(&config); + if (error) + return error; + + return copyout(&config, args->useraddr, sizeof(config)); +} + +/* + * Syscall entrypoint + */ + +int +csrctl(__unused proc_t p, struct csrctl_args *args, __unused int32_t *retval) +{ + switch (args->op) { + case CSR_SYSCALL_CHECK: + return syscall_csr_check(args); + case CSR_SYSCALL_GET_ACTIVE_CONFIG: + return syscall_csr_get_active_config(args); + default: + return ENOSYS; + } +} diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index 54faaeb13..df33970ca 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -107,6 +107,7 @@ #include #include #include +#include #include #include @@ -117,9 +118,6 @@ #include #include -#if CONFIG_PROTECT -#include -#endif #include kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t, @@ -154,7 +152,7 @@ extern void file_lock_init(void); extern kauth_scope_t kauth_scope_fileop; /* Conflict wait queue for when selects collide (opaque type) */ -extern struct wait_queue select_conflict_queue; +extern struct waitq select_conflict_queue; #define f_flag f_fglob->fg_flag #define f_type f_fglob->fg_ops->fo_type @@ -546,6 +544,11 @@ dup(proc_t p, struct dup_args *uap, int32_t *retval) fp_drop(p, old, fp, 1); proc_fdunlock(p); + if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET) { + KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START, + new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data)); + } + return (error); } @@ -754,7 +757,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) struct fileproc *fp; char *pop; struct vnode *vp = NULLVP; /* for AUDIT_ARG() at end */ - int i, tmp, error, error2, flg = F_POSIX; + int i, tmp, error, error2, flg = 0; struct flock fl; struct flocktimeout fltimeout; struct timespec *timeout = NULL; @@ -942,12 +945,51 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } goto out; + case F_SETCONFINED: + /* + * If this is the only reference to this fglob in the process + * and it's already marked as close-on-fork then mark it as + * (immutably) "confined" i.e. any fd that points to it will + * forever be close-on-fork, and attempts to use an IPC + * mechanism to move the descriptor elsewhere will fail. + */ + if (CAST_DOWN_EXPLICIT(int, uap->arg)) { + struct fileglob *fg = fp->f_fglob; + + lck_mtx_lock_spin(&fg->fg_lock); + if (fg->fg_lflags & FG_CONFINED) + error = 0; + else if (1 != fg->fg_count) + error = EAGAIN; /* go close the dup .. */ + else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) { + fg->fg_lflags |= FG_CONFINED; + error = 0; + } else + error = EBADF; /* open without O_CLOFORK? */ + lck_mtx_unlock(&fg->fg_lock); + } else { + /* + * Other subsystems may have built on the immutability + * of FG_CONFINED; clearing it may be tricky. + */ + error = EPERM; /* immutable */ + } + goto out; + + case F_GETCONFINED: + *retval = (fp->f_fglob->fg_lflags & FG_CONFINED) ? 1 : 0; + error = 0; + goto out; + case F_SETLKWTIMEOUT: case F_SETLKW: + case F_OFD_SETLKWTIMEOUT: + case F_OFD_SETLKW: flg |= F_WAIT; /* Fall into F_SETLK */ case F_SETLK: + case F_OFD_SETLK: if (fp->f_type != DTYPE_VNODE) { error = EBADF; goto out; @@ -959,7 +1001,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) proc_fdunlock(p); /* Copy in the lock structure */ - if (uap->cmd == F_SETLKWTIMEOUT) { + if (F_SETLKWTIMEOUT == uap->cmd || + F_OFD_SETLKWTIMEOUT == uap->cmd) { error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout)); if (error) { goto outdrop; @@ -994,45 +1037,90 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) goto outdrop; } #endif - switch (fl.l_type) { - - case F_RDLCK: - if ((fflag & FREAD) == 0) { - (void)vnode_put(vp); - error = EBADF; - goto outdrop; + switch (uap->cmd) { + case F_OFD_SETLK: + case F_OFD_SETLKW: + case F_OFD_SETLKWTIMEOUT: + flg |= F_OFD_LOCK; + switch (fl.l_type) { + case F_RDLCK: + if ((fflag & FREAD) == 0) { + error = EBADF; + break; + } + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, + F_SETLK, &fl, flg, &context, timeout); + break; + case F_WRLCK: + if ((fflag & FWRITE) == 0) { + error = EBADF; + break; + } + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, + F_SETLK, &fl, flg, &context, timeout); + break; + case F_UNLCK: + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, + F_UNLCK, &fl, F_OFD_LOCK, &context, + timeout); + break; + default: + error = EINVAL; + break; } - // XXX UInt32 unsafe for LP64 kernel - OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag); - error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context, timeout); - (void)vnode_put(vp); - goto outdrop; + if (0 == error && + (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) { + struct fileglob *fg = fp->f_fglob; - case F_WRLCK: - if ((fflag & FWRITE) == 0) { - (void)vnode_put(vp); - error = EBADF; - goto outdrop; + /* + * arrange F_UNLCK on last close (once + * set, FG_HAS_OFDLOCK is immutable) + */ + if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) { + lck_mtx_lock_spin(&fg->fg_lock); + fg->fg_lflags |= FG_HAS_OFDLOCK; + lck_mtx_unlock(&fg->fg_lock); + } } - // XXX UInt32 unsafe for LP64 kernel - OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag); - error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context, timeout); - (void)vnode_put(vp); - goto outdrop; - - case F_UNLCK: - error = VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl, - F_POSIX, &context, timeout); - (void)vnode_put(vp); - goto outdrop; - + break; default: - (void)vnode_put(vp); - error = EINVAL; - goto outdrop; + flg |= F_POSIX; + switch (fl.l_type) { + case F_RDLCK: + if ((fflag & FREAD) == 0) { + error = EBADF; + break; + } + // XXX UInt32 unsafe for LP64 kernel + OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag); + error = VNOP_ADVLOCK(vp, (caddr_t)p, + F_SETLK, &fl, flg, &context, timeout); + break; + case F_WRLCK: + if ((fflag & FWRITE) == 0) { + error = EBADF; + break; + } + // XXX UInt32 unsafe for LP64 kernel + OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag); + error = VNOP_ADVLOCK(vp, (caddr_t)p, + F_SETLK, &fl, flg, &context, timeout); + break; + case F_UNLCK: + error = VNOP_ADVLOCK(vp, (caddr_t)p, + F_UNLCK, &fl, F_POSIX, &context, timeout); + break; + default: + error = EINVAL; + break; + } + break; } + (void) vnode_put(vp); + goto outdrop; case F_GETLK: + case F_OFD_GETLK: if (fp->f_type != DTYPE_VNODE) { error = EBADF; goto out; @@ -1088,7 +1176,20 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) uap->cmd, &fl); if (error == 0) #endif - error = VNOP_ADVLOCK(vp, (caddr_t)p, uap->cmd, &fl, F_POSIX, &context, NULL); + switch (uap->cmd) { + case F_OFD_GETLK: + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, + F_GETLK, &fl, F_OFD_LOCK, &context, NULL); + break; + case F_OFD_GETLKPID: + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, + F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL); + break; + default: + error = VNOP_ADVLOCK(vp, (caddr_t)p, + uap->cmd, &fl, F_POSIX, &context, NULL); + break; + } (void)vnode_put(vp); @@ -1390,23 +1491,13 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } devBlockSize = vfs_devblocksize(vnode_mount(vp)); if (uap->cmd == F_LOG2PHYS_EXT) { -#if defined(__LP64__) - a_size = l2p_struct.l2p_contigbytes; -#else - if ((l2p_struct.l2p_contigbytes > SIZE_MAX) || (l2p_struct.l2p_contigbytes < 0)) { - /* size_t is 32-bit on a 32-bit kernel, therefore - * assigning l2p_contigbytes to a_size may have - * caused integer overflow. We, therefore, return - * an error here instead of calculating incorrect - * value. - */ - printf ("fcntl: F_LOG2PHYS_EXT: l2p_contigbytes=%lld will overflow, returning error\n", l2p_struct.l2p_contigbytes); - error = EFBIG; + if (l2p_struct.l2p_contigbytes < 0) { + vnode_put(vp); + error = EINVAL; goto outdrop; - } else { - a_size = l2p_struct.l2p_contigbytes; } -#endif + + a_size = MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX); } else { a_size = devBlockSize; } @@ -1494,7 +1585,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync() - case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZECACHE + case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE + case F_BARRIERFSYNC: // fsync + barrier case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd if (fp->f_type != DTYPE_VNODE) { @@ -1620,7 +1712,9 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) case F_ADDSIGS: case F_ADDFILESIGS: case F_ADDFILESIGS_FOR_DYLD_SIM: + case F_ADDFILESIGS_RETURN: { + struct cs_blob *blob = NULL; struct user_fsignatures fs; kern_return_t kr; vm_offset_t kernel_blob_addr; @@ -1663,123 +1757,109 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) goto outdrop; } - struct cs_blob * existing_blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start); - if (existing_blob != NULL) + /* + * First check if we have something loaded a this offset + */ + blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start); + if (blob != NULL) { /* If this is for dyld_sim revalidate the blob */ if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) { - error = ubc_cs_blob_revalidate(vp, existing_blob, blob_add_flags); + error = ubc_cs_blob_revalidate(vp, blob, blob_add_flags); } - vnode_put(vp); - goto outdrop; - } -/* - * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover - * our use cases for the immediate future, but note that at the time of this commit, some - * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB). - * - * We should consider how we can manage this more effectively; the above means that some - * platforms are using megabytes of memory for signing data; it merely hasn't crossed the - * threshold considered ridiculous at the time of this change. - */ -#define CS_MAX_BLOB_SIZE (10ULL * 1024ULL * 1024ULL) - if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) { - error = E2BIG; - vnode_put(vp); - goto outdrop; - } - kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size); - kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size); - if (kr != KERN_SUCCESS) { - error = ENOMEM; - vnode_put(vp); - goto outdrop; - } + } else { + /* + * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover + * our use cases for the immediate future, but note that at the time of this commit, some + * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB). + * + * We should consider how we can manage this more effectively; the above means that some + * platforms are using megabytes of memory for signing data; it merely hasn't crossed the + * threshold considered ridiculous at the time of this change. + */ +#define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL) + if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) { + error = E2BIG; + vnode_put(vp); + goto outdrop; + } - if(uap->cmd == F_ADDSIGS) { - error = copyin(fs.fs_blob_start, - (void *) kernel_blob_addr, - kernel_blob_size); - } else /* F_ADDFILESIGS */ { - int resid; - - error = vn_rdwr(UIO_READ, - vp, - (caddr_t) kernel_blob_addr, - kernel_blob_size, - fs.fs_file_start + fs.fs_blob_start, - UIO_SYSSPACE, - 0, - kauth_cred_get(), - &resid, - p); - if ((error == 0) && resid) { - /* kernel_blob_size rounded to a page size, but signature may be at end of file */ - memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid); + kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size); + kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size); + if (kr != KERN_SUCCESS) { + error = ENOMEM; + vnode_put(vp); + goto outdrop; } - } - - if (error) { - ubc_cs_blob_deallocate(kernel_blob_addr, - kernel_blob_size); - vnode_put(vp); - goto outdrop; - } - error = ubc_cs_blob_add( - vp, - CPU_TYPE_ANY, /* not for a specific architecture */ - fs.fs_file_start, - kernel_blob_addr, - kernel_blob_size, - blob_add_flags); - if (error) { - ubc_cs_blob_deallocate(kernel_blob_addr, + if(uap->cmd == F_ADDSIGS) { + error = copyin(fs.fs_blob_start, + (void *) kernel_blob_addr, kernel_blob_size); - } else { - /* ubc_blob_add() has consumed "kernel_blob_addr" */ + } else /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM */ { + int resid; + + error = vn_rdwr(UIO_READ, + vp, + (caddr_t) kernel_blob_addr, + kernel_blob_size, + fs.fs_file_start + fs.fs_blob_start, + UIO_SYSSPACE, + 0, + kauth_cred_get(), + &resid, + p); + if ((error == 0) && resid) { + /* kernel_blob_size rounded to a page size, but signature may be at end of file */ + memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid); + } + } + + if (error) { + ubc_cs_blob_deallocate(kernel_blob_addr, + kernel_blob_size); + vnode_put(vp); + goto outdrop; + } + + blob = NULL; + error = ubc_cs_blob_add(vp, + CPU_TYPE_ANY, /* not for a specific architecture */ + fs.fs_file_start, + kernel_blob_addr, + kernel_blob_size, + blob_add_flags, + &blob); + if (error) { + ubc_cs_blob_deallocate(kernel_blob_addr, + kernel_blob_size); + } else { + /* ubc_blob_add() has consumed "kernel_blob_addr" */ #if CHECK_CS_VALIDATION_BITMAP - ubc_cs_validation_bitmap_allocate( vp ); + ubc_cs_validation_bitmap_allocate( vp ); #endif + } + } + + if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) { + /* + * The first element of the structure is a + * off_t that happen to have the same size for + * all archs. Lets overwrite that. + */ + off_t end_offset = 0; + if (blob) + end_offset = blob->csb_end_offset; + error = copyout(&end_offset, argp, sizeof (end_offset)); } (void) vnode_put(vp); break; } case F_FINDSIGS: { -#ifdef SECURE_KERNEL error = ENOTSUP; -#else /* !SECURE_KERNEL */ - off_t offsetMacho; - - if (fp->f_type != DTYPE_VNODE) { - error = EBADF; - goto out; - } - vp = (struct vnode *)fp->f_data; - proc_fdunlock(p); - error = vnode_getwithref(vp); - if (error) - goto outdrop; - - error = copyin(argp, &offsetMacho, sizeof(offsetMacho)); - if (error) { - (void)vnode_put(vp); - goto outdrop; - } - -#if CONFIG_MACF - error = mac_vnode_find_sigs(p, vp, offsetMacho); -#else - error = EPERM; -#endif - if (error) { - (void)vnode_put(vp); - goto outdrop; - } -#endif /* SECURE_KERNEL */ - break; + goto out; } #if CONFIG_PROTECT case F_GETPROTECTIONCLASS: { @@ -2294,8 +2374,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) const CS_SuperBlob *super_blob = (void *)t_blob->csb_mem_kaddr; const CS_CodeDirectory *cd = findCodeDirectory(super_blob, - (char *) super_blob, - (char *) super_blob + t_blob->csb_mem_size); + (const char *) super_blob, + (const char *) super_blob + t_blob->csb_mem_size); if (cd == NULL) { error = ENOENT; goto outdrop; @@ -2312,13 +2392,13 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) if (error) goto outdrop; - if (sizeof(t_blob->csb_sha1) > args.f_hash_size || + if (sizeof(t_blob->csb_cdhash) > args.f_hash_size || buffer_size > args.f_cd_size) { error = ERANGE; goto outdrop; } - error = copyout(t_blob->csb_sha1, args.f_cd_hash, sizeof(t_blob->csb_sha1)); + error = copyout(t_blob->csb_cdhash, args.f_cd_hash, sizeof(t_blob->csb_cdhash)); if (error) goto outdrop; error = copyout(cd, args.f_cd_buffer, buffer_size); @@ -2424,6 +2504,19 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) goto outdrop; } +#if DEBUG || DEVELOPMENT + case F_RECYCLE: + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } + vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); + + vnode_recycle(vp); + break; +#endif + default: /* * This is an fcntl() that we d not recognize at this level; @@ -2778,6 +2871,10 @@ close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags) procfdtbl_reservefd(p, fd); } + if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET) + KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END, + fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data)); + error = closef_locked(fp, fp->f_fglob, p); if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE) wakeup(&fp->f_flags); @@ -3841,7 +3938,7 @@ fp_tryswap(proc_t p, int fd, struct fileproc *nfp) (fp->f_flags & ~FP_TYPEMASK); nfp->f_iocount = fp->f_iocount; nfp->f_fglob = fp->f_fglob; - nfp->f_waddr = fp->f_waddr; + nfp->f_wset = fp->f_wset; p->p_fd->fd_ofiles[fd] = nfp; (void) fp_drop(p, fd, nfp, 1); @@ -4743,7 +4840,7 @@ fdcopy(proc_t p, vnode_t uth_cdir) * allowing the table to shrink. */ i = newfdp->fd_nfiles; - while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) + while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2) i /= 2; } proc_fdunlock(p); @@ -4803,6 +4900,7 @@ fdcopy(proc_t p, vnode_t uth_cdir) for (i = newfdp->fd_lastfile + 1; --i >= 0; fpp++, flags++) if ((ofp = *fpp) != NULL && + 0 == (ofp->f_fglob->fg_lflags & FG_CONFINED) && 0 == (*flags & (UF_FORKCLOSE|UF_RESERVED))) { #if DEBUG if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE) @@ -5045,11 +5143,13 @@ fileproc_drain(proc_t p, struct fileproc * fp) (*fp->f_fglob->fg_ops->fo_drain)(fp, &context); } if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) { - if (wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT) - panic("bad wait queue for wait_queue_wakeup_all %p", fp->f_waddr); - } + if (waitq_wakeup64_all((struct waitq *)fp->f_wset, NO_EVENT64, + THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) + panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->f_wset, fp); + } if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) { - if (wait_queue_wakeup_all(&select_conflict_queue, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT) + if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64, + THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) panic("bad select_conflict_queue"); } p->p_fpdrainwait = 1; @@ -5204,27 +5304,28 @@ fileport_makeport(proc_t p, struct fileport_makeport_args *uap, ipc_port_t fileport; mach_port_name_t name = MACH_PORT_NULL; - err = fp_lookup(p, fd, &fp, 0); + proc_fdlock(p); + err = fp_lookup(p, fd, &fp, 1); if (err != 0) { - goto out; + goto out_unlock; } - if (!filetype_issendable(fp->f_type)) { + if (!file_issendable(p, fp)) { err = EINVAL; - goto out; + goto out_unlock; } if (FP_ISGUARDED(fp, GUARD_FILEPORT)) { - proc_fdlock(p); err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT); - proc_fdunlock(p); - goto out; + goto out_unlock; } /* Dropped when port is deallocated */ fg = fp->f_fglob; fg_ref(fp); + proc_fdunlock(p); + /* Allocate and initialize a port */ fileport = fileport_alloc(fg); if (fileport == IPC_PORT_NULL) { @@ -5254,6 +5355,8 @@ fileport_makeport(proc_t p, struct fileport_makeport_args *uap, return 0; +out_unlock: + proc_fdunlock(p); out: if (MACH_PORT_VALID(name)) { /* Don't care if another thread races us to deallocate the entry */ @@ -5421,10 +5524,8 @@ dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error) switch (error) { case ENODEV: if (FP_ISGUARDED(wfp, GUARD_DUP)) { - int err = fp_guard_exception(p, - dfd, wfp, kGUARD_EXC_DUP); proc_fdunlock(p); - return (err); + return (EPERM); } /* @@ -5516,23 +5617,23 @@ fg_drop(struct fileproc * fp) #if SOCKETS /* - * fg_insertuipc + * fg_insertuipc_mark * - * Description: Insert fileglob onto message queue + * Description: Mark fileglob for insertion onto message queue if needed + * Also takes fileglob reference * - * Parameters: fg Fileglob pointer to insert + * Parameters: fg Fileglob pointer to insert * - * Returns: void + * Returns: true, if the fileglob needs to be inserted onto msg queue * * Locks: Takes and drops fg_lock, potentially many times */ -void -fg_insertuipc(struct fileglob * fg) +boolean_t +fg_insertuipc_mark(struct fileglob * fg) { - int insertque = 0; + boolean_t insert = FALSE; lck_mtx_lock_spin(&fg->fg_lock); - while (fg->fg_lflags & FG_RMMSGQ) { lck_mtx_convert_spin(&fg->fg_lock); @@ -5544,11 +5645,30 @@ fg_insertuipc(struct fileglob * fg) fg->fg_msgcount++; if (fg->fg_msgcount == 1) { fg->fg_lflags |= FG_INSMSGQ; - insertque=1; + insert = TRUE; } lck_mtx_unlock(&fg->fg_lock); + return (insert); +} - if (insertque) { +/* + * fg_insertuipc + * + * Description: Insert marked fileglob onto message queue + * + * Parameters: fg Fileglob pointer to insert + * + * Returns: void + * + * Locks: Takes and drops fg_lock & uipc_lock + * DO NOT call this function with proc_fdlock held as unp_gc() + * can potentially try to acquire proc_fdlock, which can result + * in a deadlock if this function is in unp_gc_wait(). + */ +void +fg_insertuipc(struct fileglob * fg) +{ + if (fg->fg_lflags & FG_INSMSGQ) { lck_mtx_lock_spin(uipc_lock); unp_gc_wait(); LIST_INSERT_HEAD(&fmsghead, fg, f_msglist); @@ -5561,25 +5681,24 @@ fg_insertuipc(struct fileglob * fg) } lck_mtx_unlock(&fg->fg_lock); } - } - /* - * fg_removeuipc + * fg_removeuipc_mark * - * Description: Remove fileglob from message queue + * Description: Mark the fileglob for removal from message queue if needed + * Also releases fileglob message queue reference * - * Parameters: fg Fileglob pointer to remove + * Parameters: fg Fileglob pointer to remove * - * Returns: void + * Returns: true, if the fileglob needs to be removed from msg queue * * Locks: Takes and drops fg_lock, potentially many times */ -void -fg_removeuipc(struct fileglob * fg) +boolean_t +fg_removeuipc_mark(struct fileglob * fg) { - int removeque = 0; + boolean_t remove = FALSE; lck_mtx_lock_spin(&fg->fg_lock); while (fg->fg_lflags & FG_INSMSGQ) { @@ -5591,11 +5710,30 @@ fg_removeuipc(struct fileglob * fg) fg->fg_msgcount--; if (fg->fg_msgcount == 0) { fg->fg_lflags |= FG_RMMSGQ; - removeque=1; + remove = TRUE; } lck_mtx_unlock(&fg->fg_lock); + return (remove); +} - if (removeque) { +/* + * fg_removeuipc + * + * Description: Remove marked fileglob from message queue + * + * Parameters: fg Fileglob pointer to remove + * + * Returns: void + * + * Locks: Takes and drops fg_lock & uipc_lock + * DO NOT call this function with proc_fdlock held as unp_gc() + * can potentially try to acquire proc_fdlock, which can result + * in a deadlock if this function is in unp_gc_wait(). + */ +void +fg_removeuipc(struct fileglob * fg) +{ + if (fg->fg_lflags & FG_RMMSGQ) { lck_mtx_lock_spin(uipc_lock); unp_gc_wait(); LIST_REMOVE(fg, f_msglist); @@ -5752,17 +5890,19 @@ fo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) * process is opt-in by file type. */ boolean_t -filetype_issendable(file_type_t fdtype) +file_issendable(proc_t p, struct fileproc *fp) { - switch (fdtype) { - case DTYPE_VNODE: - case DTYPE_SOCKET: - case DTYPE_PIPE: - case DTYPE_PSXSHM: - return TRUE; - default: - /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */ - return FALSE; + proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED); + + switch (fp->f_type) { + case DTYPE_VNODE: + case DTYPE_SOCKET: + case DTYPE_PIPE: + case DTYPE_PSXSHM: + return (0 == (fp->f_fglob->fg_lflags & FG_CONFINED)); + default: + /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */ + return FALSE; } } diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index 708aef474..d80579a35 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -82,13 +82,15 @@ #include #include #include +#include #include #include #include #include -#include +#include #include +#include #include #include @@ -106,7 +108,7 @@ MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); -#define KQ_EVENT NULL +#define KQ_EVENT NO_EVENT64 static inline void kqlock(struct kqueue *kq); static inline void kqunlock(struct kqueue *kq); @@ -123,7 +125,7 @@ static int kqueue_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx); static int kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx); -static int kqueue_select(struct fileproc *fp, int which, void *wql, +static int kqueue_select(struct fileproc *fp, int which, void *wq_link_id, vfs_context_t ctx); static int kqueue_close(struct fileglob *fg, vfs_context_t ctx); static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn, @@ -141,28 +143,32 @@ static const struct fileops kqueueops = { .fo_drain = kqueue_drain, }; -static int kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, - int nchanges, user_addr_t eventlist, int nevents, int fd, - user_addr_t utimeout, unsigned int flags, int32_t *retval); -static int kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, - struct proc *p, int iskev64); -static int kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, - struct proc *p, int iskev64); -char * kevent_description(struct kevent64_s *kevp, char *s, size_t n); - -static int kevent_callback(struct kqueue *kq, struct kevent64_s *kevp, - void *data); +static int kevent_internal(struct proc *p, int fd, + user_addr_t changelist, int nchanges, + user_addr_t eventlist, int nevents, + user_addr_t data_out, user_size_t *data_available, + unsigned int flags, user_addr_t utimeout, + kqueue_continue_t continuation, + int32_t *retval); +static int kevent_copyin(user_addr_t *addrp, struct kevent_internal_s *kevp, + struct proc *p, unsigned int flags); +static int kevent_copyout(struct kevent_internal_s *kevp, user_addr_t *addrp, + struct proc *p, unsigned int flags); +char * kevent_description(struct kevent_internal_s *kevp, char *s, size_t n); + +static int kevent_callback(struct kqueue *kq, struct kevent_internal_s *kevp, + void *data); static void kevent_continue(struct kqueue *kq, void *data, int error); static void kqueue_scan_continue(void *contp, wait_result_t wait_result); static int kqueue_process(struct kqueue *kq, kevent_callback_t callback, - void *data, int *countp, struct proc *p); + void *data, int *countp, struct proc *p); static int kqueue_begin_processing(struct kqueue *kq); static void kqueue_end_processing(struct kqueue *kq); static int knote_process(struct knote *kn, kevent_callback_t callback, - void *data, struct kqtailq *inprocessp, struct proc *p); + void *data, struct kqtailq *inprocessp, struct proc *p); static void knote_put(struct knote *kn); static int knote_fdpattach(struct knote *kn, struct filedesc *fdp, - struct proc *p); + struct proc *p); static void knote_drop(struct knote *kn, struct proc *p); static void knote_activate(struct knote *kn, int); static void knote_deactivate(struct knote *kn); @@ -223,7 +229,7 @@ extern struct filterops sig_filtops; static int filt_timerattach(struct knote *kn); static void filt_timerdetach(struct knote *kn); static int filt_timer(struct knote *kn, long hint); -static void filt_timertouch(struct knote *kn, struct kevent64_s *kev, +static void filt_timertouch(struct knote *kn, struct kevent_internal_s *kev, long type); static struct filterops timer_filtops = { .f_attach = filt_timerattach, @@ -260,7 +266,7 @@ extern struct filterops machport_filtops; static int filt_userattach(struct knote *kn); static void filt_userdetach(struct knote *kn); static int filt_user(struct knote *kn, long hint); -static void filt_usertouch(struct knote *kn, struct kevent64_s *kev, +static void filt_usertouch(struct knote *kn, struct kevent_internal_s *kev, long type); static struct filterops user_filtops = { .f_attach = filt_userattach, @@ -358,8 +364,9 @@ kqlock2knoteusewait(struct kqueue *kq, struct knote *kn) { if ((kn->kn_status & (KN_DROPPING | KN_ATTACHING)) != 0) { kn->kn_status |= KN_USEWAIT; - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, - &kn->kn_status, THREAD_UNINT, 0); + waitq_assert_wait64((struct waitq *)kq->kq_wqs, + CAST_EVENT64_T(&kn->kn_status), + THREAD_UNINT, TIMEOUT_WAIT_FOREVER); kqunlock(kq); thread_block(THREAD_CONTINUE_NULL); return (0); @@ -389,8 +396,10 @@ knoteuse2kqlock(struct kqueue *kq, struct knote *kn) } if ((kn->kn_status & KN_USEWAIT) != 0) { kn->kn_status &= ~KN_USEWAIT; - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, - &kn->kn_status, THREAD_AWAKENED); + waitq_wakeup64_all((struct waitq *)kq->kq_wqs, + CAST_EVENT64_T(&kn->kn_status), + THREAD_AWAKENED, + WAITQ_ALL_PRIORITIES); } } return ((kn->kn_status & KN_DROPPING) == 0); @@ -425,8 +434,9 @@ kqlock2knotedrop(struct kqueue *kq, struct knote *kn) } } kn->kn_status |= KN_USEWAIT; - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status, - THREAD_UNINT, 0); + waitq_assert_wait64((struct waitq *)kq->kq_wqs, + CAST_EVENT64_T(&kn->kn_status), + THREAD_UNINT, TIMEOUT_WAIT_FOREVER); kqunlock(kq); thread_block(THREAD_CONTINUE_NULL); return (oktodrop); @@ -444,8 +454,10 @@ knote_put(struct knote *kn) if (--kn->kn_inuse == 0) { if ((kn->kn_status & KN_USEWAIT) != 0) { kn->kn_status &= ~KN_USEWAIT; - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, - &kn->kn_status, THREAD_AWAKENED); + waitq_wakeup64_all((struct waitq *)kq->kq_wqs, + CAST_EVENT64_T(&kn->kn_status), + THREAD_AWAKENED, + WAITQ_ALL_PRIORITIES); } } kqunlock(kq); @@ -838,8 +850,10 @@ filt_timerexpire(void *knx, __unused void *spare) /* if someone is waiting for timer to pop */ if (kn->kn_hookid & TIMER_CANCELWAIT) { struct kqueue *kq = kn->kn_kq; - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_hook, - THREAD_AWAKENED); + waitq_wakeup64_all((struct waitq *)kq->kq_wqs, + CAST_EVENT64_T(&kn->kn_hook), + THREAD_AWAKENED, + WAITQ_ALL_PRIORITIES); } filt_timerunlock(); @@ -864,8 +878,9 @@ filt_timercancel(struct knote *kn) } else { /* we have to wait for the expire routine. */ kn->kn_hookid |= TIMER_CANCELWAIT; - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, - &kn->kn_hook, THREAD_UNINT, 0); + waitq_assert_wait64((struct waitq *)kq->kq_wqs, + CAST_EVENT64_T(&kn->kn_hook), + THREAD_UNINT, TIMEOUT_WAIT_FOREVER); filt_timerunlock(); thread_block(THREAD_CONTINUE_NULL); filt_timerlock(); @@ -1006,7 +1021,7 @@ filt_timer(struct knote *kn, long hint) * pops have gone off (in kn_data). */ static void -filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type) +filt_timertouch(struct knote *kn, struct kevent_internal_s *kev, long type) { int error; filt_timerlock(); @@ -1110,7 +1125,7 @@ filt_user(struct knote *kn, __unused long hint) } static void -filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type) +filt_usertouch(struct knote *kn, struct kevent_internal_s *kev, long type) { uint32_t ffctrl; switch (type) { @@ -1170,10 +1185,9 @@ kqueue_alloc(struct proc *p) MALLOC_ZONE(kq, struct kqueue *, sizeof (struct kqueue), M_KQUEUE, M_WAITOK); if (kq != NULL) { - wait_queue_set_t wqs; + struct waitq_set *wqs; - wqs = wait_queue_set_alloc(SYNC_POLICY_FIFO | - SYNC_POLICY_PREPOST); + wqs = waitq_set_alloc(SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST | SYNC_POLICY_DISABLE_IRQ); if (wqs != NULL) { bzero(kq, sizeof (struct kqueue)); lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr); @@ -1213,11 +1227,17 @@ kqueue_alloc(struct proc *p) void kqueue_dealloc(struct kqueue *kq) { - struct proc *p = kq->kq_p; - struct filedesc *fdp = p->p_fd; + struct proc *p; + struct filedesc *fdp; struct knote *kn; int i; + if (kq == NULL) + return; + + p = kq->kq_p; + fdp = p->p_fd; + proc_fdlock(p); for (i = 0; i < fdp->fd_knlistsize; i++) { kn = SLIST_FIRST(&fdp->fd_knlist[i]); @@ -1262,11 +1282,11 @@ kqueue_dealloc(struct kqueue *kq) proc_fdunlock(p); /* - * before freeing the wait queue set for this kqueue, - * make sure it is unlinked from all its containing (select) sets. + * waitq_set_free() clears all preposts and also remove the KQ's + * waitq set from any select sets to which it may belong. */ - wait_queue_unlink_all((wait_queue_t)kq->kq_wqs); - wait_queue_set_free(kq->kq_wqs); + waitq_set_free(kq->kq_wqs); + kq->kq_wqs = NULL; lck_spin_destroy(&kq->kq_lock, kq_lck_grp); FREE_ZONE(kq, sizeof (struct kqueue), M_KQUEUE); } @@ -1311,43 +1331,77 @@ kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval) } static int -kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, - int iskev64) +kevent_copyin(user_addr_t *addrp, struct kevent_internal_s *kevp, struct proc *p, + unsigned int flags) { int advance; int error; - if (iskev64) { - advance = sizeof (struct kevent64_s); - error = copyin(*addrp, (caddr_t)kevp, advance); - } else if (IS_64BIT_PROCESS(p)) { - struct user64_kevent kev64; - bzero(kevp, sizeof (struct kevent64_s)); + if (flags & KEVENT_FLAG_LEGACY32) { + bzero(kevp, sizeof (*kevp)); - advance = sizeof (kev64); + if (IS_64BIT_PROCESS(p)) { + struct user64_kevent kev64; + + advance = sizeof (kev64); + error = copyin(*addrp, (caddr_t)&kev64, advance); + if (error) + return (error); + kevp->ident = kev64.ident; + kevp->filter = kev64.filter; + kevp->flags = kev64.flags; + kevp->udata = kev64.udata; + kevp->fflags = kev64.fflags; + kevp->data = kev64.data; + } else { + struct user32_kevent kev32; + + advance = sizeof (kev32); + error = copyin(*addrp, (caddr_t)&kev32, advance); + if (error) + return (error); + kevp->ident = (uintptr_t)kev32.ident; + kevp->filter = kev32.filter; + kevp->flags = kev32.flags; + kevp->udata = CAST_USER_ADDR_T(kev32.udata); + kevp->fflags = kev32.fflags; + kevp->data = (intptr_t)kev32.data; + } + } else if (flags & KEVENT_FLAG_LEGACY64) { + struct kevent64_s kev64; + + bzero(kevp, sizeof (*kevp)); + + advance = sizeof (struct kevent64_s); error = copyin(*addrp, (caddr_t)&kev64, advance); if (error) - return (error); + return(error); kevp->ident = kev64.ident; kevp->filter = kev64.filter; kevp->flags = kev64.flags; + kevp->udata = kev64.udata; kevp->fflags = kev64.fflags; kevp->data = kev64.data; - kevp->udata = kev64.udata; + kevp->ext[0] = kev64.ext[0]; + kevp->ext[1] = kev64.ext[1]; + } else { - struct user32_kevent kev32; - bzero(kevp, sizeof (struct kevent64_s)); + struct kevent_qos_s kevqos; - advance = sizeof (kev32); - error = copyin(*addrp, (caddr_t)&kev32, advance); + bzero(kevp, sizeof (*kevp)); + + advance = sizeof (struct kevent_qos_s); + error = copyin(*addrp, (caddr_t)&kevqos, advance); if (error) - return (error); - kevp->ident = (uintptr_t)kev32.ident; - kevp->filter = kev32.filter; - kevp->flags = kev32.flags; - kevp->fflags = kev32.fflags; - kevp->data = (intptr_t)kev32.data; - kevp->udata = CAST_USER_ADDR_T(kev32.udata); + return error; + kevp->ident = kevqos.ident; + kevp->filter = kevqos.filter; + kevp->flags = kevqos.flags; + kevp->udata = kevqos.udata; + kevp->fflags = kevqos.fflags; + kevp->data = kevqos.data; + kevp->ext[0] = kevqos.ext[0]; + kevp->ext[1] = kevqos.ext[1]; } if (!error) *addrp += advance; @@ -1355,46 +1409,85 @@ kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, } static int -kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, - int iskev64) +kevent_copyout(struct kevent_internal_s *kevp, user_addr_t *addrp, struct proc *p, + unsigned int flags) { + user_addr_t addr = *addrp; int advance; int error; - if (iskev64) { - advance = sizeof (struct kevent64_s); - error = copyout((caddr_t)kevp, *addrp, advance); - } else if (IS_64BIT_PROCESS(p)) { - struct user64_kevent kev64; + if (flags & KEVENT_FLAG_LEGACY32) { + assert((flags & KEVENT_FLAG_STACK_EVENTS) == 0); - /* - * deal with the special case of a user-supplied - * value of (uintptr_t)-1. - */ - kev64.ident = (kevp->ident == (uintptr_t)-1) ? - (uint64_t)-1LL : (uint64_t)kevp->ident; + if (IS_64BIT_PROCESS(p)) { + struct user64_kevent kev64; + + /* + * deal with the special case of a user-supplied + * value of (uintptr_t)-1. + */ + kev64.ident = (kevp->ident == (uintptr_t)-1) ? + (uint64_t)-1LL : (uint64_t)kevp->ident; + + kev64.filter = kevp->filter; + kev64.flags = kevp->flags; + kev64.fflags = kevp->fflags; + kev64.data = (int64_t) kevp->data; + kev64.udata = kevp->udata; + advance = sizeof (kev64); + error = copyout((caddr_t)&kev64, addr, advance); + } else { + struct user32_kevent kev32; + + kev32.ident = (uint32_t)kevp->ident; + kev32.filter = kevp->filter; + kev32.flags = kevp->flags; + kev32.fflags = kevp->fflags; + kev32.data = (int32_t)kevp->data; + kev32.udata = kevp->udata; + advance = sizeof (kev32); + error = copyout((caddr_t)&kev32, addr, advance); + } + } else if (flags & KEVENT_FLAG_LEGACY64) { + struct kevent64_s kev64; + advance = sizeof (struct kevent64_s); + if (flags & KEVENT_FLAG_STACK_EVENTS) { + addr -= advance; + } + kev64.ident = kevp->ident; kev64.filter = kevp->filter; kev64.flags = kevp->flags; kev64.fflags = kevp->fflags; kev64.data = (int64_t) kevp->data; kev64.udata = kevp->udata; - advance = sizeof (kev64); - error = copyout((caddr_t)&kev64, *addrp, advance); + kev64.ext[0] = kevp->ext[0]; + kev64.ext[1] = kevp->ext[1]; + error = copyout((caddr_t)&kev64, addr, advance); } else { - struct user32_kevent kev32; - - kev32.ident = (uint32_t)kevp->ident; - kev32.filter = kevp->filter; - kev32.flags = kevp->flags; - kev32.fflags = kevp->fflags; - kev32.data = (int32_t)kevp->data; - kev32.udata = kevp->udata; - advance = sizeof (kev32); - error = copyout((caddr_t)&kev32, *addrp, advance); + struct kevent_qos_s kevqos; + + bzero(&kevqos, sizeof (struct kevent_qos_s)); + advance = sizeof (struct kevent_qos_s); + if (flags & KEVENT_FLAG_STACK_EVENTS) { + addr -= advance; + } + kevqos.ident = kevp->ident; + kevqos.filter = kevp->filter; + kevqos.flags = kevp->flags; + kevqos.fflags = kevp->fflags; + kevqos.data = (int64_t) kevp->data; + kevqos.udata = kevp->udata; + kevqos.ext[0] = kevp->ext[0]; + kevqos.ext[1] = kevp->ext[1]; + error = copyout((caddr_t)&kevqos, addr, advance); + } + if (!error) { + if (flags & KEVENT_FLAG_STACK_EVENTS) + *addrp = addr; + else + *addrp = addr + advance; } - if (!error) - *addrp += advance; return (error); } @@ -1420,7 +1513,8 @@ kevent_continue(__unused struct kqueue *kq, void *data, int error) fd = cont_args->fd; fp = cont_args->fp; - fp_drop(p, fd, fp, 0); + if (fp != NULL) + fp_drop(p, fd, fp, 0); /* don't restart after signals... */ if (error == ERESTART) @@ -1439,49 +1533,147 @@ kevent_continue(__unused struct kqueue *kq, void *data, int error) int kevent(struct proc *p, struct kevent_args *uap, int32_t *retval) { - return (kevent_internal(p, - 0, - uap->changelist, - uap->nchanges, - uap->eventlist, - uap->nevents, - uap->fd, - uap->timeout, - 0, /* no flags from old kevent() call */ - retval)); + unsigned int flags = KEVENT_FLAG_LEGACY32; + + return kevent_internal(p, + uap->fd, + uap->changelist, uap->nchanges, + uap->eventlist, uap->nevents, + 0ULL, 0ULL, + flags, + uap->timeout, + kevent_continue, + retval); } int kevent64(struct proc *p, struct kevent64_args *uap, int32_t *retval) { - return (kevent_internal(p, - 1, - uap->changelist, - uap->nchanges, - uap->eventlist, - uap->nevents, - uap->fd, - uap->timeout, - uap->flags, - retval)); + unsigned int flags; + + /* restrict to user flags and set legacy64 */ + flags = uap->flags & KEVENT_FLAG_USER; + flags |= KEVENT_FLAG_LEGACY64; + + return kevent_internal(p, + uap->fd, + uap->changelist, uap->nchanges, + uap->eventlist, uap->nevents, + 0ULL, 0ULL, + flags, + uap->timeout, + kevent_continue, + retval); } +int +kevent_qos(struct proc *p, struct kevent_qos_args *uap, int32_t *retval) +{ + user_size_t usize = 0; + user_size_t ssize; + int error; + + /* restrict to user flags */ + uap->flags &= KEVENT_FLAG_USER; + + if (uap->data_available) { + if (!IS_64BIT_PROCESS(p)) { + uint32_t csize; + + error = copyin(uap->data_available, (caddr_t)&csize, sizeof(csize)); + if (error) + return error; + usize = csize; + } else { + uint64_t csize; + error = copyin(uap->data_available, (caddr_t)&csize, sizeof(csize)); + if (error) + return error; + usize = csize; + } + } + ssize = usize; + + error = kevent_internal(p, + uap->fd, + uap->changelist, uap->nchanges, + uap->eventlist, uap->nevents, + uap->data_out, &usize, + uap->flags, + 0ULL, + kevent_continue, + retval); + + if (error == 0 && uap->data_available && usize != ssize) { + if (!IS_64BIT_PROCESS(p)) { + uint32_t csize = (uint32_t)usize; + + error = copyout((caddr_t)&csize, uap->data_available, sizeof(csize)); + } else { + error = copyout((caddr_t)&usize, uap->data_available, sizeof(usize)); + } + } + return error; +} + +int +kevent_qos_internal(struct proc *p, int fd, + user_addr_t changelist, int nchanges, + user_addr_t eventlist, int nevents, + user_addr_t data_out, user_size_t *data_available, + unsigned int flags, + int32_t *retval) +{ + return kevent_internal(p, + fd, + changelist, nchanges, + eventlist, nevents, + data_out, data_available, + flags, + 0ULL, + NULL, + retval); +} + static int -kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, - int nchanges, user_addr_t ueventlist, int nevents, int fd, - user_addr_t utimeout, __unused unsigned int flags, - int32_t *retval) +kevent_internal(struct proc *p, + int fd, + user_addr_t changelist, int nchanges, + user_addr_t ueventlist, int nevents, + user_addr_t data_out, user_size_t *data_available, + unsigned int flags, + user_addr_t utimeout, + kqueue_continue_t continuation, + int32_t *retval) { struct _kevent *cont_args; uthread_t ut; struct kqueue *kq; - struct fileproc *fp; - struct kevent64_s kev; + struct fileproc *fp = NULL; + struct kevent_internal_s kev; int error, noutputs; struct timeval atv; - /* convert timeout to absolute - if we have one */ - if (utimeout != USER_ADDR_NULL) { +#if 1 + /* temporarily ignore these fields */ + (void)data_out; + (void)data_available; +#endif + + /* prepare to deal with stack-wise allocation of out events */ + if (flags & KEVENT_FLAG_STACK_EVENTS) { + int scale = ((flags & KEVENT_FLAG_LEGACY32) ? + (IS_64BIT_PROCESS(p) ? sizeof(struct user64_kevent) : + sizeof(struct user32_kevent)) : + ((flags & KEVENT_FLAG_LEGACY64) ? sizeof(struct kevent64_s) : + sizeof(struct kevent_qos_s))); + ueventlist += nevents * scale; + } + + /* convert timeout to absolute - if we have one (and not immediate) */ + if (flags & KEVENT_FLAG_IMMEDIATE) { + getmicrouptime(&atv); + } else if (utimeout != USER_ADDR_NULL) { struct timeval rtv; if (IS_64BIT_PROCESS(p)) { struct user64_timespec ts; @@ -1502,32 +1694,72 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, getmicrouptime(&atv); timevaladd(&atv, &rtv); } else { + /* wait forever value */ atv.tv_sec = 0; atv.tv_usec = 0; } - /* get a usecount for the kq itself */ - if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0) - return (error); + if (flags & KEVENT_FLAG_WORKQ) { + /* + * use the private kq associated with the proc workq. + * Just being a thread within the process (and not + * being the exit/exec thread) is enough to hold a + * reference on this special kq. + */ + kq = p->p_wqkqueue; + if (kq == NULL) { + struct kqueue *alloc_kq = kqueue_alloc(p); + if (alloc_kq == NULL) + return ENOMEM; + + proc_fdlock(p); + if (p->p_wqkqueue == NULL) { + /* + * The kq is marked as special - + * with unique interactions with + * the workq for this process. + */ + alloc_kq->kq_state |= KQ_WORKQ; + kq = p->p_wqkqueue = alloc_kq; + proc_fdunlock(p); + } else { + proc_fdunlock(p); + kq = p->p_wqkqueue; + kqueue_dealloc(alloc_kq); + } + } + } else { + /* get a usecount for the kq itself */ + if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0) + return (error); + } /* each kq should only be used for events of one type */ kqlock(kq); - if (kq->kq_state & (KQ_KEV32 | KQ_KEV64)) { - if (((iskev64 && (kq->kq_state & KQ_KEV32)) || - (!iskev64 && (kq->kq_state & KQ_KEV64)))) { + if (kq->kq_state & (KQ_KEV32 | KQ_KEV64 | KQ_KEV_QOS)) { + if (flags & KEVENT_FLAG_LEGACY32) { + if ((kq->kq_state & KQ_KEV32) == 0) { + error = EINVAL; + kqunlock(kq); + goto errorout; + } + } else if (kq->kq_state & KQ_KEV32) { error = EINVAL; kqunlock(kq); goto errorout; } + } else if (flags & KEVENT_FLAG_LEGACY32) { + kq->kq_state |= KQ_KEV32; } else { - kq->kq_state |= (iskev64 ? KQ_KEV64 : KQ_KEV32); + /* JMM - set KQ_KEVQOS when we are ready for exclusive */ + kq->kq_state |= KQ_KEV64; } kqunlock(kq); /* register all the change requests the user provided... */ noutputs = 0; while (nchanges > 0 && error == 0) { - error = kevent_copyin(&changelist, &kev, p, iskev64); + error = kevent_copyin(&changelist, &kev, p, flags); if (error) break; @@ -1536,7 +1768,7 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, if ((error || (kev.flags & EV_RECEIPT)) && nevents > 0) { kev.flags = EV_ERROR; kev.data = error; - error = kevent_copyout(&kev, &ueventlist, p, iskev64); + error = kevent_copyout(&kev, &ueventlist, p, flags); if (error == 0) { nevents--; noutputs++; @@ -1545,25 +1777,40 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, nchanges--; } - /* store the continuation/completion data in the uthread */ - ut = (uthread_t)get_bsdthread_info(current_thread()); - cont_args = &ut->uu_kevent.ss_kevent; - cont_args->fp = fp; - cont_args->fd = fd; - cont_args->retval = retval; - cont_args->eventlist = ueventlist; - cont_args->eventcount = nevents; - cont_args->eventout = noutputs; - cont_args->eventsize = iskev64; + /* short-circuit the scan if we only want error events */ + if (flags & KEVENT_FLAG_ERROR_EVENTS) + nevents = 0; + + if (nevents > 0 && noutputs == 0 && error == 0) { + + /* store the continuation/completion data in the uthread */ + ut = (uthread_t)get_bsdthread_info(current_thread()); + cont_args = &ut->uu_kevent.ss_kevent; + cont_args->fp = fp; + cont_args->fd = fd; + cont_args->retval = retval; + cont_args->eventlist = ueventlist; + cont_args->eventcount = nevents; + cont_args->eventout = noutputs; + cont_args->eventflags = flags; - if (nevents > 0 && noutputs == 0 && error == 0) error = kqueue_scan(kq, kevent_callback, - kevent_continue, cont_args, - &atv, p); - kevent_continue(kq, cont_args, error); + continuation, cont_args, + &atv, p); + + noutputs = cont_args->eventout; + } + /* don't restart after signals... */ + if (error == ERESTART) + error = EINTR; + else if (error == EWOULDBLOCK) + error = 0; + if (error == 0) + *retval = noutputs; errorout: - fp_drop(p, fd, fp, 0); + if (fp != NULL) + fp_drop(p, fd, fp, 0); return (error); } @@ -1575,23 +1822,20 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, * caller holds a reference on the kqueue */ static int -kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, +kevent_callback(__unused struct kqueue *kq, struct kevent_internal_s *kevp, void *data) { struct _kevent *cont_args; int error; - int iskev64; cont_args = (struct _kevent *)data; assert(cont_args->eventout < cont_args->eventcount); - iskev64 = cont_args->eventsize; - /* * Copy out the appropriate amount of event data for this user. */ error = kevent_copyout(kevp, &cont_args->eventlist, current_proc(), - iskev64); + cont_args->eventflags); /* * If there isn't space for additional events, return @@ -1605,23 +1849,23 @@ kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, /* * kevent_description - format a description of a kevent for diagnostic output * - * called with a 128-byte string buffer + * called with a 256-byte string buffer */ char * -kevent_description(struct kevent64_s *kevp, char *s, size_t n) +kevent_description(struct kevent_internal_s *kevp, char *s, size_t n) { snprintf(s, n, "kevent=" - "{.ident=%#llx, .filter=%d, .flags=%#x, .fflags=%#x, .data=%#llx, .udata=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}", + "{.ident=%#llx, .filter=%d, .flags=%#x, .udata=%#llx, .fflags=%#x, .data=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}", kevp->ident, kevp->filter, kevp->flags, + kevp->udata, kevp->fflags, kevp->data, - kevp->udata, kevp->ext[0], - kevp->ext[1]); + kevp->ext[1] ); return (s); } @@ -1641,7 +1885,7 @@ kevent_description(struct kevent64_s *kevp, char *s, size_t n) */ int -kevent_register(struct kqueue *kq, struct kevent64_s *kev, +kevent_register(struct kqueue *kq, struct kevent_internal_s *kev, __unused struct proc *ctxp) { struct proc *p = kq->kq_p; @@ -1649,6 +1893,7 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, struct filterops *fops; struct fileproc *fp = NULL; struct knote *kn = NULL; + struct klist *list; int error = 0; if (kev->filter < 0) { @@ -1656,43 +1901,48 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, return (EINVAL); fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ } else { - /* - * XXX - * filter attach routine is responsible for insuring that - * the identifier can be attached to it. - */ - printf("unknown filter: %d\n", kev->filter); return (EINVAL); } restart: /* this iocount needs to be dropped if it is not registered */ + list = NULL; proc_fdlock(p); - if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 1)) != 0) { - proc_fdunlock(p); - return (error); - } + /* + * determine where to look for the knote + */ if (fops->f_isfd) { + if ((error = fp_lookup(p, kev->ident, &fp, 1)) != 0) { + proc_fdunlock(p); + return (error); + } /* fd-based knotes are linked off the fd table */ if (kev->ident < (u_int)fdp->fd_knlistsize) { - SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) - if (kq == kn->kn_kq && - kev->filter == kn->kn_filter) - break; + list = &fdp->fd_knlist[kev->ident]; } - } else { + } else if (fdp->fd_knhashmask != 0) { /* hash non-fd knotes here too */ - if (fdp->fd_knhashmask != 0) { - struct klist *list; - - list = &fdp->fd_knhash[ - KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; - SLIST_FOREACH(kn, list, kn_link) - if (kev->ident == kn->kn_id && - kq == kn->kn_kq && - kev->filter == kn->kn_filter) - break; + list = &fdp->fd_knhash[KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; + } + + /* + * scan the selected list looking for a match + */ + if (list != NULL) { + SLIST_FOREACH(kn, list, kn_link) { + if (kq == kn->kn_kq && + kev->ident == kn->kn_id && + kev->filter == kn->kn_filter) { + if (kev->flags & EV_UDATA_SPECIFIC) { + if ((kn->kn_flags & EV_UDATA_SPECIFIC) && + kev->udata == kn->kn_udata) { + break; /* matching udata-specific knote */ + } + } else if ((kn->kn_flags & EV_UDATA_SPECIFIC) == 0) { + break; /* matching non-udata-specific knote */ + } + } } } @@ -1775,11 +2025,23 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, proc_fdunlock(p); if (kev->flags & EV_DELETE) { - knote_dequeue(kn); - kn->kn_status |= KN_DISABLED; - if (kqlock2knotedrop(kq, kn)) { - kn->kn_fop->f_detach(kn); - knote_drop(kn, p); + if ((kev->flags & EV_ENABLE) == 0 && + (kev->flags & EV_DISPATCH2) == EV_DISPATCH2 && + (kn->kn_status & KN_DISABLED) == KN_DISABLED) { + /* mark for deferred drop */ + kn->kn_status |= KN_DEFERDROP; + kqunlock(kq); + error = EINPROGRESS; + } else { + knote_dequeue(kn); + kn->kn_status |= KN_DISABLED; + if (kqlock2knotedrop(kq, kn)) { + kn->kn_fop->f_detach(kn); + knote_drop(kn, p); + } else { + /* pretend we didn't find it */ + error = ENOENT; + } } goto done; } @@ -1788,10 +2050,24 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, if (kev->flags & EV_DISABLE) { knote_dequeue(kn); kn->kn_status |= KN_DISABLED; - } else if (kev->flags & EV_ENABLE) { + + } else if ((kev->flags & EV_ENABLE) && + (kn->kn_status & KN_DISABLED)) { kn->kn_status &= ~KN_DISABLED; - if (kn->kn_status & KN_ACTIVE) - knote_enqueue(kn); + + /* handle deferred drop */ + if (kn->kn_status & KN_DEFERDROP) { + kn->kn_status &= ~KN_DEFERDROP; + kn->kn_flags |= (EV_DELETE | EV_ONESHOT); + knote_activate(kn, 0); + kqunlock(kq); + goto done; + } + + if (kn->kn_status & KN_ACTIVE) { + /* force re-activate if previously active */ + knote_activate(kn, 1); + } } /* @@ -1828,13 +2104,21 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, /* still have use ref on knote */ /* - * If the knote is not marked to always stay enqueued, - * invoke the filter routine to see if it should be - * enqueued now. + * Invoke the filter routine to see if it should be enqueued now. + */ +#if 0 + if (kn->kn_fop->f_event(kn, 0)) { +#else + /* + * JMM - temporary workaround until rdar://problem/19986199 + * This potentially results in extra wakeups for KN_STAYQUEUED event types, + * but waking up only truly active ones (yet trying below to determine + * active status, by invoking the filter routine, is having side-effects). */ - if ((kn->kn_status & KN_STAYQUEUED) == 0 && kn->kn_fop->f_event(kn, 0)) { + if ((kn->kn_status & KN_STAYQUEUED) || kn->kn_fop->f_event(kn, 0)) { +#endif if (knoteuse2kqlock(kq, kn)) - knote_activate(kn, 1); + knote_activate(kn, (kn->kn_status & KN_STAYQUEUED)); kqunlock(kq); } else { knote_put(kn); @@ -1868,7 +2152,7 @@ knote_process(struct knote *kn, struct proc *p) { struct kqueue *kq = kn->kn_kq; - struct kevent64_s kev; + struct kevent_internal_s kev; int touch; int result; int error; @@ -1916,6 +2200,9 @@ knote_process(struct knote *kn, kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS); } + if (result && (kn->kn_status & KN_TOUCH)) + kn->kn_fop->f_touch(kn, &kev, + EVENT_PROCESS); /* * convert back to a kqlock - bail if the knote @@ -1928,9 +2215,7 @@ knote_process(struct knote *kn, * if revalidated as alive, make sure * it's active */ - if (!(kn->kn_status & KN_ACTIVE)) { - knote_activate(kn, 0); - } + knote_activate(kn, 0); /* * capture all events that occurred @@ -1964,7 +2249,9 @@ knote_process(struct knote *kn, /* * Determine how to dispatch the knote for future event handling. * not-fired: just return (do not callout). - * One-shot: deactivate it. + * One-shot: If dispatch2, enter deferred-delete mode (unless this is + * is the deferred delete event delivery itself). Otherwise, + * deactivate and drop it. * Clear: deactivate and clear the state. * Dispatch: don't clear state, just deactivate it and mark it disabled. * All others: just leave where they are. @@ -1974,7 +2261,11 @@ knote_process(struct knote *kn, return (EJUSTRETURN); } else if ((kn->kn_flags & EV_ONESHOT) != 0) { knote_deactivate(kn); - if (kqlock2knotedrop(kq, kn)) { + if ((kn->kn_flags & (EV_DISPATCH2|EV_DELETE)) == EV_DISPATCH2) { + /* defer dropping non-delete oneshot dispatch2 events */ + kn->kn_status |= (KN_DISABLED | KN_DEFERDROP); + kqunlock(kq); + } else if (kqlock2knotedrop(kq, kn)) { kn->kn_fop->f_detach(kn); knote_drop(kn, p); } @@ -2027,8 +2318,9 @@ kqueue_begin_processing(struct kqueue *kq) /* if someone else is processing the queue, wait */ if (kq->kq_nprocess != 0) { - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, - &kq->kq_nprocess, THREAD_UNINT, 0); + waitq_assert_wait64((struct waitq *)kq->kq_wqs, + CAST_EVENT64_T(&kq->kq_nprocess), + THREAD_UNINT, TIMEOUT_WAIT_FOREVER); kq->kq_state |= KQ_PROCWAIT; kqunlock(kq); thread_block(THREAD_CONTINUE_NULL); @@ -2049,8 +2341,10 @@ kqueue_end_processing(struct kqueue *kq) kq->kq_nprocess = 0; if (kq->kq_state & KQ_PROCWAIT) { kq->kq_state &= ~KQ_PROCWAIT; - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, - &kq->kq_nprocess, THREAD_AWAKENED); + waitq_wakeup64_all((struct waitq *)kq->kq_wqs, + CAST_EVENT64_T(&kq->kq_nprocess), + THREAD_AWAKENED, + WAITQ_ALL_PRIORITIES); } } @@ -2093,7 +2387,7 @@ kqueue_process(struct kqueue *kq, * Clear any pre-posted status from previous runs, so we * only detect events that occur during this run. */ - wait_queue_sub_clearrefs(kq->kq_wqs); + waitq_set_clear_preposts(kq->kq_wqs); /* * loop through the enqueued knotes, processing each one and @@ -2148,8 +2442,9 @@ kqueue_scan_continue(void *data, wait_result_t wait_result) error = kqueue_process(kq, cont_args->call, cont_args, &count, current_proc()); if (error == 0 && count == 0) { - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, - KQ_EVENT, THREAD_ABORTSAFE, cont_args->deadline); + waitq_assert_wait64((struct waitq *)kq->kq_wqs, + KQ_EVENT, THREAD_ABORTSAFE, + cont_args->deadline); kq->kq_state |= KQ_SLEEP; kqunlock(kq); thread_block_parameter(kqueue_scan_continue, kq); @@ -2253,9 +2548,10 @@ kqueue_scan(struct kqueue *kq, } /* go ahead and wait */ - wait_queue_assert_wait_with_leeway((wait_queue_t)kq->kq_wqs, - KQ_EVENT, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, - deadline, 0); + waitq_assert_wait64_leeway((struct waitq *)kq->kq_wqs, + KQ_EVENT, THREAD_ABORTSAFE, + TIMEOUT_URGENCY_USER_NORMAL, + deadline, TIMEOUT_NO_LEEWAY); kq->kq_state |= KQ_SLEEP; kqunlock(kq); wait_result = thread_block_parameter(cont, kq); @@ -2315,7 +2611,7 @@ kqueue_ioctl(__unused struct fileproc *fp, /*ARGSUSED*/ static int -kqueue_select(struct fileproc *fp, int which, void *wql, +kqueue_select(struct fileproc *fp, int which, void *wq_link_id, __unused vfs_context_t ctx) { struct kqueue *kq = (struct kqueue *)fp->f_data; @@ -2337,13 +2633,28 @@ kqueue_select(struct fileproc *fp, int which, void *wql, * catch events from KN_STAYQUEUED sources. So we do the linkage manually. * (The select() call will unlink them when it ends). */ - if (wql != NULL) { + if (wq_link_id != NULL) { thread_t cur_act = current_thread(); struct uthread * ut = get_bsdthread_info(cur_act); kq->kq_state |= KQ_SEL; - wait_queue_link_noalloc((wait_queue_t)kq->kq_wqs, ut->uu_wqset, - (wait_queue_link_t)wql); + waitq_link((struct waitq *)kq->kq_wqs, ut->uu_wqset, + WAITQ_SHOULD_LOCK, (uint64_t *)wq_link_id); + + /* always consume the reserved link object */ + waitq_link_release(*(uint64_t *)wq_link_id); + *(uint64_t *)wq_link_id = 0; + + /* + * selprocess() is expecting that we send it back the waitq + * that was just added to the thread's waitq set. In order + * to not change the selrecord() API (which is exported to + * kexts), we pass this value back through the + * void *wq_link_id pointer we were passed. We need to use + * memcpy here because the pointer may not be properly aligned + * on 32-bit systems. + */ + memcpy(wq_link_id, (void *)&(kq->kq_wqs), sizeof(void *)); } if (kqueue_begin_processing(kq) == -1) { @@ -2487,20 +2798,28 @@ kqueue_stat(struct kqueue *kq, void *ub, int isstat64, proc_t p) bzero((void *)sb64, sizeof(*sb64)); sb64->st_size = kq->kq_count; - if (kq->kq_state & KQ_KEV64) + if (kq->kq_state & KQ_KEV_QOS) + sb64->st_blksize = sizeof(struct kevent_qos_s); + else if (kq->kq_state & KQ_KEV64) sb64->st_blksize = sizeof(struct kevent64_s); + else if (IS_64BIT_PROCESS(p)) + sb64->st_blksize = sizeof(struct user64_kevent); else - sb64->st_blksize = IS_64BIT_PROCESS(p) ? sizeof(struct user64_kevent) : sizeof(struct user32_kevent); + sb64->st_blksize = sizeof(struct user32_kevent); sb64->st_mode = S_IFIFO; } else { struct stat *sb = (struct stat *)ub; bzero((void *)sb, sizeof(*sb)); sb->st_size = kq->kq_count; - if (kq->kq_state & KQ_KEV64) + if (kq->kq_state & KQ_KEV_QOS) + sb->st_blksize = sizeof(struct kevent_qos_s); + else if (kq->kq_state & KQ_KEV64) sb->st_blksize = sizeof(struct kevent64_s); + else if (IS_64BIT_PROCESS(p)) + sb->st_blksize = sizeof(struct user64_kevent); else - sb->st_blksize = IS_64BIT_PROCESS(p) ? sizeof(struct user64_kevent) : sizeof(struct user32_kevent); + sb->st_blksize = sizeof(struct user32_kevent); sb->st_mode = S_IFIFO; } kqunlock(kq); @@ -2513,10 +2832,31 @@ kqueue_stat(struct kqueue *kq, void *ub, int isstat64, proc_t p) static void kqueue_wakeup(struct kqueue *kq, int closed) { + wait_result_t res = THREAD_NOT_WAITING; + if ((kq->kq_state & (KQ_SLEEP | KQ_SEL)) != 0 || kq->kq_nprocess > 0) { kq->kq_state &= ~(KQ_SLEEP | KQ_SEL); - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, KQ_EVENT, - (closed) ? THREAD_INTERRUPTED : THREAD_AWAKENED); + res = waitq_wakeup64_all((struct waitq *)kq->kq_wqs, KQ_EVENT, + (closed) ? THREAD_INTERRUPTED : THREAD_AWAKENED, + WAITQ_ALL_PRIORITIES); + } + + /* request additional workq threads if appropriate */ + if (res == THREAD_NOT_WAITING && (kq->kq_state & KQ_WORKQ) && + pthread_functions != NULL && pthread_functions->workq_reqthreads != NULL) { + /* + * The special workq kq should be accumulating the counts of + * queued sources on a pthread_priority_t basis and we should + * be providing that here. For now, just hard-code a single + * entry request at a fixed (default) QOS. + */ + struct workq_reqthreads_req_s request = { + .priority = 0x020004ff, /* legacy event manager */ + .count = kq->kq_count }; + thread_t wqthread; + + wqthread = (*pthread_functions->workq_reqthreads)(kq->kq_p, 1, &request); + assert(wqthread == THREAD_NULL); } } @@ -2556,7 +2896,7 @@ knote(struct klist *list, long hint) /* if its not going away and triggered */ if (knoteuse2kqlock(kq, kn) && result) - knote_activate(kn, 1); + knote_activate(kn, 0); /* lock held again */ } kqunlock(kq); @@ -2597,12 +2937,12 @@ knote_detach(struct klist *list, struct knote *kn) * caller provides the wait queue link structure. */ int -knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql) +knote_link_waitq(struct knote *kn, struct waitq *wq, uint64_t *reserved_link) { struct kqueue *kq = kn->kn_kq; kern_return_t kr; - kr = wait_queue_link_noalloc(wq, kq->kq_wqs, wql); + kr = waitq_link(wq, kq->kq_wqs, WAITQ_SHOULD_LOCK, reserved_link); if (kr == KERN_SUCCESS) { knote_markstayqueued(kn); return (0); @@ -2621,12 +2961,12 @@ knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t * On success, caller is responsible for the link structure */ int -knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp) +knote_unlink_waitq(struct knote *kn, struct waitq *wq) { struct kqueue *kq = kn->kn_kq; kern_return_t kr; - kr = wait_queue_unlink_nofree(wq, kq->kq_wqs, wqlp); + kr = waitq_unlink(wq, kq->kq_wqs); knote_clearstayqueued(kn); return ((kr != KERN_SUCCESS) ? EINVAL : 0); } @@ -2753,8 +3093,10 @@ knote_drop(struct knote *kn, __unused struct proc *ctxp) proc_fdunlock(p); if (needswakeup) - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, - THREAD_AWAKENED); + waitq_wakeup64_all((struct waitq *)kq->kq_wqs, + CAST_EVENT64_T(&kn->kn_status), + THREAD_AWAKENED, + WAITQ_ALL_PRIORITIES); if (kn->kn_fop->f_isfd) fp_drop(p, kn->kn_id, kn->kn_fp, 0); @@ -2764,17 +3106,19 @@ knote_drop(struct knote *kn, __unused struct proc *ctxp) /* called with kqueue lock held */ static void -knote_activate(struct knote *kn, int propagate) +knote_activate(struct knote *kn, int force) { struct kqueue *kq = kn->kn_kq; + if (!force && (kn->kn_status & KN_ACTIVE)) + return; + kn->kn_status |= KN_ACTIVE; knote_enqueue(kn); kqueue_wakeup(kq, 0); - /* this is a real event: wake up the parent kq, too */ - if (propagate) - KNOTE(&kq->kq_sel.si_note, 0); + /* wake up the parent kq, too */ + KNOTE(&kq->kq_sel.si_note, 0); } /* called with kqueue lock held */ @@ -3495,15 +3839,17 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo) st = &kinfo->kq_stat; st->vst_size = kq->kq_count; - if (kq->kq_state & KQ_KEV64) + if (kq->kq_state & KQ_KEV_QOS) + st->vst_blksize = sizeof(struct kevent_qos_s); + else if (kq->kq_state & KQ_KEV64) st->vst_blksize = sizeof(struct kevent64_s); else st->vst_blksize = sizeof(struct kevent); st->vst_mode = S_IFIFO; - if (kq->kq_state & KQ_SEL) - kinfo->kq_state |= PROC_KQUEUE_SELECT; - if (kq->kq_state & KQ_SLEEP) - kinfo->kq_state |= PROC_KQUEUE_SLEEP; + + /* flags exported to libproc as PROC_KQUEUE_* (sys/proc_info.h) */ +#define PROC_KQUEUE_MASK (KQ_SEL|KQ_SLEEP|KQ_KEV32|KQ_KEV64|KQ_KEV_QOS) + kinfo->kq_state = kq->kq_state & PROC_KQUEUE_MASK; return (0); } @@ -3526,3 +3872,95 @@ knote_clearstayqueued(struct knote *kn) knote_dequeue(kn); kqunlock(kn->kn_kq); } + +static unsigned long +kevent_extinfo_emit(struct kqueue *kq, struct knote *kn, struct kevent_extinfo *buf, + unsigned long buflen, unsigned long nknotes) +{ + struct kevent_qos_s kevqos; + struct kevent_internal_s *kevp; + for (; kn; kn = SLIST_NEXT(kn, kn_link)) { + if (kq == kn->kn_kq) { + if (nknotes < buflen) { + struct kevent_extinfo *info = &buf[nknotes]; + + kqlock(kq); + bzero(&kevqos, sizeof(kevqos)); + kevp = &(kn->kn_kevent); + + kevqos.ident = kevp->ident; + kevqos.filter = kevp->filter; + kevqos.flags = kevp->flags; + kevqos.fflags = kevp->fflags; + kevqos.data = (int64_t) kevp->data; + kevqos.udata = kevp->udata; + kevqos.ext[0] = kevp->ext[0]; + kevqos.ext[1] = kevp->ext[1]; + + memcpy(&info->kqext_kev, &kevqos, sizeof(info->kqext_kev)); + info->kqext_sdata = kn->kn_sdata; + + /* status flags exported to userspace/libproc */ +#define KQEXT_STATUS_MASK (KN_ACTIVE|KN_QUEUED|KN_DISABLED|KN_STAYQUEUED) + info->kqext_status = kn->kn_status & KQEXT_STATUS_MASK; + info->kqext_sfflags = kn->kn_sfflags; + + kqunlock(kq); + } + + /* we return total number of knotes, which may be more than requested */ + nknotes++; + } + } + + return nknotes; +} + +int +pid_kqueue_extinfo(proc_t p, struct kqueue *kq, user_addr_t ubuf, + uint32_t bufsize, int32_t *retval) +{ + struct knote *kn; + int i; + int err = 0; + struct filedesc *fdp = p->p_fd; + unsigned long nknotes = 0; + unsigned long buflen = bufsize / sizeof(struct kevent_extinfo); + struct kevent_extinfo *kqext = NULL; + + kqext = kalloc(buflen * sizeof(struct kevent_extinfo)); + if (kqext == NULL) { + err = ENOMEM; + goto out; + } + bzero(kqext, buflen * sizeof(struct kevent_extinfo)); + + proc_fdlock(p); + + for (i = 0; i < fdp->fd_knlistsize; i++) { + kn = SLIST_FIRST(&fdp->fd_knlist[i]); + nknotes = kevent_extinfo_emit(kq, kn, kqext, buflen, nknotes); + } + + if (fdp->fd_knhashmask != 0) { + for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) { + kn = SLIST_FIRST(&fdp->fd_knhash[i]); + nknotes = kevent_extinfo_emit(kq, kn, kqext, buflen, nknotes); + } + } + + proc_fdunlock(p); + + assert(bufsize >= sizeof(struct kevent_extinfo) * min(buflen, nknotes)); + err = copyout(kqext, ubuf, sizeof(struct kevent_extinfo) * min(buflen, nknotes)); + + out: + if (kqext) { + kfree(kqext, buflen * sizeof(struct kevent_extinfo)); + kqext = NULL; + } + + if (!err) + *retval = nknotes; + return err; +} diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index fc270ae21..e2e7d1526 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -172,9 +172,8 @@ static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL; #endif /* support for child creation in exec after vfork */ -thread_t fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child_proc, int inherit_memory, int is64bit); +thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalition, proc_t child_proc, int inherit_memory, int is64bit); void vfork_exit(proc_t p, int rv); -int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart); extern void proc_apply_task_networkbg_internal(proc_t, thread_t); /* @@ -228,9 +227,9 @@ __attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL_ extern vm_map_t bsd_pageable_map; extern const struct fileops vnops; -#define ROUND_PTR(type, addr) \ - (type *)( ( (uintptr_t)(addr) + 16 - 1) \ - & ~(16 - 1) ) +#define USER_ADDR_ALIGN(addr, val) \ + ( ( (user_addr_t)(addr) + (val) - 1) \ + & ~((val) - 1) ) struct image_params; /* Forward */ static int exec_activate_image(struct image_params *imgp); @@ -250,7 +249,7 @@ static void exec_resettextvp(proc_t, struct image_params *); static int check_for_signature(proc_t, struct image_params *); static void exec_prefault_data(proc_t, struct image_params *, load_result_t *); static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports); -static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, +static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role, ipc_port_t * portwatch_ports, int portwatch_count); /* @@ -307,6 +306,12 @@ exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolea return error; } +/* + * dyld is now passed the executable path as a getenv-like variable + * in the same fashion as the stack_guard and malloc_entropy keys. + */ +#define EXECUTABLE_KEY "executable_path=" + /* * exec_save_path * @@ -342,22 +347,26 @@ exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolea * unacceptable for dyld. */ static int -exec_save_path(struct image_params *imgp, user_addr_t path, int seg) +exec_save_path(struct image_params *imgp, user_addr_t path, int seg, const char **excpath) { int error; - size_t len; + size_t len; char *kpath; + // imgp->ip_strings can come out of a cache, so we need to obliterate the + // old path. + memset(imgp->ip_strings, '\0', strlen(EXECUTABLE_KEY) + MAXPATHLEN); + len = MIN(MAXPATHLEN, imgp->ip_strspace); switch(seg) { case UIO_USERSPACE32: case UIO_USERSPACE64: /* Same for copyin()... */ - error = copyinstr(path, imgp->ip_strings, len, &len); + error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len); break; case UIO_SYSSPACE: kpath = CAST_DOWN(char *,path); /* SAFE */ - error = copystr(kpath, imgp->ip_strings, len, &len); + error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len); break; default: error = EFAULT; @@ -365,8 +374,15 @@ exec_save_path(struct image_params *imgp, user_addr_t path, int seg) } if (!error) { + bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY)); + len += strlen(EXECUTABLE_KEY); + imgp->ip_strendp += len; imgp->ip_strspace -= len; + + if (excpath) { + *excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY); + } } return(error); @@ -517,7 +533,7 @@ exec_shell_imgact(struct image_params *imgp) exec_reset_save_path(imgp); exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer), - UIO_SYSSPACE); + UIO_SYSSPACE, NULL); /* Copy the entire interpreter + args for later processing into argv[] */ interp = imgp->ip_interp_buffer; @@ -718,6 +734,7 @@ exec_mach_imgact(struct image_params *imgp) struct _posix_spawnattr *psa = NULL; int spawn = (imgp->ip_flags & IMGPF_SPAWN); int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC); + int p_name_len; /* * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference @@ -815,7 +832,7 @@ exec_mach_imgact(struct image_params *imgp) */ if (vfexec || spawn) { if (vfexec) { - imgp->ip_new_thread = fork_create_child(task, COALITION_NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT)); + imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT)); if (imgp->ip_new_thread == NULL) { error = ENOMEM; goto bad; @@ -880,7 +897,7 @@ exec_mach_imgact(struct image_params *imgp) if (load_result.csflags & CS_VALID) { imgp->ip_csflags |= load_result.csflags & (CS_VALID| - CS_HARD|CS_KILL|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM| + CS_HARD|CS_KILL|CS_RESTRICT|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM| CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT); } else { imgp->ip_csflags &= ~CS_VALID; @@ -1011,20 +1028,22 @@ exec_mach_imgact(struct image_params *imgp) * Remember file name for accounting. */ p->p_acflag &= ~AFORK; - /* If the translated name isn't NULL, then we want to use - * that translated name as the name we show as the "real" name. - * Otherwise, use the name passed into exec. + + /* + * Set p->p_comm and p->p_name to the name passed to exec */ - if (0 != imgp->ip_p_comm[0]) { - bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm, - sizeof(p->p_comm)); - } else { - if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) - imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN; - bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm, - (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen); - p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0'; - } + p_name_len = sizeof(p->p_name) - 1; + if(imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len) + imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len; + bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name, + (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen); + p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0'; + + if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) + imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN; + bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm, + (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen); + p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0'; pal_dbg_set_task_name( p->task ); @@ -1095,12 +1114,6 @@ exec_mach_imgact(struct image_params *imgp) } } - /* - * Ensure the 'translated' and 'affinity' flags are cleared, since we - * no longer run PowerPC binaries. - */ - OSBitAndAtomic(~((uint32_t)(P_TRANSLATED | P_AFFINITY)), &p->p_flag); - /* * If posix_spawned with the START_SUSPENDED flag, stop the * process before it runs. @@ -1111,7 +1124,7 @@ exec_mach_imgact(struct image_params *imgp) proc_lock(p); p->p_stat = SSTOP; proc_unlock(p); - (void) task_suspend(p->task); + (void) task_suspend_internal(p->task); } } @@ -1218,6 +1231,7 @@ static int exec_activate_image(struct image_params *imgp) { struct nameidata *ndp = NULL; + const char *excpath; int error; int resid; int once = 1; /* save SGUID-ness for interpreted files */ @@ -1229,13 +1243,13 @@ exec_activate_image(struct image_params *imgp) if (error) goto bad_notrans; - error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg); + error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath); if (error) { goto bad_notrans; } - /* Use imgp->ip_strings, which contains the copyin-ed exec path */ - DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings); + /* Use excpath, which contains the copyin-ed exec path */ + DTRACE_PROC1(exec, uintptr_t, excpath); MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO); if (ndp == NULL) { @@ -1244,7 +1258,7 @@ exec_activate_image(struct image_params *imgp) } NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, - UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context); + UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context); again: error = namei(ndp); @@ -1339,9 +1353,9 @@ exec_activate_image(struct image_params *imgp) imgp->ip_vp = NULL; /* already put */ imgp->ip_ndp = NULL; /* already nameidone */ - /* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */ + /* Use excpath, which exec_shell_imgact reset to the interpreter */ NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, - UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context); + UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context); proc_transend(p, 0); goto again; @@ -1387,11 +1401,12 @@ exec_activate_image(struct image_params *imgp) * Returns: 0 Success */ static errno_t -exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, +exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role, ipc_port_t * portwatch_ports, int portwatch_count) { int apptype = TASK_APPTYPE_NONE; int qos_clamp = THREAD_QOS_UNSPECIFIED; + int role = TASK_UNSPECIFIED; if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) { int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK; @@ -1440,8 +1455,14 @@ exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, } } - if (psa_apptype != TASK_APPTYPE_NONE || qos_clamp != THREAD_QOS_UNSPECIFIED) { - proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, + if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) { + proc_darwin_role_to_task_role(psa_darwin_role, &role); + } + + if (apptype != TASK_APPTYPE_NONE || + qos_clamp != THREAD_QOS_UNSPECIFIED || + role != TASK_UNSPECIFIED) { + proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, role, portwatch_ports, portwatch_count); } @@ -1850,6 +1871,62 @@ spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx) } #endif /* CONFIG_MACF */ +#if CONFIG_COALITIONS +static inline void spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES]) +{ + for (int c = 0; c < COALITION_NUM_TYPES; c++) { + if (coal[c]) { + coalition_remove_active(coal[c]); + coalition_release(coal[c]); + } + } +} +#endif + +void +proc_set_return_wait(proc_t p) +{ + proc_lock(p); + p->p_lflag |= P_LRETURNWAIT; + proc_unlock(p); +} + +void +proc_clear_return_wait(proc_t p, thread_t child_thread) +{ + proc_lock(p); + + p->p_lflag &= ~P_LRETURNWAIT; + if (p->p_lflag & P_LRETURNWAITER) { + wakeup(&p->p_lflag); + } + + proc_unlock(p); + + (void)thread_resume(child_thread); +} + +void +proc_wait_to_return() +{ + proc_t p; + + p = current_proc(); + proc_lock(p); + + if (p->p_lflag & P_LRETURNWAIT) { + p->p_lflag |= P_LRETURNWAITER; + do { + msleep(&p->p_lflag, &p->p_mlock, 0, + "thread_check_setup_complete", NULL); + } while (p->p_lflag & P_LRETURNWAIT); + p->p_lflag &= ~P_LRETURNWAITER; + } + + proc_unlock(p); + thread_bootstrap_return(); +} + /* * posix_spawn * @@ -1890,7 +1967,6 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) struct vnode_attr *origvap; struct uthread *uthread = 0; /* compiler complains if not set to 0*/ int error, sig; - char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */ int is_64 = IS_64BIT_PROCESS(p); struct vfs_context context; struct user__posix_spawn_args_desc px_args; @@ -1926,9 +2002,9 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) imgp->ip_origvattr = origvap; imgp->ip_vfs_context = &context; imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE); - imgp->ip_p_comm = alt_p_comm; /* for PowerPC */ imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); imgp->ip_mac_return = 0; + imgp->ip_reserved = NULL; if (uap->adesc != USER_ADDR_NULL) { if(is_64) { @@ -1950,6 +2026,10 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions); px_args.mac_extensions_size = px_args32.mac_extensions_size; px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions); + px_args.coal_info_size = px_args32.coal_info_size; + px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info); + px_args.reserved = 0; + px_args.reserved_size = 0; } if (error) goto bad; @@ -2019,6 +2099,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) goto bad; } } + #if CONFIG_MACF if (px_args.mac_extensions_size != 0) { if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0) @@ -2049,50 +2130,110 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) */ if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){ - /* - * Set the new task's coalition, if it is requested. - * TODO: privilege check - 15365900 - */ - coalition_t coal = COALITION_NULL; + /* Set the new task's coalition, if it is requested. */ + coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL }; #if CONFIG_COALITIONS - if (imgp->ip_px_sa) { - uint64_t cid = px_sa.psa_coalitionid; + int i, ncoals; + kern_return_t kr = KERN_SUCCESS; + struct _posix_spawn_coalition_info coal_info; + int coal_role[COALITION_NUM_TYPES]; + + if (imgp->ip_px_sa == NULL || !px_args.coal_info) + goto do_fork1; + + memset(&coal_info, 0, sizeof(coal_info)); + + if (px_args.coal_info_size > sizeof(coal_info)) + px_args.coal_info_size = sizeof(coal_info); + error = copyin(px_args.coal_info, + &coal_info, px_args.coal_info_size); + if (error != 0) + goto bad; + + ncoals = 0; + for (i = 0; i < COALITION_NUM_TYPES; i++) { + uint64_t cid = coal_info.psci_info[i].psci_id; if (cid != 0) { -#if COALITION_DEBUG - printf("%s: searching for coalition ID %llu\n", __func__, cid); -#endif - coal = coalition_find_and_activate_by_id(cid); - if (coal == COALITION_NULL) { -#if COALITION_DEBUG - printf("%s: could not find coalition ID %llu (perhaps it has been terminated or reaped)\n", __func__, cid); -#endif + /* + * don't allow tasks which are not in a + * privileged coalition to spawn processes + * into coalitions other than their own + */ + if (!task_is_in_privileged_coalition(p->task, i)) { + coal_dbg("ERROR: %d not in privilegd " + "coalition of type %d", + p->p_pid, i); + spawn_coalitions_release_all(coal); + error = EPERM; + goto bad; + } + + coal_dbg("searching for coalition id:%llu", cid); + /* + * take a reference and activation on the + * coalition to guard against free-while-spawn + * races + */ + coal[i] = coalition_find_and_activate_by_id(cid); + if (coal[i] == COALITION_NULL) { + coal_dbg("could not find coalition id:%llu " + "(perhaps it has been terminated or reaped)", cid); + /* + * release any other coalition's we + * may have a reference to + */ + spawn_coalitions_release_all(coal); error = ESRCH; goto bad; } + if (coalition_type(coal[i]) != i) { + coal_dbg("coalition with id:%lld is not of type:%d" + " (it's type:%d)", cid, i, coalition_type(coal[i])); + error = ESRCH; + goto bad; + } + coal_role[i] = coal_info.psci_info[i].psci_role; + ncoals++; } } + if (ncoals < COALITION_NUM_TYPES) { + /* + * If the user is attempting to spawn into a subset of + * the known coalition types, then make sure they have + * _at_least_ specified a resource coalition. If not, + * the following fork1() call will implicitly force an + * inheritance from 'p' and won't actually spawn the + * new task into the coalitions the user specified. + * (also the call to coalitions_set_roles will panic) + */ + if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) { + spawn_coalitions_release_all(coal); + error = EINVAL; + goto bad; + } + } +do_fork1: #endif /* CONFIG_COALITIONS */ error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal); - if (error != 0) { - if (coal != COALITION_NULL) { #if CONFIG_COALITIONS - coalition_remove_active(coal); - coalition_release(coal); + /* set the roles of this task within each given coalition */ + if (error == 0) { + kr = coalitions_set_roles(coal, get_threadtask(imgp->ip_new_thread), coal_role); + if (kr != KERN_SUCCESS) + error = EINVAL; + } + + /* drop our references and activations - fork1() now holds them */ + spawn_coalitions_release_all(coal); #endif /* CONFIG_COALITIONS */ - } + if (error != 0) { goto bad; } imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */ spawn_no_exec = TRUE; /* used in later tests */ - if (coal != COALITION_NULL) { -#if CONFIG_COALITIONS - coalition_remove_active(coal); - coalition_release(coal); -#endif /* CONFIG_COALITIONS */ - } } if (spawn_no_exec) { @@ -2209,16 +2350,20 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) } } +#if !SECURE_KERNEL /* * Disable ASLR for the spawned process. - */ - /* - * But only do so if we are not embedded; embedded allows for a - * boot-arg (-disable_aslr) to deal with this (which itself is - * only honored on DEVELOPMENT or DEBUG builds of xnu). + * + * But only do so if we are not embedded + RELEASE. + * While embedded allows for a boot-arg (-disable_aslr) + * to deal with this (which itself is only honored on + * DEVELOPMENT or DEBUG builds of xnu), it is often + * useful or necessary to disable ASLR on a per-process + * basis for unit testing and debugging. */ if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR) OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag); +#endif /* !SECURE_KERNEL */ /* * Forcibly disallow execution from data pages for the spawned process @@ -2328,15 +2473,11 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) * Always treat a CPU monitor activation coming from spawn as entitled. Requiring * an entitlement to configure the monitor a certain way seems silly, since * whomever is turning it on could just as easily choose not to do so. - * - * XXX - Ignore the parameters that we get from userland. The spawnattr method of - * activating the monitor always gets the system default parameters. Once we have - * an explicit spawn SPI for configuring the defaults, we can revert this to - * respect the params passed in from userland. */ error = proc_set_task_ruse_cpu(p->task, TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC, - PROC_POLICY_CPUMON_DEFAULTS, 0, + px_sa.psa_cpumonitor_percent, + px_sa.psa_cpumonitor_interval * NSEC_PER_SEC, 0, TRUE); } } @@ -2370,11 +2511,36 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM /* Has jetsam attributes? */ if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) { - memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY), - TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND), - (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL)); + /* + * With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no + * longer relevant, as background limits are described via the inactive limit slots. + * At the kernel layer, the flag is ignored. + * + * That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in, + * we attempt to mimic previous behavior by forcing the BG limit data into the + * inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode. + * The kernel layer will flag this mapping. + */ + if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) { + memorystatus_update(p, px_sa.psa_priority, 0, + (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY), + TRUE, + -1, TRUE, + px_sa.psa_memlimit_inactive, FALSE, + (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND)); + } else { + memorystatus_update(p, px_sa.psa_priority, 0, + (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY), + TRUE, + px_sa.psa_memlimit_active, + (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL), + px_sa.psa_memlimit_inactive, + (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL), + (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND)); + } + } -#endif +#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM*/ } /* @@ -2422,7 +2588,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) if (error == 0 && imgp->ip_px_sa != NULL) { struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa; - exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, + exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, psa->psa_darwin_role, portwatch_ports, portwatch_count); } @@ -2479,7 +2645,6 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) FREE(imgp->ip_px_sfa, M_TEMP); if (imgp->ip_px_spa != NULL) FREE(imgp->ip_px_spa, M_TEMP); - #if CONFIG_MACF if (imgp->ip_px_smpx != NULL) spawn_free_macpolicyinfo(imgp->ip_px_smpx); @@ -2555,6 +2720,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) p->exit_thread = current_thread(); proc_unlock(p); exit1(p, 1, (int *)NULL); + proc_clear_return_wait(p, imgp->ip_new_thread); if (exec_done == FALSE) { task_deallocate(get_threadtask(imgp->ip_new_thread)); thread_deallocate(imgp->ip_new_thread); @@ -2562,6 +2728,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) } else { /* someone is doing it for us; just skip it */ proc_unlock(p); + proc_clear_return_wait(p, imgp->ip_new_thread); } } else { @@ -2574,7 +2741,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) * queue references on them, so we should be fine * with the delayed resume of the thread here. */ - (void)thread_resume(imgp->ip_new_thread); + proc_clear_return_wait(p, imgp->ip_new_thread); } } if (bufp != NULL) { @@ -2658,7 +2825,6 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) struct vnode_attr *vap; struct vnode_attr *origvap; int error; - char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */ int is_64 = IS_64BIT_PROCESS(p); struct vfs_context context; struct uthread *uthread; @@ -2686,7 +2852,6 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) imgp->ip_origvattr = origvap; imgp->ip_vfs_context = &context; imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE); - imgp->ip_p_comm = alt_p_comm; /* for PowerPC */ imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); imgp->ip_mac_return = 0; @@ -2752,7 +2917,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) if (imgp->ip_flags & IMGPF_VFORK_EXEC) { vfork_return(p, retval, p->p_pid); - (void)thread_resume(imgp->ip_new_thread); + proc_clear_return_wait(p, imgp->ip_new_thread); } } else { DTRACE_PROC1(exec__failure, int, error); @@ -3929,8 +4094,101 @@ create_unix_stack(vm_map_t map, load_result_t* load_result, #include +/* + * load_init_program_at_path + * + * Description: Load the "init" program; in most cases, this will be "launchd" + * + * Parameters: p Process to call execve() to create + * the "init" program + * scratch_addr Page in p, scratch space + * path NULL terminated path + * + * Returns: KERN_SUCCESS Success + * !KERN_SUCCESS See execve/mac_execve for error codes + * + * Notes: The process that is passed in is the first manufactured + * process on the system, and gets here via bsd_ast() firing + * for the first time. This is done to ensure that bsd_init() + * has run to completion. + * + * The address map of the first manufactured process is 32 bit. + * WHEN this becomes 64b, this code will fail; it needs to be + * made 64b capable. + */ +static int +load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path) +{ + uint32_t argv[3]; + uint32_t argc = 0; + int retval[2]; + struct execve_args init_exec_args; + + /* + * Validate inputs and pre-conditions + */ + assert(p); + assert(scratch_addr); + assert(path); + + if (IS_64BIT_PROCESS(p)) { + panic("Init against 64b primordial proc not implemented"); + } + + /* + * Copy out program name. + */ + size_t path_length = strlen(path) + 1; + (void) copyout(path, scratch_addr, path_length); + + argv[argc++] = (uint32_t)scratch_addr; + scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, 16); + + /* + * Put out first (and only) argument, similarly. + * Assumes everything fits in a page as allocated above. + */ + if (boothowto & RB_SINGLE) { + const char *init_args = "-s"; + size_t init_args_length = strlen(init_args)+1; + + copyout(init_args, scratch_addr, init_args_length); + + argv[argc++] = (uint32_t)scratch_addr; + scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, 16); + } + + /* + * Null-end the argument list + */ + argv[argc] = 0; + + /* + * Copy out the argument list. + */ + (void) copyout(argv, scratch_addr, sizeof(argv)); + + /* + * Set up argument block for fake call to execve. + */ + init_exec_args.fname = CAST_USER_ADDR_T(argv[0]); + init_exec_args.argp = scratch_addr; + init_exec_args.envp = USER_ADDR_NULL; + + /* + * So that init task is set with uid,gid 0 token + */ + set_security_token(p); + + return execve(p, &init_exec_args, retval); +} + static const char * init_programs[] = { +#if DEBUG + "/usr/local/sbin/launchd.debug", +#endif #if DEVELOPMENT || DEBUG + /* Remove DEBUG conditional when is fixed */ "/usr/local/sbin/launchd.development", #endif "/sbin/launchd", @@ -3950,81 +4208,71 @@ static const char * init_programs[] = { * process on the system, and gets here via bsd_ast() firing * for the first time. This is done to ensure that bsd_init() * has run to completion. + * + * In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg + * may be used to select a specific launchd executable. As with + * the kcsuffix boot-arg, setting launchdsuffix to "" or "release" + * will force /sbin/launchd to be selected. + * + * The DEBUG kernel will continue to check for a .development + * version until is fixed. + * + * Search order by build: + * + * DEBUG DEVELOPMENT RELEASE PATH + * ---------------------------------------------------------------------------------- + * 1 1 NA /usr/local/sbin/launchd.$LAUNCHDSUFFIX + * 2 NA NA /usr/local/sbin/launchd.debug + * 3 2 NA /usr/local/sbin/launchd.development + * 4 3 1 /sbin/launchd */ void load_init_program(proc_t p) { - vm_offset_t init_addr, addr; - int argc; - uint32_t argv[3]; - unsigned int i; - int error; - int retval[2]; - const char *init_program_name; - struct execve_args init_exec_args; - - init_addr = VM_MIN_ADDRESS; - (void) vm_allocate(current_map(), &init_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE); - if (init_addr == 0) - init_addr++; - - for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) { - - init_program_name = init_programs[i]; - addr = init_addr; - argc = 0; - - /* - * Copy out program name. - */ - (void) copyout(init_program_name, CAST_USER_ADDR_T(addr), strlen(init_program_name)+1); - - argv[argc++] = (uint32_t)addr; - addr += strlen(init_program_name)+1; - addr = (vm_offset_t)ROUND_PTR(char, addr); - - /* - * Put out first (and only) argument, similarly. - * Assumes everything fits in a page as allocated above. - */ - if (boothowto & RB_SINGLE) { - const char *init_args = "-s"; + uint32_t i; + int error; + vm_offset_t scratch_addr = VM_MIN_ADDRESS; - copyout(init_args, CAST_USER_ADDR_T(addr), strlen(init_args)+1); + (void) vm_allocate(current_map(), &scratch_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE); +#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM + (void) memorystatus_init_at_boot_snapshot(); +#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ + +#if DEBUG || DEVELOPMENT + /* Check for boot-arg suffix first */ + char launchd_suffix[64]; + if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) { + char launchd_path[128]; + boolean_t is_release_suffix = ((launchd_suffix[0] == 0) || + (strcmp(launchd_suffix, "release") == 0)); + + if (is_release_suffix) { + error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), "/sbin/launchd"); + if (!error) + return; + + panic("Process 1 exec of launchd.release failed, errno %d", error); + } else { + strlcpy(launchd_path, "/usr/local/sbin/launchd.", sizeof(launchd_path)); + strlcat(launchd_path, launchd_suffix, sizeof(launchd_path)); - argv[argc++] = (uint32_t)addr; - addr += strlen(init_args)+1; - addr = (vm_offset_t)ROUND_PTR(char, addr); + /* All the error data is lost in the loop below, don't + * attempt to save it. */ + if (!load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), launchd_path)) { + return; + } } + } +#endif - /* - * Null-end the argument list - */ - argv[argc] = 0; - - /* - * Copy out the argument list. - */ - (void) copyout(argv, CAST_USER_ADDR_T(addr), sizeof(argv)); - - /* - * Set up argument block for fake call to execve. - */ - init_exec_args.fname = CAST_USER_ADDR_T(argv[0]); - init_exec_args.argp = CAST_USER_ADDR_T((char **)addr); - init_exec_args.envp = CAST_USER_ADDR_T(0); - - /* - * So that init task is set with uid,gid 0 token - */ - set_security_token(p); - - error = execve(p, &init_exec_args, retval); + error = ENOENT; + for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) { + error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), init_programs[i]); if (!error) return; } - panic("Process 1 exec of %s failed, errno %d", init_program_name, error); + panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "" : init_programs[i-1]), error); } /* @@ -4316,6 +4564,16 @@ taskgated_required(proc_t p, boolean_t *require_success) void *blob; int error; + if (cs_debug > 2) + csvnode_print_debug(p->p_textvp); + + const int can_skip_taskgated = csproc_get_platform_binary(p) && !csproc_get_platform_path(p); + if (can_skip_taskgated) { + if (cs_debug) printf("taskgated not required for: %s\n", p->p_name); + *require_success = FALSE; + return FALSE; + } + if ((p->p_csflags & CS_VALID) == 0) { *require_success = FALSE; return TRUE; @@ -4337,11 +4595,13 @@ taskgated_required(proc_t p, boolean_t *require_success) return FALSE; } + if (cs_debug) printf("taskgated required for: %s\n", p->p_name); + return TRUE; } *require_success = FALSE; - return 0; + return FALSE; } /* diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c index 3d17f687c..fca7ab329 100644 --- a/bsd/kern/kern_exit.c +++ b/bsd/kern/kern_exit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2011, 2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -94,6 +94,8 @@ #include #include #include +#include +#include #include #include #include @@ -113,6 +115,7 @@ #include #include #include +#include #include #include #include @@ -150,13 +153,18 @@ extern void dtrace_lazy_dofs_destroy(proc_t); #include extern boolean_t init_task_died; -extern char init_task_failure_data[]; void proc_prepareexit(proc_t p, int rv, boolean_t perf_notify); void vfork_exit(proc_t p, int rv); void vproc_exit(proc_t p); __private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p); __private_extern__ void munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p); static int reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoinit, int locked, int droplock); +static void populate_corpse_crashinfo(proc_t p, void *crash_info_ptr, struct rusage_superset *rup, mach_exception_data_type_t code, mach_exception_data_type_t subcode); +extern int proc_pidpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); + +static __attribute__((noinline)) void launchd_crashed_panic(proc_t p, int rv); +extern void proc_piduniqidentifierinfo(proc_t p, struct proc_uniqidentifierinfo *p_uniqidinfo); + /* * Things which should have prototypes in headers, but don't @@ -222,6 +230,170 @@ copyoutsiginfo(user_siginfo_t *native, boolean_t is64, user_addr_t uaddr) } } +static void populate_corpse_crashinfo(proc_t p, void *crash_info_ptr, struct rusage_superset *rup, mach_exception_data_type_t code, mach_exception_data_type_t subcode) +{ + mach_vm_address_t uaddr = 0; + mach_exception_data_type_t exc_codes[EXCEPTION_CODE_MAX]; + exc_codes[0] = code; + exc_codes[1] = subcode; + cpu_type_t cputype; + struct proc_uniqidentifierinfo p_uniqidinfo; + struct proc_workqueueinfo pwqinfo; + int retval = 0; + uint64_t crashed_threadid = thread_tid(current_thread()); + unsigned int pflags = 0; + +#if CONFIG_MEMORYSTATUS + int memstat_dirty_flags = 0; +#endif + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_EXCEPTION_CODES, sizeof(exc_codes), &uaddr)) { + copyout(exc_codes, uaddr, sizeof(exc_codes)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PID, sizeof(p->p_pid), &uaddr)) { + copyout(&p->p_pid, uaddr, sizeof(p->p_pid)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PPID, sizeof(p->p_ppid), &uaddr)) { + copyout(&p->p_ppid, uaddr, sizeof(p->p_ppid)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_CRASHED_THREADID, sizeof(uint64_t), &uaddr)) { + copyout(&crashed_threadid, uaddr, sizeof(uint64_t)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_RUSAGE, sizeof(struct rusage), &uaddr)) { + copyout(&rup->ru, uaddr, sizeof(struct rusage)); + } + + if (KERN_SUCCESS == + kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_BSDINFOWITHUNIQID, sizeof(struct proc_uniqidentifierinfo), &uaddr)) { + proc_piduniqidentifierinfo(p, &p_uniqidinfo); + copyout(&p_uniqidinfo, uaddr, sizeof(struct proc_uniqidentifierinfo)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_RUSAGE_INFO, sizeof(rusage_info_current), &uaddr)) { + copyout(&rup->ri, uaddr, sizeof(rusage_info_current)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_CSFLAGS, sizeof(p->p_csflags), &uaddr)) { + copyout(&p->p_csflags, uaddr, sizeof(p->p_csflags)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_NAME, sizeof(p->p_comm), &uaddr)) { + copyout(&p->p_comm, uaddr, sizeof(p->p_comm)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_STARTTIME, sizeof(p->p_start), &uaddr)) { + struct timeval64 t64; + t64.tv_sec = (int64_t)p->p_start.tv_sec; + t64.tv_usec = (int64_t)p->p_start.tv_usec; + copyout(&t64, uaddr, sizeof(t64)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_USERSTACK, sizeof(p->user_stack), &uaddr)) { + copyout(&p->user_stack, uaddr, sizeof(p->user_stack)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_ARGSLEN, sizeof(p->p_argslen), &uaddr)) { + copyout(&p->p_argslen, uaddr, sizeof(p->p_argslen)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_ARGC, sizeof(p->p_argc), &uaddr)) { + copyout(&p->p_argc, uaddr, sizeof(p->p_argc)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_PATH, MAXPATHLEN, &uaddr)) { + proc_pidpathinfo(p, 0, uaddr, MAXPATHLEN, &retval); + } + + pflags = p->p_flag & (P_LP64 | P_SUGID); + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_FLAGS, sizeof(pflags), &uaddr)) { + copyout(&pflags, uaddr, sizeof(pflags)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_UID, sizeof(p->p_uid), &uaddr)) { + copyout(&p->p_uid, uaddr, sizeof(p->p_uid)); + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_GID, sizeof(p->p_gid), &uaddr)) { + copyout(&p->p_gid, uaddr, sizeof(p->p_gid)); + } + + cputype = cpu_type() & ~CPU_ARCH_MASK; + if (IS_64BIT_PROCESS(p)) + cputype |= CPU_ARCH_ABI64; + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_CPUTYPE, sizeof(cpu_type_t), &uaddr)) { + copyout(&cputype, uaddr, sizeof(cpu_type_t)); + } + + bzero(&pwqinfo, sizeof(struct proc_workqueueinfo)); + retval = fill_procworkqueue(p, &pwqinfo); + if (retval == 0) { + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_WORKQUEUEINFO, sizeof(struct proc_workqueueinfo), &uaddr)) { + copyout(&pwqinfo, uaddr, sizeof(struct proc_workqueueinfo)); + } + } + + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_RESPONSIBLE_PID, sizeof(p->p_responsible_pid), &uaddr)) { + copyout(&p->p_responsible_pid, uaddr, sizeof(p->p_responsible_pid)); + } + +#if CONFIG_MEMORYSTATUS + memstat_dirty_flags = memorystatus_dirty_get(p); + if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_DIRTY_FLAGS, sizeof(memstat_dirty_flags), &uaddr)) { + copyout(&memstat_dirty_flags, uaddr, sizeof(memstat_dirty_flags)); + } +#endif + +} + +static __attribute__((noinline)) void +launchd_crashed_panic(proc_t p, int rv) +{ + printf("pid 1 exited (signal %d, exit %d)\n", + WTERMSIG(rv), WEXITSTATUS(rv)); + +#if (DEVELOPMENT || DEBUG) + /* + * For debugging purposes, generate a core file of initproc before + * panicking. Leave at least 300 MB free on the root volume, and ignore + * the process's corefile ulimit. fsync() the file to ensure it lands on disk + * before the panic hits. + */ + + int err; + uint64_t coredump_start = mach_absolute_time(); + uint64_t coredump_end; + clock_sec_t tv_sec; + clock_usec_t tv_usec; + uint32_t tv_msec; + + err = coredump(p, 300, COREDUMP_IGNORE_ULIMIT | COREDUMP_FULLFSYNC); + + coredump_end = mach_absolute_time(); + + absolutetime_to_microtime(coredump_end - coredump_start, &tv_sec, &tv_usec); + + tv_msec = tv_usec / 1000; + + if (err != 0) { + printf("Failed to generate initproc core file: error %d, took %d.%03d seconds\n", + err, (uint32_t)tv_sec, tv_msec); + } else { + printf("Generated initproc core file in %d.%03d seconds\n", + (uint32_t)tv_sec, tv_msec); + } +#endif + + sync(p, (void *)NULL, (int *)NULL); + + panic_plain("%s exited (signal %d, exit status %d %s)", (p->p_name[0] != '\0' ? p->p_name : "initproc"), WTERMSIG(rv), + WEXITSTATUS(rv), ((p->p_csflags & CS_KILLED) ? "CS_KILLED" : "")); +} + /* * exit -- * Death of process. @@ -337,32 +509,11 @@ exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, bo } sig_lock_to_exit(p); } + if (p == initproc && current_proc() == p) { - proc_unlock(p); - printf("pid 1 exited (signal %d, exit %d)", - WTERMSIG(rv), WEXITSTATUS(rv)); -#if (DEVELOPMENT || DEBUG) - int err; - /* - * For debugging purposes, generate a core file of initproc before - * panicking. Leave at least 300 MB free on the root volume, and ignore - * the process's corefile ulimit. - */ - if ((err = coredump(p, 300, 1)) != 0) { - printf("Failed to generate initproc core file: error %d", err); - } else { - printf("Generated initproc core file"); - sync(p, (void *)NULL, (int *)NULL); - } -#endif init_task_died = TRUE; - panic("%s died\nState at Last Exception:\n\n%s", - (p->p_comm[0] != '\0' ? - p->p_comm : - "launchd"), - init_task_failure_data); } - + p->p_lflag |= P_LEXIT; p->p_xstat = rv; p->p_lflag |= jetsam_flags; @@ -381,11 +532,19 @@ exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, bo void proc_prepareexit(proc_t p, int rv, boolean_t perf_notify) { - mach_exception_data_type_t code, subcode; + mach_exception_data_type_t code = 0, subcode = 0; + struct uthread *ut; thread_t self = current_thread(); ut = get_bsdthread_info(self); struct rusage_superset *rup; + int kr = 0; + int create_corpse = FALSE; + + if (p == initproc) { + launchd_crashed_panic(p, rv); + /* NOTREACHED */ + } /* If a core should be generated, notify crash reporter */ if (hassigprop(WTERMSIG(rv), SA_CORE) || ((p->p_csflags & CS_KILLED) != 0)) { @@ -407,7 +566,13 @@ proc_prepareexit(proc_t p, int rv, boolean_t perf_notify) ((ut->uu_exception & 0x0f) << 20) | ((int)ut->uu_code & 0xfffff); subcode = ut->uu_subcode; - (void) task_exception_notify(EXC_CRASH, code, subcode); + + kr = task_exception_notify(EXC_CRASH, code, subcode); + + /* Nobody handled EXC_CRASH?? remember to make corpse */ + if (kr != 0) { + create_corpse = TRUE; + } } skipcheck: @@ -416,6 +581,25 @@ proc_prepareexit(proc_t p, int rv, boolean_t perf_notify) (void)sys_perf_notify(self, p->p_pid); } + + /* stash the usage into corpse data if making_corpse == true */ + if (create_corpse == TRUE) { + kr = task_mark_corpse(current_task()); + if (kr != KERN_SUCCESS) { + if (kr == KERN_NO_SPACE) { + printf("Process[%d] has no vm space for corpse info.\n", p->p_pid); + } else if (kr == KERN_NOT_SUPPORTED) { + printf("Process[%d] was destined to be corpse. But corpse is disabled by config.\n", p->p_pid); + } else { + printf("Process[%d] crashed: %s. Too many corpses being created.\n", p->p_pid, p->p_comm); + } + create_corpse = FALSE; + } else { + /* XXX: Need to sync ATM buffer before crash */ + kr = task_send_trace_memory(current_task(), p->p_pid, p->p_uniqueid); + } + } + /* * Before this process becomes a zombie, stash resource usage * stats in the proc for external observers to query @@ -436,7 +620,9 @@ proc_prepareexit(proc_t p, int rv, boolean_t perf_notify) */ p->p_ru = rup; } - + if (create_corpse) { + populate_corpse_crashinfo(p, task_get_corpseinfo(current_task()), rup, code, subcode); + } /* * Remove proc from allproc queue and from pidhash chain. * Need to do this before we do anything that can block. @@ -576,6 +762,8 @@ proc_exit(proc_t p) workqueue_mark_exiting(p); workqueue_exit(p); + kqueue_dealloc(p->p_wqkqueue); + p->p_wqkqueue = NULL; _aio_exit( p ); @@ -1128,13 +1316,8 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoi * and refernce is dropped after these calls down below * (locking protection is provided by list lock held in chgproccnt) */ - (void)chgproccnt(kauth_cred_getruid(child->p_ucred), -1); -#if CONFIG_LCTX - ALLLCTX_LOCK; - leavelctx(child); - ALLLCTX_UNLOCK; -#endif + (void)chgproccnt(kauth_cred_getruid(child->p_ucred), -1); /* * Free up credentials. diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index 9417b0911..ff5d6dda6 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2007, 2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -93,6 +93,7 @@ #include #include #include + #if CONFIG_DTRACE /* Do not include dtrace.h, it redefines kmem_[alloc/free] */ extern void dtrace_fasttrap_fork(proc_t, proc_t); @@ -149,10 +150,10 @@ void thread_set_child(thread_t child, int pid); void *act_thread_csave(void); -thread_t cloneproc(task_t, coalition_t, proc_t, int, int); +thread_t cloneproc(task_t, coalition_t *, proc_t, int, int); proc_t forkproc(proc_t); void forkproc_free(proc_t); -thread_t fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child, int inherit_memory, int is64bit); +thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t child, int inherit_memory, int is64bit); void proc_vfork_begin(proc_t parent_proc); void proc_vfork_end(proc_t parent_proc); @@ -286,7 +287,7 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) thread_t child_thread; int err; - if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK, COALITION_NULL)) != 0) { + if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK, NULL)) != 0) { retval[1] = 0; } else { uthread_t ut = get_bsdthread_info(current_thread()); @@ -323,11 +324,12 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) * Mach thread_t of the child process * breated * kind kind of creation being requested - * coalition if spawn, coalition the child process - * should join, or COALITION_NULL to + * coalitions if spawn, the set of coalitions the + * child process should join, or NULL to * inherit the parent's. On non-spawns, * this param is ignored and the child - * always inherits the parent's coalition. + * always inherits the parent's + * coalitions. * * Notes: Permissable values for 'kind': * @@ -359,7 +361,7 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) * back to the other information. */ int -fork1(proc_t parent_proc, thread_t *child_threadp, int kind, coalition_t coalition) +fork1(proc_t parent_proc, thread_t *child_threadp, int kind, coalition_t *coalitions) { thread_t parent_thread = (thread_t)current_thread(); uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread); @@ -392,6 +394,7 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind, coalition_t coaliti * always less than what an rlim_t can hold. * (locking protection is provided by list lock held in chgproccnt) */ + count = chgproccnt(uid, 1); if (uid != 0 && (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) { @@ -552,7 +555,7 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind, coalition_t coaliti * differences. Contrarily, spawned processes do not inherit. */ if ((child_thread = cloneproc(parent_proc->task, - spawn ? coalition : COALITION_NULL, + spawn ? coalitions : NULL, parent_proc, spawn ? FALSE : TRUE, FALSE)) == NULL) { @@ -758,7 +761,7 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval) * process * * Parameters: parent_task parent task - * parent_coalition parent_coalition + * parent_coalitions parent's set of coalitions * child_proc child process * inherit_memory TRUE, if the parents address space is * to be inherited by the child @@ -772,7 +775,7 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval) * vfork() equivalent call, and in the system bootstrap case. * * It creates a new task and thread (and as a side effect of the - * thread creation, a uthread) in the parent coalition, which is + * thread creation, a uthread) in the parent coalition set, which is * then associated with the process 'child'. If the parent * process address space is to be inherited, then a flag * indicates that the newly created task should inherit this from @@ -783,7 +786,7 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval) * in this case, 'inherit_memory' MUST be FALSE. */ thread_t -fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child_proc, int inherit_memory, int is64bit) +fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t child_proc, int inherit_memory, int is64bit) { thread_t child_thread = NULL; task_t child_task; @@ -791,7 +794,7 @@ fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child /* Create a new task for the child process */ result = task_create_internal(parent_task, - parent_coalition, + parent_coalitions, inherit_memory, is64bit, &child_task); @@ -825,7 +828,7 @@ fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child resetpriority(child_proc); /* Create a new thread for the child process */ - result = thread_create(child_task, &child_thread); + result = thread_create_with_continuation(child_task, &child_thread, (thread_continue_t)proc_wait_to_return); if (result != KERN_SUCCESS) { printf("%s: thread_create failed. Code: %d\n", __func__, result); @@ -887,7 +890,7 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) retval[1] = 0; /* flag parent return for user space */ - if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK, COALITION_NULL)) == 0) { + if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK, NULL)) == 0) { task_t child_task; proc_t child_proc; @@ -913,7 +916,7 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) #endif /* "Return" to the child */ - (void)thread_resume(child_thread); + proc_clear_return_wait(child_proc, child_thread); /* drop the extra references we got during the creation */ if ((child_task = (task_t)get_threadtask(child_thread)) != NULL) { @@ -965,7 +968,7 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) * live with this being somewhat awkward. */ thread_t -cloneproc(task_t parent_task, coalition_t parent_coalition, proc_t parent_proc, int inherit_memory, int memstat_internal) +cloneproc(task_t parent_task, coalition_t *parent_coalitions, proc_t parent_proc, int inherit_memory, int memstat_internal) { #if !CONFIG_MEMORYSTATUS #pragma unused(memstat_internal) @@ -979,7 +982,7 @@ cloneproc(task_t parent_task, coalition_t parent_coalition, proc_t parent_proc, goto bad; } - child_thread = fork_create_child(parent_task, parent_coalition, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); + child_thread = fork_create_child(parent_task, parent_coalitions, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); if (child_thread == NULL) { /* @@ -1196,6 +1199,7 @@ forkproc(proc_t parent_proc) } nprocs++; child_proc->p_pid = nextpid; + child_proc->p_responsible_pid = nextpid; /* initially responsible for self */ child_proc->p_idversion = nextpidversion++; /* kernel process is handcrafted and not from fork, so start from 1 */ child_proc->p_uniqueid = ++nextuniqueid; @@ -1232,7 +1236,7 @@ forkproc(proc_t parent_proc) * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ - child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR | P_DELAYIDLESLEEP)); + child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_DISABLE_ASLR | P_DELAYIDLESLEEP | P_SUGID)); if (parent_proc->p_flag & P_PROFIL) startprofclock(child_proc); @@ -1325,6 +1329,7 @@ forkproc(proc_t parent_proc) */ proc_signalstart(child_proc, 0); proc_transstart(child_proc, 0, 0); + proc_set_return_wait(child_proc); child_proc->p_pcaction = 0; @@ -1359,25 +1364,13 @@ forkproc(proc_t parent_proc) if ((parent_proc->p_lflag & P_LREGISTER) != 0) { child_proc->p_lflag |= P_LREGISTER; } + child_proc->p_wqkqueue = NULL; child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset; child_proc->p_dispatchqueue_serialno_offset = parent_proc->p_dispatchqueue_serialno_offset; #if PSYNCH pth_proc_hashinit(child_proc); #endif /* PSYNCH */ -#if CONFIG_LCTX - child_proc->p_lctx = NULL; - /* Add new process to login context (if any). */ - if (parent_proc->p_lctx != NULL) { - /* - * This should probably be delayed in the - * vfork() or posix_spawn() cases. - */ - LCTX_LOCK(parent_proc->p_lctx); - enterlctx(child_proc, parent_proc->p_lctx, 0); - } -#endif - #if CONFIG_MEMORYSTATUS /* Memorystatus + jetsam init */ child_proc->p_memstat_state = 0; @@ -1517,30 +1510,53 @@ uthread_alloc(task_t task, thread_t thread, int noinherit) if (p->p_dtrace_ptss_pages != NULL) { uth->t_dtrace_scratch = dtrace_ptss_claim_entry(p); } -#endif -#if CONFIG_MACF - mac_thread_label_init(uth); #endif } return (ut); } +/* + * This routine frees the thread name field of the uthread_t structure. Split out of + * uthread_cleanup() so it can be called separately on the threads of a corpse after + * the corpse notification has been sent, and the handler has had a chance to extract + * the thread names. + */ +void +uthread_cleanup_name(void *uthread) +{ + uthread_t uth = (uthread_t)uthread; + + /* + * + * Set pth_name to NULL before calling free(). + * Previously there was a race condition in the + * case this code was executing during a stackshot + * where the stackshot could try and copy pth_name + * after it had been freed and before if was marked + * as null. + */ + if (uth->pth_name != NULL) { + void *pth_name = uth->pth_name; + uth->pth_name = NULL; + kfree(pth_name, MAXTHREADNAMESIZE); + } + return; +} /* * This routine frees all the BSD context in uthread except the credential. * It does not free the uthread structure as well */ void -uthread_cleanup(task_t task, void *uthread, void * bsd_info) +uthread_cleanup(task_t task, void *uthread, void * bsd_info, boolean_t is_corpse) { struct _select *sel; uthread_t uth = (uthread_t)uthread; proc_t p = (proc_t)bsd_info; - void *pth_name; if (uth->uu_lowpri_window || uth->uu_throttle_info) { - /* + /* * task is marked as a low priority I/O type * and we've somehow managed to not dismiss the throttle * through the normal exit paths back to user space... @@ -1573,25 +1589,20 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) uth->uu_cdir = NULLVP; } - if (uth->uu_allocsize && uth->uu_wqset){ - kfree(uth->uu_wqset, uth->uu_allocsize); - uth->uu_allocsize = 0; - uth->uu_wqset = 0; + if (uth->uu_wqset) { + if (waitq_set_is_valid(uth->uu_wqset)) + waitq_set_deinit(uth->uu_wqset); + FREE(uth->uu_wqset, M_SELECT); + uth->uu_wqset = NULL; + uth->uu_wqstate_sz = 0; } - - /* - * - * Set pth_name to NULL before calling free(). - * Previously there was a race condition in the - * case this code was executing during a stackshot - * where the stackshot could try and copy pth_name - * after it had been freed and before if was marked - * as null. + + /* + * defer the removal of the thread name on process corpses until the corpse has + * been autopsied. */ - if (uth->pth_name != NULL) { - pth_name = uth->pth_name; - uth->pth_name = NULL; - kfree(pth_name, MAXTHREADNAMESIZE); + if (!is_corpse) { + uthread_cleanup_name(uth); } if ((task != kernel_task) && p) { @@ -1615,9 +1626,6 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) if (tmpptr != NULL) { dtrace_ptss_release_entry(p, tmpptr); } -#endif -#if CONFIG_MACF - mac_thread_label_destroy(uth); #endif } } diff --git a/bsd/kern/kern_guarded.c b/bsd/kern/kern_guarded.c index c8223153a..dad131eb4 100644 --- a/bsd/kern/kern_guarded.c +++ b/bsd/kern/kern_guarded.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Apple Inc. All rights reserved. + * Copyright (c) 2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -131,15 +131,15 @@ guarded_fileproc_free(struct fileproc *fp) static int fp_lookup_guarded(proc_t p, int fd, guardid_t guard, - struct guarded_fileproc **gfpp) + struct guarded_fileproc **gfpp, int locked) { struct fileproc *fp; int error; - if ((error = fp_lookup(p, fd, &fp, 1)) != 0) + if ((error = fp_lookup(p, fd, &fp, locked)) != 0) return (error); if (FILEPROC_TYPE(fp) != FTYPE_GUARDED) { - (void) fp_drop(p, fd, fp, 1); + (void) fp_drop(p, fd, fp, locked); return (EINVAL); } struct guarded_fileproc *gfp = FP_TO_GFP(fp); @@ -148,7 +148,7 @@ fp_lookup_guarded(proc_t p, int fd, guardid_t guard, panic("%s: corrupt fp %p", __func__, fp); if (guard != gfp->gf_guard) { - (void) fp_drop(p, fd, fp, 1); + (void) fp_drop(p, fd, fp, locked); return (EPERM); /* *not* a mismatch exception */ } if (gfpp) @@ -175,7 +175,7 @@ fp_isguarded(struct fileproc *fp, u_int attrs) if (GUARDED_FILEPROC_MAGIC != gfp->gf_magic) panic("%s: corrupt gfp %p flags %x", __func__, gfp, fp->f_flags); - return ((attrs & gfp->gf_attrs) ? 1 : 0); + return ((attrs & gfp->gf_attrs) == attrs); } return (0); } @@ -322,6 +322,10 @@ fd_guard_ast(thread_t t) * requires close-on-fork; O_CLOEXEC must be set in flags. * This setting is immutable; attempts to clear the flag will * cause a guard exception. + * + * XXX It's somewhat broken that change_fdguard_np() can completely + * remove the guard and thus revoke down the immutability + * promises above. Ick. */ int guarded_open_np(proc_t p, struct guarded_open_np_args *uap, int32_t *retval) @@ -383,10 +387,6 @@ guarded_open_dprotected_np(proc_t p, struct guarded_open_dprotected_np_args *uap if ((uap->flags & O_CLOEXEC) == 0) return (EINVAL); -#define GUARD_REQUIRED (GUARD_DUP) -#define GUARD_ALL (GUARD_REQUIRED | \ - (GUARD_CLOSE | GUARD_SOCKET_IPC | GUARD_FILEPORT | GUARD_WRITE)) - if (((uap->guardflags & GUARD_REQUIRED) != GUARD_REQUIRED) || ((uap->guardflags & ~GUARD_ALL) != 0)) return (EINVAL); @@ -429,12 +429,17 @@ guarded_open_dprotected_np(proc_t p, struct guarded_open_dprotected_np_args *uap VATTR_SET(&va, va_dataprotect_class, uap->dpclass); } - if (uap->dpflags & O_DP_GETRAWENCRYPTED) { + if (uap->dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) { if ( uap->flags & (O_RDWR | O_WRONLY)) { /* Not allowed to write raw encrypted bytes */ return EINVAL; } - VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); + if (uap->dpflags & O_DP_GETRAWENCRYPTED) { + VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); + } + if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) { + VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED); + } } return (open1(ctx, &nd, uap->flags | O_CLOFORK, &va, @@ -447,10 +452,8 @@ guarded_open_dprotected_np(proc_t p, struct guarded_open_dprotected_np_args *uap * Create a guarded kqueue descriptor with guardid and guardflags. * * Same restrictions on guardflags as for guarded_open_np(). - * All kqueues are -always- close-on-exec and close-on-fork by themselves. - * - * XXX Is it ever sensible to allow a kqueue fd (guarded or not) to - * be sent to another process via a fileport or socket? + * All kqueues are -always- close-on-exec and close-on-fork by themselves + * and are not sendable. */ int guarded_kqueue_np(proc_t p, struct guarded_kqueue_np_args *uap, int32_t *retval) @@ -492,7 +495,7 @@ guarded_close_np(proc_t p, struct guarded_close_np_args *uap, return (error); proc_fdlock(p); - if ((error = fp_lookup_guarded(p, fd, uguard, &gfp)) != 0) { + if ((error = fp_lookup_guarded(p, fd, uguard, &gfp, 1)) != 0) { proc_fdunlock(p); return (error); } @@ -535,6 +538,9 @@ guarded_close_np(proc_t p, struct guarded_close_np_args *uap, * the GUARD_CLOSE flag is being cleared, it is still possible to continue * to keep FD_CLOFORK on the descriptor by passing FD_CLOFORK via fdflagsp. * + * (File descriptors whose underlying fileglobs are marked FG_CONFINED are + * still close-on-fork, regardless of the setting of FD_CLOFORK.) + * * Example 1: Guard an unguarded descriptor during a set of operations, * then restore the original state of the descriptor. * @@ -552,14 +558,10 @@ guarded_close_np(proc_t p, struct guarded_close_np_args *uap, * // do things with 'fd' with a different guard * change_fdguard_np(fd, &myg, GUARD_CLOSE, &gd, gdflags, &sav_flags); * // back to original guarded state + * + * XXX This SPI is too much of a chainsaw and should be revised. */ -#define FDFLAGS_GET(p, fd) (*fdflags(p, fd) & (UF_EXCLOSE|UF_FORKCLOSE)) -#define FDFLAGS_SET(p, fd, bits) \ - (*fdflags(p, fd) |= ((bits) & (UF_EXCLOSE|UF_FORKCLOSE))) -#define FDFLAGS_CLR(p, fd, bits) \ - (*fdflags(p, fd) &= ~((bits) & (UF_EXCLOSE|UF_FORKCLOSE))) - int change_fdguard_np(proc_t p, struct change_fdguard_np_args *uap, __unused int32_t *retval) @@ -620,12 +622,9 @@ change_fdguard_np(proc_t p, struct change_fdguard_np_args *uap, */ if (0 == newg) error = EINVAL; /* guards cannot contain zero */ - else if (0 == uap->nguardflags) - error = EINVAL; /* attributes cannot be zero */ else if (((uap->nguardflags & GUARD_REQUIRED) != GUARD_REQUIRED) || - ((uap->guardflags & ~GUARD_ALL) != 0)) + ((uap->nguardflags & ~GUARD_ALL) != 0)) error = EINVAL; /* must have valid attributes too */ - if (0 != error) goto dropout; @@ -655,6 +654,7 @@ change_fdguard_np(proc_t p, struct change_fdguard_np_args *uap, FDFLAGS_SET(p, fd, UF_FORKCLOSE); FDFLAGS_SET(p, fd, (nfdflags & FD_CLOFORK) ? UF_FORKCLOSE : 0); + /* FG_CONFINED enforced regardless */ } else { error = EPERM; } @@ -741,6 +741,7 @@ change_fdguard_np(proc_t p, struct change_fdguard_np_args *uap, FDFLAGS_CLR(p, fd, UF_FORKCLOSE | UF_EXCLOSE); FDFLAGS_SET(p, fd, (nfdflags & FD_CLOFORK) ? UF_FORKCLOSE : 0); + /* FG_CONFINED enforced regardless */ FDFLAGS_SET(p, fd, (nfdflags & FD_CLOEXEC) ? UF_EXCLOSE : 0); (void) fp_drop(p, fd, nfp, 1); @@ -792,7 +793,7 @@ guarded_write_np(struct proc *p, struct guarded_write_np_args *uap, user_ssize_t if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0) return (error); - error = fp_lookup_guarded(p, fd, uguard, &gfp); + error = fp_lookup_guarded(p, fd, uguard, &gfp, 0); if (error) return(error); @@ -837,7 +838,7 @@ guarded_write_np(struct proc *p, struct guarded_write_np_args *uap, user_ssize_t if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0) return (error); - error = fp_lookup_guarded(p, fd, uguard, &gfp); + error = fp_lookup_guarded(p, fd, uguard, &gfp, 0); if (error) return(error); @@ -928,12 +929,15 @@ guarded_writev_np(struct proc *p, struct guarded_writev_np_args *uap, user_ssize /* finalize uio_t for use and do the IO */ - uio_calculateresid(auio); + error = uio_calculateresid(auio); + if (error) { + goto ExitThisRoutine; + } if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0) goto ExitThisRoutine; - error = fp_lookup_guarded(p, uap->fd, uguard, &gfp); + error = fp_lookup_guarded(p, uap->fd, uguard, &gfp, 0); if (error) goto ExitThisRoutine; diff --git a/bsd/kern/kern_kpc.c b/bsd/kern/kern_kpc.c index dde93bbce..3e3443fc3 100644 --- a/bsd/kern/kern_kpc.c +++ b/bsd/kern/kern_kpc.c @@ -53,6 +53,7 @@ #define REQ_PERIOD (10) #define REQ_ACTIONID (11) #define REQ_SW_INC (14) +#define REQ_PMU_VERSION (15) /* Type-munging casts */ typedef int (*getint_t)(void); @@ -70,11 +71,6 @@ static void *sysctl_buffer = NULL; typedef int (*setget_func_t)(int); -/* init our stuff */ -extern void kpc_arch_init(void); -extern void kpc_common_init(void); -extern void kpc_thread_init(void); /* osfmk/kern/kpc_thread.c */ - void kpc_init(void) { @@ -247,6 +243,9 @@ sysctl_kpc_get_config(uint32_t classes, void* buf) static int sysctl_kpc_set_config(uint32_t classes, void* buf) { + /* userspace cannot reconfigure the power class */ + if (classes & KPC_CLASS_POWER_MASK) + return (EPERM); return kpc_set_config( classes, buf); } @@ -259,6 +258,9 @@ sysctl_kpc_get_period(uint32_t classes, void* buf) static int sysctl_kpc_set_period(uint32_t classes, void* buf) { + /* userspace cannot reconfigure the power class */ + if (classes & KPC_CLASS_POWER_MASK) + return (EPERM); return kpc_set_period( classes, buf); } @@ -500,6 +502,10 @@ kpc_sysctl SYSCTL_HANDLER_ARGS ret = sysctl_set_int( req, (setget_func_t)kpc_set_sw_inc ); break; + case REQ_PMU_VERSION: + ret = sysctl_get_int(oidp, req, kpc_get_pmu_version()); + break; + default: ret = ENOENT; break; @@ -533,6 +539,11 @@ SYSCTL_PROC(_kpc, OID_AUTO, thread_counting, (void*)REQ_THREAD_COUNTING, sizeof(int), kpc_sysctl, "I", "Thread accumulation"); +SYSCTL_PROC(_kpc, OID_AUTO, pmu_version, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_ANYBODY, + (void *)REQ_PMU_VERSION, + sizeof(int), kpc_sysctl, "I", "PMU version for hardware"); + /* faux values */ SYSCTL_PROC(_kpc, OID_AUTO, config_count, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, diff --git a/bsd/kern/kern_lockf.c b/bsd/kern/kern_lockf.c index 3bbf77a45..46c4f2e77 100644 --- a/bsd/kern/kern_lockf.c +++ b/bsd/kern/kern_lockf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2015 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -78,23 +78,33 @@ #include #include +#include + /* * This variable controls the maximum number of processes that will * be checked in doing deadlock detection. */ static int maxlockdepth = MAXDEPTH; +#if (DEVELOPMENT || DEBUG) +#define LOCKF_DEBUGGING 1 +#endif + #ifdef LOCKF_DEBUGGING #include -#include -#include void lf_print(const char *tag, struct lockf *lock); void lf_printlist(const char *tag, struct lockf *lock); -static int lockf_debug = 2; + +#define LF_DBG_LOCKOP (1 << 0) /* setlk, getlk, clearlk */ +#define LF_DBG_LIST (1 << 1) /* split, coalesce */ +#define LF_DBG_IMPINH (1 << 2) /* importance inheritance */ +#define LF_DBG_TRACE (1 << 3) /* errors, exit */ + +static int lockf_debug = 0; /* was 2, could be 3 ;-) */ SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &lockf_debug, 0, ""); /* - * If there is no mask bit selector, or there is on, and the selector is + * If there is no mask bit selector, or there is one, and the selector is * set, then output the debugging diagnostic. */ #define LOCKF_DEBUG(mask, ...) \ @@ -138,6 +148,7 @@ static void lf_wakelock(struct lockf *, boolean_t); static void lf_hold_assertion(task_t, struct lockf *); static void lf_jump_to_queue_head(struct lockf *, struct lockf *); static void lf_drop_assertion(struct lockf *); +static void lf_boost_blocking_proc(struct lockf *, struct lockf *); #endif /* IMPORTANCE_INHERITANCE */ /* @@ -185,7 +196,9 @@ lf_advlock(struct vnop_advlock_args *ap) if (*head == (struct lockf *)0) { if (ap->a_op != F_SETLK) { fl->l_type = F_UNLCK; - LOCKF_DEBUG(0, "lf_advlock: '%s' unlock without lock\n", vfs_context_proc(context)->p_comm); + LOCKF_DEBUG(LF_DBG_TRACE, + "lf_advlock: '%s' unlock without lock\n", + vfs_context_proc(context)->p_comm); return (0); } } @@ -213,7 +226,8 @@ lf_advlock(struct vnop_advlock_args *ap) * do this because we will use size to force range checks. */ if ((error = vnode_size(vp, (off_t *)&size, context))) { - LOCKF_DEBUG(0, "lf_advlock: vnode_getattr failed: %d\n", error); + LOCKF_DEBUG(LF_DBG_TRACE, + "lf_advlock: vnode_getattr failed: %d\n", error); return (error); } @@ -225,22 +239,26 @@ lf_advlock(struct vnop_advlock_args *ap) break; default: - LOCKF_DEBUG(0, "lf_advlock: unknown whence %d\n", fl->l_whence); + LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: unknown whence %d\n", + fl->l_whence); return (EINVAL); } if (start < 0) { - LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start); + LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: start < 0 (%qd)\n", + start); return (EINVAL); } if (fl->l_len < 0) { if (start == 0) { - LOCKF_DEBUG(0, "lf_advlock: len < 0 & start == 0\n"); + LOCKF_DEBUG(LF_DBG_TRACE, + "lf_advlock: len < 0 & start == 0\n"); return (EINVAL); } end = start - 1; start += fl->l_len; if (start < 0) { - LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start); + LOCKF_DEBUG(LF_DBG_TRACE, + "lf_advlock: start < 0 (%qd)\n", start); return (EINVAL); } } else if (fl->l_len == 0) @@ -248,7 +266,7 @@ lf_advlock(struct vnop_advlock_args *ap) else { oadd = fl->l_len - 1; if (oadd > (off_t)(OFF_MAX - start)) { - LOCKF_DEBUG(0, "lf_advlock: overflow\n"); + LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: overflow\n"); return (EOVERFLOW); } end = start + oadd; @@ -270,7 +288,11 @@ lf_advlock(struct vnop_advlock_args *ap) lock->lf_flags = ap->a_flags; #if IMPORTANCE_INHERITANCE lock->lf_boosted = LF_NOT_BOOSTED; -#endif /* IMPORTANCE_INHERITANCE */ +#endif + if (ap->a_flags & F_POSIX) + lock->lf_owner = (struct proc *)lock->lf_id; + else + lock->lf_owner = NULL; if (ap->a_flags & F_FLOCK) lock->lf_flags |= F_WAKE1_SAFE; @@ -281,6 +303,19 @@ lf_advlock(struct vnop_advlock_args *ap) */ switch(ap->a_op) { case F_SETLK: + /* + * For F_OFD_* locks, lf_id is the fileglob. + * Record an "lf_owner" iff this is a confined fd + * i.e. it cannot escape this process and will be + * F_UNLCKed before the owner exits. (This is + * the implicit guarantee needed to ensure lf_owner + * remains a valid reference here.) + */ + if (ap->a_flags & F_OFD_LOCK) { + struct fileglob *fg = (void *)lock->lf_id; + if (fg->fg_lflags & FG_CONFINED) + lock->lf_owner = current_proc(); + } error = lf_setlock(lock, ap->a_timeout); break; @@ -302,7 +337,7 @@ lf_advlock(struct vnop_advlock_args *ap) } lck_mtx_unlock(&vp->v_lock); /* done manipulating the list */ - LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error); + LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: normal exit: %d\n", error); return (error); } @@ -389,7 +424,7 @@ lf_coalesce_adjacent(struct lockf *lock) ((*lf)->lf_end + 1) == lock->lf_start) { struct lockf *adjacent = *lf; - LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent previous\n"); + LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent previous\n"); lock->lf_start = (*lf)->lf_start; *lf = lock; lf = &(*lf)->lf_next; @@ -404,7 +439,7 @@ lf_coalesce_adjacent(struct lockf *lock) (lock->lf_end + 1) == (*lf)->lf_start) { struct lockf *adjacent = *lf; - LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent following\n"); + LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent following\n"); lock->lf_end = (*lf)->lf_end; lock->lf_next = (*lf)->lf_next; lf = &lock->lf_next; @@ -420,7 +455,6 @@ lf_coalesce_adjacent(struct lockf *lock) } } - /* * lf_setlock * @@ -457,12 +491,9 @@ lf_setlock(struct lockf *lock, struct timespec *timeout) int priority, needtolink, error; struct vnode *vp = lock->lf_vnode; overlap_t ovcase; -#if IMPORTANCE_INHERITANCE - task_t boosting_task, block_task; -#endif /* IMPORTANCE_INHERITANCE */ #ifdef LOCKF_DEBUGGING - if (lockf_debug & 1) { + if (lockf_debug & LF_DBG_LOCKOP) { lf_print("lf_setlock", lock); lf_printlist("lf_setlock(in)", lock); } @@ -491,7 +522,11 @@ lf_setlock(struct lockf *lock, struct timespec *timeout) /* * We are blocked. Since flock style locks cover * the whole file, there is no chance for deadlock. - * For byte-range locks we must check for deadlock. + * + * OFD byte-range locks currently do NOT support + * deadlock detection. + * + * For POSIX byte-range locks we must check for deadlock. * * Deadlock detection is done by looking through the * wait channels to see if there are any cycles that @@ -506,7 +541,7 @@ lf_setlock(struct lockf *lock, struct timespec *timeout) int i = 0; /* The block is waiting on something */ - wproc = (struct proc *)block->lf_id; + wproc = block->lf_owner; proc_lock(wproc); TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) { /* @@ -536,7 +571,7 @@ lf_setlock(struct lockf *lock, struct timespec *timeout) /* * Make sure it's an advisory range - * lock and not an overall file lock; + * lock and not any other kind of lock; * if we mix lock types, it's our own * fault. */ @@ -549,8 +584,8 @@ lf_setlock(struct lockf *lock, struct timespec *timeout) * getting the requested lock, then we * would deadlock, so error out. */ - bproc = (struct proc *)waitblock->lf_id; - if (bproc == (struct proc *)lock->lf_id) { + bproc = waitblock->lf_owner; + if (bproc == lock->lf_owner) { proc_unlock(wproc); FREE(lock, M_LOCKF); return (EDEADLK); @@ -584,43 +619,37 @@ lf_setlock(struct lockf *lock, struct timespec *timeout) if ( !(lock->lf_flags & F_FLOCK)) block->lf_flags &= ~F_WAKE1_SAFE; +#if IMPORTANCE_INHERITANCE + /* + * Importance donation is done only for cases where the + * owning task can be unambiguously determined. + * + * POSIX type locks are not inherited by child processes; + * we maintain a 1:1 mapping between a lock and its owning + * process. + * + * Flock type locks are inherited across fork() and there is + * no 1:1 mapping in the general case. However, the fileglobs + * used by OFD locks *may* be confined to the process that + * created them, and thus have an "owner", in which case + * we also attempt importance donation. + */ + if ((lock->lf_flags & block->lf_flags & F_POSIX) != 0) + lf_boost_blocking_proc(lock, block); + else if ((lock->lf_flags & block->lf_flags & F_OFD_LOCK) && + lock->lf_owner != block->lf_owner && + NULL != lock->lf_owner && NULL != block->lf_owner) + lf_boost_blocking_proc(lock, block); +#endif /* IMPORTANCE_INHERITANCE */ + #ifdef LOCKF_DEBUGGING - if (lockf_debug & 1) { + if (lockf_debug & LF_DBG_LOCKOP) { lf_print("lf_setlock: blocking on", block); lf_printlist("lf_setlock(block)", block); } #endif /* LOCKF_DEBUGGING */ DTRACE_FSINFO(advlock__wait, vnode_t, vp); -#if IMPORTANCE_INHERITANCE - /* - * Posix type of locks are not inherited by child processes and - * it maintains one to one mapping between lock and its owner, while - * Flock type of locks are inherited across forks and it does not - * maintian any one to one mapping between the lock and the lock - * owner. Thus importance donation is done only for Posix type of - * locks. - */ - if ((lock->lf_flags & F_POSIX) && (block->lf_flags & F_POSIX)) { - block_task = proc_task((proc_t) block->lf_id); - boosting_task = proc_task((proc_t) lock->lf_id); - - /* Check if current task can donate importance. The - * check of imp_donor bit is done without holding - * any lock. The value may change after you read it, - * but it is ok to boost a task while someone else is - * unboosting you. - * - * TODO: Support live inheritance on file locks. - */ - if (task_is_importance_donor(boosting_task)) { - if (block->lf_boosted != LF_BOOSTED && - task_is_importance_receiver_type(block_task)) { - lf_hold_assertion(block_task, block); - } - lf_jump_to_queue_head(block, lock); - } - } -#endif /* IMPORTANCE_INHERITANCE */ + error = msleep(lock, &vp->v_lock, priority, lockstr, timeout); if (error == 0 && (lock->lf_flags & F_ABORT) != 0) @@ -797,7 +826,7 @@ lf_setlock(struct lockf *lock, struct timespec *timeout) /* Coalesce adjacent locks with identical attributes */ lf_coalesce_adjacent(lock); #ifdef LOCKF_DEBUGGING - if (lockf_debug & 1) { + if (lockf_debug & LF_DBG_LOCKOP) { lf_print("lf_setlock: got the lock", lock); lf_printlist("lf_setlock(out)", lock); } @@ -835,7 +864,7 @@ lf_clearlock(struct lockf *unlock) #ifdef LOCKF_DEBUGGING if (unlock->lf_type != F_UNLCK) panic("lf_clearlock: bad type"); - if (lockf_debug & 1) + if (lockf_debug & LF_DBG_LOCKOP) lf_print("lf_clearlock", unlock); #endif /* LOCKF_DEBUGGING */ prev = head; @@ -892,7 +921,7 @@ lf_clearlock(struct lockf *unlock) break; } #ifdef LOCKF_DEBUGGING - if (lockf_debug & 1) + if (lockf_debug & LF_DBG_LOCKOP) lf_printlist("lf_clearlock", unlock); #endif /* LOCKF_DEBUGGING */ return (0); @@ -927,7 +956,7 @@ lf_getlock(struct lockf *lock, struct flock *fl, pid_t matchpid) struct lockf *block; #ifdef LOCKF_DEBUGGING - if (lockf_debug & 1) + if (lockf_debug & LF_DBG_LOCKOP) lf_print("lf_getlock", lock); #endif /* LOCKF_DEBUGGING */ @@ -939,9 +968,13 @@ lf_getlock(struct lockf *lock, struct flock *fl, pid_t matchpid) fl->l_len = 0; else fl->l_len = block->lf_end - block->lf_start + 1; - if (block->lf_flags & F_POSIX) - fl->l_pid = proc_pid((struct proc *)(block->lf_id)); - else + if (NULL != block->lf_owner) { + /* + * lf_owner is only non-NULL when the lock + * "owner" can be unambiguously determined + */ + fl->l_pid = proc_pid(block->lf_owner); + } else fl->l_pid = -1; } else { fl->l_type = F_UNLCK; @@ -977,12 +1010,14 @@ lf_getblock(struct lockf *lock, pid_t matchpid) * Found an overlap. * * If we're matching pids, and it's a record lock, + * or it's an OFD lock on a process-confined fd, * but the pid doesn't match, then keep on looking .. */ if (matchpid != -1 && - (overlap->lf_flags & F_POSIX) != 0 && - proc_pid((struct proc *)(overlap->lf_id)) != matchpid) + (overlap->lf_flags & (F_POSIX|F_OFD_LOCK)) != 0 && + proc_pid(overlap->lf_owner) != matchpid) continue; + /* * does it block us? */ @@ -1048,7 +1083,7 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, if (lf == NOLOCKF) return (0); #ifdef LOCKF_DEBUGGING - if (lockf_debug & 2) + if (lockf_debug & LF_DBG_LIST) lf_print("lf_findoverlap: looking for overlap in", lock); #endif /* LOCKF_DEBUGGING */ start = lock->lf_start; @@ -1079,7 +1114,7 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, } #ifdef LOCKF_DEBUGGING - if (lockf_debug & 2) + if (lockf_debug & LF_DBG_LIST) lf_print("\tchecking", lf); #endif /* LOCKF_DEBUGGING */ /* @@ -1088,7 +1123,7 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, if ((lf->lf_end != -1 && start > lf->lf_end) || (end != -1 && lf->lf_start > end)) { /* Case 0 */ - LOCKF_DEBUG(2, "no overlap\n"); + LOCKF_DEBUG(LF_DBG_LIST, "no overlap\n"); /* * NOTE: assumes that locks for the same process are @@ -1101,30 +1136,30 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, continue; } if ((lf->lf_start == start) && (lf->lf_end == end)) { - LOCKF_DEBUG(2, "overlap == lock\n"); + LOCKF_DEBUG(LF_DBG_LIST, "overlap == lock\n"); return (OVERLAP_EQUALS_LOCK); } if ((lf->lf_start <= start) && (end != -1) && ((lf->lf_end >= end) || (lf->lf_end == -1))) { - LOCKF_DEBUG(2, "overlap contains lock\n"); + LOCKF_DEBUG(LF_DBG_LIST, "overlap contains lock\n"); return (OVERLAP_CONTAINS_LOCK); } if (start <= lf->lf_start && (end == -1 || (lf->lf_end != -1 && end >= lf->lf_end))) { - LOCKF_DEBUG(2, "lock contains overlap\n"); + LOCKF_DEBUG(LF_DBG_LIST, "lock contains overlap\n"); return (OVERLAP_CONTAINED_BY_LOCK); } if ((lf->lf_start < start) && ((lf->lf_end >= start) || (lf->lf_end == -1))) { - LOCKF_DEBUG(2, "overlap starts before lock\n"); + LOCKF_DEBUG(LF_DBG_LIST, "overlap starts before lock\n"); return (OVERLAP_STARTS_BEFORE_LOCK); } if ((lf->lf_start > start) && (end != -1) && ((lf->lf_end > end) || (lf->lf_end == -1))) { - LOCKF_DEBUG(2, "overlap ends after lock\n"); + LOCKF_DEBUG(LF_DBG_LIST, "overlap ends after lock\n"); return (OVERLAP_ENDS_AFTER_LOCK); } panic("lf_findoverlap: default"); @@ -1162,13 +1197,13 @@ lf_split(struct lockf *lock1, struct lockf *lock2) struct lockf *splitlock; #ifdef LOCKF_DEBUGGING - if (lockf_debug & 2) { + if (lockf_debug & LF_DBG_LIST) { lf_print("lf_split", lock1); lf_print("splitting from", lock2); } #endif /* LOCKF_DEBUGGING */ /* - * Check to see if spliting into only two pieces. + * Check to see if splitting into only two pieces. */ if (lock1->lf_start == lock2->lf_start) { lock1->lf_start = lock2->lf_end + 1; @@ -1236,7 +1271,7 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all) wakelock->lf_next = NOLOCKF; #ifdef LOCKF_DEBUGGING - if (lockf_debug & 2) + if (lockf_debug & LF_DBG_LOCKOP) lf_print("lf_wakelock: awakening", wakelock); #endif /* LOCKF_DEBUGGING */ if (wake_all == FALSE) { @@ -1268,6 +1303,8 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all) #ifdef LOCKF_DEBUGGING +#define GET_LF_OWNER_PID(lf) (proc_pid((lf)->lf_owner)) + /* * lf_print DEBUG * @@ -1284,7 +1321,11 @@ lf_print(const char *tag, struct lockf *lock) { printf("%s: lock %p for ", tag, (void *)lock); if (lock->lf_flags & F_POSIX) - printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid); + printf("proc %p (owner %d)", + lock->lf_id, GET_LF_OWNER_PID(lock)); + else if (lock->lf_flags & F_OFD_LOCK) + printf("fg %p (owner %d)", + lock->lf_id, GET_LF_OWNER_PID(lock)); else printf("id %p", (void *)lock->lf_id); if (lock->lf_vnode != 0) @@ -1332,8 +1373,11 @@ lf_printlist(const char *tag, struct lockf *lock) for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) { printf("\tlock %p for ",(void *)lf); if (lf->lf_flags & F_POSIX) - printf("proc %ld", - (long)((struct proc *)lf->lf_id)->p_pid); + printf("proc %p (owner %d)", + lf->lf_id, GET_LF_OWNER_PID(lf)); + else if (lf->lf_flags & F_OFD_LOCK) + printf("fg %p (owner %d)", + lf->lf_id, GET_LF_OWNER_PID(lf)); else printf("id %p", (void *)lf->lf_id); printf(", %s, start 0x%016llx, end 0x%016llx", @@ -1344,8 +1388,11 @@ lf_printlist(const char *tag, struct lockf *lock) TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { printf("\n\t\tlock request %p for ", (void *)blk); if (blk->lf_flags & F_POSIX) - printf("proc %ld", - (long)((struct proc *)blk->lf_id)->p_pid); + printf("proc %p (owner %d)", + blk->lf_id, GET_LF_OWNER_PID(blk)); + else if (blk->lf_flags & F_OFD_LOCK) + printf("fg %p (owner %d)", + blk->lf_id, GET_LF_OWNER_PID(blk)); else printf("id %p", (void *)blk->lf_id); printf(", %s, start 0x%016llx, end 0x%016llx", @@ -1387,6 +1434,9 @@ lf_hold_assertion(task_t block_task, struct lockf *block) { if (task_importance_hold_file_lock_assertion(block_task, 1)) { block->lf_boosted = LF_BOOSTED; + LOCKF_DEBUG(LF_DBG_IMPINH, + "lf: importance hold file lock assert on pid %d lock %p\n", + proc_pid(block->lf_owner), block); } } @@ -1425,11 +1475,39 @@ lf_jump_to_queue_head(struct lockf *block, struct lockf *lock) static void lf_drop_assertion(struct lockf *block) { - task_t current_task; + LOCKF_DEBUG(LF_DBG_IMPINH, "lf: %d: dropping assertion for lock %p\n", + proc_pid(block->lf_owner), block); - current_task = proc_task((proc_t) block->lf_id); + task_t current_task = proc_task(block->lf_owner); task_importance_drop_file_lock_assertion(current_task, 1); block->lf_boosted = LF_NOT_BOOSTED; } +static void +lf_boost_blocking_proc(struct lockf *lock, struct lockf *block) +{ + task_t ltask = proc_task(lock->lf_owner); + task_t btask = proc_task(block->lf_owner); + + /* + * Check if ltask can donate importance. The + * check of imp_donor bit is done without holding + * any lock. The value may change after you read it, + * but it is ok to boost a task while someone else is + * unboosting you. + * + * TODO: Support live inheritance on file locks. + */ + if (task_is_importance_donor(ltask)) { + LOCKF_DEBUG(LF_DBG_IMPINH, + "lf: %d: attempt to boost pid %d that holds lock %p\n", + proc_pid(lock->lf_owner), proc_pid(block->lf_owner), block); + + if (block->lf_boosted != LF_BOOSTED && + task_is_importance_receiver_type(btask)) { + lf_hold_assertion(btask, block); + } + lf_jump_to_queue_head(block, lock); + } +} #endif /* IMPORTANCE_INHERITANCE */ diff --git a/bsd/kern/kern_malloc.c b/bsd/kern/kern_malloc.c index 860a232dd..d1adaabec 100644 --- a/bsd/kern/kern_malloc.c +++ b/bsd/kern/kern_malloc.c @@ -280,7 +280,7 @@ const char *memname[] = { "mactemp", /* 104 M_MACTEMP */ "sbuf", /* 105 M_SBUF */ "extattr", /* 106 M_EXTATTR */ - "lctx", /* 107 M_LCTX */ + "select", /* 107 M_SELECT */ #if TRAFFIC_MGT "traffic_mgt", /* 108 M_TRAFFIC_MGT */ #else @@ -317,6 +317,7 @@ const char *memname[] = { #endif "fdvnodedata" /* 122 M_FD_VN_DATA */ "fddirbuf", /* 123 M_FD_DIRBUF */ + "netagent", /* 124 M_NETAGENT */ "" }; @@ -484,7 +485,7 @@ struct kmzones { { 0, KMZ_MALLOC, FALSE }, /* 104 M_MACTEMP */ { 0, KMZ_MALLOC, FALSE }, /* 105 M_SBUF */ { 0, KMZ_MALLOC, FALSE }, /* 106 M_HFS_EXTATTR */ - { 0, KMZ_MALLOC, FALSE }, /* 107 M_LCTX */ + { 0, KMZ_MALLOC, FALSE }, /* 107 M_SELECT */ { 0, KMZ_MALLOC, FALSE }, /* 108 M_TRAFFIC_MGT */ #if HFS_COMPRESSION { SOS(decmpfs_cnode),KMZ_CREATEZONE , FALSE}, /* 109 M_DECMPFS_CNODE */ @@ -514,6 +515,9 @@ struct kmzones { { 0, KMZ_MALLOC, FALSE }, /* 120 M_NECP_SOCKET_POLICY */ { 0, KMZ_MALLOC, FALSE }, /* 121 M_NECP_IP_POLICY */ #endif /* NECP */ + { 0, KMZ_MALLOC, FALSE }, /* 122 M_FD_VN_DATA */ + { 0, KMZ_MALLOC, FALSE }, /* 123 M_FD_DIRBUF */ + { 0, KMZ_MALLOC, FALSE }, /* 124 M_NETAGENT */ #undef SOS #undef SOX }; @@ -580,11 +584,28 @@ struct _mhead { char dat[0]; }; + void * -_MALLOC( +_MALLOC_external( + size_t size, + int type, + int flags); +void * +_MALLOC_external( size_t size, int type, int flags) +{ + static vm_allocation_site_t site = { VM_KERN_MEMORY_KALLOC, VM_TAG_BT }; + return (__MALLOC(size, type, flags, &site)); +} + +void * +__MALLOC( + size_t size, + int type, + int flags, + vm_allocation_site_t *site) { struct _mhead *hdr = NULL; size_t memsize = sizeof (*hdr) + size; @@ -599,7 +620,7 @@ _MALLOC( if (size > memsize) /* overflow detected */ return (NULL); else - hdr = (void *)kalloc_noblock(memsize); + hdr = (void *)kalloc_canblock(memsize, FALSE, site); } else { if (size > memsize) { /* @@ -610,7 +631,7 @@ _MALLOC( panic("_MALLOC: overflow detected, size %llu ", (uint64_t) size); } else - hdr = (void *)kalloc(memsize); + hdr = (void *)kalloc_canblock(memsize, TRUE, site); if (hdr == NULL) { @@ -656,11 +677,12 @@ _FREE( } void * -_REALLOC( +__REALLOC( void *addr, size_t size, int type, - int flags) + int flags, + vm_allocation_site_t *site) { struct _mhead *hdr; void *newaddr; @@ -668,10 +690,10 @@ _REALLOC( /* realloc(NULL, ...) is equivalent to malloc(...) */ if (addr == NULL) - return (_MALLOC(size, type, flags)); + return (__MALLOC(size, type, flags, site)); /* Allocate a new, bigger (or smaller) block */ - if ((newaddr = _MALLOC(size, type, flags)) == NULL) + if ((newaddr = __MALLOC(size, type, flags, site)) == NULL) return (NULL); hdr = addr; @@ -686,10 +708,25 @@ _REALLOC( } void * -_MALLOC_ZONE( +_MALLOC_ZONE_external( + size_t size, + int type, + int flags); +void * +_MALLOC_ZONE_external( size_t size, int type, int flags) +{ + return (__MALLOC_ZONE(size, type, flags, NULL)); +} + +void * +__MALLOC_ZONE( + size_t size, + int type, + int flags, + vm_allocation_site_t *site) { struct kmzones *kmz; void *elem; @@ -713,9 +750,9 @@ _MALLOC_ZONE( } else if (flags & M_NOWAIT) { - elem = (void *)kalloc_noblock(size); + elem = (void *)kalloc_canblock(size, FALSE, site); } else { - elem = (void *)kalloc(size); + elem = (void *)kalloc_canblock(size, TRUE, site); } return (elem); diff --git a/bsd/kern/kern_memorystatus.c b/bsd/kern/kern_memorystatus.c index 0d46cec14..13dcc2607 100644 --- a/bsd/kern/kern_memorystatus.c +++ b/bsd/kern/kern_memorystatus.c @@ -36,11 +36,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -104,6 +106,70 @@ do { \ #define MEMORYSTATUS_DEBUG(cond, format, ...) #endif +/* + * Active / Inactive limit support + * proc list must be locked + * + * The SET_*** macros are used to initialize a limit + * for the first time. + * + * The CACHE_*** macros are use to cache the limit that will + * soon be in effect down in the ledgers. + */ + +#define SET_ACTIVE_LIMITS_LOCKED(p, limit, is_fatal) \ +MACRO_BEGIN \ +(p)->p_memstat_memlimit_active = (limit); \ + (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED; \ + if (is_fatal) { \ + (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL; \ + } else { \ + (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL; \ + } \ +MACRO_END + +#define SET_INACTIVE_LIMITS_LOCKED(p, limit, is_fatal) \ +MACRO_BEGIN \ +(p)->p_memstat_memlimit_inactive = (limit); \ + (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED; \ + if (is_fatal) { \ + (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL; \ + } else { \ + (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL; \ + } \ +MACRO_END + +#define CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception) \ +MACRO_BEGIN \ +(p)->p_memstat_memlimit = (p)->p_memstat_memlimit_active; \ + if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) { \ + (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; \ + } else { \ + (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; \ + } \ + if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED) { \ + trigger_exception = FALSE; \ + } else { \ + trigger_exception = TRUE; \ + } \ +MACRO_END + +#define CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception) \ +MACRO_BEGIN \ +(p)->p_memstat_memlimit = (p)->p_memstat_memlimit_inactive; \ + if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) { \ + (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; \ + } else { \ + (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; \ + } \ + if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED) { \ + trigger_exception = FALSE; \ + } else { \ + trigger_exception = TRUE; \ + } \ +MACRO_END + + /* General tunables */ unsigned long delta_percentage = 5; @@ -161,6 +227,7 @@ void memorystatus_send_low_swap_note(void); int memorystatus_wakeup = 0; unsigned int memorystatus_level = 0; +unsigned int memorystatus_early_boot_level = 0; static int memorystatus_list_count = 0; @@ -177,6 +244,10 @@ uint64_t memstat_idle_demotion_deadline = 0; static unsigned int memorystatus_dirty_count = 0; +#if CONFIG_JETSAM +SYSCTL_INT(_kern, OID_AUTO, max_task_pmem, CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED, &max_task_footprint_mb, 0, ""); +#endif // CONFIG_JETSAM + int memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret) @@ -201,6 +272,16 @@ static void memorystatus_thread(void *param __unused, wait_result_t wr __unused) #if CONFIG_JETSAM +static int memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit); + +static int memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval); + +static int memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry); + +static int memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval); + +static boolean_t proc_jetsam_state_is_active_locked(proc_t); + int proc_get_memstat_priority(proc_t, boolean_t); /* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */ @@ -208,7 +289,7 @@ int proc_get_memstat_priority(proc_t, boolean_t); static boolean_t memorystatus_idle_snapshot = 0; -static int memorystatus_highwater_enabled = 1; +static int memorystatus_highwater_enabled = 1; /* Update the cached memlimit data. This should be removed. */ unsigned int memorystatus_delta = 0; @@ -216,22 +297,49 @@ static unsigned int memorystatus_available_pages_critical_base = 0; //static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1; static unsigned int memorystatus_available_pages_critical_idle_offset = 0; +/* Jetsam Loop Detection */ +static boolean_t memorystatus_jld_enabled = TRUE; /* Enables jetsam loop detection on all devices */ +static uint32_t memorystatus_jld_eval_period_msecs = 0; /* Init pass sets this based on device memory size */ +static int memorystatus_jld_eval_aggressive_count = 3; /* Raise the priority max after 'n' aggressive loops */ +static int memorystatus_jld_eval_aggressive_priority_band_max = 15; /* Kill aggressively up through this band */ + +#if DEVELOPMENT || DEBUG +/* + * Jetsam Loop Detection tunables. + */ + +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_period_msecs, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jld_eval_period_msecs, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_count, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_count, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_priority_band_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_priority_band_max, 0, ""); +#endif /* DEVELOPMENT || DEBUG */ + #if DEVELOPMENT || DEBUG static unsigned int memorystatus_jetsam_panic_debug = 0; static unsigned int memorystatus_jetsam_policy = kPolicyDefault; static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0; +static unsigned int memorystatus_debug_dump_this_bucket = 0; #endif static unsigned int memorystatus_thread_wasted_wakeup = 0; static uint32_t kill_under_pressure_cause = 0; +/* + * default jetsam snapshot support + */ static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot; #define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries - static unsigned int memorystatus_jetsam_snapshot_count = 0; static unsigned int memorystatus_jetsam_snapshot_max = 0; +static uint64_t memorystatus_jetsam_snapshot_last_timestamp = 0; +static uint64_t memorystatus_jetsam_snapshot_timeout = 0; +#define JETSAM_SNAPSHOT_TIMEOUT_SECS 30 + +/* + * snapshot support for memstats collected at boot. + */ +static memorystatus_jetsam_snapshot_t memorystatus_at_boot_snapshot; static void memorystatus_clear_errors(void); static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages); @@ -240,7 +348,8 @@ static void memorystatus_update_levels_locked(boolean_t critical_only); //static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured); static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause); -static boolean_t memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors); +static boolean_t memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, int32_t *priority, uint32_t *errors); +static boolean_t memorystatus_kill_top_process_aggressive(boolean_t any, uint32_t cause, int aggr_count, int32_t priority_max, uint32_t *errors); #if LEGACY_HIWATER static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors); #endif @@ -248,6 +357,17 @@ static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors); static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause); static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause); +/* Priority Band Sorting Routines */ +static int memorystatus_sort_bucket(unsigned int bucket_index, int sort_order); +static int memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order); +static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index); +static int memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz); + +/* qsort routines */ +typedef int (*cmpfunc_t)(const void *a, const void *b); +extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp); +static int memstat_asc_cmp(const void *a, const void *b); + #endif /* CONFIG_JETSAM */ /* VM pressure */ @@ -300,6 +420,10 @@ static int memorystatus_send_note(int event_code, void *data, size_t data_length boolean_t memorystatus_freeze_enabled = FALSE; int memorystatus_freeze_wakeup = 0; +lck_grp_attr_t *freezer_lck_grp_attr; +lck_grp_t *freezer_lck_grp; +static lck_mtx_t freezer_mutex; + static inline boolean_t memorystatus_can_freeze_processes(void); static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low); @@ -313,6 +437,8 @@ static unsigned int memorystatus_freeze_pages_max = 0; static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; +static unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT; + /* Stats */ static uint64_t memorystatus_freeze_count = 0; static uint64_t memorystatus_freeze_pageouts = 0; @@ -327,6 +453,10 @@ static uint64_t memorystatus_freeze_throttle_count = 0; static unsigned int memorystatus_suspended_footprint_total = 0; +extern uint64_t vm_swap_get_free_space(void); + +static boolean_t memorystatus_freeze_update_throttle(); + #endif /* CONFIG_FREEZE */ /* Debug */ @@ -337,6 +467,89 @@ extern struct knote *vm_find_knote_from_pid(pid_t, struct klist *); #if CONFIG_JETSAM +static void +memorystatus_debug_dump_bucket_locked (unsigned int bucket_index) +{ + proc_t p = NULL; + uint32_t pages = 0; + uint32_t pages_in_mb = 0; + unsigned int b = bucket_index; + boolean_t traverse_all_buckets = FALSE; + + if (bucket_index >= MEMSTAT_BUCKET_COUNT) { + traverse_all_buckets = TRUE; + b = 0; + } else { + traverse_all_buckets = FALSE; + b = bucket_index; + } + + /* + * Missing from this dump is the value actually + * stored in the ledger... also, format could be better. + */ + printf("memorystatus_debug_dump ***START***\n"); + printf("bucket [pid] [pages/pages-mb] state [EP / RP] dirty deadline [C-limit / A-limit / IA-limit] name\n"); + p = memorystatus_get_first_proc_locked(&b, traverse_all_buckets); + while (p) { + memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); + pages_in_mb = (pages * 4096) /1024 / 1024; + printf("%d [%d] [%d/%dMB] 0x%x [%d / %d] 0x%x %lld [%d%s / %d%s / %d%s] %s\n", + b, p->p_pid, pages, pages_in_mb, + p->p_memstat_state, p->p_memstat_effectivepriority, p->p_memstat_requestedpriority, p->p_memstat_dirty, p->p_memstat_idledeadline, + p->p_memstat_memlimit, + (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), + p->p_memstat_memlimit_active, + (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL ? "F " : "NF"), + p->p_memstat_memlimit_inactive, + (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL ? "F " : "NF"), + (p->p_comm ? p->p_comm : "unknown")); + p = memorystatus_get_next_proc_locked(&b, p, traverse_all_buckets); + } + printf("memorystatus_debug_dump ***END***\n"); +} + +static int +sysctl_memorystatus_debug_dump_bucket SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2) + int bucket_index = 0; + int error; + error = SYSCTL_OUT(req, arg1, sizeof(int)); + if (error || !req->newptr) { + return (error); + } + error = SYSCTL_IN(req, &bucket_index, sizeof(int)); + if (error || !req->newptr) { + return (error); + } + if (bucket_index >= MEMSTAT_BUCKET_COUNT) { + /* + * All jetsam buckets will be dumped. + */ + } else { + /* + * Only a single bucket will be dumped. + */ + } + + proc_list_lock(); + memorystatus_debug_dump_bucket_locked(bucket_index); + proc_list_unlock(); + memorystatus_debug_dump_this_bucket = bucket_index; + return (error); +} + +/* + * Debug aid to look at jetsam buckets and proc jetsam fields. + * Use this sysctl to act on a particular jetsam bucket. + * Writing the sysctl triggers the dump. + * Usage: sysctl kern.memorystatus_debug_dump_this_bucket= + */ + +SYSCTL_PROC(_kern, OID_AUTO, memorystatus_debug_dump_this_bucket, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_debug_dump_this_bucket, 0, sysctl_memorystatus_debug_dump_bucket, "I", ""); + + /* Debug aid to aid determination of limit */ static int @@ -346,7 +559,6 @@ sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS proc_t p; unsigned int b = 0; int error, enable = 0; - int32_t memlimit; error = SYSCTL_OUT(req, arg1, sizeof(int)); if (error || !req->newptr) { @@ -366,25 +578,35 @@ sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS p = memorystatus_get_first_proc_locked(&b, TRUE); while (p) { + boolean_t trigger_exception; + if (enable) { - if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { - memlimit = -1; + /* + * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. + * Background limits are described via the inactive limit slots. + */ + + if (proc_jetsam_state_is_active_locked(p) == TRUE) { + CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); } else { - memlimit = p->p_memstat_memlimit; + CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); } + } else { - memlimit = -1; - } - task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); - - if (memlimit == -1) { - p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; - } else { - if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { - p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; - } + /* + * Disabling limits does not touch the stored variants. + * Set the cached limit fields to system_wide defaults. + */ + p->p_memstat_memlimit = -1; + p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; + trigger_exception = TRUE; } - + + /* + * Enforce the cached limit by writing to the ledger. + */ + task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit: -1, NULL, trigger_exception); + p = memorystatus_get_next_proc_locked(&b, p, TRUE); } @@ -393,6 +615,7 @@ sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS proc_list_unlock(); return 0; + } SYSCTL_INT(_kern, OID_AUTO, memorystatus_idle_snapshot, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_idle_snapshot, 0, ""); @@ -496,6 +719,7 @@ sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS int error = 0, pid = 0; int ret = 0; struct knote *kn = NULL; + boolean_t found_knote = FALSE; error = sysctl_handle_int(oidp, &pid, 0, req); if (error || !req->newptr) @@ -514,17 +738,27 @@ sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS */ memorystatus_klist_lock(); - kn = vm_find_knote_from_pid(pid, &memorystatus_klist); - if (kn) { - /* - * Forcibly send this pid a "warning" memory pressure notification. - */ - kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; - KNOTE(&memorystatus_klist, kMemorystatusPressure); - ret = 0; + + SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { + proc_t knote_proc = kn->kn_kq->kq_p; + pid_t knote_pid = knote_proc->p_pid; + + if (knote_pid == pid) { + /* + * Forcibly send this pid a "warning" memory pressure notification. + */ + kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; + found_knote = TRUE; + } + } + + if (found_knote) { + KNOTE(&memorystatus_klist, 0); + ret = 0; } else { ret = vm_dispatch_pressure_note_to_pid(pid, FALSE); } + memorystatus_klist_unlock(); return ret; @@ -539,6 +773,8 @@ SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_ #if CONFIG_FREEZE +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, ""); + SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, ""); SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, ""); @@ -559,7 +795,6 @@ static int sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS { #pragma unused(arg1, arg2) - int error, pid = 0; proc_t p; @@ -571,6 +806,14 @@ sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS if (error || !req->newptr) return (error); + if (pid == 2) { + vm_pageout_anonymous_pages(); + + return 0; + } + + lck_mtx_lock(&freezer_mutex); + p = proc_find(pid); if (p != NULL) { uint32_t purgeable, wired, clean, dirty; @@ -578,17 +821,42 @@ sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS uint32_t max_pages = 0; if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { - max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); + + unsigned int avail_swap_space = 0; /* in pages. */ + + if (DEFAULT_FREEZER_IS_ACTIVE) { + /* + * Freezer backed by default pager and swap file(s). + */ + avail_swap_space = default_pager_swap_pages_free(); + } else { + /* + * Freezer backed by the compressor and swap file(s) + * while will hold compressed data. + */ + avail_swap_space = vm_swap_get_free_space() / PAGE_SIZE_64; + } + + max_pages = MIN(avail_swap_space, memorystatus_freeze_pages_max); + } else { + /* + * We only have the compressor without any swap. + */ max_pages = UINT32_MAX - 1; } + error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); proc_rele(p); if (error) error = EIO; + + lck_mtx_unlock(&freezer_mutex); return error; } + + lck_mtx_unlock(&freezer_mutex); return EINVAL; } @@ -637,6 +905,64 @@ extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation thread_t *new_thread); #if CONFIG_JETSAM +/* + * Picks the sorting routine for a given jetsam priority band. + * + * Input: + * bucket_index - jetsam priority band to be sorted. + * sort_order - JETSAM_SORT_xxx from kern_memorystatus.h + * Currently sort_order is only meaningful when handling + * coalitions. + * + * Return: + * 0 on success + * non-0 on failure + */ +static int memorystatus_sort_bucket(unsigned int bucket_index, int sort_order) +{ + int coal_sort_order; + + /* + * Verify the jetsam priority + */ + if (bucket_index >= MEMSTAT_BUCKET_COUNT) { + return(EINVAL); + } + +#if DEVELOPMENT || DEBUG + if (sort_order == JETSAM_SORT_DEFAULT) { + coal_sort_order = COALITION_SORT_DEFAULT; + } else { + coal_sort_order = sort_order; /* only used for testing scenarios */ + } +#else + /* Verify default */ + if (sort_order == JETSAM_SORT_DEFAULT) { + coal_sort_order = COALITION_SORT_DEFAULT; + } else { + return(EINVAL); + } +#endif + + proc_list_lock(); + switch (bucket_index) { + case JETSAM_PRIORITY_FOREGROUND: + if (memorystatus_sort_by_largest_coalition_locked(bucket_index, coal_sort_order) == 0) { + /* + * Fall back to per process sorting when zero coalitions are found. + */ + memorystatus_sort_by_largest_process_locked(bucket_index); + } + break; + default: + memorystatus_sort_by_largest_process_locked(bucket_index); + break; + } + proc_list_unlock(); + + return(0); +} + /* * Sort processes by size for a single jetsam bucket. */ @@ -644,6 +970,7 @@ extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index) { proc_t p = NULL, insert_after_proc = NULL, max_proc = NULL; + proc_t next_p = NULL, prev_max_proc = NULL; uint32_t pages = 0, max_pages = 0; memstat_bucket_t *current_bucket; @@ -655,48 +982,36 @@ static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_inde p = TAILQ_FIRST(¤t_bucket->list); - if (p) { + while (p) { memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); max_pages = pages; - insert_after_proc = NULL; - - p = TAILQ_NEXT(p, p_memstat_list); - -restart: - while (p) { - + max_proc = p; + prev_max_proc = p; + + while ((next_p = TAILQ_NEXT(p, p_memstat_list)) != NULL) { + /* traversing list until we find next largest process */ + p=next_p; memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); - if (pages > max_pages) { max_pages = pages; max_proc = p; } - - p = TAILQ_NEXT(p, p_memstat_list); } - if (max_proc) { - + if (prev_max_proc != max_proc) { + /* found a larger process, place it in the list */ TAILQ_REMOVE(¤t_bucket->list, max_proc, p_memstat_list); - if (insert_after_proc == NULL) { TAILQ_INSERT_HEAD(¤t_bucket->list, max_proc, p_memstat_list); } else { TAILQ_INSERT_AFTER(¤t_bucket->list, insert_after_proc, max_proc, p_memstat_list); } + prev_max_proc = max_proc; + } - insert_after_proc = max_proc; - - /* Reset parameters for the new search. */ - p = TAILQ_NEXT(max_proc, p_memstat_list); - if (p) { - memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); - max_pages = pages; - } - max_proc = NULL; + insert_after_proc = max_proc; - goto restart; - } + p = TAILQ_NEXT(max_proc, p_memstat_list); } } @@ -774,6 +1089,9 @@ memorystatus_init(void) assert(freeze_threshold_percentage < 100); #if CONFIG_JETSAM + /* device tree can request to take snapshots for idle-exit kills by default */ + PE_get_default("kern.jetsam_idle_snapshot", &memorystatus_idle_snapshot, sizeof(memorystatus_idle_snapshot)); + memorystatus_delta = delta_percentage * atop_64(max_mem) / 100; memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100; memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta; @@ -786,8 +1104,21 @@ memorystatus_init(void) panic("Could not allocate memorystatus_jetsam_snapshot"); } + nanoseconds_to_absolutetime((uint64_t)JETSAM_SNAPSHOT_TIMEOUT_SECS * NSEC_PER_SEC, &memorystatus_jetsam_snapshot_timeout); + + memset(&memorystatus_at_boot_snapshot, 0, sizeof(memorystatus_jetsam_snapshot_t)); + /* No contention at this point */ memorystatus_update_levels_locked(FALSE); + + /* Jetsam Loop Detection */ + if (max_mem <= (512 * 1024 * 1024)) { + /* 512 MB devices */ + memorystatus_jld_eval_period_msecs = 8000; /* 8000 msecs == 8 second window */ + } else { + /* 1GB and larger devices */ + memorystatus_jld_eval_period_msecs = 6000; /* 6000 msecs == 6 second window */ + } #endif #if CONFIG_FREEZE @@ -852,9 +1183,7 @@ memorystatus_do_kill(proc_t p, uint32_t cause) { KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END, victim_pid, cause, vm_page_free_count, error, 0); - if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { - vm_wake_compactor_swapper(); - } + vm_wake_compactor_swapper(); return (error == 0); } @@ -931,7 +1260,7 @@ memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state) present_in_deferred_bucket = TRUE; } - MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n", + MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for pid %d (dirty:0x%x, set_state %d, demotions %d).\n", p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions); assert((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED); @@ -959,7 +1288,7 @@ memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state) assert(p->p_memstat_idledeadline); } - MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n", + MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for pid %d (clear_state %d, demotions %d).\n", p->p_pid, clear_state, memorystatus_scheduled_idle_demotions); @@ -1007,7 +1336,7 @@ memorystatus_add(proc_t p, boolean_t locked) { memstat_bucket_t *bucket; - MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority); + MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding pid %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority); if (!locked) { proc_list_lock(); @@ -1039,6 +1368,14 @@ memorystatus_add(proc_t p, boolean_t locked) return 0; } +/* + * Description: + * Moves a process from one jetsam bucket to another. + * which changes the LRU position of the process. + * + * Monitors transition between buckets and if necessary + * will update cached memory limits accordingly. + */ static void memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert) { @@ -1051,7 +1388,7 @@ memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_inser return; } - MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d, inserting at %s\n", + MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting pid %d to priority %d, inserting at %s\n", p->p_pid, priority, head_insert ? "head" : "tail"); old_bucket = &memstat_bucket[p->p_memstat_effectivepriority]; @@ -1068,45 +1405,134 @@ memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_inser else TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); new_bucket->count++; - + #if CONFIG_JETSAM - if (memorystatus_highwater_enabled && (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND)) { + if (memorystatus_highwater_enabled) { + boolean_t trigger_exception; + + /* + * If cached limit data is updated, then the limits + * will be enforced by writing to the ledgers. + */ + boolean_t ledger_update_needed = TRUE; /* - * Adjust memory limit based on if the task is going to/from foreground and background. + * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. + * Background limits are described via the inactive limit slots. + * + * Here, we must update the cached memory limit if the task + * is transitioning between: + * active <--> inactive + * FG <--> BG + * but: + * dirty <--> clean is ignored + * + * We bypass processes that have opted into dirty tracking because + * a move between buckets does not imply a transition between the + * dirty <--> clean state. + * Setting limits on processes opted into dirty tracking is handled + * in memorystatus_dirty_set() where the transition is very clear. */ - if (((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) || - ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND))) { - int32_t memlimit = (priority >= JETSAM_PRIORITY_FOREGROUND) ? -1 : p->p_memstat_memlimit; - task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); - - if (memlimit <= 0) { - p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; - } else { - p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; - } + if (p->p_memstat_dirty & P_DIRTY_TRACK) { + + ledger_update_needed = FALSE; + + } else if ((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) { + /* + * inactive --> active + * BG --> FG + * assign active state + */ + CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); + + } else if ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { + /* + * active --> inactive + * FG --> BG + * assign inactive state + */ + CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); + } else { + /* + * The transition between jetsam priority buckets apparently did + * not affect active/inactive state. + * This is not unusual... especially during startup when + * processes are getting established in their respective bands. + */ + ledger_update_needed = FALSE; + } + + /* + * Enforce the new limits by writing to the ledger + */ + if (ledger_update_needed) { + task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, NULL, trigger_exception); + + MEMORYSTATUS_DEBUG(3, "memorystatus_update_priority_locked: new limit on pid %d (%dMB %s) priority old --> new (%d --> %d) dirty?=0x%x %s\n", + p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), + (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, priority, p->p_memstat_dirty, + (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); } } -#endif + +#endif /* CONFIG_JETSAM */ p->p_memstat_effectivepriority = priority; memorystatus_check_levels_locked(); } +/* + * + * Description: Update the jetsam priority and memory limit attributes for a given process. + * + * Parameters: + * p init this process's jetsam information. + * priority The jetsam priority band + * user_data user specific data, unused by the kernel + * effective guards against race if process's update already occurred + * update_memlimit When true we know this is the init step via the posix_spawn path. + * + * memlimit_active Value in megabytes; The monitored footprint level while the + * process is active. Exceeding it may result in termination + * based on it's associated fatal flag. + * + * memlimit_active_is_fatal When a process is active and exceeds its memory footprint, + * this describes whether or not it should be immediately fatal. + * + * memlimit_inactive Value in megabytes; The monitored footprint level while the + * process is inactive. Exceeding it may result in termination + * based on it's associated fatal flag. + * + * memlimit_inactive_is_fatal When a process is inactive and exceeds its memory footprint, + * this describes whether or not it should be immediatly fatal. + * + * memlimit_background This process has a high-water-mark while in the background. + * No longer meaningful. Background limits are described via + * the inactive slots. Flag is ignored. + * + * + * Returns: 0 Success + * non-0 Failure + */ + int -memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background, boolean_t is_fatal_limit) +memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, + int32_t memlimit_active, boolean_t memlimit_active_is_fatal, + int32_t memlimit_inactive, boolean_t memlimit_inactive_is_fatal, + __unused boolean_t memlimit_background) { int ret; boolean_t head_insert = false; #if !CONFIG_JETSAM -#pragma unused(update_memlimit, memlimit, memlimit_background, is_fatal_limit) -#endif +#pragma unused(update_memlimit, memlimit_active, memlimit_inactive) +#pragma unused(memlimit_active_is_fatal, memlimit_inactive_is_fatal) +#endif /* !CONFIG_JETSAM */ + + MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing pid %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data); - MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data); - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0); if (priority == -1) { @@ -1118,13 +1544,13 @@ memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effect } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) { /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */ priority = JETSAM_PRIORITY_IDLE; - head_insert = true; + head_insert = TRUE; } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) { /* Sanity check */ ret = EINVAL; goto out; } - + proc_list_lock(); assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); @@ -1151,37 +1577,114 @@ memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effect #if CONFIG_JETSAM if (update_memlimit) { - p->p_memstat_memlimit = memlimit; + boolean_t trigger_exception; + + /* + * Posix_spawn'd processes come through this path to instantiate ledger limits. + * Forked processes do not come through this path, so no ledger limits exist. + * (That's why forked processes can consume unlimited memory.) + */ + + MEMORYSTATUS_DEBUG(3, "memorystatus_update(enter): pid %d, priority %d, dirty=0x%x, Active(%dMB %s), Inactive(%dMB, %s)\n", + p->p_pid, priority, p->p_memstat_dirty, + memlimit_active, (memlimit_active_is_fatal ? "F " : "NF"), + memlimit_inactive, (memlimit_inactive_is_fatal ? "F " : "NF")); + if (memlimit_background) { - /* Will be set as priority is updated */ - p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND; - /* Cannot have a background memory limit and be fatal. */ - is_fatal_limit = FALSE; + /* + * With 2-level HWM support, we no longer honor P_MEMSTAT_MEMLIMIT_BACKGROUND. + * Background limits are described via the inactive limit slots. + */ + + // p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND; - } else { - /* Otherwise, apply now */ - if (memorystatus_highwater_enabled) { - task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); +#if DEVELOPMENT || DEBUG + printf("memorystatus_update: WARNING %s[%d] set unused flag P_MEMSTAT_MEMLIMIT_BACKGROUND [A==%dMB %s] [IA==%dMB %s]\n", + (p->p_comm ? p->p_comm : "unknown"), p->p_pid, + memlimit_active, (memlimit_active_is_fatal ? "F " : "NF"), + memlimit_inactive, (memlimit_inactive_is_fatal ? "F " : "NF")); +#endif /* DEVELOPMENT || DEBUG */ + } + + if (memlimit_active <= 0) { + /* + * This process will have a system_wide task limit when active. + * System_wide task limit is always fatal. + * It's quite common to see non-fatal flag passed in here. + * It's not an error, we just ignore it. + */ + + /* + * For backward compatibility with some unexplained launchd behavior, + * we allow a zero sized limit. But we still enforce system_wide limit + * when written to the ledgers. + */ + + if (memlimit_active < 0) { + memlimit_active = -1; /* enforces system_wide task limit */ } + memlimit_active_is_fatal = TRUE; } - - if (is_fatal_limit || memlimit <= 0) { - p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; - } else { - p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; + + if (memlimit_inactive <= 0) { + /* + * This process will have a system_wide task limit when inactive. + * System_wide task limit is always fatal. + */ + + memlimit_inactive = -1; + memlimit_inactive_is_fatal = TRUE; } - } -#endif - /* - * We can't add to the JETSAM_PRIORITY_IDLE_DEFERRED bucket here. - * But, we could be removing it from the bucket. - * Check and take appropriate steps if so. - */ - - if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { - + /* + * Initialize the active limit variants for this process. + */ + SET_ACTIVE_LIMITS_LOCKED(p, memlimit_active, memlimit_active_is_fatal); + + /* + * Initialize the inactive limit variants for this process. + */ + SET_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive, memlimit_inactive_is_fatal); + + /* + * Initialize the cached limits for target process. + * When the target process is dirty tracked, it's typically + * in a clean state. Non dirty tracked processes are + * typically active (Foreground or above). + * But just in case, we don't make assumptions... + */ + + if (proc_jetsam_state_is_active_locked(p) == TRUE) { + CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); + } else { + CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); + } + + /* + * Enforce the cached limit by writing to the ledger. + */ + if (memorystatus_highwater_enabled) { + /* apply now */ + assert(trigger_exception == TRUE); + task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, trigger_exception); + + MEMORYSTATUS_DEBUG(3, "memorystatus_update: init: limit on pid %d (%dMB %s) targeting priority(%d) dirty?=0x%x %s\n", + p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), + (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), priority, p->p_memstat_dirty, + (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); + } + } +#endif /* CONFIG_JETSAM */ + + /* + * We can't add to the JETSAM_PRIORITY_IDLE_DEFERRED bucket here. + * But, we could be removing it from the bucket. + * Check and take appropriate steps if so. + */ + + if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { + memorystatus_invalidate_idle_demotion_locked(p, TRUE); } @@ -1202,7 +1705,7 @@ memorystatus_remove(proc_t p, boolean_t locked) int ret; memstat_bucket_t *bucket; - MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", p->p_pid); + MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing pid %d\n", p->p_pid); if (!locked) { proc_list_lock(); @@ -1252,38 +1755,46 @@ memorystatus_remove(proc_t p, boolean_t locked) return ret; } -static boolean_t +/* + * Validate dirty tracking flags with process state. + * + * Return: + * 0 on success + * non-0 on failure + */ + +static int memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) { /* See that the process isn't marked for termination */ if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) { - return FALSE; + return EBUSY; } /* Idle exit requires that process be tracked */ if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) && !(pcontrol & PROC_DIRTY_TRACK)) { - return FALSE; + return EINVAL; } /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */ if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) && !(pcontrol & PROC_DIRTY_TRACK)) { - return FALSE; + return EINVAL; } /* Deferral is only relevant if idle exit is specified */ if ((pcontrol & PROC_DIRTY_DEFER) && !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) { - return FALSE; + return EINVAL; } - return TRUE; + return(0); } static void memorystatus_update_idle_priority_locked(proc_t p) { int32_t priority; - + MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty); if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) { @@ -1320,7 +1831,7 @@ memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { boolean_t reschedule = FALSE; boolean_t already_deferred = FALSE; boolean_t defer_now = FALSE; - int ret; + int ret = 0; KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK), p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0); @@ -1340,10 +1851,10 @@ memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { goto exit; } - if (!memorystatus_validate_track_flags(p, pcontrol)) { - ret = EINVAL; + if ((ret = memorystatus_validate_track_flags(p, pcontrol)) != 0) { + /* error */ goto exit; - } + } old_dirty = p->p_memstat_dirty; @@ -1374,7 +1885,7 @@ memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { defer_now = TRUE; } - MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for process %d\n", + MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for pid %d\n", ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N", defer_now ? "Y" : "N", p->p_memstat_dirty & P_DIRTY ? "Y" : "N", @@ -1496,7 +2007,7 @@ memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { memorystatus_dirty_count++; ret = 0; } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) { - if ((flag == P_DIRTY_SHUTDOWN) && (!p->p_memstat_dirty & P_DIRTY)) { + if ((flag == P_DIRTY_SHUTDOWN) && (!(p->p_memstat_dirty & P_DIRTY))) { /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */ p->p_memstat_dirty |= P_DIRTY_TERMINATED; kill = true; @@ -1516,7 +2027,7 @@ memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { if (ret != 0) { goto exit; } - + if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) now_dirty = TRUE; @@ -1584,17 +2095,72 @@ memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { } } } - + memorystatus_update_idle_priority_locked(p); + +#if CONFIG_JETSAM + if (memorystatus_highwater_enabled) { + boolean_t trigger_exception; + /* + * We are in this path because this process transitioned between + * dirty <--> clean state. Update the cached memory limits. + */ + + if (proc_jetsam_state_is_active_locked(p) == TRUE) { + /* + * process is dirty + */ + CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); + } else { + /* + * process is clean + */ + CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); + } + + /* + * Enforce the new limits by writing to the ledger. + * + * This is a hot path and holding the proc_list_lock while writing to the ledgers, + * (where the task lock is taken) is bad. So, we temporarily drop the proc_list_lock. + * We aren't traversing the jetsam bucket list here, so we should be safe. + * See rdar://21394491. + */ + + if (proc_ref_locked(p) == p) { + int ledger_limit; + if (p->p_memstat_memlimit > 0) { + ledger_limit = p->p_memstat_memlimit; + } else { + ledger_limit = -1; + } + proc_list_unlock(); + task_set_phys_footprint_limit_internal(p->task, ledger_limit, NULL, trigger_exception); + proc_list_lock(); + proc_rele_locked(p); + + MEMORYSTATUS_DEBUG(3, "memorystatus_dirty_set: new limit on pid %d (%dMB %s) priority(%d) dirty?=0x%x %s\n", + p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), + (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty, + (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); + } + + } +#endif /* CONFIG_JETSAM */ /* If the deferral state changed, reschedule the demotion timer */ if (reschedule) { memorystatus_reschedule_idle_demotion_locked(); } } - + if (kill) { - psignal(p, SIGKILL); + if (proc_ref_locked(p) == p) { + proc_list_unlock(); + psignal(p, SIGKILL); + proc_list_lock(); + proc_rele_locked(p); + } } exit: @@ -1868,10 +2434,25 @@ static void memorystatus_thread(void *param __unused, wait_result_t wr __unused) { static boolean_t is_vm_privileged = FALSE; + #if CONFIG_JETSAM boolean_t post_snapshot = FALSE; uint32_t errors = 0; uint32_t hwm_kill = 0; + boolean_t sort_flag = TRUE; + + /* Jetsam Loop Detection - locals */ + memstat_bucket_t *bucket; + int jld_bucket_count = 0; + struct timeval jld_now_tstamp = {0,0}; + uint64_t jld_now_msecs = 0; + + /* Jetsam Loop Detection - statics */ + static uint64_t jld_timestamp_msecs = 0; + static int jld_idle_kill_candidates = 0; /* Number of available processes in band 0,1 at start */ + static int jld_idle_kills = 0; /* Number of procs killed during eval period */ + static int jld_eval_aggressive_count = 0; /* Bumps the max priority in aggressive loop */ + static int32_t jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT; #endif if (is_vm_privileged == FALSE) { @@ -1882,13 +2463,16 @@ memorystatus_thread(void *param __unused, wait_result_t wr __unused) thread_wire(host_priv_self(), current_thread(), TRUE); is_vm_privileged = TRUE; + if (vm_restricted_to_single_processor == TRUE) + thread_vm_bind_group_add(); + memorystatus_thread_block(0, memorystatus_thread); } #if CONFIG_JETSAM KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START, - memorystatus_available_pages, 0, 0, 0, 0); + memorystatus_available_pages, memorystatus_jld_enabled, memorystatus_jld_eval_period_msecs, memorystatus_jld_eval_aggressive_count,0); /* * Jetsam aware version. @@ -1942,14 +2526,104 @@ memorystatus_thread(void *param __unused, wait_result_t wr __unused) break; } #endif + if (memorystatus_jld_enabled == TRUE) { + + /* + * Jetsam Loop Detection: attempt to detect + * rapid daemon relaunches in the lower bands. + */ + + microuptime(&jld_now_tstamp); + + /* + * Ignore usecs in this calculation. + * msecs granularity is close enough. + */ + jld_now_msecs = (jld_now_tstamp.tv_sec * 1000); + + proc_list_lock(); + bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; + jld_bucket_count = bucket->count; + bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; + jld_bucket_count += bucket->count; + proc_list_unlock(); + + /* + * memorystatus_jld_eval_period_msecs is a tunable + * memorystatus_jld_eval_aggressive_count is a tunable + * memorystatus_jld_eval_aggressive_priority_band_max is a tunable + */ + if ( (jld_bucket_count == 0) || + (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) { + + /* + * Refresh evaluation parameters + */ + jld_timestamp_msecs = jld_now_msecs; + jld_idle_kill_candidates = jld_bucket_count; + jld_idle_kills = 0; + jld_eval_aggressive_count = 0; + jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT; + } + + if (jld_idle_kills > jld_idle_kill_candidates) { + jld_eval_aggressive_count++; + if (jld_eval_aggressive_count > memorystatus_jld_eval_aggressive_count) { + /* + * Bump up the jetsam priority limit (eg: the bucket index) + * Enforce bucket index sanity. + */ + if ((memorystatus_jld_eval_aggressive_priority_band_max < 0) || + (memorystatus_jld_eval_aggressive_priority_band_max >= MEMSTAT_BUCKET_COUNT)) { + /* + * Do nothing. Stick with the default level. + */ + } else { + jld_priority_band_max = memorystatus_jld_eval_aggressive_priority_band_max; + } + } + + killed = memorystatus_kill_top_process_aggressive( + TRUE, + kMemorystatusKilledVMThrashing, + jld_eval_aggressive_count, + jld_priority_band_max, + &errors); + + + if (killed) { + /* Always generate logs after aggressive kill */ + post_snapshot = TRUE; + goto done; + } + } + } /* LRU */ - killed = memorystatus_kill_top_process(TRUE, cause, &priority, &errors); + killed = memorystatus_kill_top_process(TRUE, sort_flag, cause, &priority, &errors); + sort_flag = FALSE; + if (killed) { - /* Don't generate logs for steady-state idle-exit kills (unless overridden for debug) */ + /* + * Don't generate logs for steady-state idle-exit kills, + * unless it is overridden for debug or by the device + * tree. + */ if ((priority != JETSAM_PRIORITY_IDLE) || memorystatus_idle_snapshot) { post_snapshot = TRUE; } + + /* Jetsam Loop Detection */ + if (memorystatus_jld_enabled == TRUE) { + if ((priority == JETSAM_PRIORITY_IDLE) || (priority == JETSAM_PRIORITY_IDLE_DEFERRED)) { + jld_idle_kills++; + } else { + /* + * We've reached into bands beyond idle deferred. + * We make no attempt to monitor them + */ + } + } goto done; } @@ -1991,10 +2665,19 @@ memorystatus_thread(void *param __unused, wait_result_t wr __unused) if (post_snapshot) { size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count); - memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); - memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); + uint64_t timestamp_now = mach_absolute_time(); + memorystatus_jetsam_snapshot->notification_time = timestamp_now; + if (memorystatus_jetsam_snapshot_last_timestamp == 0 || + timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout) { + int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); + if (!ret) { + proc_list_lock(); + memorystatus_jetsam_snapshot_last_timestamp = timestamp_now; + proc_list_unlock(); + } + } } - + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END, memorystatus_available_pages, 0, 0, 0, 0); @@ -2034,24 +2717,58 @@ boolean_t memorystatus_idle_exit_from_VM(void) { void memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb) { + boolean_t is_active; + boolean_t is_fatal; + proc_t p = current_proc(); - if (warning == FALSE) { - printf("process %d (%s) exceeded physical memory footprint limit of %d MB\n", - p->p_pid, p->p_comm, max_footprint_mb); + proc_list_lock(); + + is_active = proc_jetsam_state_is_active_locked(p); + is_fatal = (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT); + + if (warning == FALSE) { + /* + * We only want the EXC_RESOURCE to trigger once per lifetime + * of the active/inactive limit state. So, here, we detect the + * active/inactive state of the process and mark the + * state as exception has been triggered. + */ + if (is_active == TRUE) { + /* + * turn off exceptions for active state + */ + p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED; + } else { + /* + * turn off exceptions for inactive state + */ + p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED; + } + + /* + * Soft memory limit is a non-fatal high-water-mark + * Hard memory limit is a fatal custom-task-limit or system-wide per-task memory limit. + */ + printf("process %d (%s) exceeded physical memory footprint, the %s%sMemoryLimit of %d MB\n", + p->p_pid, p->p_comm, (is_active ? "Active" : "Inactive"), + (is_fatal ? "Hard" : "Soft"), max_footprint_mb); + } + proc_list_unlock(); + #if VM_PRESSURE_EVENTS if (warning == TRUE) { if (memorystatus_warn_process(p->p_pid, TRUE /* critical? */) != TRUE) { /* Print warning, since it's possible that task has not registered for pressure notifications */ - printf("task_exceeded_footprint: failed to warn the current task (exiting, or no handler registered?).\n"); + printf("task_exceeded_footprint: failed to warn the current task (exiting, or no handler registered?).\n"); } return; } #endif /* VM_PRESSURE_EVENTS */ - if ((p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT) == P_MEMSTAT_FATAL_MEMLIMIT) { + if (is_fatal) { /* * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task * has violated either the system-wide per-task memory limit OR its own task limit. @@ -2068,6 +2785,32 @@ memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footp } } +/* + * Toggle the P_MEMSTAT_TERMINATED state. + * Takes the proc_list_lock. + */ +void +proc_memstat_terminated(proc_t p, boolean_t set) +{ +#if DEVELOPMENT || DEBUG + if (p) { + proc_list_lock(); + if (set == TRUE) { + p->p_memstat_state |= P_MEMSTAT_TERMINATED; + } else { + p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; + } + proc_list_unlock(); + } +#else +#pragma unused(p, set) + /* + * do nothing + */ +#endif /* DEVELOPMENT || DEBUG */ + return; +} + /* * This is invoked when cpulimits have been exceeded while in fatal mode. * The jetsam_flags do not apply as those are for memory related kills. @@ -2109,9 +2852,8 @@ memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *ma } } - static void -memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause) +memorystatus_update_jetsam_snapshot_entry_locked(proc_t p, uint32_t kill_cause) { unsigned int i; @@ -2170,7 +2912,7 @@ void memorystatus_pages_update(unsigned int pages_avail) } static boolean_t -memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry) +memorystatus_init_jetsam_snapshot_entry_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry) { clock_sec_t tv_sec; clock_usec_t tv_usec; @@ -2194,56 +2936,100 @@ memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jets } static void -memorystatus_jetsam_snapshot_procs_locked(void) +memorystatus_init_snapshot_vmstats(memorystatus_jetsam_snapshot_t *snapshot) { - proc_t p, next_p; - unsigned int b = 0, i = 0; kern_return_t kr = KERN_SUCCESS; - mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; vm_statistics64_data_t vm_stat; if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count) != KERN_SUCCESS)) { - printf("memorystatus_jetsam_snapshot_procs_locked: host_statistics64 failed with %d\n", kr); - memset(&memorystatus_jetsam_snapshot->stats, 0, sizeof(memorystatus_jetsam_snapshot->stats)); + printf("memorystatus_init_jetsam_snapshot_stats: host_statistics64 failed with %d\n", kr); + memset(&snapshot->stats, 0, sizeof(snapshot->stats)); } else { - memorystatus_jetsam_snapshot->stats.free_pages = vm_stat.free_count; - memorystatus_jetsam_snapshot->stats.active_pages = vm_stat.active_count; - memorystatus_jetsam_snapshot->stats.inactive_pages = vm_stat.inactive_count; - memorystatus_jetsam_snapshot->stats.throttled_pages = vm_stat.throttled_count; - memorystatus_jetsam_snapshot->stats.purgeable_pages = vm_stat.purgeable_count; - memorystatus_jetsam_snapshot->stats.wired_pages = vm_stat.wire_count; - - memorystatus_jetsam_snapshot->stats.speculative_pages = vm_stat.speculative_count; - memorystatus_jetsam_snapshot->stats.filebacked_pages = vm_stat.external_page_count; - memorystatus_jetsam_snapshot->stats.anonymous_pages = vm_stat.internal_page_count; - memorystatus_jetsam_snapshot->stats.compressions = vm_stat.compressions; - memorystatus_jetsam_snapshot->stats.decompressions = vm_stat.decompressions; - memorystatus_jetsam_snapshot->stats.compressor_pages = vm_stat.compressor_page_count; - memorystatus_jetsam_snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor; + snapshot->stats.free_pages = vm_stat.free_count; + snapshot->stats.active_pages = vm_stat.active_count; + snapshot->stats.inactive_pages = vm_stat.inactive_count; + snapshot->stats.throttled_pages = vm_stat.throttled_count; + snapshot->stats.purgeable_pages = vm_stat.purgeable_count; + snapshot->stats.wired_pages = vm_stat.wire_count; + + snapshot->stats.speculative_pages = vm_stat.speculative_count; + snapshot->stats.filebacked_pages = vm_stat.external_page_count; + snapshot->stats.anonymous_pages = vm_stat.internal_page_count; + snapshot->stats.compressions = vm_stat.compressions; + snapshot->stats.decompressions = vm_stat.decompressions; + snapshot->stats.compressor_pages = vm_stat.compressor_page_count; + snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor; + } +} + +/* + * Collect vm statistics at boot. + * Called only once (see kern_exec.c) + * Data can be consumed at any time. + */ +void +memorystatus_init_at_boot_snapshot() { + memorystatus_init_snapshot_vmstats(&memorystatus_at_boot_snapshot); + memorystatus_at_boot_snapshot.entry_count = 0; + memorystatus_at_boot_snapshot.notification_time = 0; /* updated when consumed */ + memorystatus_at_boot_snapshot.snapshot_time = mach_absolute_time(); +} + +static void +memorystatus_init_jetsam_snapshot_locked(memorystatus_jetsam_snapshot_t *od_snapshot, uint32_t ods_list_count ) +{ + proc_t p, next_p; + unsigned int b = 0, i = 0; + + memorystatus_jetsam_snapshot_t *snapshot = NULL; + memorystatus_jetsam_snapshot_entry_t *snapshot_list = NULL; + unsigned int snapshot_max = 0; + + if (od_snapshot) { + /* + * This is an on_demand snapshot + */ + snapshot = od_snapshot; + snapshot_list = od_snapshot->entries; + snapshot_max = ods_list_count; + } else { + /* + * This is a jetsam event snapshot + */ + snapshot = memorystatus_jetsam_snapshot; + snapshot_list = memorystatus_jetsam_snapshot->entries; + snapshot_max = memorystatus_jetsam_snapshot_max; } + memorystatus_init_snapshot_vmstats(snapshot); + next_p = memorystatus_get_first_proc_locked(&b, TRUE); while (next_p) { p = next_p; next_p = memorystatus_get_next_proc_locked(&b, p, TRUE); - if (FALSE == memorystatus_get_snapshot_properties_for_proc_locked(p, &memorystatus_jetsam_snapshot_list[i])) { + if (FALSE == memorystatus_init_jetsam_snapshot_entry_locked(p, &snapshot_list[i])) { continue; } - MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", + MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", p->p_pid, p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7], p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]); - if (++i == memorystatus_jetsam_snapshot_max) { + if (++i == snapshot_max) { break; } } - memorystatus_jetsam_snapshot->snapshot_time = mach_absolute_time(); - memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = i; + snapshot->snapshot_time = mach_absolute_time(); + snapshot->entry_count = i; + + if (!od_snapshot) { + /* update the system buffer count */ + memorystatus_jetsam_snapshot_count = i; + } } #if DEVELOPMENT || DEBUG @@ -2272,6 +3058,30 @@ memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size) { return ret; } +/* + * Triggers a sort_order on a specified jetsam priority band. + * This is for testing only, used to force a path through the sort + * function. + */ +static int +memorystatus_cmd_test_jetsam_sort(int priority, int sort_order) { + + int error = 0; + + unsigned int bucket_index = 0; + + if (priority == -1) { + /* Use as shorthand for default priority */ + bucket_index = JETSAM_PRIORITY_DEFAULT; + } else { + bucket_index = (unsigned int)priority; + } + + error = memorystatus_sort_bucket(bucket_index, sort_order); + + return (error); +} + #endif /* @@ -2289,17 +3099,17 @@ memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) { return FALSE; } - printf("memorystatus: specifically killing pid %d [%s] (%s) - memorystatus_available_pages: %d\n", + printf("memorystatus: specifically killing pid %d [%s] (%s %d) - memorystatus_available_pages: %d\n", victim_pid, (p->p_comm ? p->p_comm : "(unknown)"), - jetsam_kill_cause_name[cause], memorystatus_available_pages); + jetsam_kill_cause_name[cause], p->p_memstat_effectivepriority, memorystatus_available_pages); proc_list_lock(); if (memorystatus_jetsam_snapshot_count == 0) { - memorystatus_jetsam_snapshot_procs_locked(); + memorystatus_init_jetsam_snapshot_locked(NULL,0); } - memorystatus_update_snapshot_locked(p, cause); + memorystatus_update_jetsam_snapshot_entry_locked(p, cause); proc_list_unlock(); killed = memorystatus_do_kill(p, cause); @@ -2312,12 +3122,14 @@ memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) { * Jetsam the first process in the queue. */ static boolean_t -memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors) +memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, int32_t *priority, uint32_t *errors) { pid_t aPid; proc_t p = PROC_NULL, next_p = PROC_NULL; boolean_t new_snapshot = FALSE, killed = FALSE; + int kill_count = 0; unsigned int i = 0; + uint32_t aPid_ep; #ifndef CONFIG_FREEZE #pragma unused(any) @@ -2326,9 +3138,12 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0); - proc_list_lock(); - memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND); + if (sort_flag == TRUE) { + (void)memorystatus_sort_bucket(JETSAM_PRIORITY_FOREGROUND, JETSAM_SORT_DEFAULT); + } + + proc_list_lock(); next_p = memorystatus_get_first_proc_locked(&i, TRUE); while (next_p) { @@ -2346,6 +3161,7 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, #endif /* DEVELOPMENT || DEBUG */ aPid = p->p_pid; + aPid_ep = p->p_memstat_effectivepriority; if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { continue; @@ -2388,10 +3204,6 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, } else #endif { - if (priority) { - *priority = p->p_memstat_effectivepriority; - } - /* * Capture a snapshot if none exists and: * - priority was not requested (this is something other than an ambient kill) @@ -2399,7 +3211,7 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, */ if ((memorystatus_jetsam_snapshot_count == 0) && (memorystatus_idle_snapshot || ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE))))) { - memorystatus_jetsam_snapshot_procs_locked(); + memorystatus_init_jetsam_snapshot_locked(NULL,0); new_snapshot = TRUE; } @@ -2415,7 +3227,7 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) { MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n", aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level); - memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); + memorystatus_update_jetsam_snapshot_entry_locked(p, kMemorystatusKilledDiagnostic); p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) { jetsam_diagnostic_suspended_one_active_proc = 1; @@ -2426,6 +3238,9 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, proc_list_unlock(); if (p) { task_suspend(p->task); + if (priority) { + *priority = aPid_ep; + } proc_rele(p); killed = TRUE; } @@ -2435,31 +3250,51 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, #endif /* DEVELOPMENT || DEBUG */ { /* Shift queue, update stats */ - memorystatus_update_snapshot_locked(p, cause); - - p = proc_ref_locked(p); - proc_list_unlock(); - if (p) { - printf("memorystatus: %s %d [%s] (%s) - memorystatus_available_pages: %d\n", - ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) ? + memorystatus_update_jetsam_snapshot_entry_locked(p, cause); + + if (proc_ref_locked(p) == p) { + proc_list_unlock(); + printf("memorystatus: %s %d [%s] (%s %d) - memorystatus_available_pages: %d\n", + ((aPid_ep == JETSAM_PRIORITY_IDLE) ? "idle exiting pid" : "jetsam killing pid"), aPid, (p->p_comm ? p->p_comm : "(unknown)"), - jetsam_kill_cause_name[cause], memorystatus_available_pages); + jetsam_kill_cause_name[cause], aPid_ep, memorystatus_available_pages); + killed = memorystatus_do_kill(p, cause); - } + + /* Success? */ + if (killed) { + if (priority) { + *priority = aPid_ep; + } + proc_rele(p); + kill_count++; + goto exit; + } - /* Success? */ - if (killed) { - proc_rele(p); - goto exit; + /* + * Failure - first unwind the state, + * then fall through to restart the search. + */ + proc_list_lock(); + proc_rele_locked(p); + p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; + p->p_memstat_state |= P_MEMSTAT_ERROR; + *errors += 1; } - /* Failure - unwind and restart. */ - proc_list_lock(); - proc_rele_locked(p); - p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; - p->p_memstat_state |= P_MEMSTAT_ERROR; - *errors += 1; + /* + * Failure - restart the search. + * + * We might have raced with "p" exiting on another core, resulting in no + * ref on "p". Or, we may have failed to kill "p". + * + * Either way, we fall thru to here, leaving the proc in the + * P_MEMSTAT_TERMINATED state. + * + * And, we hold the the proc_list_lock at this point. + */ + i = 0; next_p = memorystatus_get_first_proc_locked(&i, TRUE); } @@ -2475,36 +3310,238 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, } KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, - memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); + memorystatus_available_pages, killed ? aPid : 0, kill_count, 0, 0); return killed; } -#if LEGACY_HIWATER - +/* + * Jetsam aggressively + */ static boolean_t -memorystatus_kill_hiwat_proc(uint32_t *errors) +memorystatus_kill_top_process_aggressive(boolean_t any, uint32_t cause, int aggr_count, int32_t priority_max, + uint32_t *errors) { - pid_t aPid = 0; + pid_t aPid; proc_t p = PROC_NULL, next_p = PROC_NULL; boolean_t new_snapshot = FALSE, killed = FALSE; + int kill_count = 0; unsigned int i = 0; - - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START, - memorystatus_available_pages, 0, 0, 0, 0); - + int32_t aPid_ep = 0; + +#pragma unused(any) + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, + memorystatus_available_pages, priority_max, 0, 0, 0); + proc_list_lock(); - memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND); - + next_p = memorystatus_get_first_proc_locked(&i, TRUE); while (next_p) { - uint32_t footprint; - boolean_t skip; +#if DEVELOPMENT || DEBUG + int activeProcess; + int procSuspendedForDiagnosis; +#endif /* DEVELOPMENT || DEBUG */ - p = next_p; - next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); - + if ((unsigned int)(next_p->p_memstat_effectivepriority) != i) { + + /* + * We have raced with next_p running on another core, as it has + * moved to a different jetsam priority band. This means we have + * lost our place in line while traversing the jetsam list. We + * attempt to recover by rewinding to the beginning of the band + * we were already traversing. By doing this, we do not guarantee + * that no process escapes this aggressive march, but we can make + * skipping an entire range of processes less likely. (PR-21069019) + */ + + MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: rewinding %s moved from band %d --> %d\n", + aggr_count, next_p->p_comm, i, next_p->p_memstat_effectivepriority); + + next_p = memorystatus_get_first_proc_locked(&i, TRUE); + continue; + } + + p = next_p; + next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); + + if (p->p_memstat_effectivepriority > priority_max) { + /* + * Bail out of this killing spree if we have + * reached beyond the priority_max jetsam band. + * That is, we kill up to and through the + * priority_max jetsam band. + */ + proc_list_unlock(); + goto exit; + } + +#if DEVELOPMENT || DEBUG + activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND; + procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED; +#endif /* DEVELOPMENT || DEBUG */ + + aPid = p->p_pid; + aPid_ep = p->p_memstat_effectivepriority; + + if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { + continue; + } + +#if DEVELOPMENT || DEBUG + if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) { + printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid); + continue; + } +#endif /* DEVELOPMENT || DEBUG */ + + /* + * Capture a snapshot if none exists. + */ + if (memorystatus_jetsam_snapshot_count == 0) { + memorystatus_init_jetsam_snapshot_locked(NULL,0); + new_snapshot = TRUE; + } + + /* + * Mark as terminated so that if exit1() indicates success, but the process (for example) + * is blocked in task_exception_notify(), it'll be skipped if encountered again - see + * . This is cheaper than examining P_LEXIT, which requires the + * acquisition of the proc lock. + */ + p->p_memstat_state |= P_MEMSTAT_TERMINATED; + + /* Shift queue, update stats */ + memorystatus_update_jetsam_snapshot_entry_locked(p, cause); + + /* + * In order to kill the target process, we will drop the proc_list_lock. + * To guaranteee that p and next_p don't disappear out from under the lock, + * we must take a ref on both. + * If we cannot get a reference, then it's likely we've raced with + * that process exiting on another core. + */ + if (proc_ref_locked(p) == p) { + if (next_p) { + while (next_p && (proc_ref_locked(next_p) != next_p)) { + proc_t temp_p; + + /* + * We must have raced with next_p exiting on another core. + * Recover by getting the next eligible process in the band. + */ + + MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: skipping %d [%s] (exiting?)\n", + aggr_count, next_p->p_pid, (next_p->p_comm ? next_p->p_comm : "(unknown)")); + + temp_p = next_p; + next_p = memorystatus_get_next_proc_locked(&i, temp_p, TRUE); + } + } + proc_list_unlock(); + + printf("memorystatus: aggressive%d: %s %d [%s] (%s %d) - memorystatus_available_pages: %d\n", + aggr_count, + ((aPid_ep == JETSAM_PRIORITY_IDLE) ? "idle exiting pid" : "jetsam killing pid"), + aPid, (p->p_comm ? p->p_comm : "(unknown)"), + jetsam_kill_cause_name[cause], aPid_ep, memorystatus_available_pages); + + killed = memorystatus_do_kill(p, cause); + + /* Success? */ + if (killed) { + proc_rele(p); + kill_count++; + p = NULL; + killed = FALSE; + + /* + * Continue the killing spree. + */ + proc_list_lock(); + if (next_p) { + proc_rele_locked(next_p); + } + continue; + } + + /* + * Failure - first unwind the state, + * then fall through to restart the search. + */ + proc_list_lock(); + proc_rele_locked(p); + if (next_p) { + proc_rele_locked(next_p); + } + p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; + p->p_memstat_state |= P_MEMSTAT_ERROR; + *errors += 1; + p = NULL; + } + + /* + * Failure - restart the search at the beginning of + * the band we were already traversing. + * + * We might have raced with "p" exiting on another core, resulting in no + * ref on "p". Or, we may have failed to kill "p". + * + * Either way, we fall thru to here, leaving the proc in the + * P_MEMSTAT_TERMINATED or P_MEMSTAT_ERROR state. + * + * And, we hold the the proc_list_lock at this point. + */ + + next_p = memorystatus_get_first_proc_locked(&i, TRUE); + } + + proc_list_unlock(); + +exit: + /* Clear snapshot if freshly captured and no target was found */ + if (new_snapshot && (kill_count == 0)) { + memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; + } + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, + memorystatus_available_pages, killed ? aPid : 0, kill_count, 0, 0); + + if (kill_count > 0) { + return(TRUE); + } + else { + return(FALSE); + } +} + +#if LEGACY_HIWATER + +static boolean_t +memorystatus_kill_hiwat_proc(uint32_t *errors) +{ + pid_t aPid = 0; + proc_t p = PROC_NULL, next_p = PROC_NULL; + boolean_t new_snapshot = FALSE, killed = FALSE; + int kill_count = 0; + unsigned int i = 0; + uint32_t aPid_ep; + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START, + memorystatus_available_pages, 0, 0, 0, 0); + + proc_list_lock(); + + next_p = memorystatus_get_first_proc_locked(&i, TRUE); + while (next_p) { + uint32_t footprint; + boolean_t skip; + + p = next_p; + next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); + aPid = p->p_pid; + aPid_ep = p->p_memstat_effectivepriority; if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { continue; @@ -2514,14 +3551,24 @@ memorystatus_kill_hiwat_proc(uint32_t *errors) if (p->p_memstat_memlimit <= 0) { continue; } - + +#if 0 + /* + * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. + * Background limits are described via the inactive limit slots. + * Their fatal/non-fatal setting will drive whether or not to be + * considered in this kill path. + */ + /* skip if a currently inapplicable limit is encountered */ if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { continue; } +#endif footprint = (uint32_t)(get_task_phys_footprint(p->task) / (1024 * 1024)); skip = (((int32_t)footprint) <= p->p_memstat_memlimit); + #if DEVELOPMENT || DEBUG if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) { if (p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED) { @@ -2547,7 +3594,7 @@ memorystatus_kill_hiwat_proc(uint32_t *errors) (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, footprint, p->p_memstat_memlimit); if (memorystatus_jetsam_snapshot_count == 0) { - memorystatus_jetsam_snapshot_procs_locked(); + memorystatus_init_jetsam_snapshot_locked(NULL,0); new_snapshot = TRUE; } @@ -2556,7 +3603,7 @@ memorystatus_kill_hiwat_proc(uint32_t *errors) #if DEVELOPMENT || DEBUG if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages); - memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); + memorystatus_update_jetsam_snapshot_entry_locked(p, kMemorystatusKilledDiagnostic); p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; p = proc_ref_locked(p); @@ -2571,28 +3618,46 @@ memorystatus_kill_hiwat_proc(uint32_t *errors) } else #endif /* DEVELOPMENT || DEBUG */ { - memorystatus_update_snapshot_locked(p, kMemorystatusKilledHiwat); + memorystatus_update_jetsam_snapshot_entry_locked(p, kMemorystatusKilledHiwat); - p = proc_ref_locked(p); - proc_list_unlock(); - if (p) { - printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n", - aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); - killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat); - } + if (proc_ref_locked(p) == p) { + proc_list_unlock(); + + printf("memorystatus: jetsam killing pid %d [%s] (highwater %d) - memorystatus_available_pages: %d\n", + aPid, (p->p_comm ? p->p_comm : "(unknown)"), aPid_ep, memorystatus_available_pages); + + killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat); - /* Success? */ - if (killed) { - proc_rele(p); - goto exit; + /* Success? */ + if (killed) { + proc_rele(p); + kill_count++; + goto exit; + } + + /* + * Failure - first unwind the state, + * then fall through to restart the search. + */ + proc_list_lock(); + proc_rele_locked(p); + p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; + p->p_memstat_state |= P_MEMSTAT_ERROR; + *errors += 1; } - /* Failure - unwind and restart. */ - proc_list_lock(); - proc_rele_locked(p); - p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; - p->p_memstat_state |= P_MEMSTAT_ERROR; - *errors += 1; + /* + * Failure - restart the search. + * + * We might have raced with "p" exiting on another core, resulting in no + * ref on "p". Or, we may have failed to kill "p". + * + * Either way, we fall thru to here, leaving the proc in the + * P_MEMSTAT_TERMINATED state. + * + * And, we hold the the proc_list_lock at this point. + */ + i = 0; next_p = memorystatus_get_first_proc_locked(&i, TRUE); } @@ -2608,7 +3673,7 @@ memorystatus_kill_hiwat_proc(uint32_t *errors) } KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END, - memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); + memorystatus_available_pages, killed ? aPid : 0, kill_count, 0, 0); return killed; } @@ -2635,7 +3700,7 @@ memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) { if (victim_pid == -1) { /* No pid, so kill first process */ - res = memorystatus_kill_top_process(TRUE, cause, NULL, &errors); + res = memorystatus_kill_top_process(TRUE, TRUE, cause, NULL, &errors); } else { res = memorystatus_kill_specific_process(victim_pid, cause); } @@ -2648,8 +3713,17 @@ memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) { /* Fire off snapshot notification */ size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count; - memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); - memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); + uint64_t timestamp_now = mach_absolute_time(); + memorystatus_jetsam_snapshot->notification_time = timestamp_now; + if (memorystatus_jetsam_snapshot_last_timestamp == 0 || + timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout) { + int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); + if (!ret) { + proc_list_lock(); + memorystatus_jetsam_snapshot_last_timestamp = timestamp_now; + proc_list_unlock(); + } + } } return res; @@ -2696,6 +3770,11 @@ memorystatus_freeze_init(void) { kern_return_t result; thread_t thread; + + freezer_lck_grp_attr = lck_grp_attr_alloc_init(); + freezer_lck_grp = lck_grp_alloc_init("freezer", freezer_lck_grp_attr); + + lck_mtx_init(&freezer_mutex, freezer_lck_grp, NULL); result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread); if (result == KERN_SUCCESS) { @@ -2705,6 +3784,141 @@ memorystatus_freeze_init(void) } } +/* + * Synchronously freeze the passed proc. Called with a reference to the proc held. + * + * Returns EINVAL or the value returned by task_freeze(). + */ +int +memorystatus_freeze_process_sync(proc_t p) +{ + int ret = EINVAL; + pid_t aPid = 0; + boolean_t memorystatus_freeze_swap_low = FALSE; + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, + memorystatus_available_pages, 0, 0, 0, 0); + + lck_mtx_lock(&freezer_mutex); + + if (p == NULL) { + goto exit; + } + + if (memorystatus_freeze_enabled == FALSE) { + goto exit; + } + + if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { + goto exit; + } + + if (memorystatus_freeze_update_throttle()) { + printf("memorystatus_freeze_process_sync: in throttle, ignorning freeze\n"); + memorystatus_freeze_throttle_count++; + goto exit; + } + + proc_list_lock(); + + if (p != NULL) { + uint32_t purgeable, wired, clean, dirty, state; + uint32_t max_pages, pages, i; + boolean_t shared; + + aPid = p->p_pid; + state = p->p_memstat_state; + + /* Ensure the process is eligible for freezing */ + if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) { + proc_list_unlock(); + goto exit; + } + + /* Only freeze processes meeting our minimum resident page criteria */ + memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); + if (pages < memorystatus_freeze_pages_min) { + proc_list_unlock(); + goto exit; + } + + if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { + + unsigned int avail_swap_space = 0; /* in pages. */ + + if (DEFAULT_FREEZER_IS_ACTIVE) { + /* + * Freezer backed by default pager and swap file(s). + */ + avail_swap_space = default_pager_swap_pages_free(); + } else { + /* + * Freezer backed by the compressor and swap file(s) + * while will hold compressed data. + */ + avail_swap_space = vm_swap_get_free_space() / PAGE_SIZE_64; + } + + max_pages = MIN(avail_swap_space, memorystatus_freeze_pages_max); + + if (max_pages < memorystatus_freeze_pages_min) { + proc_list_unlock(); + goto exit; + } + } else { + /* + * We only have the compressor without any swap. + */ + max_pages = UINT32_MAX - 1; + } + + /* Mark as locked temporarily to avoid kill */ + p->p_memstat_state |= P_MEMSTAT_LOCKED; + proc_list_unlock(); + + ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); + + MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_process_sync: task_freeze %s for pid %d [%s] - " + "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", + (ret == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"), + memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free()); + + proc_list_lock(); + p->p_memstat_state &= ~P_MEMSTAT_LOCKED; + + if (ret == KERN_SUCCESS) { + memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; + + memorystatus_frozen_count++; + + p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM)); + + if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { + /* Update stats */ + for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { + throttle_intervals[i].pageouts += dirty; + } + } + + memorystatus_freeze_pageouts += dirty; + memorystatus_freeze_count++; + + proc_list_unlock(); + + memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); + } else { + proc_list_unlock(); + } + } + +exit: + lck_mtx_unlock(&freezer_mutex); + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, + memorystatus_available_pages, aPid, 0, 0, 0); + + return ret; +} + static int memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) { @@ -2745,14 +3959,35 @@ memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) } if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { - /* Ensure there's enough free space to freeze this process. */ - max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); + + /* Ensure there's enough free space to freeze this process. */ + + unsigned int avail_swap_space = 0; /* in pages. */ + + if (DEFAULT_FREEZER_IS_ACTIVE) { + /* + * Freezer backed by default pager and swap file(s). + */ + avail_swap_space = default_pager_swap_pages_free(); + } else { + /* + * Freezer backed by the compressor and swap file(s) + * while will hold compressed data. + */ + avail_swap_space = vm_swap_get_free_space() / PAGE_SIZE_64; + } + + max_pages = MIN(avail_swap_space, memorystatus_freeze_pages_max); + if (max_pages < memorystatus_freeze_pages_min) { *memorystatus_freeze_swap_low = TRUE; proc_list_unlock(); goto exit; } } else { + /* + * We only have the compressor pool. + */ max_pages = UINT32_MAX - 1; } @@ -2783,11 +4018,13 @@ memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM)); - /* Update stats */ - for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { - throttle_intervals[i].pageouts += dirty; + if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { + /* Update stats */ + for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { + throttle_intervals[i].pageouts += dirty; + } } - + memorystatus_freeze_pageouts += dirty; memorystatus_freeze_count++; @@ -2795,8 +4032,8 @@ memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); - /* Return the number of reclaimed pages */ - ret = dirty; + /* Return KERN_SUCESS */ + ret = kr; } else { proc_list_unlock(); @@ -2857,6 +4094,8 @@ memorystatus_can_freeze_processes(void) static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low) { + boolean_t can_freeze = TRUE; + /* Only freeze if we're sufficiently low on memory; this holds off freeze right after boot, and is generally is a no-op once we've reached steady state. */ if (memorystatus_available_pages > memorystatus_freeze_threshold) { @@ -2868,27 +4107,68 @@ memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low) return FALSE; } - /* Is swap running low? */ - if (*memorystatus_freeze_swap_low) { - /* If there's been no movement in free swap pages since we last attempted freeze, return. */ - if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) { - return FALSE; + if (COMPRESSED_PAGER_IS_SWAPLESS || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { + /* + * In-core compressor used for freezing WITHOUT on-disk swap support. + */ + + if (vm_compressor_low_on_space()) { + if (*memorystatus_freeze_swap_low) { + *memorystatus_freeze_swap_low = TRUE; + } + + can_freeze = FALSE; + + } else { + if (*memorystatus_freeze_swap_low) { + *memorystatus_freeze_swap_low = FALSE; + } + + can_freeze = TRUE; + } + } else { + /* + * Freezing WITH on-disk swap support. + */ + + if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { + /* + * In-core compressor fronts the swap. + */ + if (vm_swap_low_on_space()) { + if (*memorystatus_freeze_swap_low) { + *memorystatus_freeze_swap_low = TRUE; + } + + can_freeze = FALSE; + } + + } else if (DEFAULT_FREEZER_IS_ACTIVE) { + /* + * Legacy freeze mode with no compressor support. + */ + if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) { + if (*memorystatus_freeze_swap_low) { + *memorystatus_freeze_swap_low = TRUE; + } + + can_freeze = FALSE; + } + } else { + panic("Not a valid freeze configuration.\n"); } - - /* Pages have been freed - we can retry. */ - *memorystatus_freeze_swap_low = FALSE; } - /* OK */ - return TRUE; + return can_freeze; } static void memorystatus_freeze_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval) { + unsigned int freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE); if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) { if (!interval->max_pageouts) { - interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * FREEZE_DAILY_PAGEOUTS_MAX) / (24 * 60))); + interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / (24 * 60))); } else { printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval->mins); } @@ -2949,12 +4229,12 @@ static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) { static boolean_t memorystatus_freeze_swap_low = FALSE; - + + lck_mtx_lock(&freezer_mutex); if (memorystatus_freeze_enabled) { if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { - /* Only freeze if we've not exceeded our pageout budgets or we're not backed by swap. */ - if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS || - !memorystatus_freeze_update_throttle()) { + /* Only freeze if we've not exceeded our pageout budgets.*/ + if (!memorystatus_freeze_update_throttle()) { memorystatus_freeze_top_process(&memorystatus_freeze_swap_low); } else { printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n"); @@ -2962,6 +4242,7 @@ memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) } } } + lck_mtx_unlock(&freezer_mutex); assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT); thread_block((thread_continue_t) memorystatus_freeze_thread); @@ -3000,6 +4281,7 @@ boolean_t memorystatus_warn_process(pid_t pid, boolean_t critical) { boolean_t ret = FALSE; + boolean_t found_knote = FALSE; struct knote *kn = NULL; /* @@ -3007,34 +4289,72 @@ memorystatus_warn_process(pid_t pid, boolean_t critical) { */ memorystatus_klist_lock(); - kn = vm_find_knote_from_pid(pid, &memorystatus_klist); - if (kn) { - /* - * By setting the "fflags" here, we are forcing - * a process to deal with the case where it's - * bumping up into its memory limits. If we don't - * do this here, we will end up depending on the - * system pressure snapshot evaluation in - * filt_memorystatus(). - */ + + SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { + proc_t knote_proc = kn->kn_kq->kq_p; + pid_t knote_pid = knote_proc->p_pid; + + if (knote_pid == pid) { + /* + * By setting the "fflags" here, we are forcing + * a process to deal with the case where it's + * bumping up into its memory limits. If we don't + * do this here, we will end up depending on the + * system pressure snapshot evaluation in + * filt_memorystatus(). + */ - if (critical) { - kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; - } else { - kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; + if (critical) { + if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { + kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; + } else if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { + kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; + } + } else { + if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { + kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; + } + } + + found_knote = TRUE; } - KNOTE(&memorystatus_klist, kMemorystatusPressure); - ret = TRUE; + } + + if (found_knote) { + KNOTE(&memorystatus_klist, 0); + ret = TRUE; } else { if (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0) { ret = TRUE; } } + memorystatus_klist_unlock(); return ret; } +/* + * Can only be set by the current task on itself. + */ +int +memorystatus_low_mem_privileged_listener(uint32_t op_flags) +{ + boolean_t set_privilege = FALSE; + /* + * Need an entitlement check here? + */ + if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE) { + set_privilege = TRUE; + } else if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE) { + set_privilege = FALSE; + } else { + return EINVAL; + } + + return (task_low_mem_privileged_listener(current_task(), set_privilege, NULL)); +} + int memorystatus_send_pressure_note(pid_t pid) { MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid); @@ -3045,13 +4365,19 @@ void memorystatus_send_low_swap_note(void) { struct knote *kn = NULL; - + memorystatus_klist_lock(); SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { + /* We call is_knote_registered_modify_task_pressure_bits to check if the sfflags for the + * current note contain NOTE_MEMORYSTATUS_LOW_SWAP. Once we find one note in the memorystatus_klist + * that has the NOTE_MEMORYSTATUS_LOW_SWAP flags in its sfflags set, we call KNOTE with + * kMemoryStatusLowSwap as the hint to process and update all knotes on the memorystatus_klist accordingly. */ if (is_knote_registered_modify_task_pressure_bits(kn, NOTE_MEMORYSTATUS_LOW_SWAP, NULL, 0, 0) == TRUE) { - KNOTE(&memorystatus_klist, kMemorystatusLowSwap); + KNOTE(&memorystatus_klist, kMemorystatusLowSwap); + break; } } + memorystatus_klist_unlock(); } @@ -3151,6 +4477,7 @@ kern_return_t memorystatus_update_vm_pressure(boolean_t target_foreground_process) { struct knote *kn_max = NULL; + struct knote *kn_cur = NULL, *kn_temp = NULL; /* for safe list traversal */ pid_t target_pid = -1; struct klist dispatch_klist = { NULL }; proc_t target_proc = PROC_NULL; @@ -3181,7 +4508,15 @@ memorystatus_update_vm_pressure(boolean_t target_foreground_process) break; } idle_kill_counter++; - delay(1000000); /* 1 second */ + + if (memorystatus_manual_testing_on == TRUE) { + /* + * Skip the delay when testing + * the pressure notification scheme. + */ + } else { + delay(1000000); /* 1 second */ + } } #endif /* !CONFIG_JETSAM */ @@ -3253,7 +4588,6 @@ memorystatus_update_vm_pressure(boolean_t target_foreground_process) continue; } proc_list_unlock(); - memorystatus_klist_unlock(); target_pid = target_proc->p_pid; @@ -3285,19 +4619,27 @@ memorystatus_update_vm_pressure(boolean_t target_foreground_process) } if (found_candidate == FALSE) { + proc_rele(target_proc); + memorystatus_klist_unlock(); continue; } - memorystatus_klist_lock(); - KNOTE_DETACH(&memorystatus_klist, kn_max); - KNOTE_ATTACH(&dispatch_klist, kn_max); - memorystatus_klist_unlock(); + SLIST_FOREACH_SAFE(kn_cur, &memorystatus_klist, kn_selnext, kn_temp) { + proc_t knote_proc = kn_cur->kn_kq->kq_p; + pid_t knote_pid = knote_proc->p_pid; + if (knote_pid == target_pid) { + KNOTE_DETACH(&memorystatus_klist, kn_cur); + KNOTE_ATTACH(&dispatch_klist, kn_cur); + } + } KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure); - memorystatus_klist_lock(); - KNOTE_DETACH(&dispatch_klist, kn_max); - KNOTE_ATTACH(&memorystatus_klist, kn_max); + SLIST_FOREACH_SAFE(kn_cur, &dispatch_klist, kn_selnext, kn_temp) { + KNOTE_DETACH(&dispatch_klist, kn_cur); + KNOTE_ATTACH(&memorystatus_klist, kn_cur); + } + memorystatus_klist_unlock(); microuptime(&target_proc->vm_pressure_last_notify_tstamp); @@ -3593,12 +4935,18 @@ memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t list_entry->priority = p->p_memstat_effectivepriority; list_entry->user_data = p->p_memstat_userdata; #if LEGACY_HIWATER - if (((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) || - (p->p_memstat_memlimit <= 0)) { - task_get_phys_footprint_limit(p->task, &list_entry->limit); - } else { - list_entry->limit = p->p_memstat_memlimit; - } + + /* + * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. + * Background limits are described via the inactive limit slots. + * So, here, the cached limit should always be valid. + */ + + if (p->p_memstat_memlimit <= 0) { + task_get_phys_footprint_limit(p->task, &list_entry->limit); + } else { + list_entry->limit = p->p_memstat_memlimit; + } #else task_get_phys_footprint_limit(p->task, &list_entry->limit); #endif @@ -3719,10 +5067,94 @@ memorystatus_update_levels_locked(boolean_t critical_only) { #endif } +/* + * Get the at_boot snapshot + */ static int -memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { +memorystatus_get_at_boot_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { size_t input_size = *snapshot_size; + + /* + * The at_boot snapshot has no entry list. + */ + *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t); + + if (size_only) { + return 0; + } + + /* + * Validate the size of the snapshot buffer + */ + if (input_size < *snapshot_size) { + return EINVAL; + } + + /* + * Update the notification_time only + */ + memorystatus_at_boot_snapshot.notification_time = mach_absolute_time(); + *snapshot = &memorystatus_at_boot_snapshot; + + MEMORYSTATUS_DEBUG(7, "memorystatus_get_at_boot_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%d)\n", + (long)input_size, (long)*snapshot_size, 0); + return 0; +} + +static int +memorystatus_get_on_demand_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { + size_t input_size = *snapshot_size; + uint32_t ods_list_count = memorystatus_list_count; + memorystatus_jetsam_snapshot_t *ods = NULL; /* The on_demand snapshot buffer */ + + *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (ods_list_count)); + + if (size_only) { + return 0; + } + + /* + * Validate the size of the snapshot buffer. + * This is inherently racey. May want to revisit + * this error condition and trim the output when + * it doesn't fit. + */ + if (input_size < *snapshot_size) { + return EINVAL; + } + + /* + * Allocate and initialize a snapshot buffer. + */ + ods = (memorystatus_jetsam_snapshot_t *)kalloc(*snapshot_size); + if (!ods) { + return (ENOMEM); + } + + memset(ods, 0, *snapshot_size); + + proc_list_lock(); + memorystatus_init_jetsam_snapshot_locked(ods, ods_list_count); + proc_list_unlock(); + + /* + * Return the kernel allocated, on_demand buffer. + * The caller of this routine will copy the data out + * to user space and then free the kernel allocated + * buffer. + */ + *snapshot = ods; + + MEMORYSTATUS_DEBUG(7, "memorystatus_get_on_demand_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n", + (long)input_size, (long)*snapshot_size, (long)ods_list_count); + return 0; +} + +static int +memorystatus_get_jetsam_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { + size_t input_size = *snapshot_size; + if (memorystatus_jetsam_snapshot_count > 0) { *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count)); } else { @@ -3738,30 +5170,99 @@ memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *sna } *snapshot = memorystatus_jetsam_snapshot; - - MEMORYSTATUS_DEBUG(1, "memorystatus_snapshot: returning %ld for size\n", (long)*snapshot_size); - + + MEMORYSTATUS_DEBUG(7, "memorystatus_get_jetsam_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n", + (long)input_size, (long)*snapshot_size, (long)memorystatus_jetsam_snapshot_count); + return 0; } static int -memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer, size_t buffer_size, int32_t *retval) { +memorystatus_cmd_get_jetsam_snapshot(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval) { int error = EINVAL; boolean_t size_only; + boolean_t is_default_snapshot = FALSE; + boolean_t is_on_demand_snapshot = FALSE; + boolean_t is_at_boot_snapshot = FALSE; memorystatus_jetsam_snapshot_t *snapshot; - + size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE); - - error = memorystatus_get_snapshot(&snapshot, &buffer_size, size_only); + + if (flags == 0) { + /* Default */ + is_default_snapshot = TRUE; + error = memorystatus_get_jetsam_snapshot(&snapshot, &buffer_size, size_only); + } else { + if (flags & ~(MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT)) { + /* + * Unsupported bit set in flag. + */ + return EINVAL; + } + + if ((flags & (MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT)) == + (MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT)) { + /* + * Can't have both set at the same time. + */ + return EINVAL; + } + + if (flags & MEMORYSTATUS_SNAPSHOT_ON_DEMAND) { + is_on_demand_snapshot = TRUE; + /* + * When not requesting the size only, the following call will allocate + * an on_demand snapshot buffer, which is freed below. + */ + error = memorystatus_get_on_demand_snapshot(&snapshot, &buffer_size, size_only); + + } else if (flags & MEMORYSTATUS_SNAPSHOT_AT_BOOT) { + is_at_boot_snapshot = TRUE; + error = memorystatus_get_at_boot_snapshot(&snapshot, &buffer_size, size_only); + } else { + /* + * Invalid flag setting. + */ + return EINVAL; + } + } + if (error) { goto out; } - /* Copy out and reset */ + /* + * Copy the data out to user space and clear the snapshot buffer. + * If working with the jetsam snapshot, + * clearing the buffer means, reset the count. + * If working with an on_demand snapshot + * clearing the buffer means, free it. + * If working with the at_boot snapshot + * there is nothing to clear or update. + */ if (!size_only) { if ((error = copyout(snapshot, buffer, buffer_size)) == 0) { - snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; + if (is_default_snapshot) { + /* + * The jetsam snapshot is never freed, its count is simply reset. + */ + snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; + + proc_list_lock(); + memorystatus_jetsam_snapshot_last_timestamp = 0; + proc_list_unlock(); + } + } + + if (is_on_demand_snapshot) { + /* + * The on_demand snapshot is always freed, + * even if the copyout failed. + */ + if(snapshot) { + kfree(snapshot, buffer_size); + } } } @@ -3965,65 +5466,120 @@ memorystatus_cmd_grp_set_properties(int32_t flags, user_addr_t buffer, size_t bu /* - * This routine is meant solely for the purpose of adjusting jetsam priorities and bands. - * It is _not_ meant to be used for the setting of memory limits, especially, since we can't - * tell if the memory limit being set is fatal or not. - * - * So the the last 5 args to the memorystatus_update() call below, related to memory limits, are all 0 or FALSE. + * This routine is used to update a process's jetsam priority position and stored user_data. + * It is not used for the setting of memory limits, which is why the last 6 args to the + * memorystatus_update() call are 0 or FALSE. */ static int memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { - const uint32_t MAX_ENTRY_COUNT = 2; /* Cap the entry count */ - - int error; - uint32_t i; - uint32_t entry_count; - memorystatus_priority_properties_t *entries; - + int error = 0; + memorystatus_priority_properties_t mpp_entry; + /* Validate inputs */ - if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size == 0)) { + if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_priority_properties_t))) { return EINVAL; } - /* Make sure the buffer is a multiple of the entry size, and that an excessive size isn't specified */ - entry_count = (buffer_size / sizeof(memorystatus_priority_properties_t)); - if (((buffer_size % sizeof(memorystatus_priority_properties_t)) != 0) || (entry_count > MAX_ENTRY_COUNT)) { - return EINVAL; - } - - entries = (memorystatus_priority_properties_t *)kalloc(buffer_size); - - error = copyin(buffer, entries, buffer_size); - - for (i = 0; i < entry_count; i++) { + error = copyin(buffer, &mpp_entry, buffer_size); + + if (error == 0) { proc_t p; - if (error) { - break; - } - p = proc_find(pid); if (!p) { - error = ESRCH; - break; + return ESRCH; } if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { - error = EPERM; proc_rele(p); - break; + return EPERM; } - error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0, FALSE); + error = memorystatus_update(p, mpp_entry.priority, mpp_entry.user_data, FALSE, FALSE, 0, 0, FALSE, FALSE, FALSE); proc_rele(p); } - kfree(entries, buffer_size); - - return error; + return(error); +} + +static int +memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { + int error = 0; + memorystatus_memlimit_properties_t mmp_entry; + + /* Validate inputs */ + if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_memlimit_properties_t))) { + return EINVAL; + } + + error = copyin(buffer, &mmp_entry, buffer_size); + + if (error == 0) { + error = memorystatus_set_memlimit_properties(pid, &mmp_entry); + } + + return(error); +} + +/* + * When getting the memlimit settings, we can't simply call task_get_phys_footprint_limit(). + * That gets the proc's cached memlimit and there is no guarantee that the active/inactive + * limits will be the same in the no-limit case. Instead we convert limits <= 0 using + * task_convert_phys_footprint_limit(). It computes the same limit value that would be written + * to the task's ledgers via task_set_phys_footprint_limit(). + */ +static int +memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { + int error = 0; + memorystatus_memlimit_properties_t mmp_entry; + + /* Validate inputs */ + if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_memlimit_properties_t))) { + return EINVAL; + } + + memset (&mmp_entry, 0, sizeof(memorystatus_memlimit_properties_t)); + + proc_t p = proc_find(pid); + if (!p) { + return ESRCH; + } + + /* + * Get the active limit and attributes. + * No locks taken since we hold a reference to the proc. + */ + + if (p->p_memstat_memlimit_active > 0 ) { + mmp_entry.memlimit_active = p->p_memstat_memlimit_active; + } else { + task_convert_phys_footprint_limit(-1, &mmp_entry.memlimit_active); + } + + if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) { + mmp_entry.memlimit_active_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; + } + + /* + * Get the inactive limit and attributes + */ + if (p->p_memstat_memlimit_inactive <= 0) { + task_convert_phys_footprint_limit(-1, &mmp_entry.memlimit_inactive); + } else { + mmp_entry.memlimit_inactive = p->p_memstat_memlimit_inactive; + } + if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) { + mmp_entry.memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; + } + proc_rele(p); + + error = copyout(&mmp_entry, buffer, buffer_size); + + return(error); } + static int memorystatus_cmd_get_pressure_status(int32_t *retval) { int error; @@ -4040,51 +5596,157 @@ memorystatus_cmd_get_pressure_status(int32_t *retval) { return error; } +int +memorystatus_get_pressure_status_kdp() { + return (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0; +} + /* * Every process, including a P_MEMSTAT_INTERNAL process (currently only pid 1), is allowed to set a HWM. + * + * This call is inflexible -- it does not distinguish between active/inactive, fatal/non-fatal + * So, with 2-level HWM preserving previous behavior will map as follows. + * - treat the limit passed in as both an active and inactive limit. + * - treat the is_fatal_limit flag as though it applies to both active and inactive limits. + * + * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK + * - the is_fatal_limit is FALSE, meaning the active and inactive limits are non-fatal/soft + * - so mapping is (active/non-fatal, inactive/non-fatal) + * + * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT + * - the is_fatal_limit is TRUE, meaning the process's active and inactive limits are fatal/hard + * - so mapping is (active/fatal, inactive/fatal) */ static int memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit) { int error = 0; + memorystatus_memlimit_properties_t entry; + + entry.memlimit_active = high_water_mark; + entry.memlimit_active_attr = 0; + entry.memlimit_inactive = high_water_mark; + entry.memlimit_inactive_attr = 0; + + if (is_fatal_limit == TRUE) { + entry.memlimit_active_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; + entry.memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; + } + + error = memorystatus_set_memlimit_properties(pid, &entry); + return (error); +} + +static int +memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry) { + + int32_t memlimit_active; + boolean_t memlimit_active_is_fatal; + int32_t memlimit_inactive; + boolean_t memlimit_inactive_is_fatal; + uint32_t valid_attrs = 0; + int error = 0; proc_t p = proc_find(pid); if (!p) { return ESRCH; } - - if (high_water_mark <= 0) { - high_water_mark = -1; /* Disable */ + + /* + * Check for valid attribute flags. + */ + valid_attrs |= (MEMORYSTATUS_MEMLIMIT_ATTR_FATAL); + if ((entry->memlimit_active_attr & (~valid_attrs)) != 0) { + proc_rele(p); + return EINVAL; + } + if ((entry->memlimit_inactive_attr & (~valid_attrs)) != 0) { + proc_rele(p); + return EINVAL; } - - proc_list_lock(); - - p->p_memstat_memlimit = high_water_mark; - if (memorystatus_highwater_enabled) { - if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { - memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority, false); - - /* - * The update priority call above takes care to set/reset the fatal memory limit state - * IF the process is transitioning between foreground <-> background and has a background - * memory limit. - * Here, however, the process won't be doing any such transitions and so we explicitly tackle - * the fatal limit state. - */ - is_fatal_limit = FALSE; + /* + * Setup the active memlimit properties + */ + memlimit_active = entry->memlimit_active; + if (entry->memlimit_active_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL) { + memlimit_active_is_fatal = TRUE; + } else { + memlimit_active_is_fatal = FALSE; + } - } else { - error = (task_set_phys_footprint_limit_internal(p->task, high_water_mark, NULL, TRUE) == 0) ? 0 : EINVAL; - } + /* + * Setup the inactive memlimit properties + */ + memlimit_inactive = entry->memlimit_inactive; + if (entry->memlimit_inactive_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL) { + memlimit_inactive_is_fatal = TRUE; + } else { + memlimit_inactive_is_fatal = FALSE; } - if (error == 0) { - if (is_fatal_limit == TRUE) { - p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; + /* + * Setting a limit of <= 0 implies that the process has no + * high-water-mark and has no per-task-limit. That means + * the system_wide task limit is in place, which by the way, + * is always fatal. + */ + + if (memlimit_active <= 0) { + /* + * Enforce the fatal system_wide task limit while process is active. + */ + memlimit_active = -1; + memlimit_active_is_fatal = TRUE; + } + + if (memlimit_inactive <= 0) { + /* + * Enforce the fatal system_wide task limit while process is inactive. + */ + memlimit_inactive = -1; + memlimit_inactive_is_fatal = TRUE; + } + + proc_list_lock(); + + /* + * Store the active limit variants in the proc. + */ + SET_ACTIVE_LIMITS_LOCKED(p, memlimit_active, memlimit_active_is_fatal); + + /* + * Store the inactive limit variants in the proc. + */ + SET_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive, memlimit_inactive_is_fatal); + + /* + * Enforce appropriate limit variant by updating the cached values + * and writing the ledger. + * Limit choice is based on process active/inactive state. + */ + + if (memorystatus_highwater_enabled) { + boolean_t trigger_exception; + /* + * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. + * Background limits are described via the inactive limit slots. + */ + + if (proc_jetsam_state_is_active_locked(p) == TRUE) { + CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); } else { - p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; + CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); } + + /* Enforce the limit by writing to the ledgers */ + assert(trigger_exception == TRUE); + error = (task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, trigger_exception) == 0) ? 0 : EINVAL; + + MEMORYSTATUS_DEBUG(3, "memorystatus_set_memlimit_properties: new limit on pid %d (%dMB %s) current priority (%d) dirty_state?=0x%x %s\n", + p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), + (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty, + (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); } proc_list_unlock(); @@ -4109,6 +5771,60 @@ proc_get_memstat_priority(proc_t p, boolean_t effective_priority) } return 0; } + +/* + * Description: + * Evaluates active vs. inactive process state. + * Processes that opt into dirty tracking are evaluated + * based on clean vs dirty state. + * dirty ==> active + * clean ==> inactive + * + * Process that do not opt into dirty tracking are + * evalulated based on priority level. + * Foreground or above ==> active + * Below Foreground ==> inactive + * + * Return: TRUE if active + * False if inactive + */ + +static boolean_t +proc_jetsam_state_is_active_locked(proc_t p) { + + if (p->p_memstat_dirty & P_DIRTY_TRACK) { + /* + * process has opted into dirty tracking + * active state is based on dirty vs. clean + */ + if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { + /* + * process is dirty + * implies active state + */ + return TRUE; + } else { + /* + * process is clean + * implies inactive state + */ + return FALSE; + } + } else if (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND) { + /* + * process is Foreground or higher + * implies active state + */ + return TRUE; + } else { + /* + * process found below Foreground + * implies inactive state + */ + return FALSE; + } +} + #endif /* CONFIG_JETSAM */ int @@ -4139,19 +5855,35 @@ memorystatus_control(struct proc *p __unused, struct memorystatus_control_args * case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES: error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret); break; + case MEMORYSTATUS_CMD_SET_MEMLIMIT_PROPERTIES: + error = memorystatus_cmd_set_memlimit_properties(args->pid, args->buffer, args->buffersize, ret); + break; + case MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES: + error = memorystatus_cmd_get_memlimit_properties(args->pid, args->buffer, args->buffersize, ret); + break; case MEMORYSTATUS_CMD_GRP_SET_PROPERTIES: error = memorystatus_cmd_grp_set_properties((int32_t)args->flags, args->buffer, args->buffersize, ret); break; case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT: - error = memorystatus_cmd_get_jetsam_snapshot(args->buffer, args->buffersize, ret); + error = memorystatus_cmd_get_jetsam_snapshot((int32_t)args->flags, args->buffer, args->buffersize, ret); break; case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS: error = memorystatus_cmd_get_pressure_status(ret); break; case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK: + /* + * This call does not distinguish between active and inactive limits. + * Default behavior in 2-level HWM world is to set both. + * Non-fatal limit is also assumed for both. + */ error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, FALSE); break; case MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT: + /* + * This call does not distinguish between active and inactive limits. + * Default behavior in 2-level HWM world is to set both. + * Fatal limit is also assumed for both. + */ error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, TRUE); break; /* Test commands */ @@ -4159,11 +5891,18 @@ memorystatus_control(struct proc *p __unused, struct memorystatus_control_args * case MEMORYSTATUS_CMD_TEST_JETSAM: error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled) ? 0 : EINVAL; break; + case MEMORYSTATUS_CMD_TEST_JETSAM_SORT: + error = memorystatus_cmd_test_jetsam_sort(args->pid, (int32_t)args->flags); + break; case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS: error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize); break; #endif /* DEVELOPMENT || DEBUG */ #endif /* CONFIG_JETSAM */ + case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE: + case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE: + error = memorystatus_low_mem_privileged_listener(args->command); + break; default: break; } @@ -4193,24 +5932,24 @@ filt_memorystatus(struct knote *kn __unused, long hint) switch (hint) { case kMemorystatusNoPressure: if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { - kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_NORMAL; + kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; } break; case kMemorystatusPressure: if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) { if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { - kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; + kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; } } else if (memorystatus_vm_pressure_level == kVMPressureCritical) { if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { - kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; + kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; } } break; case kMemorystatusLowSwap: if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { - kn->kn_fflags |= NOTE_MEMORYSTATUS_LOW_SWAP; + kn->kn_fflags = NOTE_MEMORYSTATUS_LOW_SWAP; } break; default: @@ -4245,13 +5984,8 @@ memorystatus_knote_register(struct knote *kn) { if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP)) { - if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { - error = suser(kauth_cred_get(), 0); - } + KNOTE_ATTACH(&memorystatus_klist, kn); - if (error == 0) { - KNOTE_ATTACH(&memorystatus_klist, kn); - } } else { error = ENOTSUP; } @@ -4280,3 +6014,267 @@ memorystatus_issue_pressure_kevent(boolean_t pressured) { } #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ #endif /* 0 */ + +#if CONFIG_JETSAM +/* Coalition support */ + +/* sorting info for a particular priority bucket */ +typedef struct memstat_sort_info { + coalition_t msi_coal; + uint64_t msi_page_count; + pid_t msi_pid; + int msi_ntasks; +} memstat_sort_info_t; + +/* + * qsort from smallest page count to largest page count + * + * return < 0 for a < b + * 0 for a == b + * > 0 for a > b + */ +static int memstat_asc_cmp(const void *a, const void *b) +{ + const memstat_sort_info_t *msA = (const memstat_sort_info_t *)a; + const memstat_sort_info_t *msB = (const memstat_sort_info_t *)b; + + return (int)((uint64_t)msA->msi_page_count - (uint64_t)msB->msi_page_count); +} + +/* + * Return the number of pids rearranged during this sort. + */ +static int +memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order) +{ +#define MAX_SORT_PIDS 80 +#define MAX_COAL_LEADERS 10 + + unsigned int b = bucket_index; + int nleaders = 0; + int ntasks = 0; + proc_t p = NULL; + coalition_t coal = COALITION_NULL; + int pids_moved = 0; + int total_pids_moved = 0; + int i; + + /* + * The system is typically under memory pressure when in this + * path, hence, we want to avoid dynamic memory allocation. + */ + memstat_sort_info_t leaders[MAX_COAL_LEADERS]; + pid_t pid_list[MAX_SORT_PIDS]; + + if (bucket_index >= MEMSTAT_BUCKET_COUNT) { + return(0); + } + + /* + * Clear the array that holds coalition leader information + */ + for (i=0; i < MAX_COAL_LEADERS; i++) { + leaders[i].msi_coal = COALITION_NULL; + leaders[i].msi_page_count = 0; /* will hold total coalition page count */ + leaders[i].msi_pid = 0; /* will hold coalition leader pid */ + leaders[i].msi_ntasks = 0; /* will hold the number of tasks in a coalition */ + } + + p = memorystatus_get_first_proc_locked(&b, FALSE); + while (p) { + if (coalition_is_leader(p->task, COALITION_TYPE_JETSAM, &coal)) { + if (nleaders < MAX_COAL_LEADERS) { + int coal_ntasks = 0; + uint64_t coal_page_count = coalition_get_page_count(coal, &coal_ntasks); + leaders[nleaders].msi_coal = coal; + leaders[nleaders].msi_page_count = coal_page_count; + leaders[nleaders].msi_pid = p->p_pid; /* the coalition leader */ + leaders[nleaders].msi_ntasks = coal_ntasks; + nleaders++; + } else { + /* + * We've hit MAX_COAL_LEADERS meaning we can handle no more coalitions. + * Abandoned coalitions will linger at the tail of the priority band + * when this sort session ends. + * TODO: should this be an assert? + */ + printf("%s: WARNING: more than %d leaders in priority band [%d]\n", + __FUNCTION__, MAX_COAL_LEADERS, bucket_index); + break; + } + } + p=memorystatus_get_next_proc_locked(&b, p, FALSE); + } + + if (nleaders == 0) { + /* Nothing to sort */ + return(0); + } + + /* + * Sort the coalition leader array, from smallest coalition page count + * to largest coalition page count. When inserted in the priority bucket, + * smallest coalition is handled first, resulting in the last to be jetsammed. + */ + if (nleaders > 1) { + qsort(leaders, nleaders, sizeof(memstat_sort_info_t), memstat_asc_cmp); + } + +#if 0 + for (i = 0; i < nleaders; i++) { + printf("%s: coal_leader[%d of %d] pid[%d] pages[%llu] ntasks[%d]\n", + __FUNCTION__, i, nleaders, leaders[i].msi_pid, leaders[i].msi_page_count, + leaders[i].msi_ntasks); + } +#endif + + /* + * During coalition sorting, processes in a priority band are rearranged + * by being re-inserted at the head of the queue. So, when handling a + * list, the first process that gets moved to the head of the queue, + * ultimately gets pushed toward the queue tail, and hence, jetsams last. + * + * So, for example, the coalition leader is expected to jetsam last, + * after its coalition members. Therefore, the coalition leader is + * inserted at the head of the queue first. + * + * After processing a coalition, the jetsam order is as follows: + * undefs(jetsam first), extensions, xpc services, leader(jetsam last) + */ + + /* + * Coalition members are rearranged in the priority bucket here, + * based on their coalition role. + */ + total_pids_moved = 0; + for (i=0; i < nleaders; i++) { + + /* a bit of bookkeeping */ + pids_moved = 0; + + /* Coalition leaders are jetsammed last, so move into place first */ + pid_list[0] = leaders[i].msi_pid; + pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, 1); + + /* xpc services should jetsam after extensions */ + ntasks = coalition_get_pid_list (leaders[i].msi_coal, COALITION_ROLEMASK_XPC, + coal_sort_order, pid_list, MAX_SORT_PIDS); + + if (ntasks > 0) { + pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, + (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS)); + } + + /* extensions should jetsam after unmarked processes */ + ntasks = coalition_get_pid_list (leaders[i].msi_coal, COALITION_ROLEMASK_EXT, + coal_sort_order, pid_list, MAX_SORT_PIDS); + + if (ntasks > 0) { + pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, + (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS)); + } + + /* undefined coalition members should be the first to jetsam */ + ntasks = coalition_get_pid_list (leaders[i].msi_coal, COALITION_ROLEMASK_UNDEF, + coal_sort_order, pid_list, MAX_SORT_PIDS); + + if (ntasks > 0) { + pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, + (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS)); + } + +#if 0 + if (pids_moved == leaders[i].msi_ntasks) { + /* + * All the pids in the coalition were found in this band. + */ + printf("%s: pids_moved[%d] equal total coalition ntasks[%d] \n", __FUNCTION__, + pids_moved, leaders[i].msi_ntasks); + } else if (pids_moved > leaders[i].msi_ntasks) { + /* + * Apparently new coalition members showed up during the sort? + */ + printf("%s: pids_moved[%d] were greater than expected coalition ntasks[%d] \n", __FUNCTION__, + pids_moved, leaders[i].msi_ntasks); + } else { + /* + * Apparently not all the pids in the coalition were found in this band? + */ + printf("%s: pids_moved[%d] were less than expected coalition ntasks[%d] \n", __FUNCTION__, + pids_moved, leaders[i].msi_ntasks); + } +#endif + + total_pids_moved += pids_moved; + + } /* end for */ + + return(total_pids_moved); +} + + +/* + * Traverse a list of pids, searching for each within the priority band provided. + * If pid is found, move it to the front of the priority band. + * Never searches outside the priority band provided. + * + * Input: + * bucket_index - jetsam priority band. + * pid_list - pointer to a list of pids. + * list_sz - number of pids in the list. + * + * Pid list ordering is important in that, + * pid_list[n] is expected to jetsam ahead of pid_list[n+1]. + * The sort_order is set by the coalition default. + * + * Return: + * the number of pids found and hence moved within the priority band. + */ +static int +memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz) +{ + memstat_bucket_t *current_bucket; + int i; + int found_pids = 0; + + if ((pid_list == NULL) || (list_sz <= 0)) { + return(0); + } + + if (bucket_index >= MEMSTAT_BUCKET_COUNT) { + return(0); + } + + current_bucket = &memstat_bucket[bucket_index]; + for (i=0; i < list_sz; i++) { + unsigned int b = bucket_index; + proc_t p = NULL; + proc_t aProc = NULL; + pid_t aPid; + int list_index; + + list_index = ((list_sz - 1) - i); + aPid = pid_list[list_index]; + + /* never search beyond bucket_index provided */ + p = memorystatus_get_first_proc_locked(&b, FALSE); + while (p) { + if (p->p_pid == aPid) { + aProc = p; + break; + } + p = memorystatus_get_next_proc_locked(&b, p, FALSE); + } + + if (aProc == NULL) { + /* pid not found in this band, just skip it */ + continue; + } else { + TAILQ_REMOVE(¤t_bucket->list, aProc, p_memstat_list); + TAILQ_INSERT_HEAD(¤t_bucket->list, aProc, p_memstat_list); + found_pids++; + } + } + return(found_pids); +} +#endif /* CONFIG_JETSAM */ diff --git a/bsd/kern/kern_mib.c b/bsd/kern/kern_mib.c index b3bbdd9d6..ed3d86e6e 100644 --- a/bsd/kern/kern_mib.c +++ b/bsd/kern/kern_mib.c @@ -98,6 +98,7 @@ #include #include #include +#include #include #include @@ -331,6 +332,15 @@ sysctl_pagesize return sysctl_io_number(req, l, sizeof(l), NULL, NULL); } +static int +sysctl_pagesize32 +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + long long l; + l = (long long) PAGE_SIZE; + return sysctl_io_number(req, l, sizeof(l), NULL, NULL); +} + static int sysctl_tbfrequency (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) @@ -356,6 +366,7 @@ SYSCTL_INT (_hw, OID_AUTO, cpufamily, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LO SYSCTL_OPAQUE (_hw, OID_AUTO, cacheconfig, CTLFLAG_RD | CTLFLAG_LOCKED, &cacheconfig, sizeof(cacheconfig), "Q", ""); SYSCTL_OPAQUE (_hw, OID_AUTO, cachesize, CTLFLAG_RD | CTLFLAG_LOCKED, &cachesize, sizeof(cachesize), "Q", ""); SYSCTL_PROC (_hw, OID_AUTO, pagesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, sysctl_pagesize, "Q", ""); +SYSCTL_PROC (_hw, OID_AUTO, pagesize32, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, sysctl_pagesize32, "Q", ""); SYSCTL_QUAD (_hw, OID_AUTO, busfrequency, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_frequency_hz, ""); SYSCTL_QUAD (_hw, OID_AUTO, busfrequency_min, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_frequency_min_hz, ""); SYSCTL_QUAD (_hw, OID_AUTO, busfrequency_max, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_frequency_max_hz, ""); diff --git a/bsd/kern/kern_mman.c b/bsd/kern/kern_mman.c index 27bee276c..1dd5bd3f0 100644 --- a/bsd/kern/kern_mman.c +++ b/bsd/kern/kern_mman.c @@ -115,6 +115,7 @@ #include #include #include +#include #include @@ -164,6 +165,13 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) int fd = uap->fd; int num_retries = 0; + /* + * Note that for UNIX03 conformance, there is additional parameter checking for + * mmap() system call in libsyscall prior to entering the kernel. The sanity + * checks and argument validation done in this function are not the only places + * one can get returned errnos. + */ + user_map = current_map(); user_addr = (vm_map_offset_t)uap->addr; user_size = (vm_map_size_t) uap->len; @@ -212,9 +220,26 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) user_size = vm_map_round_page(user_size, vm_map_page_mask(user_map)); /* hi end */ - if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || !(flags & MAP_ANON))){ - return EINVAL; + if (flags & MAP_JIT) { + if ((flags & MAP_FIXED) || + (flags & MAP_SHARED) || + !(flags & MAP_ANON) || + (flags & MAP_RESILIENT_CODESIGN)) { + return EINVAL; + } + } + + if ((flags & MAP_RESILIENT_CODESIGN) || + (flags & MAP_RESILIENT_MEDIA)) { + assert(!(flags & MAP_JIT)); + if (flags & MAP_ANON) { + return EINVAL; + } + if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) { + return EPERM; + } } + /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). @@ -404,7 +429,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) handle = (void *)vp; #if CONFIG_MACF error = mac_file_check_mmap(vfs_context_ucred(ctx), - fp->f_fglob, prot, flags, &maxprot); + fp->f_fglob, prot, flags, file_pos, &maxprot); if (error) { (void)vnode_put(vp); goto bad; @@ -420,8 +445,6 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) } } #endif /* CONFIG_PROTECT */ - - } } @@ -475,9 +498,14 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) if (flags & MAP_NOCACHE) alloc_flags |= VM_FLAGS_NO_CACHE; - if (flags & MAP_JIT){ + if (flags & MAP_JIT) { alloc_flags |= VM_FLAGS_MAP_JIT; } + + if (flags & MAP_RESILIENT_CODESIGN) { + alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN; + } + /* * Lookup/allocate object. */ @@ -568,7 +596,19 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ + map_file_retry: + if ((flags & MAP_RESILIENT_CODESIGN) || + (flags & MAP_RESILIENT_MEDIA)) { + if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) { + assert(!mapanon); + vnode_put(vp); + error = EPERM; + goto bad; + } + /* strictly limit access to "prot" */ + maxprot &= prot; + } result = vm_map_enter_mem_object_control(user_map, &user_addr, user_size, 0, alloc_flags, @@ -909,6 +949,13 @@ madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval) case MADV_CAN_REUSE: new_behavior = VM_BEHAVIOR_CAN_REUSE; break; + case MADV_PAGEOUT: +#if MACH_ASSERT + new_behavior = VM_BEHAVIOR_PAGEOUT; + break; +#else /* MACH_ASSERT */ + return ENOTSUP; +#endif /* MACH_ASSERT */ default: return(EINVAL); } @@ -916,6 +963,7 @@ madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval) start = (mach_vm_offset_t) uap->addr; size = (mach_vm_size_t) uap->len; + user_map = current_map(); result = mach_vm_behavior_set(user_map, start, size, new_behavior); @@ -1060,7 +1108,7 @@ mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval) size = vm_map_round_page(size+pageoff, vm_map_page_mask(user_map)); /* have to call vm_map_wire directly to pass "I don't know" protections */ - result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE); + result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK), TRUE); if (result == KERN_RESOURCE_SHORTAGE) return EAGAIN; @@ -1114,7 +1162,7 @@ mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __u uint32_t cryptid; cpu_type_t cputype; cpu_subtype_t cpusubtype; - pager_crypt_info_t crypt_info; + pager_crypt_info_t crypt_info; const char * cryptname = 0; char *vpath; int len, ret; @@ -1188,13 +1236,19 @@ mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __u kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n", __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size); #endif - + /* set up decrypter first */ crypt_file_data_t crypt_data = { .filename = vpath, .cputype = cputype, .cpusubtype = cpusubtype }; result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data); +#if DEVELOPMENT || DEBUG + printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n", + p->p_pid, p->p_comm, + user_map, (uint64_t) user_addr, (uint64_t) (user_addr + user_size), + __FUNCTION__, vpath, result); +#endif /* DEVELOPMENT || DEBUG */ FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI); if(result) { @@ -1209,13 +1263,20 @@ mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __u } /* now remap using the decrypter */ - result = vm_map_apple_protected(user_map, user_addr, user_addr+user_size, &crypt_info); + vm_object_offset_t crypto_backing_offset; + crypto_backing_offset = -1; /* i.e. use map entry's offset */ + result = vm_map_apple_protected(user_map, + user_addr, + user_addr+user_size, + crypto_backing_offset, + &crypt_info); if (result) { printf("%s: mapping failed with %d\n", __FUNCTION__, result); - crypt_info.crypt_end(crypt_info.crypt_ops); + } + + if (result) { return (EPERM); } - return 0; } #endif /* CONFIG_CODE_DECRYPTION */ diff --git a/bsd/kern/kern_newsysctl.c b/bsd/kern/kern_newsysctl.c index a0a72cb5c..fdd86a948 100644 --- a/bsd/kern/kern_newsysctl.c +++ b/bsd/kern/kern_newsysctl.c @@ -1752,7 +1752,7 @@ kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, s name2mib_oid[1] = 3; oidlen = sizeof(oid); - error = kernel_sysctl(current_proc(), name2mib_oid, 2, oid, &oidlen, (void *)name, strlen(name)); + error = kernel_sysctl(current_proc(), name2mib_oid, 2, oid, &oidlen, __DECONST(void *, name), strlen(name)); oidlen /= sizeof(int); /* now use the OID */ diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c index 7b8aace09..9213b82f3 100644 --- a/bsd/kern/kern_proc.c +++ b/bsd/kern/kern_proc.c @@ -147,27 +147,6 @@ struct proclist allproc; struct proclist zombproc; extern struct tty cons; -#if CONFIG_LCTX -/* - * Login Context - */ -static pid_t lastlcid = 1; -static int alllctx_cnt; - -#define LCID_MAX 8192 /* Does this really need to be large? */ -static int maxlcid = LCID_MAX; - -LIST_HEAD(lctxlist, lctx); -static struct lctxlist alllctx; - -lck_mtx_t alllctx_lock; -lck_grp_t * lctx_lck_grp; -lck_grp_attr_t * lctx_lck_grp_attr; -lck_attr_t * lctx_lck_attr; - -static void lctxinit(void); -#endif - extern int cs_debug; #if DEBUG @@ -176,6 +155,10 @@ extern int cs_debug; /* Name to give to core files */ __XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN+1] = {"/cores/core.%P"}; +#if PROC_REF_DEBUG +extern uint32_t fastbacktrace(uintptr_t* bt, uint32_t max_frames) __attribute__((noinline)); +#endif + static void orphanpg(struct pgrp *pg); void proc_name_kdp(task_t t, char * buf, int size); int proc_threadname_kdp(void *uth, char *buf, size_t size); @@ -210,9 +193,6 @@ procinit(void) pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash); sesshashtbl = hashinit(maxproc / 4, M_PROC, &sesshash); uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash); -#if CONFIG_LCTX - lctxinit(); -#endif } /* @@ -385,6 +365,56 @@ proc_findthread(thread_t thread) return(p); } +#if PROC_REF_DEBUG +void +uthread_reset_proc_refcount(void *uthread) { + uthread_t uth; + + if (proc_ref_tracking_disabled) { + return; + } + + uth = (uthread_t) uthread; + + uth->uu_proc_refcount = 0; + uth->uu_pindex = 0; +} + +int +uthread_get_proc_refcount(void *uthread) { + uthread_t uth; + + if (proc_ref_tracking_disabled) { + return 0; + } + + uth = (uthread_t) uthread; + + return uth->uu_proc_refcount; +} + +static void +record_procref(proc_t p, int count) { + uthread_t uth; + + if (proc_ref_tracking_disabled) { + return; + } + + uth = current_uthread(); + uth->uu_proc_refcount += count; + + if (count == 1) { + if (uth->uu_pindex < NUM_PROC_REFS_TO_TRACK) { + fastbacktrace((uintptr_t *) &uth->uu_proc_pcs[uth->uu_pindex], PROC_REF_STACK_DEPTH); + + uth->uu_proc_ps[uth->uu_pindex] = p; + uth->uu_pindex++; + } + } +} +#endif + int proc_rele(proc_t p) { @@ -419,8 +449,12 @@ proc_ref_locked(proc_t p) if ((p == PROC_NULL) || ((p->p_listflag & P_LIST_INCREATE) != 0)) return (PROC_NULL); /* do not return process marked for termination */ - if ((p->p_stat != SZOMB) && ((p->p_listflag & P_LIST_EXITED) == 0) && ((p->p_listflag & (P_LIST_DRAINWAIT | P_LIST_DRAIN | P_LIST_DEAD)) == 0)) + if ((p->p_stat != SZOMB) && ((p->p_listflag & P_LIST_EXITED) == 0) && ((p->p_listflag & (P_LIST_DRAINWAIT | P_LIST_DRAIN | P_LIST_DEAD)) == 0)) { p->p_refcount++; +#if PROC_REF_DEBUG + record_procref(p, 1); +#endif + } else p1 = PROC_NULL; @@ -433,6 +467,9 @@ proc_rele_locked(proc_t p) if (p->p_refcount > 0) { p->p_refcount--; +#if PROC_REF_DEBUG + record_procref(p, -1); +#endif if ((p->p_refcount == 0) && ((p->p_listflag & P_LIST_DRAINWAIT) == P_LIST_DRAINWAIT)) { p->p_listflag &= ~P_LIST_DRAINWAIT; wakeup(&p->p_refcount); @@ -609,13 +646,17 @@ proc_checkdeadrefs(__unused proc_t p) int proc_pid(proc_t p) { - return (p->p_pid); + if (p != NULL) + return (p->p_pid); + return -1; } -int +int proc_ppid(proc_t p) { - return (p->p_ppid); + if (p != NULL) + return (p->p_ppid); + return -1; } int @@ -715,9 +756,13 @@ void proc_name_kdp(task_t t, char * buf, int size) { proc_t p = get_bsdtask_info(t); + if (p == PROC_NULL) + return; - if (p != PROC_NULL) - strlcpy(buf, &p->p_comm[0], size); + if ((size_t)size > sizeof(p->p_comm)) + strlcpy(buf, &p->p_name[0], MIN((int)sizeof(p->p_name), size)); + else + strlcpy(buf, &p->p_comm[0], MIN((int)sizeof(p->p_comm), size)); } @@ -907,14 +952,15 @@ proc_puniqueid(proc_t p) return(p->p_puniqueid); } -uint64_t -proc_coalitionid(__unused proc_t p) +void +proc_coalitionids(__unused proc_t p, __unused uint64_t ids[COALITION_NUM_TYPES]) { #if CONFIG_COALITIONS - return(task_coalition_id(p->task)); + task_coalition_ids(p->task, ids); #else - return 0; + memset(ids, 0, sizeof(uint64_t [COALITION_NUM_TYPES])); #endif + return; } uint64_t @@ -1552,13 +1598,10 @@ orphanpg(struct pgrp * pgrp) return; } - - -/* XXX should be __private_extern__ */ int -proc_is_classic(proc_t p) +proc_is_classic(proc_t p __unused) { - return (p->p_flag & P_TRANSLATED) ? 1 : 0; + return (0); } /* XXX Why does this function exist? Need to kill it off... */ @@ -1641,221 +1684,6 @@ proc_core_name(const char *name, uid_t uid, pid_t pid, char *cf_name, return (1); } -#if CONFIG_LCTX - -static void -lctxinit(void) -{ - LIST_INIT(&alllctx); - alllctx_cnt = 0; - - /* allocate lctx lock group attribute and group */ - lctx_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(lctx_lck_grp_attr); - - lctx_lck_grp = lck_grp_alloc_init("lctx", lctx_lck_grp_attr); - /* Allocate lctx lock attribute */ - lctx_lck_attr = lck_attr_alloc_init(); - - lck_mtx_init(&alllctx_lock, lctx_lck_grp, lctx_lck_attr); -} - -/* - * Locate login context by number. - */ -struct lctx * -lcfind(pid_t lcid) -{ - struct lctx *l; - - ALLLCTX_LOCK; - LIST_FOREACH(l, &alllctx, lc_list) { - if (l->lc_id == lcid) { - LCTX_LOCK(l); - break; - } - } - ALLLCTX_UNLOCK; - return (l); -} - -#define LCID_INC \ - do { \ - lastlcid++; \ - if (lastlcid > maxlcid) \ - lastlcid = 1; \ - } while (0) \ - -struct lctx * -lccreate(void) -{ - struct lctx *l; - pid_t newlcid; - - /* Not very efficient but this isn't a common operation. */ - while ((l = lcfind(lastlcid)) != NULL) { - LCTX_UNLOCK(l); - LCID_INC; - } - newlcid = lastlcid; - LCID_INC; - - MALLOC(l, struct lctx *, sizeof(struct lctx), M_LCTX, M_WAITOK|M_ZERO); - l->lc_id = newlcid; - LIST_INIT(&l->lc_members); - lck_mtx_init(&l->lc_mtx, lctx_lck_grp, lctx_lck_attr); -#if CONFIG_MACF - l->lc_label = mac_lctx_label_alloc(); -#endif - ALLLCTX_LOCK; - LIST_INSERT_HEAD(&alllctx, l, lc_list); - alllctx_cnt++; - ALLLCTX_UNLOCK; - - return (l); -} - -/* - * Call with proc protected (either by being invisible - * or by having the all-login-context lock held) and - * the lctx locked. - * - * Will unlock lctx on return. - */ -void -enterlctx (proc_t p, struct lctx *l, __unused int create) -{ - if (l == NULL) - return; - - p->p_lctx = l; - LIST_INSERT_HEAD(&l->lc_members, p, p_lclist); - l->lc_mc++; - -#if CONFIG_MACF - if (create) - mac_lctx_notify_create(p, l); - else - mac_lctx_notify_join(p, l); -#endif - LCTX_UNLOCK(l); - - return; -} - -/* - * Remove process from login context (if any). Called with p protected by - * the alllctx lock. - */ -void -leavelctx (proc_t p) -{ - struct lctx *l; - - if (p->p_lctx == NULL) { - return; - } - - LCTX_LOCK(p->p_lctx); - l = p->p_lctx; - p->p_lctx = NULL; - LIST_REMOVE(p, p_lclist); - l->lc_mc--; -#if CONFIG_MACF - mac_lctx_notify_leave(p, l); -#endif - if (LIST_EMPTY(&l->lc_members)) { - LIST_REMOVE(l, lc_list); - alllctx_cnt--; - LCTX_UNLOCK(l); - lck_mtx_destroy(&l->lc_mtx, lctx_lck_grp); -#if CONFIG_MACF - mac_lctx_label_free(l->lc_label); - l->lc_label = NULL; -#endif - FREE(l, M_LCTX); - } else { - LCTX_UNLOCK(l); - } - return; -} - -static int -sysctl_kern_lctx SYSCTL_HANDLER_ARGS -{ - int *name = (int*) arg1; - u_int namelen = arg2; - struct kinfo_lctx kil; - struct lctx *l; - int error; - - error = 0; - - switch (oidp->oid_number) { - case KERN_LCTX_ALL: - ALLLCTX_LOCK; - /* Request for size. */ - if (!req->oldptr) { - error = SYSCTL_OUT(req, 0, - sizeof(struct kinfo_lctx) * (alllctx_cnt + 1)); - goto out; - } - break; - - case KERN_LCTX_LCID: - /* No space */ - if (req->oldlen < sizeof(struct kinfo_lctx)) - return (ENOMEM); - /* No argument */ - if (namelen != 1) - return (EINVAL); - /* No login context */ - l = lcfind((pid_t)name[0]); - if (l == NULL) - return (ENOENT); - kil.id = l->lc_id; - kil.mc = l->lc_mc; - LCTX_UNLOCK(l); - return (SYSCTL_OUT(req, (caddr_t)&kil, sizeof(kil))); - - default: - return (EINVAL); - } - - /* Provided buffer is too small. */ - if (req->oldlen < (sizeof(struct kinfo_lctx) * alllctx_cnt)) { - error = ENOMEM; - goto out; - } - - LIST_FOREACH(l, &alllctx, lc_list) { - LCTX_LOCK(l); - kil.id = l->lc_id; - kil.mc = l->lc_mc; - LCTX_UNLOCK(l); - error = SYSCTL_OUT(req, (caddr_t)&kil, sizeof(kil)); - if (error) - break; - } -out: - ALLLCTX_UNLOCK; - - return (error); -} - -SYSCTL_NODE(_kern, KERN_LCTX, lctx, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Login Context"); - -SYSCTL_PROC(_kern_lctx, KERN_LCTX_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT | CTLFLAG_LOCKED, - 0, 0, sysctl_kern_lctx, "S,lctx", - "Return entire login context table"); -SYSCTL_NODE(_kern_lctx, KERN_LCTX_LCID, lcid, CTLFLAG_RD | CTLFLAG_LOCKED, - sysctl_kern_lctx, "Login Context Table"); -SYSCTL_INT(_kern_lctx, OID_AUTO, last, CTLFLAG_RD | CTLFLAG_LOCKED, &lastlcid, 0, ""); -SYSCTL_INT(_kern_lctx, OID_AUTO, count, CTLFLAG_RD | CTLFLAG_LOCKED, &alllctx_cnt, 0, ""); -SYSCTL_INT(_kern_lctx, OID_AUTO, max, CTLFLAG_RW | CTLFLAG_LOCKED, &maxlcid, 0, ""); - -#endif /* LCTX */ - /* Code Signing related routines */ int @@ -1962,6 +1790,8 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user retflags = pt->p_csflags; if (cs_enforcement(pt)) retflags |= CS_ENFORCEMENT; + if (csproc_get_platform_binary(pt)) + retflags |= CS_PLATFORM_BINARY; proc_unlock(pt); if (uaddr != USER_ADDR_NULL) @@ -2158,14 +1988,6 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user break; } - case CS_OPS_SIGPUP_INSTALL: - error = sigpup_install(uaddr); - break; - - case CS_OPS_SIGPUP_DROP: - error = sigpup_drop(); - break; - default: error = EINVAL; break; @@ -3309,3 +3131,26 @@ int proc_shadow_max(void) return max; } #endif /* VM_SCAN_FOR_SHADOW_CHAIN */ + +void proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid); +void proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid) +{ + if (target_proc != NULL) { + target_proc->p_responsible_pid = responsible_pid; + } + return; +} + +int +proc_chrooted(proc_t p) +{ + int retval = 0; + + if (p) { + proc_fdlock(p); + retval = (p->p_fd->fd_rdir != NULL) ? 1 : 0; + proc_fdunlock(p); + } + + return retval; +} diff --git a/bsd/kern/kern_prot.c b/bsd/kern/kern_prot.c index 23c602e8b..5df82a23f 100644 --- a/bsd/kern/kern_prot.c +++ b/bsd/kern/kern_prot.c @@ -96,11 +96,9 @@ #include #include -#include +#define chgproccnt_ok(p) 1 -#if CONFIG_LCTX -#include -#endif +#include #if CONFIG_MACF #include @@ -780,7 +778,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval) * may be able to decrement the proc count of B before we can increment it. This results in a panic. * Incrementing the proc count of the target ruid, B, before setting the process credentials prevents this race. */ - if (ruid != KAUTH_UID_NONE) { + if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) { (void)chgproccnt(ruid, 1); } @@ -799,7 +797,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval) * We didn't successfully switch to the new ruid, so decrement * the procs/uid count that we incremented above. */ - if (ruid != KAUTH_UID_NONE) { + if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) { (void)chgproccnt(ruid, -1); } kauth_cred_unref(&my_new_cred); @@ -818,7 +816,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval) * If we've updated the ruid, decrement the count of procs running * under the previous ruid */ - if (ruid != KAUTH_UID_NONE) { + if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) { (void)chgproccnt(my_pcred->cr_ruid, -1); } } @@ -1028,7 +1026,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval) * may be able to decrement the proc count of B before we can increment it. This results in a panic. * Incrementing the proc count of the target ruid, B, before setting the process credentials prevents this race. */ - if (ruid != KAUTH_UID_NONE) { + if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) { (void)chgproccnt(ruid, 1); } @@ -1043,7 +1041,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval) */ if (p->p_ucred != my_cred) { proc_unlock(p); - if (ruid != KAUTH_UID_NONE) { + if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) { /* * We didn't successfully switch to the new ruid, so decrement * the procs/uid count that we incremented above. @@ -1063,7 +1061,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval) OSBitOrAtomic(P_SUGID, &p->p_flag); proc_unlock(p); - if (ruid != KAUTH_UID_NONE) { + if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) { /* * We switched to a new ruid, so decrement the count of procs running * under the previous ruid @@ -2059,6 +2057,18 @@ set_security_token(proc_t p) } +int get_audit_token_pid(audit_token_t *audit_token); + +int +get_audit_token_pid(audit_token_t *audit_token) +{ + /* keep in-sync with set_security_token (above) */ + if (audit_token) + return (int)audit_token->val[5]; + return -1; +} + + /* * Fill in a struct xucred based on a kauth_cred_t. */ @@ -2074,170 +2084,3 @@ cru2x(kauth_cred_t cr, struct xucred *xcr) xcr->cr_ngroups = pcr->cr_ngroups; bcopy(pcr->cr_groups, xcr->cr_groups, sizeof(xcr->cr_groups)); } - -#if CONFIG_LCTX - -/* - * Set Login Context ID - */ -/* - * MPSAFE - assignment of (visible) process to context protected by ALLLCTX_LOCK, - * LCTX by its own locks. - */ -int -setlcid(proc_t p0, struct setlcid_args *uap, __unused int32_t *retval) -{ - proc_t p; - struct lctx *l; - int error = 0; - int refheld = 0; - - AUDIT_ARG(pid, uap->pid); - AUDIT_ARG(value32, uap->lcid); - if (uap->pid == LCID_PROC_SELF) { /* Create/Join/Leave */ - p = p0; - } else { /* Adopt/Orphan */ - p = proc_find(uap->pid); - if (p == NULL) - return (ESRCH); - refheld = 1; - } - -#if CONFIG_MACF - error = mac_proc_check_setlcid(p0, p, uap->pid, uap->lcid); - if (error) - goto out; -#endif - - switch (uap->lcid) { - /* Leave/Orphan */ - case LCID_REMOVE: - - /* Only root may Leave/Orphan. */ - if (!kauth_cred_issuser(kauth_cred_get())) { - error = EPERM; - goto out; - } - - /* Process not in login context. */ - if (p->p_lctx == NULL) { - error = ENOATTR; - goto out; - } - - l = NULL; - - break; - - /* Create */ - case LCID_CREATE: - - /* Create only valid for self! */ - if (uap->pid != LCID_PROC_SELF) { - error = EPERM; - goto out; - } - - /* Already in a login context. */ - if (p->p_lctx != NULL) { - error = EPERM; - goto out; - } - - l = lccreate(); - if (l == NULL) { - error = ENOMEM; - goto out; - } - - LCTX_LOCK(l); - - break; - - /* Join/Adopt */ - default: - - /* Only root may Join/Adopt. */ - if (!kauth_cred_issuser(kauth_cred_get())) { - error = EPERM; - goto out; - } - - l = lcfind(uap->lcid); - if (l == NULL) { - error = ENOATTR; - goto out; - } - - break; - } - - ALLLCTX_LOCK; - leavelctx(p); - enterlctx(p, l, (uap->lcid == LCID_CREATE) ? 1 : 0); - ALLLCTX_UNLOCK; - -out: - if (refheld != 0) - proc_rele(p); - return (error); -} - -/* - * Get Login Context ID - */ -/* - * MPSAFE - membership of (visible) process in a login context - * protected by the all-context lock. - */ -int -getlcid(proc_t p0, struct getlcid_args *uap, int32_t *retval) -{ - proc_t p; - int error = 0; - int refheld = 0; - - AUDIT_ARG(pid, uap->pid); - if (uap->pid == LCID_PROC_SELF) { - p = p0; - } else { - p = proc_find(uap->pid); - if (p == NULL) - return (ESRCH); - refheld = 1; - } - -#if CONFIG_MACF - error = mac_proc_check_getlcid(p0, p, uap->pid); - if (error) - goto out; -#endif - ALLLCTX_LOCK; - if (p->p_lctx == NULL) { - error = ENOATTR; - ALLLCTX_UNLOCK; - goto out; - } - *retval = p->p_lctx->lc_id; - ALLLCTX_UNLOCK; - out: - if (refheld != 0) - proc_rele(p); - - return (error); -} -#else /* LCTX */ -int -setlcid(proc_t p0, struct setlcid_args *uap, int32_t *retval) -{ - - return (ENOSYS); -} - -int -getlcid(proc_t p0, struct getlcid_args *uap, int32_t *retval) -{ - - return (ENOSYS); -} -#endif /* !LCTX */ diff --git a/bsd/kern/kern_resource.c b/bsd/kern/kern_resource.c index 2900cd52b..a994b8bd6 100644 --- a/bsd/kern/kern_resource.c +++ b/bsd/kern/kern_resource.c @@ -108,6 +108,7 @@ #include #include +#include int donice(struct proc *curp, struct proc *chgp, int n); int dosetrlimit(struct proc *p, u_int which, struct rlimit *limp); @@ -611,23 +612,8 @@ proc_set_darwin_role(proc_t curp, proc_t targetp, int priority) integer_t role = 0; - switch (priority) { - case PRIO_DARWIN_ROLE_DEFAULT: - role = TASK_UNSPECIFIED; - break; - case PRIO_DARWIN_ROLE_UI_FOCAL: - role = TASK_FOREGROUND_APPLICATION; - break; - case PRIO_DARWIN_ROLE_UI: - role = TASK_BACKGROUND_APPLICATION; - break; - case PRIO_DARWIN_ROLE_NON_UI: - role = TASK_NONUI_APPLICATION; - break; - default: - error = EINVAL; - goto out; - } + if ((error = proc_darwin_role_to_task_role(priority, &role))) + goto out; proc_set_task_policy(proc_task(targetp), THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, role); @@ -665,21 +651,7 @@ proc_get_darwin_role(proc_t curp, proc_t targetp, int *priority) role = proc_get_task_policy(proc_task(targetp), THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE); - switch (role) { - case TASK_FOREGROUND_APPLICATION: - *priority = PRIO_DARWIN_ROLE_UI_FOCAL; - break; - case TASK_BACKGROUND_APPLICATION: - *priority = PRIO_DARWIN_ROLE_UI; - break; - case TASK_NONUI_APPLICATION: - *priority = PRIO_DARWIN_ROLE_NON_UI; - break; - case TASK_UNSPECIFIED: - default: - *priority = PRIO_DARWIN_ROLE_DEFAULT; - break; - } + *priority = proc_task_role_to_darwin_role(role); out: kauth_cred_unref(&target_cred); @@ -1632,8 +1604,13 @@ iopolicysys_vfs(struct proc *p, int cmd, int scope, int policy, struct _iopol_pa switch(cmd) { case IOPOL_CMD_SET: if (0 == kauth_cred_issuser(kauth_cred_get())) { - error = EPERM; - goto out; + /* If it's a non-root process, it needs to have the entitlement to set the policy */ + boolean_t entitled = FALSE; + entitled = IOTaskHasEntitlement(current_task(), "com.apple.private.iopol.case_sensitivity"); + if (!entitled) { + error = EPERM; + goto out; + } } switch (policy) { diff --git a/bsd/kern/kern_shutdown.c b/bsd/kern/kern_shutdown.c index 4e231826d..3858ce83e 100644 --- a/bsd/kern/kern_shutdown.c +++ b/bsd/kern/kern_shutdown.c @@ -74,7 +74,6 @@ uint32_t system_inshutdown = 0; /* XXX should be in a header file somewhere, but isn't */ -extern void md_prepare_for_shutdown(int, int, char *); extern void (*unmountroot_pre_hook)(void); unsigned int proc_shutdown_exitcount = 0; @@ -83,7 +82,7 @@ static int sd_openlog(vfs_context_t); static int sd_closelog(vfs_context_t); static void sd_log(vfs_context_t, const char *, ...); static void proc_shutdown(void); - +static void kernel_hwm_panic_info(void); extern void IOSystemShutdownNotification(void); struct sd_filterargs{ @@ -109,10 +108,37 @@ static int sd_callback1(proc_t p, void * arg); static int sd_callback2(proc_t p, void * arg); static int sd_callback3(proc_t p, void * arg); +extern boolean_t panic_include_zprint; +extern vm_offset_t panic_kext_memory_info; +extern vm_size_t panic_kext_memory_size; + +static void +kernel_hwm_panic_info(void) +{ + mach_memory_info_t *memory_info; + unsigned int num_sites; + kern_return_t kr; + + panic_include_zprint = TRUE; + panic_kext_memory_info = 0; + panic_kext_memory_size = 0; + + num_sites = VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT; + panic_kext_memory_size = round_page(num_sites * sizeof(mach_zone_info_t)); + + kr = kmem_alloc(kernel_map, (vm_offset_t *) &panic_kext_memory_info, panic_kext_memory_size, VM_KERN_MEMORY_OSFMK); + if (kr != KERN_SUCCESS) { + panic_kext_memory_info = 0; + return; + } + memory_info = (mach_memory_info_t *)panic_kext_memory_info; + vm_page_diagnose(memory_info, num_sites); + return; +} + int -boot(int paniced, int howto, char *command) +reboot_kernel(int howto, char *message) { - struct proc *p = current_proc(); /* XXX */ int hostboot_option=0; if (!OSCompareAndSwap(0, 1, &system_inshutdown)) { @@ -126,12 +152,10 @@ boot(int paniced, int howto, char *command) */ IOSystemShutdownNotification(); - md_prepare_for_shutdown(paniced, howto, command); - if ((howto&RB_QUICK)==RB_QUICK) { printf("Quick reboot...\n"); if ((howto&RB_NOSYNC)==0) { - sync(p, (void *)NULL, (int *)NULL); + sync((proc_t)NULL, (void *)NULL, (int *)NULL); } } else if ((howto&RB_NOSYNC)==0) { @@ -143,7 +167,7 @@ boot(int paniced, int howto, char *command) * Release vnodes held by texts before sync. */ - /* handle live procs (deallocate their root and current directories). */ + /* handle live procs (deallocate their root and current directories), suspend initproc */ proc_shutdown(); #if CONFIG_AUDIT @@ -153,15 +177,7 @@ boot(int paniced, int howto, char *command) if (unmountroot_pre_hook != NULL) unmountroot_pre_hook(); - sync(p, (void *)NULL, (int *)NULL); - - /* - * Now that all processes have been terminated and system is - * sync'ed up, suspend init - */ - - if (initproc && p != initproc) - task_suspend(initproc->task); + sync((proc_t)NULL, (void *)NULL, (int *)NULL); if (kdebug_enable) kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace"); @@ -194,12 +210,18 @@ boot(int paniced, int howto, char *command) #endif /* NETWORKING */ force_reboot: + + if (howto & RB_PANIC) { + if (strncmp(message, "Kernel memory has exceeded limits", 33) == 0) { + kernel_hwm_panic_info(); + } + panic ("userspace panic: %s", message); + } + if (howto & RB_POWERDOWN) hostboot_option = HOST_REBOOT_HALT; if (howto & RB_HALT) hostboot_option = HOST_REBOOT_HALT; - if (paniced == RB_PANIC) - hostboot_option = HOST_REBOOT_HALT; if (howto & RB_UPSDELAY) { hostboot_option = HOST_REBOOT_UPSDELAY; @@ -568,6 +590,11 @@ proc_shutdown(void) sd_closelog(ctx); + /* + * Now that all other processes have been terminated, suspend init + */ + task_suspend_internal(initproc->task); + /* drop the ref on initproc */ proc_rele(initproc); printf("continuing\n"); diff --git a/bsd/kern/kern_sig.c b/bsd/kern/kern_sig.c index bb44111fe..5f3e5960c 100644 --- a/bsd/kern/kern_sig.c +++ b/bsd/kern/kern_sig.c @@ -111,6 +111,7 @@ #include #include +#include /* * Missing prototypes that Mach should export @@ -121,8 +122,6 @@ extern int thread_enable_fpe(thread_t act, int onoff); extern thread_t port_name_to_thread(mach_port_name_t port_name); extern kern_return_t get_signalact(task_t , thread_t *, int); extern unsigned int get_useraddr(void); -extern kern_return_t task_suspend_internal(task_t); -extern kern_return_t task_resume_internal(task_t); /* * --- @@ -134,10 +133,11 @@ extern void doexception(int exc, mach_exception_code_t code, static void stop(proc_t, proc_t); int cansignal(proc_t, kauth_cred_t, proc_t, int, int); int killpg1(proc_t, int, int, int, int); -int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart); static void psignal_uthread(thread_t, int); +static void psignal_try_thread(proc_t, thread_t, int signum); kern_return_t do_bsdexception(int, int, int); void __posix_sem_syscall_return(kern_return_t); +char *proc_name_address(void *p); /* implementations in osfmk/kern/sync_sema.c. We do not want port.h in this scope, so void * them */ kern_return_t semaphore_timedwait_signal_trap_internal(mach_port_name_t, mach_port_name_t, unsigned int, clock_res_t, void (*)(kern_return_t)); @@ -148,7 +148,7 @@ kern_return_t semaphore_wait_trap_internal(mach_port_name_t, void (*)(kern_retur static int filt_sigattach(struct knote *kn); static void filt_sigdetach(struct knote *kn); static int filt_signal(struct knote *kn, long hint); -static void filt_signaltouch(struct knote *kn, struct kevent64_s *kev, +static void filt_signaltouch(struct knote *kn, struct kevent_internal_s *kev, long type); struct filterops sig_filtops = { @@ -185,6 +185,7 @@ static kern_return_t get_signalthread(proc_t, int, thread_t *); #define PSIG_LOCKED 0x1 #define PSIG_VFORK 0x2 #define PSIG_THREAD 0x4 +#define PSIG_TRY_THREAD 0x8 static void psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum); @@ -305,6 +306,10 @@ cansignal(proc_t p, kauth_cred_t uc, proc_t q, int signum, int zombie) if (p == q) return(1); + /* you can't send launchd SIGKILL, even if root */ + if (signum == SIGKILL && q == initproc) + return(0); + if (!suser(uc, NULL)) return (1); /* root can always signal */ @@ -349,6 +354,53 @@ cansignal(proc_t p, kauth_cred_t uc, proc_t q, int signum, int zombie) return (0); } +/* + * Some signals can be restricted from being handled, + * forcing the default action for that signal. This behavior applies only to + * non-root (EUID != 0) processes, and is configured with the "sigrestrict=x" + * bootarg: + * + * 0 (default): Disallow use of restricted signals. Trying to register a handler + * returns ENOTSUP, which userspace may use to take special action (e.g. abort). + * 1: As above, but return EINVAL. Restricted signals behave similarly to SIGKILL. + * 2: Usual POSIX semantics. + */ +unsigned sigrestrict_arg = 0; + +#if PLATFORM_WatchOS || PLATFORM_AppleTVOS +static int +sigrestrictmask(void) +{ + if (kauth_getuid() != 0 && sigrestrict_arg != 2) { + return SIGRESTRICTMASK; + } + return 0; +} + +static int +signal_is_restricted(proc_t p, int signum) +{ + if (sigmask(signum) & sigrestrictmask()) { + if (sigrestrict_arg == 0 && + task_get_apptype(p->task) == TASK_APPTYPE_APP_DEFAULT) { + return ENOTSUP; + } else { + return EINVAL; + } + } + return 0; +} + +#else + +static inline int +signal_is_restricted(proc_t p, int signum) +{ + (void)p; + (void)signum; + return 0; +} +#endif /* !(PLATFORM_WatchOS || PLATFORM_AppleTVOS) */ /* * Returns: 0 Success @@ -375,9 +427,17 @@ sigaction(proc_t p, struct sigaction_args *uap, __unused int32_t *retval) signum = uap->signum; if (signum <= 0 || signum >= NSIG || - signum == SIGKILL || signum == SIGSTOP) + signum == SIGKILL || signum == SIGSTOP) return (EINVAL); + if ((error = signal_is_restricted(p, signum))) { + if (error == ENOTSUP) { + printf("%s(%d): denied attempt to register action for signal %d\n", + proc_name_address(p), proc_pid(p), signum); + } + return error; + } + if (uap->osa) { sa->sa_handler = ps->ps_sigact[signum]; sa->sa_mask = ps->ps_catchmask[signum]; @@ -1662,7 +1722,7 @@ get_signalthread(proc_t p, int signum, thread_t * thr) thread_t sig_thread; struct task * sig_task = p->task; kern_return_t kret; - + *thr = THREAD_NULL; if ((p->p_lflag & P_LINVFORK) && p->p_vforkact) { @@ -1673,9 +1733,10 @@ get_signalthread(proc_t p, int signum, thread_t * thr) return(KERN_SUCCESS); }else return(KERN_FAILURE); - } + } proc_lock(p); + TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) { if(((uth->uu_flag & UT_NO_SIGMASK)== 0) && (((uth->uu_sigmask & mask) == 0) || (uth->uu_sigwait & mask))) { @@ -1733,6 +1794,12 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) } #endif /* SIGNAL_DEBUG */ + /* catch unexpected initproc kills early for easier debuggging */ + if (signum == SIGKILL && p == initproc) + panic_plain("unexpected SIGKILL of %s %s", + (p->p_name[0] != '\0' ? p->p_name : "initproc"), + ((p->p_csflags & CS_KILLED) ? "(CS_KILLED)" : "")); + /* * We will need the task pointer later. Grab it now to * check for a zombie process. Also don't send signals @@ -1746,6 +1813,10 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) sig_task = get_threadtask(thread); sig_thread = thread; sig_proc = (proc_t)get_bsdtask_info(sig_task); + } else if (flavor & PSIG_TRY_THREAD) { + sig_task = p->task; + sig_thread = thread; + sig_proc = p; } else { sig_task = p->task; sig_thread = (struct thread *)0; @@ -1782,7 +1853,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) * the corresponding task data structures around too. This * reference is released by thread_deallocate. */ - + if (((flavor & PSIG_VFORK) == 0) && ((sig_proc->p_lflag & P_LTRACED) == 0) && (sig_proc->p_sigignore & mask)) { DTRACE_PROC3(signal__discard, thread_t, sig_thread, proc_t, sig_proc, int, signum); @@ -1793,6 +1864,16 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) action = SIG_DFL; act_set_astbsd(sig_thread); kret = KERN_SUCCESS; + } else if (flavor & PSIG_TRY_THREAD) { + uth = get_bsdthread_info(sig_thread); + if (((uth->uu_flag & UT_NO_SIGMASK) == 0) && + (((uth->uu_sigmask & mask) == 0) || (uth->uu_sigwait & mask)) && + ((kret = check_actforsig(sig_proc->task, sig_thread, 1)) == KERN_SUCCESS)) { + /* deliver to specified thread */ + } else { + /* deliver to any willing thread */ + kret = get_signalthread(sig_proc, signum, &sig_thread); + } } else if (flavor & PSIG_THREAD) { /* If successful return with ast set */ kret = check_actforsig(sig_task, sig_thread, 1); @@ -1807,7 +1888,6 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) goto psigout; } - uth = get_bsdthread_info(sig_thread); /* @@ -1838,7 +1918,6 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) } } - proc_lock(sig_proc); if (sig_proc->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) && @@ -2192,6 +2271,12 @@ psignal_uthread(thread_t thread, int signum) psignal_internal(PROC_NULL, TASK_NULL, thread, PSIG_THREAD, signum); } +/* same as psignal(), but prefer delivery to 'thread' if possible */ +static void +psignal_try_thread(proc_t p, thread_t thread, int signum) +{ + psignal_internal(p, NULL, thread, PSIG_TRY_THREAD, signum); +} /* * If the current process has received a signal (should be caught or cause @@ -2395,21 +2480,6 @@ issignal_locked(proc_t p) switch ((long)p->p_sigacts->ps_sigact[signum]) { case (long)SIG_DFL: - /* - * Don't take default actions on system processes. - */ - if (p->p_ppid == 0) { -#if DIAGNOSTIC - /* - * Are you sure you want to ignore SIGSEGV - * in init? XXX - */ - printf("Process (pid %d) got signal %d\n", - p->p_pid, signum); -#endif - break; /* == ignore */ - } - /* * If there is a pending stop signal to process * with default action, stop here, @@ -2557,21 +2627,6 @@ CURSIG(proc_t p) switch ((long)p->p_sigacts->ps_sigact[signum]) { case (long)SIG_DFL: - /* - * Don't take default actions on system processes. - */ - if (p->p_ppid == 0) { -#if DIAGNOSTIC - /* - * Are you sure you want to ignore SIGSEGV - * in init? XXX - */ - printf("Process (pid %d) got signal %d\n", - p->p_pid, signum); -#endif - break; /* == ignore */ - } - /* * If there is a pending stop signal to process * with default action, stop here, @@ -2840,7 +2895,7 @@ filt_signal(struct knote *kn, long hint) } static void -filt_signaltouch(struct knote *kn, struct kevent64_s *kev, long type) +filt_signaltouch(struct knote *kn, struct kevent_internal_s *kev, long type) { proc_klist_lock(); switch (type) { @@ -2856,7 +2911,7 @@ filt_signaltouch(struct knote *kn, struct kevent64_s *kev, long type) } break; default: - panic("filt_machporttouch() - invalid type (%ld)", type); + panic("filt_signaltouch() - invalid type (%ld)", type); break; } proc_klist_unlock(); @@ -2891,7 +2946,7 @@ bsd_ast(thread_t thread) else task_vtimer_clear(p->task, TASK_VTIMER_USER); - psignal(p, SIGVTALRM); + psignal_try_thread(p, thread, SIGVTALRM); } } @@ -2906,7 +2961,7 @@ bsd_ast(thread_t thread) else task_vtimer_clear(p->task, TASK_VTIMER_PROF); - psignal(p, SIGPROF); + psignal_try_thread(p, thread, SIGPROF); } } @@ -2927,7 +2982,7 @@ bsd_ast(thread_t thread) task_vtimer_clear(p->task, TASK_VTIMER_RLIM); - psignal(p, SIGXCPU); + psignal_try_thread(p, thread, SIGXCPU); } } diff --git a/bsd/kern/kern_subr.c b/bsd/kern/kern_subr.c index eada997ca..610c94936 100644 --- a/bsd/kern/kern_subr.c +++ b/bsd/kern/kern_subr.c @@ -1004,9 +1004,6 @@ void uio_update( uio_t a_uio, user_size_t a_count ) a_uio->uio_iovs.uiovp->iov_base += a_count; a_uio->uio_iovs.uiovp->iov_len -= a_count; } - if (a_uio->uio_resid_64 < 0) { - a_uio->uio_resid_64 = 0; - } if (a_count > (user_size_t)a_uio->uio_resid_64) { a_uio->uio_offset += a_uio->uio_resid_64; a_uio->uio_resid_64 = 0; @@ -1040,9 +1037,6 @@ void uio_update( uio_t a_uio, user_size_t a_count ) a_uio->uio_iovs.kiovp->iov_base += a_count; a_uio->uio_iovs.kiovp->iov_len -= a_count; } - if (a_uio->uio_resid_64 < 0) { - a_uio->uio_resid_64 = 0; - } if (a_count > (user_size_t)a_uio->uio_resid_64) { a_uio->uio_offset += a_uio->uio_resid_64; a_uio->uio_resid_64 = 0; diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index 0e9d6c9c6..9cd79fca4 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -58,7 +59,7 @@ #include #include #include -#include +#include /* This function is called from kern_sysctl in the current process context; * it is exported with the System6.0.exports, but this appears to be a legacy @@ -79,6 +80,7 @@ struct kern_direct_file_io_ref_t dev_t device; uint32_t blksize; off_t filelength; + char cf; char pinned; }; @@ -99,7 +101,7 @@ static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t static int kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, off_t offset, off_t end) { - int error; + int error = 0; int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); void * p1; void * p2; @@ -125,6 +127,18 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, p2 = ref->ctx; do_ioctl = &device_ioctl; } + + if (_DKIOCCSPINEXTENT == theIoctl) { + /* Tell CS the image size, so it knows whether to place the subsequent pins SSD/HDD */ + pin.cp_extent.length = end; + pin.cp_flags = _DKIOCCSHIBERNATEIMGSIZE; + (void) do_ioctl(p1, p2, _DKIOCCSPINEXTENT, (caddr_t)&pin); + } else if (_DKIOCCSUNPINEXTENT == theIoctl) { + /* Tell CS hibernation is done, so it can stop blocking overlapping writes */ + pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST; + (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin); + } + while (offset < end) { if (ref->vp->v_type == VREG) @@ -161,28 +175,40 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin); if (error && (ENOTTY != error)) { - printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", - error, pin.cp_extent.offset, pin.cp_extent.length); + printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", error, pin.cp_extent.offset, pin.cp_extent.length); + } + } + else if (_DKIOCCSUNPINEXTENT == theIoctl) + { + pin.cp_extent.offset = fileblk; + pin.cp_extent.length = filechunk; + pin.cp_flags = _DKIOCCSPINFORHIBERNATION; + error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin); + if (error && (ENOTTY != error)) + { + printf("_DKIOCCSUNPINEXTENT(%d) 0x%qx, 0x%qx\n", error, pin.cp_extent.offset, pin.cp_extent.length); } } else error = EINVAL; - if (error) break; + if (error) break; offset += filechunk; } return (error); } +extern uint32_t freespace_mb(vnode_t vp); struct kern_direct_file_io_ref_t * kern_open_file_for_direct_io(const char * name, - boolean_t create_file, + boolean_t create_file, kern_get_file_extents_callback_t callback, void * callback_ref, off_t set_file_size, + off_t fs_free_size, off_t write_file_offset, - caddr_t write_file_addr, - vm_size_t write_file_len, + void * write_file_addr, + size_t write_file_len, dev_t * partition_device_result, dev_t * image_device_result, uint64_t * partitionbase_result, @@ -191,20 +217,24 @@ kern_open_file_for_direct_io(const char * name, { struct kern_direct_file_io_ref_t * ref; - proc_t p; - struct vnode_attr va; - int error; - off_t f_offset; - uint64_t fileblk; - size_t filechunk; - uint64_t physoffset; - dev_t device; - dev_t target = 0; - int isssd = 0; - uint32_t flags = 0; - uint32_t blksize; - off_t maxiocount, count, segcount; - boolean_t locked = FALSE; + proc_t p; + struct vnode_attr va; + int error; + off_t f_offset; + uint64_t fileblk; + size_t filechunk; + uint64_t physoffset; + dev_t device; + dev_t target = 0; + int isssd = 0; + uint32_t flags = 0; + uint32_t blksize; + off_t maxiocount, count, segcount; + boolean_t locked = FALSE; + int fmode, cmode; + struct nameidata nd; + u_int32_t ndflags; + off_t mpFree; int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); void * p1 = NULL; @@ -221,12 +251,19 @@ kern_open_file_for_direct_io(const char * name, bzero(ref, sizeof(*ref)); p = kernproc; - ref->ctx = vfs_context_create(vfs_context_current()); + ref->ctx = vfs_context_create(vfs_context_kernel()); - if ((error = vnode_open(name, (create_file) ? (O_CREAT | FWRITE) : FWRITE, - (0), 0, &ref->vp, ref->ctx))) - goto out; + fmode = (create_file) ? (O_CREAT | FWRITE) : FWRITE; + cmode = S_IRUSR | S_IWUSR; + ndflags = NOFOLLOW; + NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ref->ctx); + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, cmode); + VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); + VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D); + if ((error = vn_open_auth(&nd, &fmode, &va))) goto out; + ref->vp = nd.ni_vp; if (ref->vp->v_type == VREG) { vnode_lock_spin(ref->vp); @@ -236,8 +273,7 @@ kern_open_file_for_direct_io(const char * name, if (write_file_addr && write_file_len) { - if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, 0))) - goto out; + if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, 0))) goto out; } VATTR_INIT(&va); @@ -247,18 +283,17 @@ kern_open_file_for_direct_io(const char * name, VATTR_WANTED(&va, va_data_alloc); VATTR_WANTED(&va, va_nlink); error = EFAULT; - if (vnode_getattr(ref->vp, &va, ref->ctx)) - goto out; + if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out; - kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev)); - kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid)); - kprintf("vp size %qd alloc %qd\n", va.va_data_size, va.va_data_alloc); + mpFree = freespace_mb(ref->vp); + mpFree <<= 20; + kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n", + name, va.va_data_size, va.va_data_alloc, mpFree, fs_free_size); if (ref->vp->v_type == VREG) { - /* Don't dump files with links. */ - if (va.va_nlink != 1) - goto out; + /* Don't dump files with links. */ + if (va.va_nlink != 1) goto out; device = va.va_fsid; ref->filelength = va.va_data_size; @@ -267,14 +302,21 @@ kern_open_file_for_direct_io(const char * name, p2 = p; do_ioctl = &file_ioctl; - if (set_file_size) - { - error = vnode_setsize(ref->vp, set_file_size, - IO_NOZEROFILL | IO_NOAUTH, ref->ctx); - if (error) - goto out; - ref->filelength = set_file_size; + if (set_file_size) + { + if (fs_free_size) + { + mpFree += va.va_data_alloc; + if ((mpFree < set_file_size) || ((mpFree - set_file_size) < fs_free_size)) + { + error = ENOSPC; + goto out; } + } + error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL | IO_NOAUTH, ref->ctx); + if (error) goto out; + ref->filelength = set_file_size; + } } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { @@ -288,11 +330,17 @@ kern_open_file_for_direct_io(const char * name, else { /* Don't dump to non-regular files. */ - error = EFAULT; + error = EFAULT; goto out; } ref->device = device; + // probe for CF + dk_corestorage_info_t cs_info; + memset(&cs_info, 0, sizeof(dk_corestorage_info_t)); + error = do_ioctl(p1, p2, DKIOCCORESTORAGE, (caddr_t)&cs_info); + ref->cf = (error == 0) && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES); + // get block size error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize); @@ -302,8 +350,7 @@ kern_open_file_for_direct_io(const char * name, if (ref->vp->v_type != VREG) { error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk); - if (error) - goto out; + if (error) goto out; ref->filelength = fileblk * ref->blksize; } @@ -316,8 +363,7 @@ kern_open_file_for_direct_io(const char * name, // generate the block list error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL); - if (error) - goto out; + if (error) goto out; locked = TRUE; f_offset = 0; @@ -330,8 +376,7 @@ kern_open_file_for_direct_io(const char * name, error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, VNODE_WRITE, NULL); - if (error) - goto out; + if (error) goto out; fileblk = blkno * ref->blksize; } @@ -350,8 +395,7 @@ kern_open_file_for_direct_io(const char * name, getphysreq.offset = fileblk + physoffset; getphysreq.length = (filechunk - physoffset); error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq); - if (error) - goto out; + if (error) goto out; if (!target) { target = getphysreq.dev; @@ -376,8 +420,13 @@ kern_open_file_for_direct_io(const char * name, } callback(callback_ref, 0ULL, 0ULL); - if (ref->vp->v_type == VREG) - p1 = ⌖ + if (ref->vp->v_type == VREG) p1 = ⌖ + else + { + p1 = ⌖ + p2 = p; + do_ioctl = &file_ioctl; + } // get partition base @@ -446,7 +495,7 @@ kern_open_file_for_direct_io(const char * name, error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd); if (!error && isssd) - flags |= kIOHibernateOptionSSD; + flags |= kIOPolledFileSSD; if (partition_device_result) *partition_device_result = device; @@ -455,8 +504,16 @@ kern_open_file_for_direct_io(const char * name, if (oflags) *oflags = flags; + if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) + { + vnode_close(ref->vp, FWRITE, ref->ctx); + ref->vp = NULLVP; + vfs_context_rele(ref->ctx); + ref->ctx = NULL; + } + out: - kprintf("kern_open_file_for_direct_io(%d)\n", error); + printf("kern_open_file_for_direct_io(%d)\n", error); if (error && locked) { @@ -466,17 +523,9 @@ kern_open_file_for_direct_io(const char * name, if (error && ref) { - if (ref->pinned) - { - _dk_cs_pin_t pin; - bzero(&pin, sizeof(pin)); - - pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST; - p1 = &device; - (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin); - } if (ref->vp) { + (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (ref->pinned && ref->cf) ? ref->filelength : 0); vnode_close(ref->vp, FWRITE, ref->ctx); ref->vp = NULLVP; } @@ -489,7 +538,7 @@ kern_open_file_for_direct_io(const char * name, } int -kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len, int ioflag) +kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag) { return (vn_rdwr(UIO_WRITE, ref->vp, addr, len, offset, @@ -498,14 +547,29 @@ kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t ad vfs_context_proc(ref->ctx))); } +int +kern_read_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag) +{ + return (vn_rdwr(UIO_READ, ref->vp, + addr, len, offset, + UIO_SYSSPACE, ioflag|IO_SYNC|IO_NODELOCKED|IO_UNIT, + vfs_context_ucred(ref->ctx), (int *) 0, + vfs_context_proc(ref->ctx))); +} + + +struct mount * +kern_file_mount(struct kern_direct_file_io_ref_t * ref) +{ + return (ref->vp->v_mount); +} void kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, - off_t write_offset, caddr_t addr, vm_size_t write_length, + off_t write_offset, void * addr, size_t write_length, off_t discard_offset, off_t discard_end) { int error; - _dk_cs_pin_t pin; kprintf("kern_close_file_for_direct_io\n"); if (!ref) return; @@ -531,18 +595,21 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, } (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); - if (ref->pinned) - { - bzero(&pin, sizeof(pin)); - pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST; - (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin); - } + //XXX If unmapping extents then don't also need to unpin; except ... + //XXX if file unaligned (HFS 4k / Fusion 128k) then pin is superset and + //XXX unmap is subset, so save extra walk over file extents (and the risk + //XXX that CF drain starts) vs leaving partial units pinned to SSD + //XXX (until whatever was sharing also unmaps). Err on cleaning up fully. + boolean_t will_unmap = (!ref->pinned || ref->cf) && (discard_end > discard_offset); + boolean_t will_unpin = (ref->pinned && ref->cf /* && !will_unmap */); - - if (discard_offset && discard_end && !ref->pinned) + (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (will_unpin) ? ref->filelength : 0); + + if (will_unmap) { - (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, discard_end); + (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, (ref->cf) ? ref->filelength : discard_end); } + if (addr && write_length) { (void) kern_write_file(ref, write_offset, addr, write_length, 0); @@ -553,7 +620,10 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, ref->vp = NULLVP; kprintf("vnode_close(%d)\n", error); } - vfs_context_rele(ref->ctx); - ref->ctx = NULL; + if (ref->ctx) + { + vfs_context_rele(ref->ctx); + ref->ctx = NULL; + } kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); } diff --git a/bsd/kern/kern_sysctl.c b/bsd/kern/kern_sysctl.c index c7978d82e..8239a10fd 100644 --- a/bsd/kern/kern_sysctl.c +++ b/bsd/kern/kern_sysctl.c @@ -102,6 +102,8 @@ #include #include #include +#include +#include #include #include @@ -115,6 +117,7 @@ #include #include #include +#include #include #include #include @@ -240,9 +243,6 @@ STATIC int sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg); STATIC int sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg); STATIC int sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg); STATIC int sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg); -#if CONFIG_LCTX -STATIC int sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg); -#endif int sysdoproc_callback(proc_t p, void *arg); @@ -294,6 +294,7 @@ STATIC int sysctl_sysctl_native(struct sysctl_oid *oidp, void *arg1, int arg2, s STATIC int sysctl_sysctl_cputype(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_safeboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_singleuser(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_minimalboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_slide(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); @@ -459,6 +460,14 @@ sysctl_sched_stats_enable(__unused struct sysctl_oid *oidp, __unused void *arg1, SYSCTL_PROC(_kern, OID_AUTO, sched_stats_enable, CTLFLAG_LOCKED | CTLFLAG_WR, 0, 0, sysctl_sched_stats_enable, "-", ""); +extern uint32_t sched_debug_flags; +SYSCTL_INT(_debug, OID_AUTO, sched, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_debug_flags, 0, "scheduler debug"); + +#if (DEBUG || DEVELOPMENT) +extern boolean_t doprnt_hide_pointers; +SYSCTL_INT(_debug, OID_AUTO, hide_kernel_pointers, CTLFLAG_RW | CTLFLAG_LOCKED, &doprnt_hide_pointers, 0, "hide kernel pointers from log"); +#endif + extern int get_kernel_symfile(proc_t, char **); #if COUNT_SYSCALLS @@ -662,18 +671,6 @@ sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg) return(1); } -#if CONFIG_LCTX -STATIC int -sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg) -{ - if ((p->p_lctx == NULL) || - (p->p_lctx->lc_id != (pid_t)*(int*)arg)) - return(0); - else - return(1); -} -#endif - /* * try over estimating by 5 procs */ @@ -779,11 +776,6 @@ sysctl_prochandle SYSCTL_HANDLER_ARGS ruidcheck = 1; break; -#if CONFIG_LCTX - case KERN_PROC_LCID: - filterfn = sysdoproc_filt_KERN_PROC_LCID; - break; -#endif case KERN_PROC_ALL: break; @@ -914,10 +906,6 @@ fill_user32_eproc(proc_t p, struct user32_eproc *__restrict ep) if (sessp != SESSION_NULL && sessp->s_ttyvp) ep->e_flag = EPROC_CTTY; } -#if CONFIG_LCTX - if (p->p_lctx) - ep->e_lcid = p->p_lctx->lc_id; -#endif ep->e_ppid = p->p_ppid; if (p->p_ucred) { my_cred = kauth_cred_proc_ref(p); @@ -974,10 +962,6 @@ fill_user64_eproc(proc_t p, struct user64_eproc *__restrict ep) if (sessp != SESSION_NULL && sessp->s_ttyvp) ep->e_flag = EPROC_CTTY; } -#if CONFIG_LCTX - if (p->p_lctx) - ep->e_lcid = p->p_lctx->lc_id; -#endif ep->e_ppid = p->p_ppid; if (p->p_ucred) { my_cred = kauth_cred_proc_ref(p); @@ -1164,8 +1148,8 @@ sysctl_kdebug_ops SYSCTL_HANDLER_ARGS case KERN_KDSETREG: case KERN_KDGETREG: case KERN_KDREADTR: - case KERN_KDWRITETR: - case KERN_KDWRITEMAP: + case KERN_KDWRITETR: + case KERN_KDWRITEMAP: case KERN_KDPIDTR: case KERN_KDTHRMAP: case KERN_KDPIDEX: @@ -1176,9 +1160,12 @@ sysctl_kdebug_ops SYSCTL_HANDLER_ARGS case KERN_KDDISABLE_BG_TRACE: case KERN_KDREADCURTHRMAP: case KERN_KDSET_TYPEFILTER: - case KERN_KDBUFWAIT: + case KERN_KDBUFWAIT: case KERN_KDCPUMAP: - + case KERN_KDWAIT_BG_TRACE_RESET: + case KERN_KDSET_BG_TYPEFILTER: + case KERN_KDWRITEMAP_V3: + case KERN_KDWRITETR_V3: ret = kdbg_control(name, namelen, oldp, oldlenp); break; default: @@ -1386,7 +1373,7 @@ sysctl_procargsx(int *name, u_int namelen, user_addr_t where, return(EINVAL); - ret = kmem_alloc(kernel_map, ©_start, round_page(arg_size)); + ret = kmem_alloc(kernel_map, ©_start, round_page(arg_size), VM_KERN_MEMORY_BSD); if (ret != KERN_SUCCESS) { vm_map_deallocate(proc_map); return(ENOMEM); @@ -1424,6 +1411,20 @@ sysctl_procargsx(int *name, u_int namelen, user_addr_t where, size = arg_size; } + /* + * When these sysctls were introduced, the first string in the strings + * section was just the bare path of the executable. However, for security + * reasons we now prefix this string with executable_path= so it can be + * parsed getenv style. To avoid binary compatability issues with exising + * callers of this sysctl, we strip it off here if present. + * (rdar://problem/13746466) + */ +#define EXECUTABLE_KEY "executable_path=" + if (strncmp(EXECUTABLE_KEY, data, strlen(EXECUTABLE_KEY)) == 0){ + data += strlen(EXECUTABLE_KEY); + size -= strlen(EXECUTABLE_KEY); + } + if (argc_yes) { /* Put processes argc as the first word in the copyout buffer */ suword(where, p->p_argc); @@ -1856,6 +1857,10 @@ SYSCTL_INT(_kern, OID_AUTO, ignore_is_ssd, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, &ignore_is_ssd, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, root_is_CF_drive, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &root_is_CF_drive, 0, ""); + SYSCTL_UINT(_kern, OID_AUTO, preheat_max_bytes, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, &preheat_max_bytes, 0, ""); @@ -2597,6 +2602,16 @@ SYSCTL_PROC(_kern, OID_AUTO, singleuser, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_singleuser, "I", ""); +STATIC int sysctl_minimalboot +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + return sysctl_io_number(req, minimalboot, sizeof(int), NULL, NULL); +} + +SYSCTL_PROC(_kern, OID_AUTO, minimalboot, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_minimalboot, "I", ""); + /* * Controls for debugging affinity sets - see osfmk/kern/affinity.c */ @@ -2666,8 +2681,12 @@ SYSCTL_INT(_vm, OID_AUTO, vm_page_filecache_min, CTLFLAG_RW | CTLFLAG_LOCKED, &v extern int vm_compressor_mode; extern int vm_compressor_is_active; +extern int vm_compressor_available; +extern uint32_t vm_ripe_target_age; extern uint32_t swapout_target_age; extern int64_t compressor_bytes_used; +extern int64_t c_segment_input_bytes; +extern int64_t c_segment_compressed_bytes; extern uint32_t compressor_eval_period_in_msecs; extern uint32_t compressor_sample_min_in_msecs; extern uint32_t compressor_sample_max_in_msecs; @@ -2678,10 +2697,16 @@ extern uint32_t vm_compressor_majorcompact_threshold_divisor; extern uint32_t vm_compressor_unthrottle_threshold_divisor; extern uint32_t vm_compressor_catchup_threshold_divisor; +SYSCTL_QUAD(_vm, OID_AUTO, compressor_input_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_input_bytes, ""); +SYSCTL_QUAD(_vm, OID_AUTO, compressor_compressed_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_compressed_bytes, ""); +SYSCTL_QUAD(_vm, OID_AUTO, compressor_bytes_used, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_bytes_used, ""); + SYSCTL_INT(_vm, OID_AUTO, compressor_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_mode, 0, ""); SYSCTL_INT(_vm, OID_AUTO, compressor_is_active, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_is_active, 0, ""); -SYSCTL_QUAD(_vm, OID_AUTO, compressor_bytes_used, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_bytes_used, ""); SYSCTL_INT(_vm, OID_AUTO, compressor_swapout_target_age, CTLFLAG_RD | CTLFLAG_LOCKED, &swapout_target_age, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_available, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_available, 0, ""); + +SYSCTL_INT(_vm, OID_AUTO, vm_ripe_target_age_in_secs, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_ripe_target_age, 0, ""); SYSCTL_INT(_vm, OID_AUTO, compressor_eval_period_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_eval_period_in_msecs, 0, ""); SYSCTL_INT(_vm, OID_AUTO, compressor_sample_min_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_sample_min_in_msecs, 0, ""); @@ -2709,12 +2734,12 @@ SYSCTL_INT(_vm, OID_AUTO, phantom_cache_thrashing_threshold_ssd, CTLFLAG_RW | CT #if (DEVELOPMENT || DEBUG) SYSCTL_UINT(_vm, OID_AUTO, vm_page_creation_throttled_hard, - CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, - &vm_page_creation_throttled_hard, 0, ""); + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_page_creation_throttled_hard, 0, ""); SYSCTL_UINT(_vm, OID_AUTO, vm_page_creation_throttled_soft, - CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, - &vm_page_creation_throttled_soft, 0, ""); + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_page_creation_throttled_soft, 0, ""); #endif /* DEVELOPMENT || DEBUG */ @@ -2747,13 +2772,6 @@ SYSCTL_INT(_kern, OID_AUTO, ipc_portbt, * Scheduler sysctls */ -/* - * See osfmk/kern/sched_prim.c for the corresponding definition - * in osfmk/. If either version changes, update the other. - */ -#define SCHED_STRING_MAX_LENGTH (48) - -extern char sched_string[SCHED_STRING_MAX_LENGTH]; SYSCTL_STRING(_kern, OID_AUTO, sched, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, sched_string, sizeof(sched_string), @@ -2944,3 +2962,5 @@ SYSCTL_INT(_kern, OID_AUTO, hv_support, CTLFLAG_KERN | CTLFLAG_RD | CTLFLAG_LOCKED, &hv_support_available, 0, ""); #endif + + diff --git a/bsd/kern/kern_tests.c b/bsd/kern/kern_tests.c deleted file mode 100644 index 8499a3545..000000000 --- a/bsd/kern/kern_tests.c +++ /dev/null @@ -1,48 +0,0 @@ -/*************************************************************** - * Test Declarations Go Here * - ***************************************************************/ -#include -#include -#include -#include - -/*************************************************************** - * End Test Declarations * - ***************************************************************/ -typedef int (*xnu_test_func_t)(void); - -typedef struct xnu_test { - xnu_test_func_t t_func; - const char *t_name; -} xnu_test_t; - -#define DEFINE_XNU_TEST(func) { func, #func } - -xnu_test_t xnu_tests[] = { -}; - -#define NUM_XNU_TESTS (sizeof(xnu_tests) / sizeof(xnu_test_t)) - -static int -run_xnu_tests -(struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) -{ - unsigned i; - int result; - - for (i = 0; i < NUM_XNU_TESTS; i++) { - result = xnu_tests[i].t_func(); - if (result == 0) { - kprintf("xnu_tests: %s passed.\n", xnu_tests[i].t_name); - } else{ - panic("xnu_tests: %s failed.\n", xnu_tests[i].t_name); - } - } - - return sysctl_handle_int(oidp, NULL, 0, req); -} - -SYSCTL_PROC(_kern, OID_AUTO, kern_tests, - CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, - 0, 0, run_xnu_tests, "I", ""); - diff --git a/bsd/kern/kern_xxx.c b/bsd/kern/kern_xxx.c index fc94a14e0..dde94f5a2 100644 --- a/bsd/kern/kern_xxx.c +++ b/bsd/kern/kern_xxx.c @@ -88,7 +88,7 @@ int reboot(struct proc *p, register struct reboot_args *uap, __unused int32_t *retval) { - char command[64]; + char message[128]; int error=0; size_t dummy=0; #if CONFIG_MACF @@ -97,14 +97,23 @@ reboot(struct proc *p, register struct reboot_args *uap, __unused int32_t *retva AUDIT_ARG(cmd, uap->opt); - command[0] = '\0'; + message[0] = '\0'; if ((error = suser(kauth_cred_get(), &p->p_acflag))) return(error); if (uap->opt & RB_COMMAND) - error = copyinstr(uap->command, - (void *)command, sizeof(command), (size_t *)&dummy); + return ENOSYS; + + if (uap->opt & RB_PANIC) { +#if !(DEVELOPMENT || DEBUG) + if (p != initproc) { + return EPERM; + } +#endif + error = copyinstr(uap->command, (void *)message, sizeof(message), (size_t *)&dummy); + } + #if CONFIG_MACF if (error) return (error); @@ -114,7 +123,7 @@ reboot(struct proc *p, register struct reboot_args *uap, __unused int32_t *retva #endif if (!error) { OSBitOrAtomic(P_REBOOT, &p->p_flag); /* No more signals for this proc */ - error = boot(RB_BOOT, uap->opt, command); + error = reboot_kernel(uap->opt, message); } return(error); } diff --git a/bsd/kern/kpi_mbuf.c b/bsd/kern/kpi_mbuf.c index faaf98c29..c40ca8189 100644 --- a/bsd/kern/kpi_mbuf.c +++ b/bsd/kern/kpi_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2014 Apple Inc. All rights reserved. + * Copyright (c) 2004-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -320,7 +320,7 @@ errno_t mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t *new_mbuf) errno_t mbuf_prepend(mbuf_t *orig, size_t len, mbuf_how_t how) { /* Must set *orig to NULL in failure case */ - *orig = m_prepend_2(*orig, len, how); + *orig = m_prepend_2(*orig, len, how, 0); return (*orig == NULL) ? ENOMEM : 0; } @@ -1369,3 +1369,16 @@ mbuf_get_driver_scratch(mbuf_t m, u_int8_t **area, size_t *area_len) *area_len = m_scratch_get(m, area); return (0); } + +errno_t +mbuf_get_unsent_data_bytes(const mbuf_t m, u_int32_t *unsent_data) +{ + if (m == NULL || unsent_data == NULL || !(m->m_flags & M_PKTHDR)) + return (EINVAL); + + if (!(m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA)) + return (EINVAL); + + *unsent_data = m->m_pkthdr.pkt_unsent_databytes; + return (0); +} diff --git a/bsd/kern/kpi_socket.c b/bsd/kern/kpi_socket.c index 07c65bf39..09818e3b0 100644 --- a/bsd/kern/kpi_socket.c +++ b/bsd/kern/kpi_socket.c @@ -1038,14 +1038,14 @@ sock_set_tcp_stream_priority(socket_t sock) * Caller must have ensured socket is valid and won't be going away. */ void -socket_set_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags) +socket_set_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags) { - (void) OSBitOrAtomic(flags, &sock->so_traffic_mgt_flags); + (void) OSBitOrAtomic8(flags, &sock->so_traffic_mgt_flags); sock_set_tcp_stream_priority(sock); } void -socket_set_traffic_mgt_flags(socket_t sock, u_int32_t flags) +socket_set_traffic_mgt_flags(socket_t sock, u_int8_t flags) { socket_lock(sock, 1); socket_set_traffic_mgt_flags_locked(sock, flags); @@ -1056,14 +1056,14 @@ socket_set_traffic_mgt_flags(socket_t sock, u_int32_t flags) * Caller must have ensured socket is valid and won't be going away. */ void -socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags) +socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags) { - (void) OSBitAndAtomic(~flags, &sock->so_traffic_mgt_flags); + (void) OSBitAndAtomic8(~flags, &sock->so_traffic_mgt_flags); sock_set_tcp_stream_priority(sock); } void -socket_clear_traffic_mgt_flags(socket_t sock, u_int32_t flags) +socket_clear_traffic_mgt_flags(socket_t sock, u_int8_t flags) { socket_lock(sock, 1); socket_clear_traffic_mgt_flags_locked(sock, flags); diff --git a/bsd/kern/kpi_socketfilter.c b/bsd/kern/kpi_socketfilter.c index ac9dfcb1c..bdebcc182 100644 --- a/bsd/kern/kpi_socketfilter.c +++ b/bsd/kern/kpi_socketfilter.c @@ -1256,12 +1256,16 @@ sflt_register(const struct sflt_filter *filter, int domain, int type, struct socket_filter *sock_filt = NULL; struct socket_filter *match = NULL; int error = 0; - struct protosw *pr = pffindproto(domain, protocol, type); + struct protosw *pr; unsigned int len; struct socket *so; struct inpcb *inp; struct solist *solisthead = NULL, *solist = NULL; + if ((domain != PF_INET) && (domain != PF_INET6)) + return (ENOTSUP); + + pr = pffindproto(domain, protocol, type); if (pr == NULL) return (ENOENT); diff --git a/bsd/kern/mach_loader.c b/bsd/kern/mach_loader.c index 81419cb5f..b5666e881 100644 --- a/bsd/kern/mach_loader.c +++ b/bsd/kern/mach_loader.c @@ -89,6 +89,8 @@ extern pmap_t pmap_create(ledger_t ledger, vm_map_size_t size, boolean_t is_64bit); +extern kern_return_t machine_thread_neon_state_initialize(thread_t thread); + /* XXX should have prototypes in a shared header file */ extern int get_map_nentries(vm_map_t); @@ -112,6 +114,7 @@ static load_result_t load_result_null = { .validentry = 0, .using_lcmain = 0, .csflags = 0, + .has_pagezero = 0, .uuid = { 0 }, .min_vm_addr = MACH_VM_MAX_ADDRESS, .max_vm_addr = MACH_VM_MIN_ADDRESS, @@ -171,9 +174,10 @@ set_code_unprotect( caddr_t addr, vm_map_t map, int64_t slide, - struct vnode *vp, + struct vnode *vp, + off_t macho_offset, cpu_type_t cputype, - cpu_subtype_t cpusubtype); + cpu_subtype_t cpusubtype); #endif static @@ -286,6 +290,7 @@ note_all_image_info_section(const struct segment_command_64 *scp, } } + load_return_t load_machfile( struct image_params *imgp, @@ -344,15 +349,19 @@ load_machfile( } pmap = pmap_create(get_task_ledger(ledger_task), (vm_map_size_t) 0, - (imgp->ip_flags & IMGPF_IS_64BIT)); + ((imgp->ip_flags & IMGPF_IS_64BIT) != 0)); pal_switch_pmap(thread, pmap, imgp->ip_flags & IMGPF_IS_64BIT); map = vm_map_create(pmap, 0, - vm_compute_max_offset((imgp->ip_flags & IMGPF_IS_64BIT)), + vm_compute_max_offset(((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT)), TRUE); } else map = new_map; +#if (__ARM_ARCH_7K__ >= 2) && defined(PLATFORM_WatchOS) + /* enforce 16KB alignment for watch targets with new ABI */ + vm_map_set_page_shift(map, SIXTEENK_PAGE_SHIFT); +#endif /* __arm64__ */ #ifndef CONFIG_ENFORCE_SIGNED_CODE /* This turns off faulting for executable pages, which allows @@ -412,12 +421,14 @@ load_machfile( /* * Check to see if the page zero is enforced by the map->min_offset. */ - if (enforce_hard_pagezero && (vm_map_has_hard_pagezero(map, 0x1000) == FALSE)) { - if (create_map) { - vm_map_deallocate(map); /* will lose pmap reference too */ + if (enforce_hard_pagezero && + (vm_map_has_hard_pagezero(map, 0x1000) == FALSE)) { + { + if (create_map) { + vm_map_deallocate(map); /* will lose pmap reference too */ + } + return (LOAD_BADMACHO); } - printf("Cannot enforce a hard page-zero for %s\n", imgp->ip_strings); - return (LOAD_BADMACHO); } /* @@ -464,6 +475,8 @@ load_machfile( workqueue_mark_exiting(p); task_complete_halt(task); workqueue_exit(p); + kqueue_dealloc(p->p_wqkqueue); + p->p_wqkqueue = NULL; } old_map = swap_task_map(old_task, thread, map, !spawn); vm_map_deallocate(old_map); @@ -471,6 +484,14 @@ load_machfile( return(LOAD_SUCCESS); } +int macho_printf = 0; +#define MACHO_PRINTF(args) \ + do { \ + if (macho_printf) { \ + printf args; \ + } \ + } while (0) + /* * The file size of a mach-o file is limited to 32 bits; this is because * this is the limit on the kalloc() of enough bytes for a mach_header and @@ -511,7 +532,7 @@ parse_machfile( int pass; proc_t p = current_proc(); /* XXXX */ int error; - int resid=0; + int resid = 0; size_t mach_header_sz = sizeof(struct mach_header); boolean_t abi64; boolean_t got_code_signatures = FALSE; @@ -613,12 +634,20 @@ parse_machfile( /* * Scan through the commands, processing each one as necessary. * We parse in three passes through the headers: + * 0: determine if TEXT and DATA boundary can be page-aligned * 1: thread state, uuid, code signature * 2: segments * 3: dyld, encryption, check entry point */ - for (pass = 1; pass <= 3; pass++) { + for (pass = 0; pass <= 3; pass++) { + + if (pass == 0) { + /* see if we need to adjust the slide to re-align... */ + /* no re-alignment needed on X86_64 or ARM32 kernel */ + continue; + } else if (pass == 1) { + } /* * Check that the entry point is contained in an executable segments @@ -667,6 +696,10 @@ parse_machfile( */ switch(lcp->cmd) { case LC_SEGMENT: + if (pass == 0) { + break; + } + if (pass != 2) break; @@ -819,7 +852,7 @@ parse_machfile( break; ret = set_code_unprotect( (struct encryption_info_command *) lcp, - addr, map, slide, vp, + addr, map, slide, vp, file_offset, header->cputype, header->cpusubtype); if (ret != LOAD_SUCCESS) { printf("proc %d: set_code_unprotect() error %d " @@ -836,7 +869,7 @@ parse_machfile( proc_lock(p); p->p_lflag |= P_LTERM_DECRYPTFAIL; proc_unlock(p); - } + } psignal(p, SIGKILL); } break; @@ -858,34 +891,34 @@ parse_machfile( if (cs_enforcement(NULL)) { ret = LOAD_FAILURE; } else { - /* - * No embedded signatures: look for detached by taskgated, - * this is only done on OSX, on embedded platforms we expect everything - * to be have embedded signatures. - */ + /* + * No embedded signatures: look for detached by taskgated, + * this is only done on OSX, on embedded platforms we expect everything + * to be have embedded signatures. + */ struct cs_blob *blob; blob = ubc_cs_blob_get(vp, -1, file_offset); if (blob != NULL) { - unsigned int cs_flag_data = blob->csb_flags; - if(0 != ubc_cs_generation_check(vp)) { - if (0 != ubc_cs_blob_revalidate(vp, blob, 0)) { - /* clear out the flag data if revalidation fails */ - cs_flag_data = 0; - result->csflags &= ~CS_VALID; + unsigned int cs_flag_data = blob->csb_flags; + if(0 != ubc_cs_generation_check(vp)) { + if (0 != ubc_cs_blob_revalidate(vp, blob, 0)) { + /* clear out the flag data if revalidation fails */ + cs_flag_data = 0; + result->csflags &= ~CS_VALID; + } } - } - /* get flags to be applied to the process */ - result->csflags |= cs_flag_data; + /* get flags to be applied to the process */ + result->csflags |= cs_flag_data; } } } /* Make sure if we need dyld, we got it */ - if ((ret == LOAD_SUCCESS) && result->needs_dynlinker && !dlp) { + if (result->needs_dynlinker && !dlp) { ret = LOAD_FAILURE; } - + if ((ret == LOAD_SUCCESS) && (dlp != 0)) { /* * load the dylinker, and slide it by the independent DYLD ASLR @@ -910,7 +943,7 @@ parse_machfile( #if CONFIG_CODE_DECRYPTION -#define APPLE_UNPROTECTED_HEADER_SIZE (3 * PAGE_SIZE_64) +#define APPLE_UNPROTECTED_HEADER_SIZE (3 * 4096) static load_return_t unprotect_dsmos_segment( @@ -953,9 +986,20 @@ unprotect_dsmos_segment( crypt_info.crypt_end = NULL; #pragma unused(vp, macho_offset) crypt_info.crypt_ops = (void *)0x2e69cf40; + vm_map_offset_t crypto_backing_offset; + crypto_backing_offset = -1; /* i.e. use map entry's offset */ +#if DEVELOPMENT || DEBUG + struct proc *p; + p = current_proc(); + printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s)\n", + p->p_pid, p->p_comm, map, + (uint64_t) map_addr, (uint64_t) (map_addr + map_size), + __FUNCTION__, vp->v_name); +#endif /* DEVELOPMENT || DEBUG */ kr = vm_map_apple_protected(map, map_addr, map_addr + map_size, + crypto_backing_offset, &crypt_info); } @@ -979,29 +1023,166 @@ unprotect_dsmos_segment( } #endif /* CONFIG_CODE_DECRYPTION */ + +/* + * map_segment: + * Maps a Mach-O segment, taking care of mis-alignment (wrt the system + * page size) issues. + * + * The mapping might result in 1, 2 or 3 map entries: + * 1. for the first page, which could be overlap with the previous + * mapping, + * 2. for the center (if applicable), + * 3. for the last page, which could overlap with the next mapping. + * + * For each of those map entries, we might have to interpose a + * "fourk_pager" to deal with mis-alignment wrt the system page size, + * either in the mapping address and/or size or the file offset and/or + * size. + * The "fourk_pager" itself would be mapped with proper alignment + * wrt the system page size and would then be populated with the + * information about the intended mapping, with a "4KB" granularity. + */ +static kern_return_t +map_segment( + vm_map_t map, + vm_map_offset_t vm_start, + vm_map_offset_t vm_end, + memory_object_control_t control, + vm_map_offset_t file_start, + vm_map_offset_t file_end, + vm_prot_t initprot, + vm_prot_t maxprot) +{ + int extra_vm_flags, cur_extra_vm_flags; + vm_map_offset_t cur_offset, cur_start, cur_end; + kern_return_t ret; + vm_map_offset_t effective_page_mask; + + if (vm_end < vm_start || + file_end < file_start) { + return LOAD_BADMACHO; + } + if (vm_end == vm_start || + file_end == file_start) { + /* nothing to map... */ + return LOAD_SUCCESS; + } + + effective_page_mask = MAX(PAGE_MASK, vm_map_page_mask(map)); + + extra_vm_flags = 0; + if (vm_map_page_aligned(vm_start, effective_page_mask) && + vm_map_page_aligned(vm_end, effective_page_mask) && + vm_map_page_aligned(file_start, effective_page_mask) && + vm_map_page_aligned(file_end, effective_page_mask)) { + /* all page-aligned and map-aligned: proceed */ + } else { + panic("map_segment: unexpected mis-alignment " + "vm[0x%llx:0x%llx] file[0x%llx:0x%llx]\n", + (uint64_t) vm_start, + (uint64_t) vm_end, + (uint64_t) file_start, + (uint64_t) file_end); + } + + cur_offset = 0; + cur_start = vm_start; + cur_end = vm_start; + if (cur_end >= vm_start + (file_end - file_start)) { + /* all mapped: done */ + goto done; + } + if (vm_map_round_page(cur_end, effective_page_mask) >= + vm_map_trunc_page(vm_start + (file_end - file_start), + effective_page_mask)) { + /* no middle */ + } else { + cur_start = cur_end; + if ((vm_start & effective_page_mask) != + (file_start & effective_page_mask)) { + /* one 4K pager for the middle */ + cur_extra_vm_flags = extra_vm_flags; + } else { + /* regular mapping for the middle */ + cur_extra_vm_flags = 0; + } + cur_end = vm_map_trunc_page(vm_start + (file_end - + file_start), + effective_page_mask); + if (control != MEMORY_OBJECT_CONTROL_NULL) { + ret = vm_map_enter_mem_object_control( + map, + &cur_start, + cur_end - cur_start, + (mach_vm_offset_t)0, + VM_FLAGS_FIXED | cur_extra_vm_flags, + control, + file_start + cur_offset, + TRUE, /* copy */ + initprot, maxprot, + VM_INHERIT_DEFAULT); + } else { + ret = vm_map_enter_mem_object( + map, + &cur_start, + cur_end - cur_start, + (mach_vm_offset_t)0, + VM_FLAGS_FIXED | cur_extra_vm_flags, + IPC_PORT_NULL, + 0, /* offset */ + TRUE, /* copy */ + initprot, maxprot, + VM_INHERIT_DEFAULT); + } + if (ret != KERN_SUCCESS) { + return (LOAD_NOSPACE); + } + cur_offset += cur_end - cur_start; + } + if (cur_end >= vm_start + (file_end - file_start)) { + /* all mapped: done */ + goto done; + } + cur_start = cur_end; +done: + assert(cur_end >= vm_start + (file_end - file_start)); + return LOAD_SUCCESS; +} + static load_return_t load_segment( - struct load_command *lcp, - uint32_t filetype, - void * control, - off_t pager_offset, - off_t macho_size, - struct vnode *vp, - vm_map_t map, - int64_t slide, - load_result_t *result -) + struct load_command *lcp, + uint32_t filetype, + void * control, + off_t pager_offset, + off_t macho_size, + struct vnode *vp, + vm_map_t map, + int64_t slide, + load_result_t *result) { struct segment_command_64 segment_command, *scp; kern_return_t ret; - vm_map_offset_t map_addr, map_offset; - vm_map_size_t map_size, seg_size, delta_size; + vm_map_size_t delta_size; vm_prot_t initprot; vm_prot_t maxprot; size_t segment_command_size, total_section_size, single_section_size; - + vm_map_offset_t file_offset, file_size; + vm_map_offset_t vm_offset, vm_size; + vm_map_offset_t vm_start, vm_end, vm_end_aligned; + vm_map_offset_t file_start, file_end; + kern_return_t kr; + boolean_t verbose; + vm_map_size_t effective_page_size; + vm_map_offset_t effective_page_mask; + + effective_page_size = MAX(PAGE_SIZE, vm_map_page_size(map)); + effective_page_mask = MAX(PAGE_MASK, vm_map_page_mask(map)); + + verbose = FALSE; if (LC_SEGMENT_64 == lcp->cmd) { segment_command_size = sizeof(struct segment_command_64); single_section_size = sizeof(struct section_64); @@ -1013,31 +1194,57 @@ load_segment( return (LOAD_BADMACHO); total_section_size = lcp->cmdsize - segment_command_size; - if (LC_SEGMENT_64 == lcp->cmd) + if (LC_SEGMENT_64 == lcp->cmd) { scp = (struct segment_command_64 *)lcp; - else { + } else { scp = &segment_command; widen_segment_command((struct segment_command *)lcp, scp); } + if (verbose) { + MACHO_PRINTF(("+++ load_segment %s " + "vm[0x%llx:0x%llx] file[0x%llx:0x%llx] " + "prot %d/%d flags 0x%x\n", + scp->segname, + (uint64_t)(slide + scp->vmaddr), + (uint64_t)(slide + scp->vmaddr + scp->vmsize), + pager_offset + scp->fileoff, + pager_offset + scp->fileoff + scp->filesize, + scp->initprot, + scp->maxprot, + scp->flags)); + } + /* * Make sure what we get from the file is really ours (as specified * by macho_size). */ if (scp->fileoff + scp->filesize < scp->fileoff || - scp->fileoff + scp->filesize > (uint64_t)macho_size) + scp->fileoff + scp->filesize > (uint64_t)macho_size) { return (LOAD_BADMACHO); + } /* * Ensure that the number of sections specified would fit * within the load command size. */ - if (total_section_size / single_section_size < scp->nsects) + if (total_section_size / single_section_size < scp->nsects) { return (LOAD_BADMACHO); + } /* * Make sure the segment is page-aligned in the file. */ - if ((scp->fileoff & PAGE_MASK_64) != 0) + file_offset = pager_offset + scp->fileoff; /* limited to 32 bits */ + file_size = scp->filesize; + if ((file_offset & PAGE_MASK_64) != 0 || + /* we can't mmap() it if it's not page-aligned in the file */ + (file_offset & vm_map_page_mask(map)) != 0) { + /* + * The 1st test would have failed if the system's page size + * was what this process believe is the page size, so let's + * fail here too for the sake of consistency. + */ return (LOAD_BADMACHO); + } /* * If we have a code signature attached for this slice @@ -1053,21 +1260,14 @@ load_segment( return LOAD_BADMACHO; } - /* - * Round sizes to page size. - */ - seg_size = round_page_64(scp->vmsize); - map_size = round_page_64(scp->filesize); - map_addr = trunc_page_64(scp->vmaddr); /* JVXXX note that in XNU TOT this is round instead of trunc for 64 bits */ - - seg_size = vm_map_round_page(seg_size, vm_map_page_mask(map)); - map_size = vm_map_round_page(map_size, vm_map_page_mask(map)); - - if (seg_size == 0) - return (KERN_SUCCESS); - if (map_addr == 0 && - map_size == 0 && - seg_size != 0 && + vm_offset = scp->vmaddr + slide; + vm_size = scp->vmsize; + + if (vm_size == 0) + return (LOAD_SUCCESS); + if (scp->vmaddr == 0 && + file_size == 0 && + vm_size != 0 && (scp->initprot & VM_PROT_ALL) == VM_PROT_NONE && (scp->maxprot & VM_PROT_ALL) == VM_PROT_NONE) { /* @@ -1076,9 +1276,6 @@ load_segment( * between the end of page zero and the beginning of the first * slid segment. */ - seg_size += slide; - slide = 0; - /* * This is a "page zero" segment: it starts at address 0, * is not mapped from the binary file and is not accessible. @@ -1086,53 +1283,89 @@ load_segment( * make it completely off limits by raising the VM map's * minimum offset. */ - ret = vm_map_raise_min_offset(map, seg_size); + vm_end = vm_offset + vm_size; + if (vm_end < vm_offset) { + return (LOAD_BADMACHO); + } + if (verbose) { + MACHO_PRINTF(("++++++ load_segment: " + "page_zero up to 0x%llx\n", + (uint64_t) vm_end)); + } + { + vm_end = vm_map_round_page(vm_end, + PAGE_MASK_64); + vm_end_aligned = vm_end; + } + ret = vm_map_raise_min_offset(map, + vm_end_aligned); + if (ret != KERN_SUCCESS) { return (LOAD_FAILURE); } return (LOAD_SUCCESS); + } else { } - /* If a non-zero slide was specified by the caller, apply now */ - map_addr += slide; + { + file_start = vm_map_trunc_page(file_offset, + effective_page_mask); + file_end = vm_map_round_page(file_offset + file_size, + effective_page_mask); + vm_start = vm_map_trunc_page(vm_offset, + effective_page_mask); + vm_end = vm_map_round_page(vm_offset + vm_size, + effective_page_mask); + } - if (map_addr < result->min_vm_addr) - result->min_vm_addr = map_addr; - if (map_addr+seg_size > result->max_vm_addr) - result->max_vm_addr = map_addr+seg_size; + if (vm_start < result->min_vm_addr) + result->min_vm_addr = vm_start; + if (vm_end > result->max_vm_addr) + result->max_vm_addr = vm_end; if (map == VM_MAP_NULL) return (LOAD_SUCCESS); - map_offset = pager_offset + scp->fileoff; /* limited to 32 bits */ - - if (map_size > 0) { + if (vm_size > 0) { initprot = (scp->initprot) & VM_PROT_ALL; maxprot = (scp->maxprot) & VM_PROT_ALL; /* * Map a copy of the file into the address space. */ - ret = vm_map_enter_mem_object_control(map, - &map_addr, map_size, (mach_vm_offset_t)0, - VM_FLAGS_FIXED, control, map_offset, TRUE, - initprot, maxprot, - VM_INHERIT_DEFAULT); - if (ret != KERN_SUCCESS) { - return (LOAD_NOSPACE); + if (verbose) { + MACHO_PRINTF(("++++++ load_segment: " + "mapping at vm [0x%llx:0x%llx] of " + "file [0x%llx:0x%llx]\n", + (uint64_t) vm_start, + (uint64_t) vm_end, + (uint64_t) file_start, + (uint64_t) file_end)); } - + ret = map_segment(map, + vm_start, + vm_end, + control, + file_start, + file_end, + initprot, + maxprot); + if (ret) { + return LOAD_NOSPACE; + } + +#if FIXME /* * If the file didn't end on a page boundary, * we need to zero the leftover. */ delta_size = map_size - scp->filesize; -#if FIXME if (delta_size > 0) { mach_vm_offset_t tmp; - ret = mach_vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE); - if (ret != KERN_SUCCESS) + ret = mach_vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE| VM_MAKE_TAG(VM_KERN_MEMORY_BSD)); + if (ret != KERN_SUCCESS) { return(LOAD_RESOURCE); + } if (copyout(tmp, map_addr + scp->filesize, delta_size)) { @@ -1151,40 +1384,66 @@ load_segment( * than the size from the file, we need to allocate * zero fill memory for the rest. */ - delta_size = seg_size - map_size; + if ((vm_end - vm_start) > (file_end - file_start)) { + delta_size = (vm_end - vm_start) - (file_end - file_start); + } else { + delta_size = 0; + } if (delta_size > 0) { - mach_vm_offset_t tmp = map_addr + map_size; - - ret = mach_vm_map(map, &tmp, delta_size, 0, VM_FLAGS_FIXED, - NULL, 0, FALSE, - scp->initprot, scp->maxprot, - VM_INHERIT_DEFAULT); - if (ret != KERN_SUCCESS) + mach_vm_offset_t tmp; + + tmp = vm_start + (file_end - file_start); + if (verbose) { + MACHO_PRINTF(("++++++ load_segment: " + "delta mapping vm [0x%llx:0x%llx]\n", + (uint64_t) tmp, + (uint64_t) (tmp + delta_size))); + } + kr = map_segment(map, + tmp, + tmp + delta_size, + MEMORY_OBJECT_CONTROL_NULL, + 0, + delta_size, + scp->initprot, + scp->maxprot); + if (kr != KERN_SUCCESS) { return(LOAD_NOSPACE); + } } if ( (scp->fileoff == 0) && (scp->filesize != 0) ) - result->mach_header = map_addr; + result->mach_header = vm_offset; if (scp->flags & SG_PROTECTED_VERSION_1) { - ret = unprotect_dsmos_segment(scp->fileoff, - scp->filesize, - vp, - pager_offset, - map, - map_addr, - map_size); + ret = unprotect_dsmos_segment(file_start, + file_end - file_start, + vp, + pager_offset, + map, + vm_start, + vm_end - vm_start); + if (ret != LOAD_SUCCESS) { + return ret; + } } else { ret = LOAD_SUCCESS; } - if (LOAD_SUCCESS == ret && filetype == MH_DYLINKER && - result->all_image_info_addr == MACH_VM_MIN_ADDRESS) + + if (LOAD_SUCCESS == ret && + filetype == MH_DYLINKER && + result->all_image_info_addr == MACH_VM_MIN_ADDRESS) { note_all_image_info_section(scp, - LC_SEGMENT_64 == lcp->cmd, single_section_size, - (const char *)lcp + segment_command_size, slide, result); + LC_SEGMENT_64 == lcp->cmd, + single_section_size, + ((const char *)lcp + + segment_command_size), + slide, + result); + } if (result->entry_point != MACH_VM_MIN_ADDRESS) { - if ((result->entry_point >= map_addr) && (result->entry_point < (map_addr + map_size))) { + if ((result->entry_point >= vm_offset) && (result->entry_point < (vm_offset + vm_size))) { if ((scp->initprot & (VM_PROT_READ|VM_PROT_EXECUTE)) == (VM_PROT_READ|VM_PROT_EXECUTE)) { result->validentry = 1; } else { @@ -1274,6 +1533,7 @@ load_main( return(LOAD_FAILURE); } + result->unixproc = TRUE; result->thread_count++; @@ -1350,6 +1610,7 @@ load_unixthread( if (ret != LOAD_SUCCESS) return (ret); + result->unixproc = TRUE; result->thread_count++; @@ -1747,8 +2008,9 @@ load_code_signature( cputype, macho_offset, addr, - lcp->datasize, - 0)) { + lcp->datasize, + 0, + &blob)) { ret = LOAD_FAILURE; goto out; } else { @@ -1760,11 +2022,12 @@ load_code_signature( ubc_cs_validation_bitmap_allocate( vp ); #endif - blob = ubc_cs_blob_get(vp, cputype, macho_offset); - ret = LOAD_SUCCESS; out: if (ret == LOAD_SUCCESS) { + if (blob == NULL) + panic("sucess, but no blob!"); + result->csflags |= blob->csb_flags; result->platform_binary = blob->csb_platform_binary; result->cs_end_offset = blob->csb_end_offset; @@ -1782,15 +2045,16 @@ load_code_signature( static load_return_t set_code_unprotect( - struct encryption_info_command *eip, - caddr_t addr, - vm_map_t map, - int64_t slide, - struct vnode *vp, - cpu_type_t cputype, - cpu_subtype_t cpusubtype) + struct encryption_info_command *eip, + caddr_t addr, + vm_map_t map, + int64_t slide, + struct vnode *vp, + off_t macho_offset, + cpu_type_t cputype, + cpu_subtype_t cpusubtype) { - int result, len; + int error, len; pager_crypt_info_t crypt_info; const char * cryptname = 0; char *vpath; @@ -1799,6 +2063,7 @@ set_code_unprotect( struct segment_command_64 *seg64; struct segment_command *seg32; vm_map_offset_t map_offset, map_size; + vm_object_offset_t crypto_backing_offset; kern_return_t kr; if (eip->cmdsize < sizeof(*eip)) return LOAD_BADMACHO; @@ -1826,8 +2091,8 @@ set_code_unprotect( if(vpath == NULL) return LOAD_FAILURE; len = MAXPATHLEN; - result = vn_getpath(vp, vpath, &len); - if(result) { + error = vn_getpath(vp, vpath, &len); + if (error) { FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI); return LOAD_FAILURE; } @@ -1838,6 +2103,12 @@ set_code_unprotect( .cputype = cputype, .cpusubtype = cpusubtype}; kr=text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data); +#if DEVELOPMENT || DEBUG + struct proc *p; + p = current_proc(); + printf("APPLE_PROTECT: %d[%s] map %p %s(%s) -> 0x%x\n", + p->p_pid, p->p_comm, map, __FUNCTION__, vpath, kr); +#endif /* DEVELOPMENT || DEBUG */ FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI); if(kr) { @@ -1876,6 +2147,7 @@ set_code_unprotect( eip->cryptoff+eip->cryptsize)) { map_offset = seg64->vmaddr + eip->cryptoff - seg64->fileoff + slide; map_size = eip->cryptsize; + crypto_backing_offset = macho_offset + eip->cryptoff; goto remap_now; } case LC_SEGMENT: @@ -1885,6 +2157,7 @@ set_code_unprotect( eip->cryptoff+eip->cryptsize)) { map_offset = seg32->vmaddr + eip->cryptoff - seg32->fileoff + slide; map_size = eip->cryptsize; + crypto_backing_offset = macho_offset + eip->cryptoff; goto remap_now; } } @@ -1895,10 +2168,16 @@ set_code_unprotect( remap_now: /* now remap using the decrypter */ - kr = vm_map_apple_protected(map, map_offset, map_offset+map_size, &crypt_info); - if(kr) { + MACHO_PRINTF(("+++ set_code_unprotect: vm[0x%llx:0x%llx]\n", + (uint64_t) map_offset, + (uint64_t) (map_offset+map_size))); + kr = vm_map_apple_protected(map, + map_offset, + map_offset+map_size, + crypto_backing_offset, + &crypt_info); + if (kr) { printf("set_code_unprotect(): mapping failed with %x\n", kr); - crypt_info.crypt_end(crypt_info.crypt_ops); return LOAD_PROTECT; } diff --git a/bsd/kern/mach_loader.h b/bsd/kern/mach_loader.h index b6ab1feb1..5600cb42f 100644 --- a/bsd/kern/mach_loader.h +++ b/bsd/kern/mach_loader.h @@ -64,6 +64,7 @@ typedef struct _load_result { prog_allocated_stack :1, prog_stack_size : 1, validentry :1, + has_pagezero :1, using_lcmain :1, :0; unsigned int csflags; diff --git a/bsd/kern/mach_process.c b/bsd/kern/mach_process.c index ef8ebffcd..d8bc4f07a 100644 --- a/bsd/kern/mach_process.c +++ b/bsd/kern/mach_process.c @@ -156,20 +156,38 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) * Intercept and deal with "please trace me" request. */ if (uap->req == PT_TRACE_ME) { - proc_lock(p); - SET(p->p_lflag, P_LTRACED); - /* Non-attached case, our tracer is our parent. */ - p->p_oppid = p->p_ppid; - /* Check whether child and parent are allowed to run modified - * code (they'll have to) */ - struct proc *pproc=proc_find(p->p_oppid); - proc_unlock(p); - cs_allow_invalid(p); - if(pproc) { +retry_trace_me:; + proc_t pproc = proc_parent(p); + if (pproc == NULL) + return (EINVAL); +#if CONFIG_MACF + /* + * NB: Cannot call kauth_authorize_process(..., KAUTH_PROCESS_CANTRACE, ...) + * since that assumes the process being checked is the current process + * when, in this case, it is the current process's parent. + * Most of the other checks in cantrace() don't apply either. + */ + if ((error = mac_proc_check_debug(pproc, p)) == 0) { +#endif + proc_lock(p); + /* Make sure the process wasn't re-parented. */ + if (p->p_ppid != pproc->p_pid) { + proc_unlock(p); + proc_rele(pproc); + goto retry_trace_me; + } + SET(p->p_lflag, P_LTRACED); + /* Non-attached case, our tracer is our parent. */ + p->p_oppid = p->p_ppid; + proc_unlock(p); + /* Child and parent will have to be able to run modified code. */ + cs_allow_invalid(p); cs_allow_invalid(pproc); - proc_rele(pproc); +#if CONFIG_MACF } - return(0); +#endif + proc_rele(pproc); + return (error); } if (uap->req == PT_SIGEXC) { proc_lock(p); @@ -200,12 +218,16 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) task = t->task; if (uap->req == PT_ATTACHEXC) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" uap->req = PT_ATTACH; tr_sigexc = 1; } if (uap->req == PT_ATTACH) { +#pragma clang diagnostic pop int err; - + + if ( kauth_authorize_process(proc_ucred(p), KAUTH_PROCESS_CANTRACE, t, (uintptr_t)&err, 0, 0) == 0 ) { /* it's OK to attach */ @@ -403,8 +425,10 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) goto out; } th_act = port_name_to_thread(CAST_MACH_PORT_TO_NAME(uap->addr)); - if (th_act == THREAD_NULL) - return (ESRCH); + if (th_act == THREAD_NULL) { + error = ESRCH; + goto out; + } ut = (uthread_t)get_bsdthread_info(th_act); if (uap->data) ut->uu_siglist |= sigmask(uap->data); diff --git a/bsd/kern/makesyscalls.sh b/bsd/kern/makesyscalls.sh index 13d56f58a..7317f55b8 100755 --- a/bsd/kern/makesyscalls.sh +++ b/bsd/kern/makesyscalls.sh @@ -502,7 +502,7 @@ s/\$//g argtype[i] == "sigset_t" || argtype[i] == "gid_t" || argtype[i] == "unsigned int" || argtype[i] == "mode_t" || argtype[i] == "key_t" || argtype[i] == "mach_port_name_t" || argtype[i] == "au_asid_t" || - argtype[i] == "associd_t" || argtype[i] == "connid_t") { + argtype[i] == "sae_associd_t" || argtype[i] == "sae_connid_t") { munge32 = munge32 "w" size32 += 4 } @@ -582,7 +582,7 @@ s/\$//g } } - printf("#if CONFIG_REQUIRES_U32_MUNGING\n") > sysent + printf("#if CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4))\n") > sysent printf("\t{ \(sy_call_t *\)%s, %s, %s, %s, %s},", tempname, munge32, munge_ret, argssize, size32) > sysent linesize = length(tempname) + length(munge32) + \ @@ -673,7 +673,7 @@ s/\$//g printf("};\n") > sysent printf("int nsysent = sizeof(sysent) / sizeof(sysent[0]);\n") > sysent printf("/* Verify that NUM_SYSENT reflects the latest syscall count */\n") > sysent - printf("int nsysent_size_check[((sizeof(sysent) / sizeof(sysent[0])) == NUM_SYSENT) ? 1 : -1] __unused;\n") > sysent + printf("_Static_assert(((sizeof(sysent) / sizeof(sysent[0])) == NUM_SYSENT), \"NUM_SYSENT needs to be updated to match syscall count\");\n") > sysent printf("};\n") > syscallnamestempfile printf("#define\t%sMAXSYSCALL\t%d\n", syscallprefix, syscall_num) \ diff --git a/bsd/kern/netboot.c b/bsd/kern/netboot.c index 22e6497d2..e8bdddb34 100644 --- a/bsd/kern/netboot.c +++ b/bsd/kern/netboot.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include @@ -603,6 +602,40 @@ find_interface(void) return (ifp); } +static const struct sockaddr_in blank_sin = { + sizeof(struct sockaddr_in), + AF_INET, + 0, + { 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0 } +}; + +static int +inet_aifaddr(struct socket * so, const char * name, + const struct in_addr * addr, + const struct in_addr * mask, + const struct in_addr * broadcast) +{ + struct ifaliasreq ifra; + + bzero(&ifra, sizeof(ifra)); + strlcpy(ifra.ifra_name, name, sizeof(ifra.ifra_name)); + if (addr) { + *((struct sockaddr_in *)(void *)&ifra.ifra_addr) = blank_sin; + ((struct sockaddr_in *)(void *)&ifra.ifra_addr)->sin_addr = *addr; + } + if (mask) { + *((struct sockaddr_in *)(void *)&ifra.ifra_mask) = blank_sin; + ((struct sockaddr_in *)(void *)&ifra.ifra_mask)->sin_addr = *mask; + } + if (broadcast) { + *((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr) = blank_sin; + ((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr)->sin_addr = *broadcast; + } + return (ifioctl(so, SIOCAIFADDR, (caddr_t)&ifra, current_proc())); +} + + int netboot_mountroot(void) { @@ -642,12 +675,8 @@ netboot_mountroot(void) /* grab information from the registry */ if (get_ip_parameters(&iaddr, &netmask, &router) == FALSE) { - /* use DHCP to retrieve IP address, netmask and router */ - error = dhcp(ifp, &iaddr, 64, &netmask, &router, procp); - if (error) { - printf("netboot: DHCP failed %d\n", error); - goto failed; - } + printf("netboot: can't retrieve IP parameters\n"); + goto failed; } printf("netboot: IP address " IP_FORMAT, IP_LIST(&iaddr)); if (netmask.s_addr) { diff --git a/bsd/kern/policy_check.c b/bsd/kern/policy_check.c index 95ae2d593..1c8791c9e 100644 --- a/bsd/kern/policy_check.c +++ b/bsd/kern/policy_check.c @@ -118,7 +118,7 @@ common_hook(void) return rv; } -#if (MAC_POLICY_OPS_VERSION != 32) +#if (MAC_POLICY_OPS_VERSION != 37) # error "struct mac_policy_ops doesn't match definition in mac_policy.h" #endif /* @@ -201,15 +201,15 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(ipq_label_init) CHECK_SET_HOOK(ipq_label_update) - CHECK_SET_HOOK(lctx_check_label_update) - CHECK_SET_HOOK(lctx_label_destroy) - CHECK_SET_HOOK(lctx_label_externalize) - CHECK_SET_HOOK(lctx_label_init) - CHECK_SET_HOOK(lctx_label_internalize) - CHECK_SET_HOOK(lctx_label_update) - CHECK_SET_HOOK(lctx_notify_create) - CHECK_SET_HOOK(lctx_notify_join) - CHECK_SET_HOOK(lctx_notify_leave) + .mpo_reserved1 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved2 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved3 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved4 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved5 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved6 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved7 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved8 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved9 = (mpo_reserved_hook_t *)common_hook, CHECK_SET_HOOK(mbuf_label_associate_bpfdesc) CHECK_SET_HOOK(mbuf_label_associate_ifnet) @@ -265,13 +265,13 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(system_check_sysctlbyname) CHECK_SET_HOOK(proc_check_inherit_ipc_ports) CHECK_SET_HOOK(vnode_check_rename) - .mpo_reserved4 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved5 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved6 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved7 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved8 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved9 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved10 = (mpo_reserved_hook_t *)common_hook, + CHECK_SET_HOOK(kext_check_query) + CHECK_SET_HOOK(iokit_check_nvram_get) + CHECK_SET_HOOK(iokit_check_nvram_set) + CHECK_SET_HOOK(iokit_check_nvram_delete) + CHECK_SET_HOOK(proc_check_expose_task) + CHECK_SET_HOOK(proc_check_set_host_special_port) + CHECK_SET_HOOK(proc_check_set_host_exception_port) .mpo_reserved11 = (mpo_reserved_hook_t *)common_hook, .mpo_reserved12 = (mpo_reserved_hook_t *)common_hook, .mpo_reserved13 = (mpo_reserved_hook_t *)common_hook, @@ -461,8 +461,7 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(vnode_check_uipc_bind) CHECK_SET_HOOK(vnode_check_uipc_connect) - /* CHECK_SET_HOOK(proc_check_run_cs_invalid) */ - .mpo_proc_check_run_cs_invalid = (mac_proc_check_run_cs_invalid_t *)common_hook, + CHECK_SET_HOOK(proc_check_run_cs_invalid) CHECK_SET_HOOK(proc_check_suspend_resume) CHECK_SET_HOOK(thread_userret) @@ -486,8 +485,8 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(vnode_notify_rename) - CHECK_SET_HOOK(thread_label_init) - CHECK_SET_HOOK(thread_label_destroy) + .mpo_reserved32 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved33 = (mpo_reserved_hook_t *)common_hook, CHECK_SET_HOOK(system_check_kas_info) diff --git a/bsd/kern/posix_shm.c b/bsd/kern/posix_shm.c index 4a0a848a2..e14baf815 100644 --- a/bsd/kern/posix_shm.c +++ b/bsd/kern/posix_shm.c @@ -742,7 +742,8 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd, pshmobj_next_p = &pshmobj->pshmo_next; } - pinfo->pshm_flags = PSHM_ALLOCATED; + pinfo->pshm_flags |= PSHM_ALLOCATED; + pinfo->pshm_flags &= ~(PSHM_ALLOCATING); pinfo->pshm_length = total_size; PSHM_SUBSYS_UNLOCK(); return(0); diff --git a/bsd/kern/proc_info.c b/bsd/kern/proc_info.c index a2b82a6e4..a5da30245 100644 --- a/bsd/kern/proc_info.c +++ b/bsd/kern/proc_info.c @@ -85,6 +85,11 @@ #include #include +/* Needed by proc_listcoalitions() */ +#ifdef CONFIG_COALITIONS +#include +#endif + struct pshmnode; struct psemnode; struct pipe; @@ -133,6 +138,7 @@ int __attribute__ ((noinline)) proc_dirtycontrol(int pid, int flavor, uint64_t a int __attribute__ ((noinline)) proc_terminate(int pid, int32_t * retval); int __attribute__ ((noinline)) proc_pid_rusage(int pid, int flavor, user_addr_t buffer, int32_t * retval); int __attribute__ ((noinline)) proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) proc_listcoalitions(int flavor, int coaltype, user_addr_t buffer, uint32_t buffersize, int32_t *retval); /* protos for procpidinfo calls */ int __attribute__ ((noinline)) proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); @@ -154,23 +160,24 @@ void __attribute__ ((noinline)) proc_piduniqidentifierinfo(proc_t p, struct proc void __attribute__ ((noinline)) proc_archinfo(proc_t p, struct proc_archinfo *pai); void __attribute__ ((noinline)) proc_pidcoalitioninfo(proc_t p, struct proc_pidcoalitioninfo *pci); int __attribute__ ((noinline)) proc_pidnoteexit(proc_t p, uint64_t arg, uint32_t *data); +int __attribute__ ((noinline)) proc_pidoriginatorpid_uuid(uuid_t uuid, uint32_t buffersize, pid_t *pid); /* protos for proc_pidfdinfo calls */ -int __attribute__ ((noinline)) pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int __attribute__ ((noinline)) pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int __attribute__ ((noinline)) pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int __attribute__ ((noinline)) pid_pseminfo(struct psemnode * psem, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int __attribute__ ((noinline)) pid_pshminfo(struct pshmnode * pshm, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int __attribute__ ((noinline)) pid_pipeinfo(struct pipe * p, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int __attribute__ ((noinline)) pid_kqueueinfo(struct kqueue * kq, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int __attribute__ ((noinline)) pid_atalkinfo(struct atalk * at, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp,proc_t proc, int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp,proc_t proc, int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_socketinfo(socket_t so, struct fileproc *fp,proc_t proc, int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_pseminfo(struct psemnode * psem, struct fileproc * fp, proc_t proc, int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_pshminfo(struct pshmnode * pshm, struct fileproc * fp, proc_t proc, int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_pipeinfo(struct pipe * p, struct fileproc * fp, proc_t proc, int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_kqueueinfo(struct kqueue * kq, struct fileproc * fp, proc_t proc, int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_atalkinfo(struct atalk * at, struct fileproc * fp, proc_t proc, int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); /* protos for misc */ int fill_vnodeinfo(vnode_t vp, struct vnode_info *vinfo); -void fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * finfo); +void fill_fileinfo(struct fileproc * fp, proc_t proc, int fd, struct proc_fileinfo * finfo); int proc_security_policy(proc_t targetp, int callnum, int flavor, boolean_t check_same_user); static void munge_vinfo_stat(struct stat64 *sbp, struct vinfo_stat *vsbp); static int proc_piduuidinfo(pid_t pid, uuid_t uuid_buf, uint32_t buffersize); @@ -236,6 +243,9 @@ proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t b return (proc_pid_rusage(pid, flavor, buffer, retval)); case PROC_INFO_CALL_PIDORIGINATORINFO: return (proc_pidoriginatorinfo(pid, flavor, buffer, buffersize, retval)); + case PROC_INFO_CALL_LISTCOALITIONS: + return proc_listcoalitions(pid /* flavor */, flavor /* coaltype */, buffer, + buffersize, retval); default: return(EINVAL); } @@ -1146,10 +1156,10 @@ proc_piduuidinfo(pid_t pid, uuid_t uuid_buf, uint32_t buffersize) } /* - * Function to get the uuid of the originator of the voucher. + * Function to get the uuid and pid of the originator of the voucher. */ int -proc_pidoriginatoruuid(uuid_t uuid, uint32_t buffersize) +proc_pidoriginatorpid_uuid(uuid_t uuid, uint32_t buffersize, pid_t *pid) { pid_t originator_pid; kern_return_t kr; @@ -1171,10 +1181,21 @@ proc_pidoriginatoruuid(uuid_t uuid, uint32_t buffersize) return error; } + *pid = originator_pid; error = proc_piduuidinfo(originator_pid, uuid, buffersize); return error; } +/* + * Function to get the uuid of the originator of the voucher. + */ +int +proc_pidoriginatoruuid(uuid_t uuid, uint32_t buffersize) +{ + pid_t originator_pid; + return (proc_pidoriginatorpid_uuid(uuid, buffersize, &originator_pid)); +} + /***************************** proc_pidoriginatorinfo ***************************/ int @@ -1190,6 +1211,9 @@ proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t buffer case PROC_PIDORIGINATOR_BGSTATE: size = PROC_PIDORIGINATOR_BGSTATE_SIZE; break; + case PROC_PIDORIGINATOR_PID_UUID: + size = PROC_PIDORIGINATOR_PID_UUID_SIZE; + break; default: return(EINVAL); } @@ -1214,6 +1238,24 @@ proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t buffer } break; + case PROC_PIDORIGINATOR_PID_UUID: { + struct proc_originatorinfo originator_info; + + error = proc_pidoriginatorpid_uuid(originator_info.originator_uuid, + sizeof(uuid_t), &originator_info.originator_pid); + if (error != 0) + goto out; + + originator_info.p_reserve2 = 0; + originator_info.p_reserve3 = 0; + originator_info.p_reserve4 = 0; + + error = copyout(&originator_info, buffer, size); + if (error == 0) + *retval = size; + } + break; + case PROC_PIDORIGINATOR_BGSTATE: { uint32_t is_backgrounded; error = proc_get_originatorbgstate(&is_backgrounded); @@ -1233,6 +1275,106 @@ proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t buffer return error; } +/***************************** proc_listcoalitions ***************************/ +int proc_listcoalitions(int flavor, int type, user_addr_t buffer, + uint32_t buffersize, int32_t *retval) +{ +#if CONFIG_COALITIONS + int error = ENOTSUP; + int coal_type; + uint32_t elem_size; + void *coalinfo = NULL; + uint32_t k_buffersize = 0, copyout_sz = 0; + int ncoals = 0, ncoals_ = 0; + + /* struct procinfo_coalinfo; */ + + switch (flavor) { + case LISTCOALITIONS_ALL_COALS: + elem_size = LISTCOALITIONS_ALL_COALS_SIZE; + coal_type = -1; + break; + case LISTCOALITIONS_SINGLE_TYPE: + elem_size = LISTCOALITIONS_SINGLE_TYPE_SIZE; + coal_type = type; + break; + default: + return EINVAL; + } + + /* find the total number of coalitions */ + ncoals = coalitions_get_list(coal_type, NULL, 0); + + if (ncoals == 0 || buffer == 0 || buffersize == 0) { + /* + * user just wants buffer size + * or there are no coalitions + */ + error = 0; + *retval = (int)(ncoals * elem_size); + goto out; + } + + k_buffersize = ncoals * elem_size; + coalinfo = kalloc((vm_size_t)k_buffersize); + if (!coalinfo) { + error = ENOMEM; + goto out; + } + bzero(coalinfo, k_buffersize); + + switch (flavor) { + case LISTCOALITIONS_ALL_COALS: + case LISTCOALITIONS_SINGLE_TYPE: + ncoals_ = coalitions_get_list(coal_type, coalinfo, ncoals); + break; + default: + panic("memory corruption?!"); + } + + if (ncoals_ == 0) { + /* all the coalitions disappeared... weird but valid */ + error = 0; + *retval = 0; + goto out; + } + + /* + * Some coalitions may have disappeared between our initial check, + * and the the actual list acquisition. + * Only copy out what we really need. + */ + copyout_sz = k_buffersize; + if (ncoals_ < ncoals) + copyout_sz = ncoals_ * elem_size; + + /* + * copy the list up to user space + * (we're guaranteed to have a non-null pointer/size here) + */ + error = copyout(coalinfo, buffer, + copyout_sz < buffersize ? copyout_sz : buffersize); + + if (error == 0) + *retval = (int)copyout_sz; + +out: + if (coalinfo) + kfree(coalinfo, k_buffersize); + + return error; +#else + /* no coalition support */ + (void)flavor; + (void)type; + (void)buffer; + (void)buffersize; + (void)retval; + return ENOTSUP; +#endif +} + + /********************************** proc_pidinfo ********************************/ @@ -1365,6 +1507,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu case PROC_PIDT_SHORTBSDINFO: case PROC_PIDUNIQIDENTIFIERINFO: case PROC_PIDPATHINFO: + case PROC_PIDCOALITIONINFO: check_same_user = NO_CHECK_SAME_USER; break; default: @@ -1545,7 +1688,8 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu if (error == 0) { *retval = sizeof(struct proc_archinfo); } - } + } + break; case PROC_PIDCOALITIONINFO: { struct proc_pidcoalitioninfo pci; @@ -1557,7 +1701,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu } break; - case PROC_PIDNOTEEXIT: { + case PROC_PIDNOTEEXIT: { uint32_t data; error = proc_pidnoteexit(p, arg, &data); if (error == 0) { @@ -1582,8 +1726,8 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu } -int -pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) +int +pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, proc_t proc, int fd, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct vnode_fdinfo vfi; int error= 0; @@ -1592,7 +1736,7 @@ pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, u return(error); } bzero(&vfi, sizeof(struct vnode_fdinfo)); - fill_fileinfo(fp, closeonexec, &vfi.pfi); + fill_fileinfo(fp, proc, fd, &vfi.pfi); error = fill_vnodeinfo(vp, &vfi.pvi); vnode_put(vp); if (error == 0) { @@ -1603,8 +1747,8 @@ pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, u return(error); } -int -pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) +int +pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, proc_t proc, int fd, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct vnode_fdinfowithpath vfip; int count, error= 0; @@ -1613,7 +1757,7 @@ pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexe return(error); } bzero(&vfip, sizeof(struct vnode_fdinfowithpath)); - fill_fileinfo(fp, closeonexec, &vfip.pfi); + fill_fileinfo(fp, proc, fd, &vfip.pfi); error = fill_vnodeinfo(vp, &vfip.pvip.vip_vi) ; if (error == 0) { count = MAXPATHLEN; @@ -1628,8 +1772,8 @@ pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexe return(error); } -void -fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * fproc) +void +fill_fileinfo(struct fileproc * fp, proc_t proc, int fd, struct proc_fileinfo * fproc) { fproc->fi_openflags = fp->f_fglob->fg_flag; fproc->fi_status = 0; @@ -1637,9 +1781,12 @@ fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * fpro fproc->fi_type = FILEGLOB_DTYPE(fp->f_fglob); if (fp->f_fglob->fg_count > 1) fproc->fi_status |= PROC_FP_SHARED; - if (closeonexec != 0) - fproc->fi_status |= PROC_FP_CLEXEC; - + if (proc != PROC_NULL) { + if ((FDFLAGS_GET(proc, fd) & UF_EXCLOSE) != 0) + fproc->fi_status |= PROC_FP_CLEXEC; + if ((FDFLAGS_GET(proc, fd) & UF_FORKCLOSE) != 0) + fproc->fi_status |= PROC_FP_CLFORK; + } if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) { fproc->fi_status |= PROC_FP_GUARDED; fproc->fi_guardflags = 0; @@ -1685,34 +1832,34 @@ fill_vnodeinfo(vnode_t vp, struct vnode_info *vinfo) } int -pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) +pid_socketinfo(socket_t so, struct fileproc *fp, proc_t proc, int fd, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { #if SOCKETS struct socket_fdinfo s; int error = 0; bzero(&s, sizeof(struct socket_fdinfo)); - fill_fileinfo(fp, closeonexec, &s.pfi); + fill_fileinfo(fp, proc, fd, &s.pfi); if ((error = fill_socketinfo(so, &s.psi)) == 0) { if ((error = copyout(&s, buffer, sizeof(struct socket_fdinfo))) == 0) *retval = sizeof(struct socket_fdinfo); } return (error); #else -#pragma unused(so, fp, closeonexec, buffer) +#pragma unused(so, fp, proc, fd, buffer) *retval = 0; return (ENOTSUP); #endif } int -pid_pseminfo(struct psemnode *psem, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) +pid_pseminfo(struct psemnode *psem, struct fileproc *fp, proc_t proc, int fd, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct psem_fdinfo pseminfo; int error = 0; - + bzero(&pseminfo, sizeof(struct psem_fdinfo)); - fill_fileinfo(fp, closeonexec, &pseminfo.pfi); + fill_fileinfo(fp, proc, fd, &pseminfo.pfi); if ((error = fill_pseminfo(psem, &pseminfo.pseminfo)) == 0) { if ((error = copyout(&pseminfo, buffer, sizeof(struct psem_fdinfo))) == 0) @@ -1723,13 +1870,13 @@ pid_pseminfo(struct psemnode *psem, struct fileproc *fp, int closeonexec, user_ } int -pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) +pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp, proc_t proc, int fd, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct pshm_fdinfo pshminfo; int error = 0; - + bzero(&pshminfo, sizeof(struct pshm_fdinfo)); - fill_fileinfo(fp, closeonexec, &pshminfo.pfi); + fill_fileinfo(fp, proc, fd, &pshminfo.pfi); if ((error = fill_pshminfo(pshm, &pshminfo.pshminfo)) == 0) { if ((error = copyout(&pshminfo, buffer, sizeof(struct pshm_fdinfo))) == 0) @@ -1740,13 +1887,13 @@ pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp, int closeonexec, user_ } int -pid_pipeinfo(struct pipe * p, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) +pid_pipeinfo(struct pipe * p, struct fileproc *fp, proc_t proc, int fd, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct pipe_fdinfo pipeinfo; int error = 0; bzero(&pipeinfo, sizeof(struct pipe_fdinfo)); - fill_fileinfo(fp, closeonexec, &pipeinfo.pfi); + fill_fileinfo(fp, proc, fd, &pipeinfo.pfi); if ((error = fill_pipeinfo(p, &pipeinfo.pipeinfo)) == 0) { if ((error = copyout(&pipeinfo, buffer, sizeof(struct pipe_fdinfo))) == 0) *retval = sizeof(struct pipe_fdinfo); @@ -1756,14 +1903,18 @@ pid_pipeinfo(struct pipe * p, struct fileproc *fp, int closeonexec, user_addr_ } int -pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) +pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp, proc_t proc, int fd, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct kqueue_fdinfo kqinfo; int error = 0; - + bzero(&kqinfo, sizeof(struct kqueue_fdinfo)); - - fill_fileinfo(fp, closeonexec, &kqinfo.pfi); + + /* not all kq's are associated with a file (e.g. workqkq) */ + if (fp) { + assert(fd >= 0); + fill_fileinfo(fp, proc, fd, &kqinfo.pfi); + } if ((error = fill_kqueueinfo(kq, &kqinfo.kqueueinfo)) == 0) { if ((error = copyout(&kqinfo, buffer, sizeof(struct kqueue_fdinfo))) == 0) @@ -1774,7 +1925,7 @@ pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp, int closeonexec, user_a } int -pid_atalkinfo(__unused struct atalk * at, __unused struct fileproc *fp, __unused int closeonexec, __unused user_addr_t buffer, __unused uint32_t buffersize, __unused int32_t * retval) +pid_atalkinfo(__unused struct atalk * at, __unused struct fileproc *fp, __unused proc_t proc, __unused int fd, __unused user_addr_t buffer, __unused uint32_t buffersize, __unused int32_t * retval) { return ENOTSUP; } @@ -1787,9 +1938,8 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer { proc_t p; int error = ENOTSUP; - struct fileproc * fp; + struct fileproc * fp = NULL; uint32_t size; - int closeonexec = 0; switch (flavor) { case PROC_PIDFDVNODEINFO: @@ -1813,6 +1963,11 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer case PROC_PIDFDKQUEUEINFO: size = PROC_PIDFDKQUEUEINFO_SIZE; break; + case PROC_PIDFDKQUEUE_EXTINFO: + size = PROC_PIDFDKQUEUE_EXTINFO_SIZE; + if (buffer == (user_addr_t)0) + size = 0; + break; case PROC_PIDFDATALKINFO: size = PROC_PIDFDATALKINFO_SIZE; break; @@ -1843,8 +1998,7 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer goto out1; } /* no need to be under the fdlock */ - closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE; - error = pid_vnodeinfo(vp, vid, fp, closeonexec, buffer, buffersize, retval); + error = pid_vnodeinfo(vp, vid, fp, p, fd, buffer, buffersize, retval); } break; @@ -1857,8 +2011,7 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer } /* no need to be under the fdlock */ - closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE; - error = pid_vnodeinfopath(vp, vid, fp, closeonexec, buffer, buffersize, retval); + error = pid_vnodeinfopath(vp, vid, fp, p, fd, buffer, buffersize, retval); } break; @@ -1869,8 +2022,7 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer goto out1; } /* no need to be under the fdlock */ - closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE; - error = pid_socketinfo(so, fp, closeonexec, buffer, buffersize, retval); + error = pid_socketinfo(so, fp, p, fd, buffer, buffersize, retval); } break; @@ -1881,8 +2033,7 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer goto out1; } /* no need to be under the fdlock */ - closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE; - error = pid_pseminfo(psem, fp, closeonexec, buffer, buffersize, retval); + error = pid_pseminfo(psem, fp, p, fd, buffer, buffersize, retval); } break; @@ -1893,8 +2044,7 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer goto out1; } /* no need to be under the fdlock */ - closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE; - error = pid_pshminfo(pshm, fp, closeonexec, buffer, buffersize, retval); + error = pid_pshminfo(pshm, fp, p, fd, buffer, buffersize, retval); } break; @@ -1905,20 +2055,41 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer goto out1; } /* no need to be under the fdlock */ - closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE; - error = pid_pipeinfo(cpipe, fp, closeonexec, buffer, buffersize, retval); + error = pid_pipeinfo(cpipe, fp, p, fd, buffer, buffersize, retval); } break; case PROC_PIDFDKQUEUEINFO: { struct kqueue * kq; - if ((error = fp_getfkq(p, fd, &fp, &kq)) !=0) { + if (fd == -1) { + if ((kq = p->p_wqkqueue) == NULL) { + /* wqkqueue is initialized on-demand */ + error = 0; + break; + } + } else if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0) { goto out1; } + /* no need to be under the fdlock */ - closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE; - error = pid_kqueueinfo(kq, fp, closeonexec, buffer, buffersize, retval); + error = pid_kqueueinfo(kq, fp, p, fd, buffer, buffersize, retval); + } + break; + + case PROC_PIDFDKQUEUE_EXTINFO: { + struct kqueue * kq; + + if (fd == -1) { + if ((kq = p->p_wqkqueue) == NULL) { + /* wqkqueue is initialized on-demand */ + error = 0; + break; + } + } else if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0) { + goto out1; + } + error = pid_kqueue_extinfo(p, kq, buffer, buffersize, retval); } break; @@ -1928,7 +2099,9 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer } } - fp_drop(p, fd, fp , 0); + if (fp) { + fp_drop(p, fd, fp , 0); + } out1 : proc_rele(p); out: @@ -1966,7 +2139,7 @@ proc_fileport_info(__unused mach_port_name_t name, break; } vp = (struct vnode *)fg->fg_data; - error = pid_vnodeinfopath(vp, vnode_vid(vp), fp, 0, + error = pid_vnodeinfopath(vp, vnode_vid(vp), fp, PROC_NULL, 0, fia->fia_buffer, fia->fia_buffersize, fia->fia_retval); } break; @@ -1978,7 +2151,7 @@ proc_fileport_info(__unused mach_port_name_t name, break; } so = (socket_t)fg->fg_data; - error = pid_socketinfo(so, fp, 0, + error = pid_socketinfo(so, fp, PROC_NULL, 0, fia->fia_buffer, fia->fia_buffersize, fia->fia_retval); } break; @@ -1990,7 +2163,7 @@ proc_fileport_info(__unused mach_port_name_t name, break; } pshm = (struct pshmnode *)fg->fg_data; - error = pid_pshminfo(pshm, fp, 0, + error = pid_pshminfo(pshm, fp, PROC_NULL, 0, fia->fia_buffer, fia->fia_buffersize, fia->fia_retval); } break; @@ -2002,7 +2175,7 @@ proc_fileport_info(__unused mach_port_name_t name, break; } cpipe = (struct pipe *)fg->fg_data; - error = pid_pipeinfo(cpipe, fp, 0, + error = pid_pipeinfo(cpipe, fp, PROC_NULL, 0, fia->fia_buffer, fia->fia_buffersize, fia->fia_retval); } break; @@ -2024,7 +2197,7 @@ proc_pidfileportinfo(int pid, int flavor, mach_port_name_t name, uint32_t size; struct fileport_info_args fia; - /* fileport types are restricted by filetype_issendable() */ + /* fileport types are restricted by file_issendable() */ switch (flavor) { case PROC_PIDFILEPORTVNODEPATHINFO: @@ -2436,7 +2609,7 @@ void proc_pidcoalitioninfo(proc_t p, struct proc_pidcoalitioninfo *ppci) { bzero(ppci, sizeof(*ppci)); - ppci->coalition_id = proc_coalitionid(p); + proc_coalitionids(p, ppci->coalition_id); } diff --git a/bsd/kern/qsort.c b/bsd/kern/qsort.c index 70ded5774..cfa58910a 100644 --- a/bsd/kern/qsort.c +++ b/bsd/kern/qsort.c @@ -65,6 +65,7 @@ #include //#include +#include __private_extern__ void @@ -199,3 +200,9 @@ loop: SWAPINIT(a, es); } /* qsort(pn - r, r / es, es, cmp);*/ } + +/* private KPI */ +void +kx_qsort (void *array, size_t nm, size_t member_size, int (*cmpf)(const void *, const void *)) { + qsort (array, nm, member_size, cmpf); +} diff --git a/bsd/kern/socket_info.c b/bsd/kern/socket_info.c index 73725bbb4..4713bf260 100644 --- a/bsd/kern/socket_info.c +++ b/bsd/kern/socket_info.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005-2013 Apple Inc. All rights reserved. + * Copyright (c) 2005-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -222,24 +222,8 @@ fill_socketinfo(struct socket *so, struct socket_info *si) ev_pcb->evp_vendor_code_filter; kesi->kesi_class_filter = ev_pcb->evp_class_filter; kesi->kesi_subclass_filter = ev_pcb->evp_subclass_filter; - } else if (SOCK_PROTO(so) == SYSPROTO_CONTROL) { - struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; - struct kern_ctl_info *kcsi = - &si->soi_proto.pri_kern_ctl; - struct kctl *kctl = kcb->kctl; - - si->soi_kind = SOCKINFO_KERN_CTL; - - if (kctl == 0) - break; - kcsi->kcsi_id = kctl->id; - kcsi->kcsi_reg_unit = kctl->id; - kcsi->kcsi_flags = kctl->flags; - kcsi->kcsi_recvbufsize = kctl->recvbufsize; - kcsi->kcsi_sendbufsize = kctl->sendbufsize; - kcsi->kcsi_unit = kcb->unit; - strlcpy(kcsi->kcsi_name, kctl->name, MAX_KCTL_NAME); + kctl_fill_socketinfo(so, si); } break; diff --git a/bsd/kern/subr_prf.c b/bsd/kern/subr_prf.c index 5bb098bbb..d14302bec 100644 --- a/bsd/kern/subr_prf.c +++ b/bsd/kern/subr_prf.c @@ -130,7 +130,8 @@ extern int __doprnt(const char *fmt, va_list argp, void (*)(int, void *), void *arg, - int radix); + int radix, + int is_log); /* * Record cpu that panic'd and lock around panic data @@ -171,7 +172,7 @@ uprintf(const char *fmt, ...) if (pca.tty != NULL) tty_lock(pca.tty); va_start(ap, fmt); - __doprnt(fmt, ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10, FALSE); va_end(ap); if (pca.tty != NULL) tty_unlock(pca.tty); @@ -236,7 +237,7 @@ tprintf(tpr_t tpr, const char *fmt, ...) pca.flags = flags; pca.tty = tp; va_start(ap, fmt); - __doprnt(fmt, ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10, FALSE); va_end(ap); if (tp != NULL) @@ -265,7 +266,7 @@ ttyprintf(struct tty *tp, const char *fmt, ...) pca.tty = tp; va_start(ap, fmt); - __doprnt(fmt, ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10, TRUE); va_end(ap); } } @@ -314,7 +315,7 @@ vaddlog(const char *fmt, va_list ap) } bsd_log_lock(); - __doprnt(fmt, ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10, TRUE); bsd_log_unlock(); logwakeup(); @@ -334,7 +335,7 @@ _printf(int flags, struct tty *ttyp, const char *format, ...) tty_lock(ttyp); va_start(ap, format); - __doprnt(format, ap, putchar, &pca, 10); + __doprnt(format, ap, putchar, &pca, 10, TRUE); va_end(ap); tty_unlock(ttyp); @@ -349,7 +350,7 @@ prf(const char *fmt, va_list ap, int flags, struct tty *ttyp) pca.flags = flags; pca.tty = ttyp; - __doprnt(fmt, ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10, TRUE); return 0; } @@ -442,7 +443,7 @@ vprintf(const char *fmt, va_list ap) pca.flags = TOLOG | TOCONS; pca.tty = NULL; - __doprnt(fmt, ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10, TRUE); return 0; } @@ -462,7 +463,7 @@ vsprintf(char *buf, const char *cfmt, va_list ap) info.str = buf; info.remain = 999999; - retval = __doprnt(cfmt, ap, snprintf_func, &info, 10); + retval = __doprnt(cfmt, ap, snprintf_func, &info, 10, FALSE); if (info.remain >= 1) { *info.str++ = '\0'; } @@ -495,7 +496,7 @@ vsnprintf(char *str, size_t size, const char *format, va_list ap) info.str = str; info.remain = size; - retval = __doprnt(format, ap, snprintf_func, &info, 10); + retval = __doprnt(format, ap, snprintf_func, &info, 10, FALSE); if (info.remain >= 1) *info.str++ = '\0'; return retval; @@ -515,7 +516,7 @@ snprintf_func(int ch, void *arg) int kvprintf(char const *fmt, void (*func)(int, void*), void *arg, int radix, va_list ap) { - __doprnt(fmt, ap, func, arg, radix); + __doprnt(fmt, ap, func, arg, radix, TRUE); return 0; } diff --git a/bsd/kern/sys_coalition.c b/bsd/kern/sys_coalition.c index 3255fb0d6..a20ce301f 100644 --- a/bsd/kern/sys_coalition.c +++ b/bsd/kern/sys_coalition.c @@ -30,14 +30,15 @@ coalition_create_syscall(user_addr_t cidp, uint32_t flags) kern_return_t kr; uint64_t cid; coalition_t coal; + int type = COALITION_CREATE_FLAGS_GET_TYPE(flags); + boolean_t privileged = !!(flags & COALITION_CREATE_FLAGS_PRIVILEGED); - if ((flags & (~COALITION_CREATE_FLAG_MASK)) != 0) { + if ((flags & (~COALITION_CREATE_FLAGS_MASK)) != 0) + return EINVAL; + if (type < 0 || type > COALITION_TYPE_MAX) return EINVAL; - } - - boolean_t privileged = flags & COALITION_CREATE_FLAG_PRIVILEGED; - kr = coalition_create_internal(&coal, privileged); + kr = coalition_create_internal(type, privileged, &coal); if (kr != KERN_SUCCESS) { /* for now, the only kr is KERN_RESOURCE_SHORTAGE */ error = ENOMEM; @@ -46,9 +47,7 @@ coalition_create_syscall(user_addr_t cidp, uint32_t flags) cid = coalition_id(coal); -#if COALITION_DEBUG - printf("%s(addr, %u) -> %llu\n", __func__, flags, cid); -#endif + coal_dbg("(addr, %u) -> %llu", flags, cid); error = copyout(&cid, cidp, sizeof(cid)); out: return error; @@ -98,17 +97,19 @@ coalition_request_terminate_syscall(user_addr_t cidp, uint32_t flags) break; case KERN_DEFAULT_SET: error = EPERM; + break; case KERN_TERMINATED: error = EALREADY; + break; case KERN_INVALID_NAME: error = ESRCH; + break; default: error = EIO; + break; } -#if COALITION_DEBUG - printf("%s(%llu, %u) -> %d\n", __func__, cid, flags, error); -#endif + coal_dbg("(%llu, %u) -> %d", cid, flags, error); return error; } @@ -160,17 +161,19 @@ coalition_reap_syscall(user_addr_t cidp, uint32_t flags) break; case KERN_DEFAULT_SET: error = EPERM; + break; case KERN_TERMINATED: error = ESRCH; + break; case KERN_FAILURE: error = EBUSY; + break; default: error = EIO; + break; } -#if COALITION_DEBUG - printf("%s(%llu, %u) -> %d\n", __func__, cid, flags, error); -#endif + coal_dbg("(%llu, %u) -> %d", cid, flags, error); return error; } @@ -184,8 +187,9 @@ int coalition(proc_t p, struct coalition_args *cap, __unused int32_t *retval) user_addr_t cidp = cap->cid; uint32_t flags = cap->flags; int error = 0; + int type = COALITION_CREATE_FLAGS_GET_TYPE(flags); - if (!task_is_in_privileged_coalition(p->task)) { + if (!task_is_in_privileged_coalition(p->task, type)) { return EPERM; } @@ -279,3 +283,235 @@ int coalition_info(proc_t p, struct coalition_info_args *uap, __unused int32_t * coalition_release(coal); return error; } + +#if defined(DEVELOPMENT) || defined(DEBUG) +static int sysctl_coalition_get_ids SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error, pid; + proc_t tproc; + uint64_t value; + uint64_t ids[COALITION_NUM_TYPES]; + + + error = SYSCTL_IN(req, &value, sizeof(value)); + if (error) + return error; + if (!req->newptr) + pid = req->p->p_pid; + else + pid = (int)value; + + coal_dbg("looking up coalitions for pid:%d", pid); + tproc = proc_find(pid); + if (tproc == NULL) { + coal_dbg("ERROR: Couldn't find pid:%d", pid); + return ESRCH; + } + + task_coalition_ids(tproc->task, ids); + proc_rele(tproc); + + return SYSCTL_OUT(req, ids, sizeof(ids)); +} + +SYSCTL_PROC(_kern, OID_AUTO, coalitions, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_coalition_get_ids, "Q", "coalition ids of a given process"); + + +static int sysctl_coalition_get_roles SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error, pid; + proc_t tproc; + int value; + int roles[COALITION_NUM_TYPES]; + + + error = SYSCTL_IN(req, &value, sizeof(value)); + if (error) + return error; + if (!req->newptr) + pid = req->p->p_pid; + else + pid = (int)value; + + coal_dbg("looking up coalitions for pid:%d", pid); + tproc = proc_find(pid); + if (tproc == NULL) { + coal_dbg("ERROR: Couldn't find pid:%d", pid); + return ESRCH; + } + + task_coalition_roles(tproc->task, roles); + proc_rele(tproc); + + return SYSCTL_OUT(req, roles, sizeof(roles)); +} + +SYSCTL_PROC(_kern, OID_AUTO, coalition_roles, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_coalition_get_roles, "I", "coalition roles of a given process"); + + +static int sysctl_coalition_get_page_count SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error, pid; + proc_t tproc; + coalition_t coal; + uint64_t value; + uint64_t pgcount[COALITION_NUM_TYPES]; + + + error = SYSCTL_IN(req, &value, sizeof(value)); + if (error) + return error; + if (!req->newptr) + pid = req->p->p_pid; + else + pid = (int)value; + + coal_dbg("looking up coalitions for pid:%d", pid); + tproc = proc_find(pid); + if (tproc == NULL) { + coal_dbg("ERROR: Couldn't find pid:%d", pid); + return ESRCH; + } + + memset(pgcount, 0, sizeof(pgcount)); + + for (int t = 0; t < COALITION_NUM_TYPES; t++) { + coal = COALITION_NULL; + coalition_is_leader(tproc->task, t, &coal); + if (coal != COALITION_NULL) { + int ntasks = 0; + pgcount[t] = coalition_get_page_count(coal, &ntasks); + coal_dbg("PID:%d, Coalition:%lld, type:%d, pgcount:%lld", + pid, coalition_id(coal), t, pgcount[t]); + } + } + + proc_rele(tproc); + + return SYSCTL_OUT(req, pgcount, sizeof(pgcount)); +} + +SYSCTL_PROC(_kern, OID_AUTO, coalition_page_count, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_coalition_get_page_count, "Q", "coalition page count of a specified process"); + + +static int sysctl_coalition_get_pid_list SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error, type, sort_order, pid; + int value[3]; + int has_pid = 1; + + coalition_t coal = COALITION_NULL; + proc_t tproc = PROC_NULL; + int npids = 0; + int pidlist[100] = { 0, }; + + + error = SYSCTL_IN(req, &value, sizeof(value)); + if (error) { + has_pid = 0; + error = SYSCTL_IN(req, &value, sizeof(value) - sizeof(value[0])); + } + if (error) + return error; + if (!req->newptr) { + type = COALITION_TYPE_RESOURCE; + sort_order = COALITION_SORT_DEFAULT; + pid = req->p->p_pid; + } else { + type = value[0]; + sort_order = value[1]; + if (has_pid) + pid = value[2]; + else + pid = req->p->p_pid; + } + + if (type < 0 || type >= COALITION_NUM_TYPES) + return EINVAL; + + coal_dbg("getting constituent PIDS for coalition of type %d " + "containing pid:%d (sort:%d)", type, pid, sort_order); + tproc = proc_find(pid); + if (tproc == NULL) { + coal_dbg("ERROR: Couldn't find pid:%d", pid); + return ESRCH; + } + + (void)coalition_is_leader(tproc->task, type, &coal); + if (coal == COALITION_NULL) { + goto out; + } + + npids = coalition_get_pid_list(coal, COALITION_ROLEMASK_ALLROLES, sort_order, + pidlist, sizeof(pidlist) / sizeof(pidlist[0])); + if (npids > (int)(sizeof(pidlist) / sizeof(pidlist[0]))) { + coal_dbg("Too many members in coalition %llu (from pid:%d): %d!", + coalition_id(coal), pid, npids); + npids = sizeof(pidlist) / sizeof(pidlist[0]); + } + +out: + proc_rele(tproc); + + if (npids == 0) + return ENOENT; + + return SYSCTL_OUT(req, pidlist, sizeof(pidlist[0]) * npids); +} + +SYSCTL_PROC(_kern, OID_AUTO, coalition_pid_list, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_coalition_get_pid_list, "I", "list of PIDS which are members of the coalition of the current process"); + +#if DEVELOPMENT +static int sysctl_coalition_notify SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error, should_set; + coalition_t coal; + uint64_t value[2]; + + should_set = 1; + error = SYSCTL_IN(req, value, sizeof(value)); + if (error) { + error = SYSCTL_IN(req, value, sizeof(value) - sizeof(value[0])); + if (error) + return error; + should_set = 0; + } + if (!req->newptr) + return error; + + coal = coalition_find_by_id(value[0]); + if (coal == COALITION_NULL) { + coal_dbg("Can't find coalition with ID:%lld", value[0]); + return ESRCH; + } + + if (should_set) + coalition_set_notify(coal, (int)value[1]); + + value[0] = (uint64_t)coalition_should_notify(coal); + + coalition_release(coal); + + return SYSCTL_OUT(req, value, sizeof(value[0])); +} + +SYSCTL_PROC(_kern, OID_AUTO, coalition_notify, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_coalition_notify, "Q", "get/set coalition notification flag"); + +extern int unrestrict_coalition_syscalls; +SYSCTL_INT(_kern, OID_AUTO, unrestrict_coalitions, + CTLFLAG_RW, &unrestrict_coalition_syscalls, 0, + "unrestrict the coalition interface"); + +#endif /* DEVELOPMENT */ + +#endif /* DEVELOPMENT || DEBUG */ diff --git a/bsd/kern/sys_generic.c b/bsd/kern/sys_generic.c index 1247ff355..d6c46f58d 100644 --- a/bsd/kern/sys_generic.c +++ b/bsd/kern/sys_generic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -106,6 +106,8 @@ #include #include #include +#include +#include #include #include @@ -134,7 +136,7 @@ #include #include /* for wait queue based select */ -#include +#include #include #include @@ -144,6 +146,7 @@ void evpipefree(struct pipe *); void postpipeevent(struct pipe *, int); void postevent(struct socket *, struct sockbuf *, int); extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp); +extern void delay(int); int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval); int wr_uio(struct proc *p, struct fileproc *fp, uio_t uio, user_ssize_t *retval); @@ -159,16 +162,16 @@ __private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, in /* Conflict wait queue for when selects collide (opaque type) */ -struct wait_queue select_conflict_queue; +struct waitq select_conflict_queue; /* * Init routine called from bsd_init.c */ -void select_wait_queue_init(void); +void select_waitq_init(void); void -select_wait_queue_init(void) +select_waitq_init(void) { - wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO); + waitq_init(&select_conflict_queue, SYNC_POLICY_FIFO | SYNC_POLICY_DISABLE_IRQ); } #define f_flag f_fglob->fg_flag @@ -933,7 +936,7 @@ int selwait, nselcoll; extern int selcontinue(int error); extern int selprocess(int error, int sel_pass); static int selscan(struct proc *p, struct _select * sel, struct _select_data * seldata, - int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub); + int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset); static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count); static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount); static int seldrop(struct proc *p, u_int32_t *ibits, int nfd); @@ -957,13 +960,14 @@ int select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval) { int error = 0; - u_int ni, nw, size; + u_int ni, nw; thread_t th_act; struct uthread *uth; struct _select *sel; struct _select_data *seldata; int needzerofill = 1; int count = 0; + size_t sz = 0; th_act = current_thread(); uth = get_bsdthread_info(th_act); @@ -973,6 +977,8 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva seldata->args = uap; seldata->retval = retval; + seldata->wqp = NULL; + seldata->count = 0; if (uap->nd < 0) { return (EINVAL); @@ -1074,28 +1080,57 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva goto continuation; } + /* + * We need an array of waitq pointers. This is due to the new way + * in which waitqs are linked to sets. When a thread selects on a + * file descriptor, a waitq (embedded in a selinfo structure) is + * added to the thread's local waitq set. There is no longer any + * way to directly iterate over all members of a given waitq set. + * The process of linking a waitq into a set may allocate a link + * table object. Because we can't iterate over all the waitqs to + * which our thread waitq set belongs, we need a way of removing + * this link object! + * + * Thus we need a buffer which will hold one waitq pointer + * per FD being selected. During the tear-down phase we can use + * these pointers to dis-associate the underlying selinfo's waitq + * from our thread's waitq set. + * + * Because we also need to allocate a waitq set for this thread, + * we use a bare buffer pointer to hold all the memory. Note that + * this memory is cached in the thread pointer and not reaped until + * the thread exists. This is generally OK because threads that + * call select tend to keep calling select repeatedly. + */ + sz = ALIGN(sizeof(struct waitq_set)) + (count * sizeof(uint64_t)); + if (sz > uth->uu_wqstate_sz) { + /* (re)allocate a buffer to hold waitq pointers */ + if (uth->uu_wqset) { + if (waitq_set_is_valid(uth->uu_wqset)) + waitq_set_deinit(uth->uu_wqset); + FREE(uth->uu_wqset, M_SELECT); + } else if (uth->uu_wqstate_sz && !uth->uu_wqset) + panic("select: thread structure corrupt! " + "uu_wqstate_sz:%ld, wqstate_buf == NULL", + uth->uu_wqstate_sz); + uth->uu_wqstate_sz = sz; + MALLOC(uth->uu_wqset, struct waitq_set *, sz, M_SELECT, M_WAITOK); + if (!uth->uu_wqset) + panic("can't allocate %ld bytes for wqstate buffer", + uth->uu_wqstate_sz); + waitq_set_init(uth->uu_wqset, + SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST|SYNC_POLICY_DISABLE_IRQ, NULL); + } + + if (!waitq_set_is_valid(uth->uu_wqset)) + waitq_set_init(uth->uu_wqset, + SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST|SYNC_POLICY_DISABLE_IRQ, NULL); + + /* the last chunk of our buffer is an array of waitq pointers */ + seldata->wqp = (uint64_t *)((char *)(uth->uu_wqset) + ALIGN(sizeof(struct waitq_set))); + bzero(seldata->wqp, sz - ALIGN(sizeof(struct waitq_set))); + seldata->count = count; - size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK); - if (uth->uu_allocsize) { - if (uth->uu_wqset == 0) - panic("select: wql memory smashed"); - /* needed for the select now */ - if (size > uth->uu_allocsize) { - kfree(uth->uu_wqset, uth->uu_allocsize); - uth->uu_allocsize = size; - uth->uu_wqset = (wait_queue_set_t)kalloc(size); - if (uth->uu_wqset == (wait_queue_set_t)NULL) - panic("failed to allocate memory for waitqueue\n"); - } - } else { - uth->uu_allocsize = size; - uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize); - if (uth->uu_wqset == (wait_queue_set_t)NULL) - panic("failed to allocate memory for waitqueue\n"); - } - bzero(uth->uu_wqset, size); - seldata->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET; - wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST)); continuation: @@ -1152,40 +1187,31 @@ selprocess(int error, int sel_pass) retval = seldata->retval; if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) - unwind = 0; + unwind = 0; if (seldata->count == 0) - unwind = 0; + unwind = 0; retry: - if (error != 0) { - sel_pass = SEL_FIRSTPASS; /* Reset for seldrop */ + if (error != 0) goto done; - } ncoll = nselcoll; OSBitOrAtomic(P_SELECT, &p->p_flag); + /* skip scans if the select is just for timeouts */ if (seldata->count) { - /* - * Clear out any dangling refs from prior calls; technically - * there should not be any. - */ - if (sel_pass == SEL_FIRSTPASS) - wait_queue_sub_clearrefs(uth->uu_wqset); - - error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset); + error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, uth->uu_wqset); if (error || *retval) { goto done; } - if (prepost) { - /* if the select of log, then we canwakeup and discover some one - * else already read the data; go toselct again if time permits - */ - prepost = 0; - doretry = 1; - } - if (somewakeup) { - somewakeup = 0; - doretry = 1; + if (prepost || somewakeup) { + /* + * if the select of log, then we can wakeup and + * discover some one else already read the data; + * go to select again if time permits + */ + prepost = 0; + somewakeup = 0; + doretry = 1; } } @@ -1221,13 +1247,15 @@ selprocess(int error, int sel_pass) OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); /* if the select is just for timeout skip check */ - if (seldata->count &&(sel_pass == SEL_SECONDPASS)) + if (seldata->count && (sel_pass == SEL_SECONDPASS)) panic("selprocess: 2nd pass assertwaiting"); - /* Wait Queue Subordinate has waitqueue as first element */ - wait_result = wait_queue_assert_wait_with_leeway((wait_queue_t)uth->uu_wqset, - NULL, THREAD_ABORTSAFE, - TIMEOUT_URGENCY_USER_NORMAL, seldata->abstime, 0); + /* waitq_set has waitqueue as first element */ + wait_result = waitq_assert_wait64_leeway((struct waitq *)uth->uu_wqset, + NO_EVENT64, THREAD_ABORTSAFE, + TIMEOUT_URGENCY_USER_NORMAL, + seldata->abstime, + TIMEOUT_NO_LEEWAY); if (wait_result != THREAD_AWAKENED) { /* there are no preposted events */ error = tsleep1(NULL, PSOCK | PCATCH, @@ -1245,8 +1273,14 @@ selprocess(int error, int sel_pass) } done: if (unwind) { - wait_subqueue_unlink_all(uth->uu_wqset); seldrop(p, sel->ibits, uap->nd); + waitq_set_deinit(uth->uu_wqset); + /* + * zero out the waitq pointer array to avoid use-after free + * errors in the selcount error path (seldrop_locked) if/when + * the thread re-calls select(). + */ + bzero((void *)uth->uu_wqset, uth->uu_wqstate_sz); } OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); /* select is not restarted after signals... */ @@ -1276,6 +1310,119 @@ selprocess(int error, int sel_pass) } +/** + * remove the fileproc's underlying waitq from the supplied waitq set; + * clear FP_INSELECT when appropriate + * + * Parameters: + * fp File proc that is potentially currently in select + * wqset Waitq set to which the fileproc may belong + * (usually this is the thread's private waitq set) + * Conditions: + * proc_fdlock is held + */ +static void selunlinkfp(struct fileproc *fp, uint64_t wqp_id, struct waitq_set *wqset) +{ + int valid_set = waitq_set_is_valid(wqset); + int valid_q = !!wqp_id; + + /* + * This could be called (from selcount error path) before we setup + * the thread's wqset. Check the wqset passed in, and only unlink if + * the set is valid. + */ + + /* unlink the underlying waitq from the input set (thread waitq set) */ + if (valid_q && valid_set) + waitq_unlink_by_prepost_id(wqp_id, wqset); + + /* allow passing a NULL/invalid fp for seldrop unwind */ + if (!fp || !(fp->f_flags & (FP_INSELECT|FP_SELCONFLICT))) + return; + + /* + * We can always remove the conflict queue from our thread's set: this + * will not affect other threads that potentially need to be awoken on + * the conflict queue during a fileproc_drain - those sets will still + * be linked with the global conflict queue, and the last waiter + * on the fp clears the CONFLICT marker. + */ + if (valid_set && (fp->f_flags & FP_SELCONFLICT)) + waitq_unlink(&select_conflict_queue, wqset); + + /* jca: TODO: + * This isn't quite right - we don't actually know if this + * fileproc is in another select or not! Here we just assume + * that if we were the first thread to select on the FD, then + * we'll be the one to clear this flag... + */ + if (valid_set && fp->f_wset == (void *)wqset) { + fp->f_flags &= ~FP_INSELECT; + fp->f_wset = NULL; + } +} + +/** + * connect a fileproc to the given wqset, potentially bridging to a waitq + * pointed to indirectly by wq_data + * + * Parameters: + * fp File proc potentially currently in select + * wq_data Pointer to a pointer to a waitq (could be NULL) + * wqset Waitq set to which the fileproc should now belong + * (usually this is the thread's private waitq set) + * + * Conditions: + * proc_fdlock is held + */ +static uint64_t sellinkfp(struct fileproc *fp, void **wq_data, struct waitq_set *wqset) +{ + struct waitq *f_wq = NULL; + + if ((fp->f_flags & FP_INSELECT) != FP_INSELECT) { + if (wq_data) + panic("non-null data:%p on fp:%p not in select?!" + "(wqset:%p)", wq_data, fp, wqset); + return 0; + } + + if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) { + /* + * The conflict queue requires disabling interrupts, so we + * need to explicitly reserve a link object to avoid a + * panic/assert in the waitq code. Hopefully this extra step + * can be avoided if we can split the waitq structure into + * blocking and linkage sub-structures. + */ + uint64_t reserved_link = waitq_link_reserve(&select_conflict_queue); + waitq_link(&select_conflict_queue, wqset, WAITQ_SHOULD_LOCK, &reserved_link); + waitq_link_release(reserved_link); + } + + /* + * The wq_data parameter has potentially been set by selrecord called + * from a subsystems fo_select() function. If the subsystem does not + * call selrecord, then wq_data will be NULL + * + * Use memcpy to get the value into a proper pointer because + * wq_data most likely points to a stack variable that could be + * unaligned on 32-bit systems. + */ + if (wq_data) { + memcpy(&f_wq, wq_data, sizeof(f_wq)); + if (!waitq_is_valid(f_wq)) + f_wq = NULL; + } + + /* record the first thread's wqset in the fileproc structure */ + if (!fp->f_wset) + fp->f_wset = (void *)wqset; + + /* handles NULL f_wq */ + return waitq_get_prepost_id(f_wq); +} + + /* * selscan * @@ -1285,7 +1432,7 @@ selprocess(int error, int sel_pass) * retval The per thread system call return area * sel_pass Which pass this is; allowed values are * SEL_FIRSTPASS and SEL_SECONDPASS - * wqsub The per thread wait queue set + * wqset The per thread wait queue set * * Returns: 0 Success * EIO Invalid p->p_fd field XXX Obsolete? @@ -1293,8 +1440,8 @@ selprocess(int error, int sel_pass) * invalid. */ static int -selscan(struct proc *p, struct _select *sel, struct _select_data * seldata, int nfd, int32_t *retval, - int sel_pass, wait_queue_sub_t wqsub) +selscan(struct proc *p, struct _select *sel, struct _select_data * seldata, + int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset) { struct filedesc *fdp = p->p_fd; int msk, i, j, fd; @@ -1306,8 +1453,7 @@ selscan(struct proc *p, struct _select *sel, struct _select_data * seldata, int u_int32_t *iptr, *optr; u_int nw; u_int32_t *ibits, *obits; - char * wql; - char * wql_ptr; + uint64_t reserved_link, *rl_ptr = NULL; int count; struct vfs_context context = *vfs_context_current(); @@ -1321,75 +1467,98 @@ selscan(struct proc *p, struct _select *sel, struct _select_data * seldata, int } ibits = sel->ibits; obits = sel->obits; - wql = seldata->wql; nw = howmany(nfd, NFDBITS); count = seldata->count; nc = 0; - if (count) { - proc_fdlock(p); - for (msk = 0; msk < 3; msk++) { - iptr = (u_int32_t *)&ibits[msk * nw]; - optr = (u_int32_t *)&obits[msk * nw]; + if (!count) { + *retval = 0; + return 0; + } + + proc_fdlock(p); + for (msk = 0; msk < 3; msk++) { + iptr = (u_int32_t *)&ibits[msk * nw]; + optr = (u_int32_t *)&obits[msk * nw]; + + for (i = 0; i < nfd; i += NFDBITS) { + bits = iptr[i/NFDBITS]; - for (i = 0; i < nfd; i += NFDBITS) { - bits = iptr[i/NFDBITS]; + while ((j = ffs(bits)) && (fd = i + --j) < nfd) { + bits &= ~(1 << j); - while ((j = ffs(bits)) && (fd = i + --j) < nfd) { - bits &= ~(1 << j); + if (fd < fdp->fd_nfiles) + fp = fdp->fd_ofiles[fd]; + else + fp = NULL; - if (fd < fdp->fd_nfiles) - fp = fdp->fd_ofiles[fd]; + if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + /* + * If we abort because of a bad + * fd, let the caller unwind... + */ + proc_fdunlock(p); + return(EBADF); + } + if (sel_pass == SEL_SECONDPASS) { + reserved_link = 0; + rl_ptr = NULL; + selunlinkfp(fp, seldata->wqp[nc], wqset); + } else { + reserved_link = waitq_link_reserve((struct waitq *)wqset); + rl_ptr = &reserved_link; + if (fp->f_flags & FP_INSELECT) + /* someone is already in select on this fp */ + fp->f_flags |= FP_SELCONFLICT; else - fp = NULL; - - if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) { - /* - * If we abort because of a bad - * fd, let the caller unwind... - */ - proc_fdunlock(p); - return(EBADF); - } - if (sel_pass == SEL_SECONDPASS) { - wql_ptr = (char *)0; - if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)wqsub)) { - fp->f_flags &= ~FP_INSELECT; - fp->f_waddr = (void *)0; - } - } else { - wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK); - if (fp->f_flags & FP_INSELECT) { - /* someone is already in select on this fp */ - fp->f_flags |= FP_SELCONFLICT; - wait_queue_link(&select_conflict_queue, (wait_queue_set_t)wqsub); - } else { - fp->f_flags |= FP_INSELECT; - fp->f_waddr = (void *)wqsub; - } - } + fp->f_flags |= FP_INSELECT; + } - context.vc_ucred = fp->f_cred; + context.vc_ucred = fp->f_cred; - /* The select; set the bit, if true */ - if (fp->f_ops && fp->f_type - && fo_select(fp, flag[msk], wql_ptr, &context)) { - optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); - n++; - } - nc++; + /* + * stash this value b/c fo_select may replace + * reserved_link with a pointer to a waitq object + */ + uint64_t rsvd = reserved_link; + + /* The select; set the bit, if true */ + if (fp->f_ops && fp->f_type + && fo_select(fp, flag[msk], rl_ptr, &context)) { + optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); + n++; + } + if (sel_pass == SEL_FIRSTPASS) { + waitq_link_release(rsvd); + /* + * If the fp's supporting selinfo structure was linked + * to this thread's waitq set, then 'reserved_link' + * will have been updated by selrecord to be a pointer + * to the selinfo's waitq. + */ + if (reserved_link == rsvd) + rl_ptr = NULL; /* fo_select never called selrecord() */ + /* + * Hook up the thread's waitq set either to + * the fileproc structure, or to the global + * conflict queue: but only on the first + * select pass. + */ + seldata->wqp[nc] = sellinkfp(fp, (void **)rl_ptr, wqset); } + nc++; } } - proc_fdunlock(p); } + proc_fdunlock(p); + *retval = n; return (0); } -int poll_callback(struct kqueue *, struct kevent64_s *, void *); +int poll_callback(struct kqueue *, struct kevent_internal_s *, void *); struct poll_continue_args { user_addr_t pca_fds; @@ -1466,7 +1635,6 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval) OSBitOrAtomic(P_SELECT, &p->p_flag); for (i = 0; i < nfds; i++) { short events = fds[i].events; - struct kevent64_s kev; int kerror = 0; /* per spec, ignore fd values below zero */ @@ -1476,13 +1644,10 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval) } /* convert the poll event into a kqueue kevent */ - kev.ident = fds[i].fd; - kev.flags = EV_ADD | EV_ONESHOT | EV_POLL; - kev.udata = CAST_USER_ADDR_T(&fds[i]); - kev.fflags = 0; - kev.data = 0; - kev.ext[0] = 0; - kev.ext[1] = 0; + struct kevent_internal_s kev = { + .ident = fds[i].fd, + .flags = EV_ADD | EV_ONESHOT | EV_POLL, + .udata = CAST_USER_ADDR_T(&fds[i]) }; /* Handle input events */ if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) { @@ -1554,7 +1719,7 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval) } int -poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data) +poll_callback(__unused struct kqueue *kq, struct kevent_internal_s *kevp, void *data) { struct poll_continue_args *cont = (struct poll_continue_args *)data; struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata); @@ -1572,10 +1737,9 @@ poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data) if (fds->revents & POLLHUP) mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND ); else { - if ((kevp->flags & EV_ERROR) == 0 && kevp->data != 0) - mask = (POLLIN | POLLRDNORM ); + mask = (POLLIN | POLLRDNORM); if (kevp->flags & EV_OOBAND) - mask |= ( POLLPRI | POLLRDBAND ); + mask |= (POLLPRI | POLLRDBAND); } fds->revents |= (fds->events & mask); break; @@ -1690,7 +1854,7 @@ selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp) bad: dropcount = 0; - if (n== 0) + if (n == 0) goto out; /* Ignore error return; it's already EBADF */ (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1); @@ -1711,7 +1875,7 @@ selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp) * outstanding per fileproc f_iocount() picked up during the selcount(). * * Parameters: p Process performing the select - * ibits Input pit bector of fd's + * ibits Input bit bector of fd's * nfd Number of fd's * lim Limit to number of vector entries to * consider, or -1 for "all" @@ -1733,7 +1897,7 @@ static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount) { struct filedesc *fdp = p->p_fd; - int msk, i, j, fd; + int msk, i, j, nc, fd; u_int32_t bits; struct fileproc *fp; u_int32_t *iptr; @@ -1741,6 +1905,7 @@ seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wak int error = 0; int dropcount = 0; uthread_t uth = get_bsdthread_info(current_thread()); + struct _select_data *seldata; *need_wakeup = 0; @@ -1753,7 +1918,9 @@ seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wak } nw = howmany(nfd, NFDBITS); + seldata = &uth->uu_kevent.ss_select_data; + nc = 0; for (msk = 0; msk < 3; msk++) { iptr = (u_int32_t *)&ibits[msk * nw]; for (i = 0; i < nfd; i += NFDBITS) { @@ -1768,20 +1935,22 @@ seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wak if ((fromselcount != 0) && (++dropcount > lim)) goto done; + /* + * unlink even potentially NULL fileprocs. + * If the FD was closed from under us, we + * still need to cleanup the waitq links! + */ + selunlinkfp(fp, + seldata->wqp ? seldata->wqp[nc] : 0, + uth->uu_wqset); + + nc++; + if (fp == NULL) { /* skip (now) bad fds */ error = EBADF; continue; } - /* - * Only clear the flag if we set it. We'll - * only find that we set it if we had made - * at least one [partial] pass through selscan(). - */ - if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)uth->uu_wqset)) { - fp->f_flags &= ~FP_INSELECT; - fp->f_waddr = (void *)0; - } fp->f_iocount--; if (fp->f_iocount < 0) @@ -1828,33 +1997,59 @@ seldrop(struct proc *p, u_int32_t *ibits, int nfd) * Record a select request. */ void -selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql) +selrecord(__unused struct proc *selector, struct selinfo *sip, void *s_data) { thread_t cur_act = current_thread(); struct uthread * ut = get_bsdthread_info(cur_act); + /* on input, s_data points to the 64-bit ID of a reserved link object */ + uint64_t *reserved_link = (uint64_t *)s_data; /* need to look at collisions */ /*do not record if this is second pass of select */ - if(p_wql == (void *)0) { + if (!s_data) return; - } if ((sip->si_flags & SI_INITED) == 0) { - wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO); + waitq_init(&sip->si_waitq, SYNC_POLICY_FIFO | SYNC_POLICY_DISABLE_IRQ); sip->si_flags |= SI_INITED; sip->si_flags &= ~SI_CLEAR; } - if (sip->si_flags & SI_RECORDED) { + if (sip->si_flags & SI_RECORDED) sip->si_flags |= SI_COLL; - } else + else sip->si_flags &= ~SI_COLL; sip->si_flags |= SI_RECORDED; - if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset)) - wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset, - (wait_queue_link_t)p_wql); + /* note: this checks for pre-existing linkage */ + waitq_link(&sip->si_waitq, ut->uu_wqset, + WAITQ_SHOULD_LOCK, reserved_link); + + /* + * Always consume the reserved link. + * We can always call waitq_link_release() safely because if + * waitq_link is successful, it consumes the link and resets the + * value to 0, in which case our call to release becomes a no-op. + * If waitq_link fails, then the following release call will actually + * release the reserved link object. + */ + waitq_link_release(*reserved_link); + *reserved_link = 0; + + /* + * Use the s_data pointer as an output parameter as well + * This avoids changing the prototype for this function which is + * used by many kexts. We need to surface the waitq object + * associated with the selinfo we just added to the thread's select + * set. New waitq sets do not have back-pointers to set members, so + * the only way to clear out set linkage objects is to go from the + * waitq to the set. We use a memcpy because s_data could be + * pointing to an unaligned value on the stack + * (especially on 32-bit systems) + */ + void *wqptr = (void *)&sip->si_waitq; + memcpy((void *)s_data, (void *)&wqptr, sizeof(void *)); return; } @@ -1877,7 +2072,8 @@ selwakeup(struct selinfo *sip) } if (sip->si_flags & SI_RECORDED) { - wait_queue_wakeup_all(&sip->si_wait_queue, NULL, THREAD_AWAKENED); + waitq_wakeup64_all(&sip->si_waitq, NO_EVENT64, + THREAD_AWAKENED, WAITQ_ALL_PRIORITIES); sip->si_flags &= ~SI_RECORDED; } @@ -1886,6 +2082,7 @@ selwakeup(struct selinfo *sip) void selthreadclear(struct selinfo *sip) { + struct waitq *wq; if ((sip->si_flags & SI_INITED) == 0) { return; @@ -1895,7 +2092,18 @@ selthreadclear(struct selinfo *sip) sip->si_flags &= ~(SI_RECORDED | SI_COLL); } sip->si_flags |= SI_CLEAR; - wait_queue_unlink_all(&sip->si_wait_queue); + sip->si_flags &= ~SI_INITED; + + wq = &sip->si_waitq; + + /* + * Higher level logic may have a handle on this waitq's prepost ID, + * but that's OK because the waitq_deinit will remove/invalidate the + * prepost object (as well as mark the waitq invalid). This de-couples + * us from any callers that may have a handle to this waitq via the + * prepost ID. + */ + waitq_deinit(wq); } @@ -2967,3 +3175,385 @@ telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t return (error); } + +#if defined(DEVELOPMENT) || defined(DEBUG) +#if CONFIG_WAITQ_DEBUG +static uint64_t g_wqset_num = 0; +struct g_wqset { + queue_chain_t link; + struct waitq_set *wqset; +}; + +static queue_head_t g_wqset_list; +static struct waitq_set *g_waitq_set = NULL; + +static inline struct waitq_set *sysctl_get_wqset(int idx) +{ + struct g_wqset *gwqs; + + if (!g_wqset_num) + queue_init(&g_wqset_list); + + /* don't bother with locks: this is test-only code! */ + qe_foreach_element(gwqs, &g_wqset_list, link) { + if ((int)(wqset_id(gwqs->wqset) & 0xffffffff) == idx) + return gwqs->wqset; + } + + /* allocate a new one */ + ++g_wqset_num; + gwqs = (struct g_wqset *)kalloc(sizeof(*gwqs)); + assert(gwqs != NULL); + + gwqs->wqset = waitq_set_alloc(SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST|SYNC_POLICY_DISABLE_IRQ); + enqueue_tail(&g_wqset_list, &gwqs->link); + printf("[WQ]: created new waitq set 0x%llx\n", wqset_id(gwqs->wqset)); + + return gwqs->wqset; +} + +#define MAX_GLOBAL_TEST_QUEUES 64 +static int g_wq_init = 0; +static struct waitq g_wq[MAX_GLOBAL_TEST_QUEUES]; + +static inline struct waitq *global_test_waitq(int idx) +{ + if (idx < 0) + return NULL; + + if (!g_wq_init) { + g_wq_init = 1; + for (int i = 0; i < MAX_GLOBAL_TEST_QUEUES; i++) + waitq_init(&g_wq[i], SYNC_POLICY_FIFO|SYNC_POLICY_DISABLE_IRQ); + } + + return &g_wq[idx % MAX_GLOBAL_TEST_QUEUES]; +} + +static int sysctl_waitq_wakeup_one SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error; + int index; + struct waitq *waitq; + kern_return_t kr; + int64_t event64 = 0; + + error = SYSCTL_IN(req, &event64, sizeof(event64)); + if (error) + return error; + + if (!req->newptr) + return SYSCTL_OUT(req, &event64, sizeof(event64)); + + if (event64 < 0) { + index = (int)((-event64) & 0xffffffff); + waitq = wqset_waitq(sysctl_get_wqset(index)); + index = -index; + } else { + index = (int)event64; + waitq = global_test_waitq(index); + } + + event64 = 0; + + printf("[WQ]: Waking one thread on waitq [%d] event:0x%llx\n", + index, event64); + kr = waitq_wakeup64_one(waitq, (event64_t)event64, THREAD_AWAKENED, + WAITQ_ALL_PRIORITIES); + printf("[WQ]: \tkr=%d\n", kr); + + return SYSCTL_OUT(req, &kr, sizeof(kr)); +} +SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_one, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_waitq_wakeup_one, "Q", "wakeup one thread waiting on given event"); + + +static int sysctl_waitq_wakeup_all SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error; + int index; + struct waitq *waitq; + kern_return_t kr; + int64_t event64 = 0; + + error = SYSCTL_IN(req, &event64, sizeof(event64)); + if (error) + return error; + + if (!req->newptr) + return SYSCTL_OUT(req, &event64, sizeof(event64)); + + if (event64 < 0) { + index = (int)((-event64) & 0xffffffff); + waitq = wqset_waitq(sysctl_get_wqset(index)); + index = -index; + } else { + index = (int)event64; + waitq = global_test_waitq(index); + } + + event64 = 0; + + printf("[WQ]: Waking all threads on waitq [%d] event:0x%llx\n", + index, event64); + kr = waitq_wakeup64_all(waitq, (event64_t)event64, + THREAD_AWAKENED, WAITQ_ALL_PRIORITIES); + printf("[WQ]: \tkr=%d\n", kr); + + return SYSCTL_OUT(req, &kr, sizeof(kr)); +} +SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_waitq_wakeup_all, "Q", "wakeup all threads waiting on given event"); + + +static int sysctl_waitq_wait SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error; + int index; + struct waitq *waitq; + kern_return_t kr; + int64_t event64 = 0; + + error = SYSCTL_IN(req, &event64, sizeof(event64)); + if (error) + return error; + + if (!req->newptr) + return SYSCTL_OUT(req, &event64, sizeof(event64)); + + if (event64 < 0) { + index = (int)((-event64) & 0xffffffff); + waitq = wqset_waitq(sysctl_get_wqset(index)); + index = -index; + } else { + index = (int)event64; + waitq = global_test_waitq(index); + } + + event64 = 0; + + printf("[WQ]: Current thread waiting on waitq [%d] event:0x%llx\n", + index, event64); + kr = waitq_assert_wait64(waitq, (event64_t)event64, THREAD_INTERRUPTIBLE, 0); + if (kr == THREAD_WAITING) + thread_block(THREAD_CONTINUE_NULL); + printf("[WQ]: \tWoke Up: kr=%d\n", kr); + + return SYSCTL_OUT(req, &kr, sizeof(kr)); +} +SYSCTL_PROC(_kern, OID_AUTO, waitq_wait, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_waitq_wait, "Q", "start waiting on given event"); + + +static int sysctl_wqset_select SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error; + struct waitq_set *wqset; + uint64_t event64 = 0; + + error = SYSCTL_IN(req, &event64, sizeof(event64)); + if (error) + return error; + + if (!req->newptr) + goto out; + + wqset = sysctl_get_wqset((int)(event64 & 0xffffffff)); + g_waitq_set = wqset; + + event64 = wqset_id(wqset); + printf("[WQ]: selected wqset 0x%llx\n", event64); + +out: + if (g_waitq_set) + event64 = wqset_id(g_waitq_set); + else + event64 = (uint64_t)(-1); + + return SYSCTL_OUT(req, &event64, sizeof(event64)); +} +SYSCTL_PROC(_kern, OID_AUTO, wqset_select, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_wqset_select, "Q", "select/create a global waitq set"); + + +static int sysctl_waitq_link SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error; + int index; + struct waitq *waitq; + struct waitq_set *wqset; + kern_return_t kr; + uint64_t reserved_link = 0; + int64_t event64 = 0; + + error = SYSCTL_IN(req, &event64, sizeof(event64)); + if (error) + return error; + + if (!req->newptr) + return SYSCTL_OUT(req, &event64, sizeof(event64)); + + if (!g_waitq_set) + g_waitq_set = sysctl_get_wqset(1); + wqset = g_waitq_set; + + if (event64 < 0) { + struct waitq_set *tmp; + index = (int)((-event64) & 0xffffffff); + tmp = sysctl_get_wqset(index); + if (tmp == wqset) + goto out; + waitq = wqset_waitq(tmp); + index = -index; + } else { + index = (int)event64; + waitq = global_test_waitq(index); + } + + printf("[WQ]: linking waitq [%d] to global wqset (0x%llx)\n", + index, wqset_id(wqset)); + reserved_link = waitq_link_reserve(waitq); + kr = waitq_link(waitq, wqset, WAITQ_SHOULD_LOCK, &reserved_link); + waitq_link_release(reserved_link); + + printf("[WQ]: \tkr=%d\n", kr); + +out: + return SYSCTL_OUT(req, &kr, sizeof(kr)); +} +SYSCTL_PROC(_kern, OID_AUTO, waitq_link, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_waitq_link, "Q", "link global waitq to test waitq set"); + + +static int sysctl_waitq_unlink SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error; + int index; + struct waitq *waitq; + struct waitq_set *wqset; + kern_return_t kr; + uint64_t event64 = 0; + + error = SYSCTL_IN(req, &event64, sizeof(event64)); + if (error) + return error; + + if (!req->newptr) + return SYSCTL_OUT(req, &event64, sizeof(event64)); + + if (!g_waitq_set) + g_waitq_set = sysctl_get_wqset(1); + wqset = g_waitq_set; + + index = (int)event64; + waitq = global_test_waitq(index); + + printf("[WQ]: unlinking waitq [%d] from global wqset (0x%llx)\n", + index, wqset_id(wqset)); + + kr = waitq_unlink(waitq, wqset); + printf("[WQ]: \tkr=%d\n", kr); + + return SYSCTL_OUT(req, &kr, sizeof(kr)); +} +SYSCTL_PROC(_kern, OID_AUTO, waitq_unlink, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_waitq_unlink, "Q", "unlink global waitq from test waitq set"); + + +static int sysctl_waitq_clear_prepost SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + struct waitq *waitq; + uint64_t event64 = 0; + int error, index; + + error = SYSCTL_IN(req, &event64, sizeof(event64)); + if (error) + return error; + + if (!req->newptr) + return SYSCTL_OUT(req, &event64, sizeof(event64)); + + index = (int)event64; + waitq = global_test_waitq(index); + + printf("[WQ]: clearing prepost on waitq [%d]\n", index); + waitq_clear_prepost(waitq); + + return SYSCTL_OUT(req, &event64, sizeof(event64)); +} +SYSCTL_PROC(_kern, OID_AUTO, waitq_clear_prepost, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_waitq_clear_prepost, "Q", "clear prepost on given waitq"); + + +static int sysctl_wqset_unlink_all SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error; + struct waitq_set *wqset; + kern_return_t kr; + uint64_t event64 = 0; + + error = SYSCTL_IN(req, &event64, sizeof(event64)); + if (error) + return error; + + if (!req->newptr) + return SYSCTL_OUT(req, &event64, sizeof(event64)); + + if (!g_waitq_set) + g_waitq_set = sysctl_get_wqset(1); + wqset = g_waitq_set; + + printf("[WQ]: unlinking all queues from global wqset (0x%llx)\n", + wqset_id(wqset)); + + kr = waitq_set_unlink_all(wqset); + printf("[WQ]: \tkr=%d\n", kr); + + return SYSCTL_OUT(req, &kr, sizeof(kr)); +} +SYSCTL_PROC(_kern, OID_AUTO, wqset_unlink_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_wqset_unlink_all, "Q", "unlink all queues from test waitq set"); + + +static int sysctl_wqset_clear_preposts SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + struct waitq_set *wqset = NULL; + uint64_t event64 = 0; + int error, index; + + error = SYSCTL_IN(req, &event64, sizeof(event64)); + if (error) + return error; + + if (!req->newptr) + goto out; + + index = (int)((event64) & 0xffffffff); + wqset = sysctl_get_wqset(index); + assert(wqset != NULL); + + printf("[WQ]: clearing preposts on wqset 0x%llx\n", wqset_id(wqset)); + waitq_set_clear_preposts(wqset); + +out: + if (wqset) + event64 = wqset_id(wqset); + else + event64 = (uint64_t)(-1); + + return SYSCTL_OUT(req, &event64, sizeof(event64)); +} +SYSCTL_PROC(_kern, OID_AUTO, wqset_clear_preposts, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_wqset_clear_preposts, "Q", "clear preposts on given waitq set"); + +#endif /* CONFIG_WAITQ_DEBUG */ +#endif /* defined(DEVELOPMENT) || defined(DEBUG) */ diff --git a/bsd/kern/sys_pipe.c b/bsd/kern/sys_pipe.c index 374d82381..1e64ce737 100644 --- a/bsd/kern/sys_pipe.c +++ b/bsd/kern/sys_pipe.c @@ -317,7 +317,7 @@ pipe_touch(struct pipe *tpipe, int touch) } } -static const unsigned int pipesize_blocks[] = {128,256,1024,2048,4096, 4096 * 2, PIPE_SIZE , PIPE_SIZE * 4 }; +static const unsigned int pipesize_blocks[] = {512,1024,2048,4096, 4096 * 2, PIPE_SIZE , PIPE_SIZE * 4 }; /* * finds the right size from possible sizes in pipesize_blocks @@ -329,6 +329,12 @@ choose_pipespace(unsigned long current, unsigned long expected) int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1; unsigned long target; + /* + * assert that we always get an atomic transaction sized pipe buffer, + * even if the system pipe buffer high-water mark has been crossed. + */ + assert(PIPE_BUF == pipesize_blocks[0]); + if (expected > current) target = expected; else diff --git a/bsd/kern/sys_work_interval.c b/bsd/kern/sys_work_interval.c new file mode 100644 index 000000000..45b36c717 --- /dev/null +++ b/bsd/kern/sys_work_interval.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int +work_interval_ctl(__unused proc_t p, struct work_interval_ctl_args *uap, __unused int32_t *retval) +{ + uint32_t operation = uap->operation; + int error = 0; + kern_return_t kret = KERN_SUCCESS; + uint64_t work_interval_id; + struct work_interval_notification notification; + + switch (operation) { + case WORK_INTERVAL_OPERATION_CREATE: + if (uap->arg == USER_ADDR_NULL || uap->work_interval_id != 0) { + return EINVAL; + } + if (uap->len < sizeof(work_interval_id)) { + return ERANGE; + } + + /* + * Privilege check performed up-front, and then the work + * ID is allocated for use by the thread + */ + error = priv_check_cred(kauth_cred_get(), PRIV_WORK_INTERVAL, 0); + if (error) { + return (error); + } + + kret = thread_policy_create_work_interval(current_thread(), + &work_interval_id); + if (kret == KERN_SUCCESS) { + error = copyout(&work_interval_id, uap->arg, sizeof(work_interval_id)); + } else { + error = EINVAL; + } + + break; + case WORK_INTERVAL_OPERATION_DESTROY: + if (uap->arg != USER_ADDR_NULL || uap->work_interval_id == 0) { + return EINVAL; + } + + /* + * No privilege check, we assume a previous WORK_INTERVAL_OPERATION_CREATE + * operation would have allocated a work interval ID for the current + * thread, which the scheduler will validate. + */ + kret = thread_policy_destroy_work_interval(current_thread(), + uap->work_interval_id); + if (kret != KERN_SUCCESS) { + error = EINVAL; + } + + break; + case WORK_INTERVAL_OPERATION_NOTIFY: + if (uap->arg == USER_ADDR_NULL || uap->work_interval_id == 0) { + return EINVAL; + } + if (uap->len < sizeof(notification)) { + return EINVAL; + } + + /* + * No privilege check, we assume a previous WORK_INTERVAL_OPERATION_CREATE + * operation would have allocated a work interval ID for the current + * thread, which the scheduler will validate. + */ + error = copyin(uap->arg, ¬ification, sizeof(notification)); + if (error) { + break; + } + + kret = sched_work_interval_notify(current_thread(), + uap->work_interval_id, + notification.start, + notification.finish, + notification.deadline, + notification.next_start, + notification.flags); + if (kret != KERN_SUCCESS) { + error = EINVAL; + break; + } + + break; + default: + error = ENOTSUP; + break; + } + + return (error); +} diff --git a/bsd/kern/syscalls.master b/bsd/kern/syscalls.master index 624fd33dc..5f58f9263 100644 --- a/bsd/kern/syscalls.master +++ b/bsd/kern/syscalls.master @@ -266,7 +266,7 @@ 175 AUE_NULL ALL { int nosys(void); } { old gc_control } 176 AUE_NULL ALL { int nosys(void); } { old add_profil } 177 AUE_NULL ALL { int nosys(void); } -178 AUE_NULL ALL { int nosys(void); } +178 AUE_KDEBUGTRACE ALL { uint64_t kdebug_trace_string(uint32_t debugid, uint64_t str_id, const char *str) NO_SYSCALL_STUB; } 179 AUE_KDEBUGTRACE ALL { int kdebug_trace64(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) NO_SYSCALL_STUB; } 180 AUE_KDEBUGTRACE ALL { int kdebug_trace(uint32_t code, u_long arg1, u_long arg2, u_long arg3, u_long arg4) NO_SYSCALL_STUB; } 181 AUE_SETGID ALL { int setgid(gid_t gid); } @@ -567,7 +567,7 @@ #endif 372 AUE_NULL ALL { uint64_t thread_selfid (void) NO_SYSCALL_STUB; } 373 AUE_LEDGER ALL { int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3); } -374 AUE_NULL ALL { int nosys(void); } +374 AUE_NULL ALL { int kevent_qos(int fd, const struct kevent_qos_s *changelist, int nchanges, struct kevent_qos_s *eventlist, int nevents, void *data_out, size_t *data_available, unsigned int flags); } 375 AUE_NULL ALL { int nosys(void); } 376 AUE_NULL ALL { int nosys(void); } 377 AUE_NULL ALL { int nosys(void); } @@ -585,9 +585,6 @@ 388 AUE_MAC_GET_FD ALL { int __mac_get_fd(int fd, struct mac *mac_p); } 389 AUE_MAC_SET_FD ALL { int __mac_set_fd(int fd, struct mac *mac_p); } 390 AUE_MAC_GET_PID ALL { int __mac_get_pid(pid_t pid, struct mac *mac_p); } -391 AUE_MAC_GET_LCID ALL { int __mac_get_lcid(pid_t lcid, struct mac *mac_p); } -392 AUE_MAC_GET_LCTX ALL { int __mac_get_lctx(struct mac *mac_p); } -393 AUE_MAC_SET_LCTX ALL { int __mac_set_lctx(struct mac *mac_p); } #else 381 AUE_MAC_SYSCALL ALL { int enosys(void); } 382 AUE_MAC_GET_FILE ALL { int nosys(void); } @@ -599,12 +596,12 @@ 388 AUE_MAC_GET_FD ALL { int nosys(void); } 389 AUE_MAC_SET_FD ALL { int nosys(void); } 390 AUE_MAC_GET_PID ALL { int nosys(void); } -391 AUE_MAC_GET_LCID ALL { int nosys(void); } -392 AUE_MAC_GET_LCTX ALL { int nosys(void); } -393 AUE_MAC_SET_LCTX ALL { int nosys(void); } #endif -394 AUE_SETLCID ALL { int setlcid(pid_t pid, pid_t lcid) NO_SYSCALL_STUB; } -395 AUE_GETLCID ALL { int getlcid(pid_t pid) NO_SYSCALL_STUB; } +391 AUE_NULL ALL { int enosys(void); } +392 AUE_NULL ALL { int enosys(void); } +393 AUE_NULL ALL { int enosys(void); } +394 AUE_NULL ALL { int enosys(void); } +395 AUE_NULL ALL { int enosys(void); } 396 AUE_NULL ALL { user_ssize_t read_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } 397 AUE_NULL ALL { user_ssize_t write_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } 398 AUE_OPEN_RWTC ALL { int open_nocancel(user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; } @@ -689,9 +686,9 @@ 445 AUE_NULL ALL { int nosys(void); } { old __proc_suppress } 446 AUE_NULL ALL { int proc_rlimit_control(pid_t pid, int flavor, void *arg); } #if SOCKETS -447 AUE_CONNECT ALL { int connectx(int s, struct sockaddr *src, socklen_t srclen, struct sockaddr *dsts, socklen_t dstlen, uint32_t ifscope, associd_t aid, connid_t *cid); } -448 AUE_NULL ALL { int disconnectx(int s, associd_t aid, connid_t cid); } -449 AUE_NULL ALL { int peeloff(int s, associd_t aid); } +447 AUE_CONNECT ALL { int connectx(int socket, const sa_endpoints_t *endpoints, sae_associd_t associd, unsigned int flags, const struct iovec *iov, unsigned int iovcnt, size_t *len, sae_connid_t *connid); } +448 AUE_NULL ALL { int disconnectx(int s, sae_associd_t aid, sae_connid_t cid); } +449 AUE_NULL ALL { int peeloff(int s, sae_associd_t aid); } 450 AUE_SOCKET ALL { int socket_delegate(int domain, int type, int protocol, pid_t epid); } #else 447 AUE_NULL ALL { int nosys(void); } @@ -761,7 +758,7 @@ 484 AUE_NULL ALL { int guarded_open_dprotected_np(const char *path, const guardid_t *guard, u_int guardflags, int flags, int dpclass, int dpflags, int mode) NO_SYSCALL_STUB; } 485 AUE_NULL ALL { user_ssize_t guarded_write_np(int fd, const guardid_t *guard, user_addr_t cbuf, user_size_t nbyte); } 486 AUE_PWRITE ALL { user_ssize_t guarded_pwrite_np(int fd, const guardid_t *guard, user_addr_t buf, user_size_t nbyte, off_t offset); } -487 AUE_WRITEV ALL { user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iovec *iovp, u_int iovcnt); } +487 AUE_WRITEV ALL { user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iovec *iovp, int iovcnt); } #if CONFIG_SECLUDED_RENAME 488 AUE_RENAME ALL { int rename_ext(char *from, char *to, u_int flags) NO_SYSCALL_STUB; } #else @@ -772,3 +769,21 @@ #else 489 AUE_NULL ALL { int enosys(void); } #endif +#if NETWORKING +490 AUE_NULL ALL { int netagent_trigger(uuid_t agent_uuid, size_t agent_uuidlen); } +#else +490 AUE_NULL ALL { int nosys(void); } +#endif /* NETWORKING */ +491 AUE_STACKSNAPSHOT ALL { int stack_snapshot_with_config(int stackshot_config_version, user_addr_t stackshot_config, size_t stackshot_config_size) NO_SYSCALL_STUB; } +#if CONFIG_TELEMETRY +492 AUE_STACKSNAPSHOT ALL { int microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags) NO_SYSCALL_STUB; } +#else +492 AUE_NULL ALL { int enosys(void); } +#endif /* CONFIG_TELEMETRY */ +493 AUE_NULL ALL { user_ssize_t grab_pgo_data (user_addr_t uuid, int flags, user_addr_t buffer, user_ssize_t size); } +494 AUE_NULL ALL { int enosys(void); } +495 AUE_NULL ALL { int enosys(void); } +496 AUE_NULL ALL { int enosys(void); } +497 AUE_NULL ALL { int enosys(void); } +498 AUE_NULL ALL { int enosys(void); } +499 AUE_NULL ALL { int work_interval_ctl(uint32_t operation, uint64_t work_interval_id, void *arg, size_t len) NO_SYSCALL_STUB; } diff --git a/bsd/kern/sysv_shm.c b/bsd/kern/sysv_shm.c index caed4c433..41a6bb873 100644 --- a/bsd/kern/sysv_shm.c +++ b/bsd/kern/sysv_shm.c @@ -93,6 +93,7 @@ #include #include +#include #include @@ -169,6 +170,7 @@ static int shm_delete_mapping(struct proc *, struct shmmap_state *, int); #define DEFAULT_SHMMNI 32 #define DEFAULT_SHMSEG 8 #define DEFAULT_SHMALL 1024 + struct shminfo shminfo = { DEFAULT_SHMMAX, DEFAULT_SHMMIN, @@ -368,8 +370,8 @@ shmat(struct proc *p, struct shmat_args *uap, user_addr_t *retval) mach_vm_address_t attach_va; /* attach address in/out */ mach_vm_size_t map_size; /* size of map entry */ mach_vm_size_t mapped_size; - vm_prot_t prot; - size_t size; + vm_prot_t prot; + size_t size; kern_return_t rv; int shmat_ret; int vm_flags; @@ -389,6 +391,11 @@ shmat(struct proc *p, struct shmat_args *uap, user_addr_t *retval) if (shmmap_s == NULL) { size = shminfo.shmseg * sizeof(struct shmmap_state); + if (size == 0 || size / shminfo.shmseg != sizeof(struct shmmap_state)) { + /* overflow */ + shmat_ret = ENOMEM; + goto shmat_out; + } MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK); if (shmmap_s == NULL) { shmat_ret = ENOMEM; @@ -910,7 +917,7 @@ int shmfork(struct proc *p1, struct proc *p2) { struct shmmap_state *shmmap_s; - size_t size; + size_t size; int i; int shmfork_ret = 0; @@ -919,8 +926,12 @@ shmfork(struct proc *p1, struct proc *p2) if (!shm_inited) { shminit(NULL); } - - size = shminfo.shmseg * sizeof(struct shmmap_state); + size = shminfo.shmseg * sizeof(struct shmmap_state); + if (size == 0 || size / shminfo.shmseg != sizeof(struct shmmap_state)) { + /* overflow */ + shmfork_ret = 1; + goto shmfork_out; + } MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK); if (shmmap_s != NULL) { bcopy((caddr_t)p1->vm_shm, (caddr_t)shmmap_s, size); @@ -1037,6 +1048,9 @@ sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1, int error = 0; int sysctl_shminfo_ret = 0; uint64_t saved_shmmax; + uint64_t saved_shmseg; + uint64_t saved_shmmni; + uint64_t saved_shmall; error = SYSCTL_OUT(req, arg1, sizeof(int64_t)); if (error || req->newptr == USER_ADDR_NULL) @@ -1049,7 +1063,10 @@ sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1, sysctl_shminfo_ret = EPERM; goto sysctl_shminfo_out; } - saved_shmmax = shminfo.shmmax; + saved_shmmax = shminfo.shmmax; + saved_shmseg = shminfo.shmseg; + saved_shmmni = shminfo.shmmni; + saved_shmall = shminfo.shmall; if ((error = SYSCTL_IN(req, arg1, sizeof(int64_t))) != 0) { sysctl_shminfo_ret = error; @@ -1064,6 +1081,30 @@ sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1, goto sysctl_shminfo_out; } } + else if (arg1 == &shminfo.shmseg) { + /* add a sanity check - 20847256 */ + if (shminfo.shmseg > INT32_MAX || shminfo.shmseg < 0) { + shminfo.shmseg = saved_shmseg; + sysctl_shminfo_ret = EINVAL; + goto sysctl_shminfo_out; + } + } + else if (arg1 == &shminfo.shmmni) { + /* add a sanity check - 20847256 */ + if (shminfo.shmmni > INT32_MAX || shminfo.shmmni < 0) { + shminfo.shmmni = saved_shmmni; + sysctl_shminfo_ret = EINVAL; + goto sysctl_shminfo_out; + } + } + else if (arg1 == &shminfo.shmall) { + /* add a sanity check - 20847256 */ + if (shminfo.shmall > INT32_MAX || shminfo.shmall < 0) { + shminfo.shmall = saved_shmall; + sysctl_shminfo_ret = EINVAL; + goto sysctl_shminfo_out; + } + } sysctl_shminfo_ret = 0; sysctl_shminfo_out: SYSV_SHM_SUBSYS_UNLOCK(); diff --git a/bsd/kern/trace.codes b/bsd/kern/trace.codes index 57de6588d..2a5a36206 100644 --- a/bsd/kern/trace.codes +++ b/bsd/kern/trace.codes @@ -222,6 +222,7 @@ 0x1300488 MACH_vm_page_wait_block 0x130048C MACH_vm_page_sleep 0x1300490 MACH_vm_page_expedite +0x13004c0 MACH_vm_pressure_event 0x1400000 MACH_SCHED 0x1400004 MACH_STKATTACH 0x1400008 MACH_STKHANDOFF @@ -234,8 +235,6 @@ 0x1400024 MACH_IDLE 0x1400028 MACH_STACK_DEPTH 0x140002c MACH_MOVED -0x1400030 MACH_FAIRSHARE_ENTER -0x1400034 MACH_FAIRSHARE_EXIT 0x1400038 MACH_FAILSAFE 0x140003C MACH_BLOCK 0x1400040 MACH_WAIT @@ -256,6 +255,14 @@ 0x1400084 MACH_QUANTUM_HANDOFF 0x1400088 MACH_MULTIQ_DEQUEUE 0x140008C MACH_SCHED_THREAD_SWITCH +0x1400094 MACH_SCHED_REMOTE_DEFERRED_AST +0x1400098 MACH_SCHED_REMOTE_CANCEL_AST +0x140009C MACH_SCHED_CHANGE_PRIORITY +0x14000A0 MACH_SCHED_UPDATE_REC_CORES +0x14000A4 MACH_STACK_WAIT +0x14000A8 MACH_THREAD_BIND +0x14000AC MACH_WAITQ_PROMOTE +0x14000B0 MACH_WAITQ_DEMOTE 0x1500000 MACH_MSGID_INVALID 0x1600000 MTX_SLEEP 0x1600004 MTX_SLEEP_DEADLINE @@ -324,6 +331,7 @@ 0x1a20028 SFI_GLOBAL_DEFER 0x1a30004 ENERGY_PERF_GPU_DESCRIPTION 0x1a30008 ENERGY_PERF_GPU_TIME +0x1a40000 SYSDIAGNOSE_notify_user 0x2010000 L_IP_In_Beg 0x2010004 L_IP_Out_Beg 0x2010008 L_IP_In_End @@ -734,6 +742,7 @@ 0x3080058 HFS_syncer_timed 0x308005C HFS_ScanUnmapBlocks 0x3080060 HFS_issue_unmap +0x3080064 HFS_KR 0x30A0000 SMB_vop_mount 0x30A0004 SMB_vop_unmount 0x30A0008 SMB_vop_root @@ -996,7 +1005,7 @@ 0x40c02bc BSC_obs_gc_control 0x40c02c0 BSC_add_profil 0x40c02c4 BSC_#177 -0x40c02c8 BSC_#178 +0x40c02c8 BSC_kdebug_trace_string 0x40c02cc BSC_kdebug_trace64 0x40c02d0 BSC_kdebug_trace 0x40c02d4 BSC_setgid @@ -1206,11 +1215,11 @@ 0x40c0610 BSC_mac_get_fd 0x40c0614 BSC_mac_set_fd 0x40c0618 BSC_mac_get_pid -0x40c061c BSC_mac_get_lcid -0x40c0620 BSC_mac_get_lctx -0x40c0624 BSC_mac_set_lctx -0x40c0628 BSC_setlcid -0x40c062c BSC_getlcid +0x40c061c BSC_#391 +0x40c0620 BSC_#392 +0x40c0624 BSC_#393 +0x40c0628 BSC_#394 +0x40c062c BSC_#395 0x40c0630 BSC_read_nocancel 0x40c0634 BSC_write_nocancel 0x40c0638 BSC_open_nocancel @@ -1273,6 +1282,8 @@ 0x40c071c BSC_vfs_purge 0x40c0720 BSC_sfi_ctl 0x40c0724 BSC_sfi_pidctl +0x40c0728 BSC_coalition +0x40c072c BSC_coalition_info 0x40c0734 BSC_getattrlistbulk 0x40c073c BSC_openat 0x40c0740 BSC_openat_nocancel @@ -1292,6 +1303,8 @@ 0x40c0784 BSC_sendmsg_x 0x40c0788 BSC_thread_selfusage 0x40c07a4 BSC_mremap_encrypted +0x40c07b8 BSC_reserved +0x40c07cc BSC_work_interval_ctl 0x40e0104 BSC_msync_extended_info 0x40e0264 BSC_pread_extended_info 0x40e0268 BSC_pwrite_extended_info @@ -1567,6 +1580,7 @@ 0x7000004 TRACE_DATA_NEWTHREAD 0x7000008 TRACE_DATA_EXEC 0x700000c TRACE_DATA_THREAD_TERMINATE +0x7010000 TRACE_STRING_GLOBAL 0x7010004 TRACE_STRING_NEWTHREAD 0x7010008 TRACE_STRING_EXEC 0x7020000 TRACE_PANIC @@ -1990,6 +2004,13 @@ 0x25060014 PERF_KPC_ConfReg 0x25060018 PERF_KPC_Data32 0x2506001c PERF_KPC_ConfReg32 +0x25060020 PERF_KPC_Data_Thread +0x25060024 PERF_KPC_Data_Thread32 +0x25070000 PERF_KDBG_Handler +0x25080000 PERF_CS_Handler +0x25090000 PERF_SP_Handler +0x250a0000 PERF_MI_Sample +0x250a0004 PERF_MI_Data 0x26100008 imp_assertion_hold 0x2610000c imp_assertion_hold_ext 0x26100020 imp_assertion_externalize @@ -2069,7 +2090,7 @@ 0x2700E020 PERF_SRAMEMA_DOM2 0x2700E030 PERF_SRAMEMA_DOM3 0x2a100004 ATM_MIN_CALLED -0x2a100008 ATM_MIN_LINK_LIST +0x2a100008 ATM_LINK_LIST_TRIM 0x2a200004 ATM_VALUE_REPLACED 0x2a200008 ATM_VALUE_ADDED 0x2a300004 ATM_VALUE_UNREGISTERED diff --git a/bsd/kern/tty.c b/bsd/kern/tty.c index 2586c482f..b863e27ff 100644 --- a/bsd/kern/tty.c +++ b/bsd/kern/tty.c @@ -341,11 +341,6 @@ tty_unlock(struct tty *tp) int ttyopen(dev_t device, struct tty *tp) { - proc_t p = current_proc(); - struct pgrp *pg, *oldpg; - struct session *sessp, *oldsess; - struct tty *oldtp; - TTY_LOCK_OWNED(tp); /* debug assert */ tp->t_dev = device; @@ -357,57 +352,6 @@ ttyopen(dev_t device, struct tty *tp) bzero(&tp->t_winsize, sizeof(tp->t_winsize)); } - pg = proc_pgrp(p); - sessp = proc_session(p); - - /* - * First tty open affter setsid() call makes this tty its controlling - * tty, if the tty does not already have a session associated with it. - */ - if (SESS_LEADER(p, sessp) && /* the process is the session leader */ - sessp->s_ttyvp == NULL && /* but has no controlling tty */ - tp->t_session == NULL ) { /* and tty not controlling */ - session_lock(sessp); - if ((sessp->s_flags & S_NOCTTY) == 0) { /* and no O_NOCTTY */ - oldtp = sessp->s_ttyp; - ttyhold(tp); - sessp->s_ttyp = tp; - OSBitOrAtomic(P_CONTROLT, &p->p_flag); - session_unlock(sessp); - proc_list_lock(); - oldpg = tp->t_pgrp; - oldsess = tp->t_session; - if (oldsess != SESSION_NULL) - oldsess->s_ttypgrpid = NO_PID; - tp->t_session = sessp; - tp->t_pgrp = pg; - sessp->s_ttypgrpid = pg->pg_id; - proc_list_unlock(); - /* SAFE: All callers drop the lock on return */ - tty_unlock(tp); - if (oldpg != PGRP_NULL) - pg_rele(oldpg); - if (oldsess != SESSION_NULL) - session_rele(oldsess); - if (NULL != oldtp) - ttyfree(oldtp); - tty_lock(tp); - goto out; - } - session_unlock(sessp); - } - - /* SAFE: All callers drop the lock on return */ - tty_unlock(tp); - if (sessp != SESSION_NULL) - session_rele(sessp); - if (pg != PGRP_NULL) - pg_rele(pg); - tty_lock(tp); - -out: - - /* XXX may be an error code */ return (0); } @@ -1075,6 +1019,7 @@ int ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p) { int error = 0; + int bogusData = 1; struct uthread *ut; struct pgrp *pg, *oldpg; struct session *sessp, *oldsessp; @@ -1171,7 +1116,6 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p) } case TIOCSCONS: { /* Set current console device to this line */ - int bogusData = 1; data = (caddr_t) &bogusData; /* No break - Fall through to BSD code */ @@ -1408,21 +1352,58 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p) break; case TIOCSCTTY: /* become controlling tty */ /* Session ctty vnode pointer set in vnode layer. */ - pg = proc_pgrp(p); sessp = proc_session(p); - if (!SESS_LEADER(p, sessp) || - ((sessp->s_ttyvp || tp->t_session) && - (tp->t_session != sessp))) { + if (sessp == SESSION_NULL) { + error = EPERM; + goto out; + } + + /* + * This can only be done by a session leader. + */ + if (!SESS_LEADER(p, sessp)) { /* SAFE: All callers drop the lock on return */ tty_unlock(tp); - if (sessp != SESSION_NULL) - session_rele(sessp); - if (pg != PGRP_NULL) + session_rele(sessp); + tty_lock(tp); + error = EPERM; + goto out; + } + /* + * If this terminal is already the controlling terminal for the + * session, nothing to do here. + */ + if (tp->t_session == sessp) { + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); + session_rele(sessp); + tty_lock(tp); + error = 0; + goto out; + } + pg = proc_pgrp(p); + /* + * Deny if the terminal is already attached to another session or + * the session already has a terminal vnode. + */ + session_lock(sessp); + if (sessp->s_ttyvp || tp->t_session) { + session_unlock(sessp); + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); + if (pg != PGRP_NULL) { pg_rele(pg); + } + session_rele(sessp); tty_lock(tp); error = EPERM; goto out; } + sessp->s_ttypgrpid = pg->pg_id; + oldtp = sessp->s_ttyp; + ttyhold(tp); + sessp->s_ttyp = tp; + session_unlock(sessp); proc_list_lock(); oldsessp = tp->t_session; oldpg = tp->t_pgrp; @@ -1430,14 +1411,8 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p) oldsessp->s_ttypgrpid = NO_PID; /* do not drop refs on sessp and pg as tp holds them */ tp->t_session = sessp; - sessp->s_ttypgrpid = pg->pg_id; tp->t_pgrp = pg; proc_list_unlock(); - session_lock(sessp); - oldtp = sessp->s_ttyp; - ttyhold(tp); - sessp->s_ttyp = tp; - session_unlock(sessp); OSBitOrAtomic(P_CONTROLT, &p->p_flag); /* SAFE: All callers drop the lock on return */ tty_unlock(tp); diff --git a/bsd/kern/tty_pty.c b/bsd/kern/tty_pty.c index f4bb4dac6..7c4f14e6a 100644 --- a/bsd/kern/tty_pty.c +++ b/bsd/kern/tty_pty.c @@ -148,11 +148,13 @@ pty_init(int n_ptys) int i; int j; + n_ptys = min(n_ptys, NPTY); /* clamp to avoid pt_ioctl overflow */ + /* create the pseudo tty device nodes */ for (j = 0; j < 10; j++) { for (i = 0; i < HEX_BASE; i++) { int m = j * HEX_BASE + i; - if (m == n_ptys) + if (m >= n_ptys) goto done; pt_ioctl[m].pt_devhandle = devfs_make_node(makedev(TTY_MAJOR, m), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c index 83785b788..fc62b1afc 100644 --- a/bsd/kern/ubc_subr.c +++ b/bsd/kern/ubc_subr.c @@ -69,6 +69,7 @@ #include /* last */ #include +#include #include #include @@ -144,14 +145,79 @@ cs_valid_range( return TRUE; } +typedef void (*cs_md_init)(void *ctx); +typedef void (*cs_md_update)(void *ctx, const void *data, size_t size); +typedef void (*cs_md_final)(void *hash, void *ctx); + +struct cs_hash { + uint8_t cs_type; + size_t cs_cd_size; + size_t cs_size; + size_t cs_digest_size; + cs_md_init cs_init; + cs_md_update cs_update; + cs_md_final cs_final; +}; + +static struct cs_hash cs_hash_sha1 = { + .cs_type = CS_HASHTYPE_SHA1, + .cs_cd_size = CS_SHA1_LEN, + .cs_size = CS_SHA1_LEN, + .cs_digest_size = SHA_DIGEST_LENGTH, + .cs_init = (cs_md_init)SHA1Init, + .cs_update = (cs_md_update)SHA1Update, + .cs_final = (cs_md_final)SHA1Final, +}; +#if CRYPTO_SHA2 +static struct cs_hash cs_hash_sha256 = { + .cs_type = CS_HASHTYPE_SHA256, + .cs_cd_size = SHA256_DIGEST_LENGTH, + .cs_size = SHA256_DIGEST_LENGTH, + .cs_digest_size = SHA256_DIGEST_LENGTH, + .cs_init = (cs_md_init)SHA256_Init, + .cs_update = (cs_md_update)SHA256_Update, + .cs_final = (cs_md_final)SHA256_Final, +}; +static struct cs_hash cs_hash_sha256_truncate = { + .cs_type = CS_HASHTYPE_SHA256_TRUNCATED, + .cs_cd_size = CS_SHA256_TRUNCATED_LEN, + .cs_size = CS_SHA256_TRUNCATED_LEN, + .cs_digest_size = SHA256_DIGEST_LENGTH, + .cs_init = (cs_md_init)SHA256_Init, + .cs_update = (cs_md_update)SHA256_Update, + .cs_final = (cs_md_final)SHA256_Final, +}; +#endif + +static struct cs_hash * +cs_find_md(uint8_t type) +{ + if (type == CS_HASHTYPE_SHA1) { + return &cs_hash_sha1; +#if CRYPTO_SHA2 + } else if (type == CS_HASHTYPE_SHA256) { + return &cs_hash_sha256; + } else if (type == CS_HASHTYPE_SHA256_TRUNCATED) { + return &cs_hash_sha256_truncate; +#endif + } + return NULL; +} + +union cs_hash_union { + SHA1_CTX sha1ctxt; + SHA256_CTX sha256ctx; +}; + + /* * Locate the CodeDirectory from an embedded signature blob */ const CS_CodeDirectory *findCodeDirectory( const CS_SuperBlob *embedded, - char *lower_bound, - char *upper_bound) + const char *lower_bound, + const char *upper_bound) { const CS_CodeDirectory *cd = NULL; @@ -209,9 +275,10 @@ CS_CodeDirectory *findCodeDirectory( static const unsigned char * hashes( const CS_CodeDirectory *cd, - unsigned page, - char *lower_bound, - char *upper_bound) + uint32_t page, + size_t hash_len, + const char *lower_bound, + const char *upper_bound) { const unsigned char *base, *top, *hash; uint32_t nCodeSlots = ntohl(cd->nCodeSlots); @@ -260,9 +327,9 @@ hashes( /* base = address of first hash covered by scatter */ base = (const unsigned char *)cd + ntohl(cd->hashOffset) + - hashindex * SHA1_RESULTLEN; + hashindex * hash_len; /* top = address of first hash after this scatter */ - top = base + scount * SHA1_RESULTLEN; + top = base + scount * hash_len; if (!cs_valid_range(base, top, lower_bound, upper_bound) || hashindex > nCodeSlots) { @@ -278,20 +345,20 @@ hashes( scatter++; } while(1); - hash = base + (page - sbase) * SHA1_RESULTLEN; + hash = base + (page - sbase) * hash_len; } else { base = (const unsigned char *)cd + ntohl(cd->hashOffset); - top = base + nCodeSlots * SHA1_RESULTLEN; + top = base + nCodeSlots * hash_len; if (!cs_valid_range(base, top, lower_bound, upper_bound) || page > nCodeSlots) { return NULL; } assert(page < nCodeSlots); - hash = base + page * SHA1_RESULTLEN; + hash = base + page * hash_len; } - if (!cs_valid_range(hash, hash + SHA1_RESULTLEN, + if (!cs_valid_range(hash, hash + hash_len, lower_bound, upper_bound)) { hash = NULL; } @@ -315,27 +382,31 @@ hashes( static int cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length) { + struct cs_hash *hashtype; if (length < sizeof(*cd)) return EBADEXEC; if (ntohl(cd->magic) != CSMAGIC_CODEDIRECTORY) return EBADEXEC; - if (cd->hashSize != SHA1_RESULTLEN) - return EBADEXEC; if (cd->pageSize != PAGE_SHIFT_4K) return EBADEXEC; - if (cd->hashType != CS_HASHTYPE_SHA1) + hashtype = cs_find_md(cd->hashType); + if (hashtype == NULL) return EBADEXEC; + if (cd->hashSize != hashtype->cs_cd_size) + return EBADEXEC; + + if (length < ntohl(cd->hashOffset)) return EBADEXEC; /* check that nSpecialSlots fits in the buffer in front of hashOffset */ - if (ntohl(cd->hashOffset) / SHA1_RESULTLEN < ntohl(cd->nSpecialSlots)) + if (ntohl(cd->hashOffset) / hashtype->cs_size < ntohl(cd->nSpecialSlots)) return EBADEXEC; /* check that codeslots fits in the buffer */ - if ((length - ntohl(cd->hashOffset)) / SHA1_RESULTLEN < ntohl(cd->nCodeSlots)) + if ((length - ntohl(cd->hashOffset)) / hashtype->cs_size < ntohl(cd->nCodeSlots)) return EBADEXEC; if (ntohl(cd->version) >= CS_SUPPORTSSCATTER && cd->scatterOffset) { @@ -343,8 +414,8 @@ cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length) if (length < ntohl(cd->scatterOffset)) return EBADEXEC; - SC_Scatter *scatter = (SC_Scatter *) - (((uint8_t *)cd) + ntohl(cd->scatterOffset)); + const SC_Scatter *scatter = (const SC_Scatter *) + (((const uint8_t *)cd) + ntohl(cd->scatterOffset)); uint32_t nPages = 0; /* @@ -378,7 +449,7 @@ cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length) /* identifier is NUL terminated string */ if (cd->identOffset) { - uint8_t *ptr = (uint8_t *)cd + ntohl(cd->identOffset); + const uint8_t *ptr = (const uint8_t *)cd + ntohl(cd->identOffset); if (memchr(ptr, 0, length - ntohl(cd->identOffset)) == NULL) return EBADEXEC; } @@ -388,7 +459,7 @@ cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length) if (length < ntohl(cd->teamOffset)) return EBADEXEC; - uint8_t *ptr = (uint8_t *)cd + ntohl(cd->teamOffset); + const uint8_t *ptr = (const uint8_t *)cd + ntohl(cd->teamOffset); if (memchr(ptr, 0, length - ntohl(cd->teamOffset)) == NULL) return EBADEXEC; } @@ -429,7 +500,7 @@ static int cs_validate_csblob(const uint8_t *addr, size_t length, const CS_CodeDirectory **rcd) { - const CS_GenericBlob *blob = (const CS_GenericBlob *)(void *)addr; + const CS_GenericBlob *blob = (const CS_GenericBlob *)(const void *)addr; int error; *rcd = NULL; @@ -458,7 +529,7 @@ cs_validate_csblob(const uint8_t *addr, size_t length, return EBADEXEC; const CS_GenericBlob *subBlob = - (const CS_GenericBlob *)(void *)(addr + ntohl(blobIndex->offset)); + (const CS_GenericBlob *)(const void *)(addr + ntohl(blobIndex->offset)); size_t subLength = length - ntohl(blobIndex->offset); @@ -477,7 +548,7 @@ cs_validate_csblob(const uint8_t *addr, size_t length, } else if (ntohl(blob->magic) == CSMAGIC_CODEDIRECTORY) { - if ((error = cs_validate_codedirectory((const CS_CodeDirectory *)(void *)addr, length)) != 0) + if ((error = cs_validate_codedirectory((const CS_CodeDirectory *)(const void *)addr, length)) != 0) return error; *rcd = (const CS_CodeDirectory *)blob; } else { @@ -495,7 +566,7 @@ cs_validate_csblob(const uint8_t *addr, size_t length, * * Find an blob from the superblob/code directory. The blob must have * been been validated by cs_validate_csblob() before calling - * this. Use cs_find_blob() instead. + * this. Use csblob_find_blob() instead. * * Will also find a "raw" code directory if its stored as well as * searching the superblob. @@ -509,10 +580,10 @@ cs_validate_csblob(const uint8_t *addr, size_t length, * NULL Buffer not found */ -static const CS_GenericBlob * -cs_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t magic) +const CS_GenericBlob * +csblob_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t magic) { - const CS_GenericBlob *blob = (const CS_GenericBlob *)(void *)addr; + const CS_GenericBlob *blob = (const CS_GenericBlob *)(const void *)addr; if (ntohl(blob->magic) == CSMAGIC_EMBEDDED_SIGNATURE) { const CS_SuperBlob *sb = (const CS_SuperBlob *)blob; @@ -524,7 +595,7 @@ cs_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t m uint32_t offset = ntohl(sb->index[n].offset); if (length - sizeof(const CS_GenericBlob) < offset) return NULL; - blob = (const CS_GenericBlob *)(void *)(addr + offset); + blob = (const CS_GenericBlob *)(const void *)(addr + offset); if (ntohl(blob->magic) != magic) continue; return blob; @@ -538,167 +609,70 @@ cs_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t m const CS_GenericBlob * -cs_find_blob(struct cs_blob *csblob, uint32_t type, uint32_t magic) +csblob_find_blob(struct cs_blob *csblob, uint32_t type, uint32_t magic) { if ((csblob->csb_flags & CS_VALID) == 0) return NULL; - return cs_find_blob_bytes((const uint8_t *)csblob->csb_mem_kaddr, csblob->csb_mem_size, type, magic); + return csblob_find_blob_bytes((const uint8_t *)csblob->csb_mem_kaddr, csblob->csb_mem_size, type, magic); } static const uint8_t * -cs_find_special_slot(const CS_CodeDirectory *cd, uint32_t slot) +find_special_slot(const CS_CodeDirectory *cd, size_t slotsize, uint32_t slot) { /* there is no zero special slot since that is the first code slot */ if (ntohl(cd->nSpecialSlots) < slot || slot == 0) return NULL; - return ((const uint8_t *)cd + ntohl(cd->hashOffset) - (SHA1_RESULTLEN * slot)); + return ((const uint8_t *)cd + ntohl(cd->hashOffset) - (slotsize * slot)); } -/* - * CODESIGNING - * End of routines to navigate code signing data structures in the kernel. - */ - -/* - * ENTITLEMENTS - * Routines to navigate entitlements in the kernel. - */ - -/* Retrieve the entitlements blob for a process. - * Returns: - * EINVAL no text vnode associated with the process - * EBADEXEC invalid code signing data - * 0 no error occurred - * - * On success, out_start and out_length will point to the - * entitlements blob if found; or will be set to NULL/zero - * if there were no entitlements. - */ - -static uint8_t sha1_zero[SHA1_RESULTLEN] = { 0 }; +static uint8_t cshash_zero[CS_HASH_MAX_SIZE] = { 0 }; int -cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length) +csblob_get_entitlements(struct cs_blob *csblob, void **out_start, size_t *out_length) { - uint8_t computed_hash[SHA1_RESULTLEN]; + uint8_t computed_hash[CS_HASH_MAX_SIZE]; const CS_GenericBlob *entitlements; const CS_CodeDirectory *code_dir; - struct cs_blob *csblob; const uint8_t *embedded_hash; - SHA1_CTX context; + union cs_hash_union context; *out_start = NULL; *out_length = 0; - if (NULL == p->p_textvp) - return EINVAL; + if (csblob->csb_hashtype == NULL || csblob->csb_hashtype->cs_digest_size > sizeof(computed_hash)) + return EBADEXEC; - if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) + if ((code_dir = (const CS_CodeDirectory *)csblob_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL) return 0; - if ((code_dir = (const CS_CodeDirectory *)cs_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL) - return 0; - - entitlements = cs_find_blob(csblob, CSSLOT_ENTITLEMENTS, CSMAGIC_EMBEDDED_ENTITLEMENTS); - embedded_hash = cs_find_special_slot(code_dir, CSSLOT_ENTITLEMENTS); + entitlements = csblob_find_blob(csblob, CSSLOT_ENTITLEMENTS, CSMAGIC_EMBEDDED_ENTITLEMENTS); + embedded_hash = find_special_slot(code_dir, csblob->csb_hashtype->cs_size, CSSLOT_ENTITLEMENTS); if (embedded_hash == NULL) { if (entitlements) return EBADEXEC; return 0; - } else if (entitlements == NULL && memcmp(embedded_hash, sha1_zero, SHA1_RESULTLEN) != 0) { + } else if (entitlements == NULL && memcmp(embedded_hash, cshash_zero, csblob->csb_hashtype->cs_size) != 0) { return EBADEXEC; } - SHA1Init(&context); - SHA1Update(&context, entitlements, ntohl(entitlements->length)); - SHA1Final(computed_hash, &context); - if (memcmp(computed_hash, embedded_hash, SHA1_RESULTLEN) != 0) + csblob->csb_hashtype->cs_init(&context); + csblob->csb_hashtype->cs_update(&context, entitlements, ntohl(entitlements->length)); + csblob->csb_hashtype->cs_final(computed_hash, &context); + + if (memcmp(computed_hash, embedded_hash, csblob->csb_hashtype->cs_size) != 0) return EBADEXEC; - *out_start = (void *)entitlements; + *out_start = __DECONST(void *, entitlements); *out_length = ntohl(entitlements->length); return 0; } -/* Retrieve the codesign identity for a process. - * Returns: - * NULL an error occured - * string the cs_identity - */ - -const char * -cs_identity_get(proc_t p) -{ - const CS_CodeDirectory *code_dir; - struct cs_blob *csblob; - - if (NULL == p->p_textvp) - return NULL; - - if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) - return NULL; - - if ((code_dir = (const CS_CodeDirectory *)cs_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL) - return NULL; - - if (code_dir->identOffset == 0) - return NULL; - - return ((const char *)code_dir) + ntohl(code_dir->identOffset); -} - - - -/* Retrieve the codesign blob for a process. - * Returns: - * EINVAL no text vnode associated with the process - * 0 no error occurred - * - * On success, out_start and out_length will point to the - * cms blob if found; or will be set to NULL/zero - * if there were no blob. - */ - -int -cs_blob_get(proc_t p, void **out_start, size_t *out_length) -{ - struct cs_blob *csblob; - - *out_start = NULL; - *out_length = 0; - - if (NULL == p->p_textvp) - return EINVAL; - - if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) - return 0; - - *out_start = (void *)csblob->csb_mem_kaddr; - *out_length = csblob->csb_mem_size; - - return 0; -} - -uint8_t * -cs_get_cdhash(struct proc *p) -{ - struct cs_blob *csblob; - - if (NULL == p->p_textvp) - return NULL; - - if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) - return NULL; - - return csblob->csb_sha1; -} - /* - * ENTITLEMENTS - * End of routines to navigate entitlements in the kernel. + * CODESIGNING + * End of routines to navigate code signing data structures in the kernel. */ @@ -921,11 +895,7 @@ ubc_info_deallocate(struct ubc_info *uip) ubc_info_free(uip); } -/* - * This should be public but currently it is only used below so we - * defer making that change. - */ -static errno_t mach_to_bsd_errno(kern_return_t mach_err) +errno_t mach_to_bsd_errno(kern_return_t mach_err) { switch (mach_err) { case KERN_SUCCESS: @@ -2736,6 +2706,40 @@ SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_b SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_peak, 0, "Peak size of code signature blobs"); SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_max, 0, "Size of biggest code signature blob"); +/* + * Function: csblob_parse_teamid + * + * Description: This function returns a pointer to the team id + stored within the codedirectory of the csblob. + If the codedirectory predates team-ids, it returns + NULL. + This does not copy the name but returns a pointer to + it within the CD. Subsequently, the CD must be + available when this is used. +*/ + +static const char * +csblob_parse_teamid(struct cs_blob *csblob) +{ + const CS_CodeDirectory *cd; + + if ((cd = (const CS_CodeDirectory *)csblob_find_blob( + csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL) + return NULL; + + if (ntohl(cd->version) < CS_SUPPORTSTEAMID) + return NULL; + + if (cd->teamOffset == 0) + return NULL; + + const char *name = ((const char *)cd) + ntohl(cd->teamOffset); + if (cs_debug > 1) + printf("found team-id %s in cdblob\n", name); + + return name; +} + kern_return_t ubc_cs_blob_allocate( @@ -2746,9 +2750,9 @@ ubc_cs_blob_allocate( #if CS_BLOB_PAGEABLE *blob_size_p = round_page(*blob_size_p); - kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p); + kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p, VM_KERN_MEMORY_SECURITY); #else /* CS_BLOB_PAGEABLE */ - *blob_addr_p = (vm_offset_t) kalloc(*blob_size_p); + *blob_addr_p = (vm_offset_t) kalloc_tag(*blob_size_p, VM_KERN_MEMORY_SECURITY); if (*blob_addr_p == 0) { kr = KERN_NO_SPACE; } else { @@ -2769,120 +2773,6 @@ ubc_cs_blob_deallocate( kfree((void *) blob_addr, blob_size); #endif /* CS_BLOB_PAGEABLE */ } - -int -ubc_cs_sigpup_add( - struct vnode *vp, - vm_address_t address, - vm_size_t size) -{ - kern_return_t kr; - struct ubc_info *uip; - struct cs_blob *blob; - memory_object_control_t control; - const CS_CodeDirectory *cd; - int error; - - control = ubc_getobject(vp, UBC_FLAGS_NONE); - if (control == MEMORY_OBJECT_CONTROL_NULL) - return KERN_INVALID_ARGUMENT; - - if (memory_object_is_signed(control)) - return 0; - - blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob)); - if (blob == NULL) - return ENOMEM; - - /* fill in the new blob */ - blob->csb_cpu_type = CPU_TYPE_ANY; - blob->csb_base_offset = 0; - blob->csb_mem_size = size; - blob->csb_mem_offset = 0; - blob->csb_mem_handle = IPC_PORT_NULL; - blob->csb_mem_kaddr = address; - blob->csb_sigpup = 1; - blob->csb_platform_binary = 0; - blob->csb_teamid = NULL; - - /* - * Validate the blob's contents - */ - cd = findCodeDirectory( - (const CS_SuperBlob *) address, - (char *) address, - (char *) address + blob->csb_mem_size); - if (cd == NULL) { - /* no code directory => useless blob ! */ - error = EINVAL; - goto out; - } - - blob->csb_flags = ntohl(cd->flags) | CS_VALID; - blob->csb_end_offset = round_page_4K(ntohl(cd->codeLimit)); - if((ntohl(cd->version) >= CS_SUPPORTSSCATTER) && (ntohl(cd->scatterOffset))) { - const SC_Scatter *scatter = (const SC_Scatter*) - ((const char*)cd + ntohl(cd->scatterOffset)); - blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE_4K; - } else { - blob->csb_start_offset = (blob->csb_end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE_4K)); - } - - /* - * We don't need to check with the policy module, since the input data is supposed to be already checked - */ - - vnode_lock(vp); - if (! UBCINFOEXISTS(vp)) { - vnode_unlock(vp); - if (cs_debug) - printf("out ubc object\n"); - error = ENOENT; - goto out; - } - uip = vp->v_ubcinfo; - - /* someone raced us to adding the code directory */ - if (uip->cs_blobs != NULL) { - if (cs_debug) - printf("sigpup: vnode already have CD ?\n"); - vnode_unlock(vp); - error = EEXIST; - goto out; - } - - blob->csb_next = uip->cs_blobs; - uip->cs_blobs = blob; - - OSAddAtomic(+1, &cs_blob_count); - OSAddAtomic((SInt32) +blob->csb_mem_size, &cs_blob_size); - - /* mark this vnode's VM object as having "signed pages" */ - kr = memory_object_signed(uip->ui_control, TRUE); - if (kr != KERN_SUCCESS) { - vnode_unlock(vp); - if (cs_debug) - printf("sigpup: not signable ?\n"); - error = ENOENT; - goto out; - } - - vnode_unlock(vp); - - error = 0; -out: - if (error) { - if (cs_debug) - printf("sigpup: not signable ?\n"); - /* we failed; release what we allocated */ - if (blob) { - kfree(blob, sizeof (*blob)); - blob = NULL; - } - } - - return error; -} int ubc_cs_blob_add( @@ -2891,7 +2781,8 @@ ubc_cs_blob_add( off_t base_offset, vm_address_t addr, vm_size_t size, - __unused int flags) + __unused int flags, + struct cs_blob **ret_blob) { kern_return_t kr; struct ubc_info *uip; @@ -2901,12 +2792,14 @@ ubc_cs_blob_add( memory_object_size_t blob_size; const CS_CodeDirectory *cd; off_t blob_start_offset, blob_end_offset; - SHA1_CTX sha1ctxt; + union cs_hash_union mdctx; boolean_t record_mtime; - int is_platform_binary; + int cs_flags; record_mtime = FALSE; - is_platform_binary = 0; + cs_flags = 0; + if (ret_blob) + *ret_blob = NULL; blob_handle = IPC_PORT_NULL; @@ -2943,7 +2836,6 @@ ubc_cs_blob_add( /* fill in the new blob */ blob->csb_cpu_type = cputype; - blob->csb_sigpup = 0; blob->csb_base_offset = base_offset; blob->csb_mem_size = size; blob->csb_mem_offset = 0; @@ -2951,6 +2843,7 @@ ubc_cs_blob_add( blob->csb_mem_kaddr = addr; blob->csb_flags = 0; blob->csb_platform_binary = 0; + blob->csb_platform_path = 0; blob->csb_teamid = NULL; /* @@ -2964,12 +2857,23 @@ ubc_cs_blob_add( blob->csb_flags = 0; blob->csb_start_offset = 0; blob->csb_end_offset = 0; - memset(blob->csb_sha1, 0, SHA1_RESULTLEN); + memset(blob->csb_cdhash, 0, sizeof(blob->csb_cdhash)); /* let the vnode checker determine if the signature is valid or not */ } else { - const unsigned char *sha1_base; - int sha1_size; - + const unsigned char *md_base; + uint8_t hash[CS_HASH_MAX_SIZE]; + int md_size; + + blob->csb_hashtype = cs_find_md(cd->hashType); + if (blob->csb_hashtype == NULL || blob->csb_hashtype->cs_digest_size > sizeof(hash)) + panic("validated CodeDirectory but unsupported type"); + if (blob->csb_hashtype->cs_cd_size < CS_CDHASH_LEN) { + if (cs_debug) + printf("cs_cd_size is too small for a cdhash\n"); + error = EINVAL; + goto out; + } + blob->csb_flags = (ntohl(cd->flags) & CS_ALLOWED_MACHO) | CS_VALID; blob->csb_end_offset = round_page_4K(ntohl(cd->codeLimit)); if((ntohl(cd->version) >= CS_SUPPORTSSCATTER) && (ntohl(cd->scatterOffset))) { @@ -2977,15 +2881,17 @@ ubc_cs_blob_add( ((const char*)cd + ntohl(cd->scatterOffset)); blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE_4K; } else { - blob->csb_start_offset = (blob->csb_end_offset - - (ntohl(cd->nCodeSlots) * PAGE_SIZE_4K)); + blob->csb_start_offset = 0; } - /* compute the blob's SHA1 hash */ - sha1_base = (const unsigned char *) cd; - sha1_size = ntohl(cd->length); - SHA1Init(&sha1ctxt); - SHA1Update(&sha1ctxt, sha1_base, sha1_size); - SHA1Final(blob->csb_sha1, &sha1ctxt); + /* compute the blob's cdhash */ + md_base = (const unsigned char *) cd; + md_size = ntohl(cd->length); + + blob->csb_hashtype->cs_init(&mdctx); + blob->csb_hashtype->cs_update(&mdctx, md_base, md_size); + blob->csb_hashtype->cs_final(hash, &mdctx); + + memcpy(blob->csb_cdhash, hash, CS_CDHASH_LEN); } /* @@ -2994,16 +2900,15 @@ ubc_cs_blob_add( #if CONFIG_MACF error = mac_vnode_check_signature(vp, base_offset, - blob->csb_sha1, - (const void*)cd, - size, flags, - &is_platform_binary); + blob->csb_cdhash, + (const void*)addr, size, + flags, &cs_flags); if (error) { if (cs_debug) printf("check_signature[pid: %d], error = %d\n", current_proc()->p_pid, error); goto out; } - if ((flags & MAC_VNODE_CHECK_DYLD_SIM) && !is_platform_binary) { + if ((flags & MAC_VNODE_CHECK_DYLD_SIM) && !(cs_flags & CS_PLATFORM_BINARY)) { if (cs_debug) printf("check_signature[pid: %d], is not apple signed\n", current_proc()->p_pid); error = EPERM; @@ -3011,13 +2916,15 @@ ubc_cs_blob_add( } #endif - if (is_platform_binary) { + if (cs_flags & CS_PLATFORM_BINARY) { if (cs_debug > 1) printf("check_signature[pid: %d]: platform binary\n", current_proc()->p_pid); blob->csb_platform_binary = 1; + blob->csb_platform_path = !!(cs_flags & CS_PLATFORM_PATH); } else { blob->csb_platform_binary = 0; - blob->csb_teamid = csblob_get_teamid(blob); + blob->csb_platform_path = 0; + blob->csb_teamid = csblob_parse_teamid(blob); if (cs_debug > 1) { if (blob->csb_teamid) printf("check_signature[pid: %d]: team-id is %s\n", current_proc()->p_pid, blob->csb_teamid); @@ -3094,9 +3001,9 @@ ubc_cs_blob_add( (blob->csb_cpu_type == CPU_TYPE_ANY || oblob->csb_cpu_type == CPU_TYPE_ANY || blob->csb_cpu_type == oblob->csb_cpu_type) && - !bcmp(blob->csb_sha1, - oblob->csb_sha1, - SHA1_RESULTLEN)) { + !bcmp(blob->csb_cdhash, + oblob->csb_cdhash, + CS_CDHASH_LEN)) { /* * We already have this blob: * we'll return success but @@ -3112,6 +3019,8 @@ ubc_cs_blob_add( oblob->csb_cpu_type = cputype; } vnode_unlock(vp); + if (ret_blob) + *ret_blob = oblob; error = EAGAIN; goto out; } else { @@ -3185,6 +3094,9 @@ ubc_cs_blob_add( vnode_mtime(vp, &uip->cs_mtime, vfs_context_current()); } + if (ret_blob) + *ret_blob = blob; + error = 0; /* success ! */ out: @@ -3219,6 +3131,42 @@ ubc_cs_blob_add( return error; } +void +csvnode_print_debug(struct vnode *vp) +{ + const char *name = NULL; + struct ubc_info *uip; + struct cs_blob *blob; + + name = vnode_getname_printable(vp); + if (name) { + printf("csvnode: name: %s\n", name); + vnode_putname_printable(name); + } + + vnode_lock_spin(vp); + + if (! UBCINFOEXISTS(vp)) { + blob = NULL; + goto out; + } + + uip = vp->v_ubcinfo; + for (blob = uip->cs_blobs; blob != NULL; blob = blob->csb_next) { + printf("csvnode: range: %lu -> %lu flags: 0x%08x platform: %s path: %s team: %s\n", + (unsigned long)blob->csb_start_offset, + (unsigned long)blob->csb_end_offset, + blob->csb_flags, + blob->csb_platform_binary ? "yes" : "no", + blob->csb_platform_path ? "yes" : "no", + blob->csb_teamid ? blob->csb_teamid : ""); + } + +out: + vnode_unlock(vp); + +} + struct cs_blob * ubc_cs_blob_get( struct vnode *vp, @@ -3253,10 +3201,6 @@ ubc_cs_blob_get( } } - if (cs_debug && blob != NULL && blob->csb_sigpup) { - printf("found sig pup blob\n"); - } - out: vnode_unlock(vp); @@ -3273,7 +3217,7 @@ ubc_cs_free( blob != NULL; blob = next_blob) { next_blob = blob->csb_next; - if (blob->csb_mem_kaddr != 0 && !blob->csb_sigpup) { + if (blob->csb_mem_kaddr != 0) { ubc_cs_blob_deallocate(blob->csb_mem_kaddr, blob->csb_mem_size); blob->csb_mem_kaddr = 0; @@ -3322,7 +3266,7 @@ ubc_cs_blob_revalidate( { int error = 0; #if CONFIG_MACF - int is_platform_binary = 0; + int cs_flags = 0; #endif const CS_CodeDirectory *cd = NULL; @@ -3339,7 +3283,9 @@ ubc_cs_blob_revalidate( /* callout to mac_vnode_check_signature */ #if CONFIG_MACF - error = mac_vnode_check_signature(vp, blob->csb_base_offset, blob->csb_sha1, (const void*)cd, blob->csb_cpu_type, flags, &is_platform_binary); + error = mac_vnode_check_signature(vp, blob->csb_base_offset, blob->csb_cdhash, + (const void*)blob->csb_mem_kaddr, (int)blob->csb_mem_size, + flags, &cs_flags); if (cs_debug && error) { printf("revalidate: check_signature[pid: %d], error = %d\n", current_proc()->p_pid, error); } @@ -3430,8 +3376,9 @@ cs_validate_page( const void *data, unsigned *tainted) { - SHA1_CTX sha1ctxt; - unsigned char actual_hash[SHA1_RESULTLEN]; + union cs_hash_union mdctx; + struct cs_hash *hashtype = NULL; + unsigned char actual_hash[CS_HASH_MAX_SIZE]; unsigned char expected_hash[SHA1_RESULTLEN]; boolean_t found_hash; struct cs_blob *blobs, *blob; @@ -3442,7 +3389,7 @@ cs_validate_page( off_t offset; /* page offset in the file */ size_t size; off_t codeLimit = 0; - char *lower_bound, *upper_bound; + const char *lower_bound, *upper_bound; vm_offset_t kaddr, blob_addr; vm_size_t ksize; kern_return_t kr; @@ -3487,8 +3434,6 @@ cs_validate_page( break; } } - if (blob->csb_sigpup && cs_debug) - printf("checking for a sigpup CD\n"); blob_addr = kaddr + blob->csb_mem_offset; @@ -3498,43 +3443,32 @@ cs_validate_page( embedded = (const CS_SuperBlob *) blob_addr; cd = findCodeDirectory(embedded, lower_bound, upper_bound); if (cd != NULL) { - if (cd->pageSize != PAGE_SHIFT_4K || - cd->hashType != CS_HASHTYPE_SHA1 || - cd->hashSize != SHA1_RESULTLEN) { - /* bogus blob ? */ - if (blob->csb_sigpup && cs_debug) - printf("page foo bogus sigpup CD\n"); - continue; - } + /* all CD's that have been injected is already validated */ offset = page_offset - blob->csb_base_offset; if (offset < blob->csb_start_offset || offset >= blob->csb_end_offset) { /* our page is not covered by this blob */ - if (blob->csb_sigpup && cs_debug) - printf("OOB sigpup CD\n"); continue; } + hashtype = blob->csb_hashtype; + if (hashtype == NULL) + panic("unknown hash type ?"); + if (hashtype->cs_digest_size > sizeof(actual_hash)) + panic("hash size too large"); + codeLimit = ntohl(cd->codeLimit); - if (blob->csb_sigpup && cs_debug) - printf("sigpup codesize %d\n", (int)codeLimit); - hash = hashes(cd, (unsigned)(offset>>PAGE_SHIFT_4K), + hash = hashes(cd, (uint32_t)(offset>>PAGE_SHIFT_4K), + hashtype->cs_size, lower_bound, upper_bound); if (hash != NULL) { - bcopy(hash, expected_hash, - sizeof (expected_hash)); + bcopy(hash, expected_hash, sizeof(expected_hash)); found_hash = TRUE; - if (blob->csb_sigpup && cs_debug) - printf("sigpup hash\n"); } break; - } else { - if (blob->csb_sigpup && cs_debug) - printf("sig pup had no valid CD\n"); - } } @@ -3567,15 +3501,15 @@ cs_validate_page( size = (size_t) (codeLimit & PAGE_MASK_4K); *tainted |= CS_VALIDATE_NX; } - /* compute the actual page's SHA1 hash */ - SHA1Init(&sha1ctxt); - SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size); - SHA1Final(actual_hash, &sha1ctxt); + + hashtype->cs_init(&mdctx); + hashtype->cs_update(&mdctx, data, size); + hashtype->cs_final(actual_hash, &mdctx); asha1 = (const uint32_t *) actual_hash; esha1 = (const uint32_t *) expected_hash; - if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) { + if (bcmp(expected_hash, actual_hash, hashtype->cs_cd_size) != 0) { if (cs_debug) { printf("CODE SIGNING: cs_validate_page: " "mobj %p off 0x%llx size 0x%lx: " @@ -3633,7 +3567,7 @@ ubc_cs_getcdhash( ret = EBADEXEC; /* XXX any better error ? */ } else { /* get the SHA1 hash of that blob */ - bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1)); + bcopy(blob->csb_cdhash, cdhash, sizeof (blob->csb_cdhash)); ret = 0; } diff --git a/bsd/kern/uipc_domain.c b/bsd/kern/uipc_domain.c index 9c5801dde..7fde6ee3e 100644 --- a/bsd/kern/uipc_domain.c +++ b/bsd/kern/uipc_domain.c @@ -104,6 +104,19 @@ decl_lck_mtx_data(static, domain_timeout_mtx); static u_int64_t _net_uptime; +#if (DEVELOPMENT || DEBUG) + +SYSCTL_DECL(_kern_ipc); + +static int sysctl_do_drain_domains SYSCTL_HANDLER_ARGS; + +SYSCTL_PROC(_kern_ipc, OID_AUTO, do_drain_domains, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, + 0, 0, + sysctl_do_drain_domains, "I", "force manual drain domains"); + +#endif /* DEVELOPMENT || DEBUG */ + static void pr_init_old(struct protosw *pp, struct domain *dp) { @@ -1052,3 +1065,24 @@ domain_unguard_release(domain_unguard_t unguard) else lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED); } + +#if (DEVELOPMENT || DEBUG) + +static int +sysctl_do_drain_domains SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error; + int dummy = 0; + + error = sysctl_handle_int(oidp, &dummy, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + return (error); + + net_drain_domains(); + + return (0); +} + +#endif /* DEVELOPMENT || DEBUG */ + \ No newline at end of file diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index d5f73128e..be9cded69 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -231,7 +231,7 @@ * | | * v | * [freelist] ----------->>------------+ - * (objects never get purged to VM) + * (objects get purged to VM only on demand) * * b. Composite object: * @@ -293,7 +293,7 @@ * * The mclaudit[] array is allocated at initialization time, but its contents * get populated when the corresponding cluster is created. Because a page - * can be turned into NMBPBG number of mbufs, we preserve enough space for the + * can be turned into NMBPG number of mbufs, we preserve enough space for the * mbufs so that there is a 1-to-1 mapping between them. A page that never * gets (or has not yet) turned into mbufs will use only cl_audit[0] with the * remaining entries unused. For 16KB cluster, only one entry from the first @@ -402,22 +402,22 @@ typedef struct mcl_slab { * whenever a new piece of memory mapped in from the VM crosses the 1MB * boundary. */ -#define NSLABSPMB ((1 << MBSHIFT) >> PGSHIFT) /* 256 slabs/grp */ +#define NSLABSPMB ((1 << MBSHIFT) >> PAGE_SHIFT) typedef struct mcl_slabg { - mcl_slab_t slg_slab[NSLABSPMB]; /* group of slabs */ + mcl_slab_t *slg_slab; /* group of slabs */ } mcl_slabg_t; /* * Number of slabs needed to control a 16KB cluster object. */ -#define NSLABSP16KB (M16KCLBYTES >> PGSHIFT) +#define NSLABSP16KB (M16KCLBYTES >> PAGE_SHIFT) /* * Per-cluster audit structure. */ typedef struct { - mcache_audit_t *cl_audit[NMBPBG]; /* array of audits */ + mcache_audit_t **cl_audit; /* array of audits */ } mcl_audit_t; typedef struct { @@ -476,8 +476,8 @@ static unsigned int slabgrp; /* # of entries in slabs table */ int nclusters; /* # of clusters for non-jumbo (legacy) sizes */ int njcl; /* # of clusters for jumbo sizes */ int njclbytes; /* size of a jumbo cluster */ -union mbigcluster *mbutl; /* first mapped cluster address */ -union mbigcluster *embutl; /* ending virtual address of mclusters */ +unsigned char *mbutl; /* first mapped cluster address */ +unsigned char *embutl; /* ending virtual address of mclusters */ int _max_linkhdr; /* largest link-level header */ int _max_protohdr; /* largest protocol header */ int max_hdr; /* largest link+protocol header */ @@ -788,7 +788,8 @@ static boolean_t mbuf_report_usage(mbuf_class_t); } #define MBUF_IN_MAP(addr) \ - ((void *)(addr) >= (void *)mbutl && (void *)(addr) < (void *)embutl) + ((unsigned char *)(addr) >= mbutl && \ + (unsigned char *)(addr) < embutl) #define MRANGE(addr) { \ if (!MBUF_IN_MAP(addr)) \ @@ -801,21 +802,28 @@ static boolean_t mbuf_report_usage(mbuf_class_t); #define MTOD(m, t) ((t)((m)->m_data)) /* - * Macros to obtain (4KB) cluster index and base cluster address. + * Macros to obtain page index given a base cluster address */ - -#define MTOBG(x) (((char *)(x) - (char *)mbutl) >> MBIGCLSHIFT) -#define BGTOM(x) ((union mbigcluster *)(mbutl + (x))) +#define MTOPG(x) (((unsigned char *)x - mbutl) >> PAGE_SHIFT) +#define PGTOM(x) (mbutl + (x << PAGE_SHIFT)) /* * Macro to find the mbuf index relative to a base. */ -#define MCLIDX(c, m) (((char *)(m) - (char *)(c)) >> MSIZESHIFT) +#define MBPAGEIDX(c, m) \ + (((unsigned char *)(m) - (unsigned char *)(c)) >> MSIZESHIFT) /* * Same thing for 2KB cluster index. */ -#define CLBGIDX(c, m) (((char *)(m) - (char *)(c)) >> MCLSHIFT) +#define CLPAGEIDX(c, m) \ + (((unsigned char *)(m) - (unsigned char *)(c)) >> MCLSHIFT) + +/* + * Macro to find 4KB cluster index relative to a base + */ +#define BCLPAGEIDX(c, m) \ + (((unsigned char *)(m) - (unsigned char *)(c)) >> MBIGCLSHIFT) /* * Macros used during mbuf and cluster initialization. @@ -1178,7 +1186,7 @@ static void mbuf_table_init(void) { unsigned int b, c, s; - int m; + int m, config_mbuf_jumbo = 0; MALLOC(omb_stat, struct omb_stat *, OMB_STAT_SIZE(NELEM(mbuf_table)), M_TEMP, M_WAITOK | M_ZERO); @@ -1193,38 +1201,44 @@ mbuf_table_init(void) mbuf_table[m].mtbl_stats = &mb_stat->mbs_class[m]; #if CONFIG_MBUF_JUMBO - /* - * Set aside 1/3 of the mbuf cluster map for jumbo clusters; we do - * this only on platforms where jumbo cluster pool is enabled. - */ - njcl = nmbclusters / 3; - njclbytes = M16KCLBYTES; + config_mbuf_jumbo = 1; #endif /* CONFIG_MBUF_JUMBO */ + if (config_mbuf_jumbo == 1 || PAGE_SIZE == M16KCLBYTES) { + /* + * Set aside 1/3 of the mbuf cluster map for jumbo + * clusters; we do this only on platforms where jumbo + * cluster pool is enabled. + */ + njcl = nmbclusters / 3; + njclbytes = M16KCLBYTES; + } + /* * nclusters holds both the 2KB and 4KB pools, so ensure it's * a multiple of 4KB clusters. */ - nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPBG); + nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPG); if (njcl > 0) { /* * Each jumbo cluster takes 8 2KB clusters, so make * sure that the pool size is evenly divisible by 8; * njcl is in 2KB unit, hence treated as such. */ - njcl = P2ROUNDDOWN(nmbclusters - nclusters, 8); + njcl = P2ROUNDDOWN(nmbclusters - nclusters, NCLPJCL); /* Update nclusters with rounded down value of njcl */ - nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPBG); + nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPG); } /* - * njcl is valid only on platforms with 16KB jumbo clusters, where - * it is configured to 1/3 of the pool size. On these platforms, - * the remaining is used for 2KB and 4KB clusters. On platforms - * without 16KB jumbo clusters, the entire pool is used for both - * 2KB and 4KB clusters. A 4KB cluster can either be splitted into - * 16 mbufs, or into 2 2KB clusters. + * njcl is valid only on platforms with 16KB jumbo clusters or + * with 16KB pages, where it is configured to 1/3 of the pool + * size. On these platforms, the remaining is used for 2KB + * and 4KB clusters. On platforms without 16KB jumbo clusters, + * the entire pool is used for both 2KB and 4KB clusters. A 4KB + * cluster can either be splitted into 16 mbufs, or into 2 2KB + * clusters. * * +---+---+------------ ... -----------+------- ... -------+ * | c | b | s | njcl | @@ -1233,8 +1247,8 @@ mbuf_table_init(void) * 1/32th of the shared region is reserved for pure 2KB and 4KB * clusters (1/64th each.) */ - c = P2ROUNDDOWN((nclusters >> 6), 2); /* in 2KB unit */ - b = P2ROUNDDOWN((nclusters >> (6 + NCLPBGSHIFT)), 2); /* in 4KB unit */ + c = P2ROUNDDOWN((nclusters >> 6), NCLPG); /* in 2KB unit */ + b = P2ROUNDDOWN((nclusters >> (6 + NCLPBGSHIFT)), NBCLPG); /* in 4KB unit */ s = nclusters - (c + (b << NCLPBGSHIFT)); /* in 2KB unit */ /* @@ -1468,7 +1482,7 @@ mbinit(void) * mcl_slab_g_t units, each one representing a MB of memory. */ maxslabgrp = - (P2ROUNDUP(nmbclusters, (MBSIZE >> 11)) << MCLSHIFT) >> MBSHIFT; + (P2ROUNDUP(nmbclusters, (MBSIZE >> MCLSHIFT)) << MCLSHIFT) >> MBSHIFT; MALLOC(slabstbl, mcl_slabg_t **, maxslabgrp * sizeof (mcl_slabg_t *), M_TEMP, M_WAITOK | M_ZERO); VERIFY(slabstbl != NULL); @@ -1476,17 +1490,25 @@ mbinit(void) /* * Allocate audit structures, if needed: * - * maxclaudit = (maxslabgrp * 1024 * 1024) / 4096 + * maxclaudit = (maxslabgrp * 1024 * 1024) / PAGE_SIZE * * This yields mcl_audit_t units, each one representing a page. */ PE_parse_boot_argn("mbuf_debug", &mbuf_debug, sizeof (mbuf_debug)); mbuf_debug |= mcache_getflags(); if (mbuf_debug & MCF_DEBUG) { - maxclaudit = ((maxslabgrp << MBSHIFT) >> PGSHIFT); + int l; + mcl_audit_t *mclad; + maxclaudit = ((maxslabgrp << MBSHIFT) >> PAGE_SHIFT); MALLOC(mclaudit, mcl_audit_t *, maxclaudit * sizeof (*mclaudit), M_TEMP, M_WAITOK | M_ZERO); VERIFY(mclaudit != NULL); + for (l = 0, mclad = mclaudit; l < maxclaudit; l++) { + MALLOC(mclad[l].cl_audit, mcache_audit_t **, + NMBPG * sizeof(mcache_audit_t *), + M_TEMP, M_WAITOK | M_ZERO); + VERIFY(mclad[l].cl_audit != NULL); + } mcl_audit_con_cache = mcache_create("mcl_audit_contents", AUDIT_CONTENTS_SIZE, sizeof (u_int64_t), 0, MCR_SLEEP); @@ -1507,7 +1529,7 @@ mbinit(void) mleak_activate(); /* Calculate the number of pages assigned to the cluster pool */ - mcl_pages = (nmbclusters * MCLBYTES) / CLBYTES; + mcl_pages = (nmbclusters << MCLSHIFT) / PAGE_SIZE; MALLOC(mcl_paddr, ppnum_t *, mcl_pages * sizeof (ppnum_t), M_TEMP, M_WAITOK); VERIFY(mcl_paddr != NULL); @@ -1516,9 +1538,8 @@ mbinit(void) mcl_paddr_base = IOMapperIOVMAlloc(mcl_pages); bzero((char *)mcl_paddr, mcl_pages * sizeof (ppnum_t)); - embutl = (union mbigcluster *) - ((void *)((unsigned char *)mbutl + (nmbclusters * MCLBYTES))); - VERIFY((((char *)embutl - (char *)mbutl) % MBIGCLBYTES) == 0); + embutl = (mbutl + (nmbclusters * MCLBYTES)); + VERIFY(((embutl - mbutl) % MBIGCLBYTES) == 0); /* Prime up the freelist */ PE_parse_boot_argn("initmcl", &initmcl, sizeof (initmcl)); @@ -1659,8 +1680,6 @@ slab_alloc(mbuf_class_t class, int wait) lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED); - VERIFY(class != MC_16KCL || njcl > 0); - /* This should always be NULL for us */ VERIFY(m_cobjlist(class) == NULL); @@ -1671,7 +1690,8 @@ slab_alloc(mbuf_class_t class, int wait) * more than one buffer chunks (e.g. mbuf slabs). For other * slabs, this probably doesn't make much of a difference. */ - if ((class == MC_MBUF || class == MC_CL) && (wait & MCR_COMP)) + if ((class == MC_MBUF || class == MC_CL || class == MC_BIGCL) + && (wait & MCR_COMP)) sp = (mcl_slab_t *)TAILQ_LAST(&m_slablist(class), mcl_slhead); else sp = (mcl_slab_t *)TAILQ_FIRST(&m_slablist(class)); @@ -1688,16 +1708,12 @@ slab_alloc(mbuf_class_t class, int wait) (sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED); buf = sp->sl_head; VERIFY(slab_inrange(sp, buf) && sp == slab_get(buf)); + sp->sl_head = buf->obj_next; + /* Increment slab reference */ + sp->sl_refcnt++; + + VERIFY(sp->sl_head != NULL || sp->sl_refcnt == sp->sl_chunks); - if (class == MC_MBUF) { - sp->sl_head = buf->obj_next; - VERIFY(sp->sl_head != NULL || sp->sl_refcnt == (NMBPBG - 1)); - } else if (class == MC_CL) { - sp->sl_head = buf->obj_next; - VERIFY(sp->sl_head != NULL || sp->sl_refcnt == (NCLPBG - 1)); - } else { - sp->sl_head = NULL; - } if (sp->sl_head != NULL && !slab_inrange(sp, sp->sl_head)) { slab_nextptr_panic(sp, sp->sl_head); /* In case sl_head is in the map but not in the slab */ @@ -1705,9 +1721,6 @@ slab_alloc(mbuf_class_t class, int wait) /* NOTREACHED */ } - /* Increment slab reference */ - sp->sl_refcnt++; - if (mclaudit != NULL) { mcache_audit_t *mca = mcl_audit_buf2mca(class, buf); mca->mca_uflags = 0; @@ -1719,20 +1732,20 @@ slab_alloc(mbuf_class_t class, int wait) if (class == MC_CL) { mbstat.m_clfree = (--m_infree(MC_CL)) + m_infree(MC_MBUF_CL); /* - * A 2K cluster slab can have at most NCLPBG references. + * A 2K cluster slab can have at most NCLPG references. */ - VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NCLPBG && - sp->sl_chunks == NCLPBG && - sp->sl_len == m_maxsize(MC_BIGCL)); - VERIFY(sp->sl_refcnt < NCLPBG || sp->sl_head == NULL); + VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NCLPG && + sp->sl_chunks == NCLPG && sp->sl_len == PAGE_SIZE); + VERIFY(sp->sl_refcnt < NCLPG || sp->sl_head == NULL); } else if (class == MC_BIGCL) { mbstat.m_bigclfree = (--m_infree(MC_BIGCL)) + m_infree(MC_MBUF_BIGCL); /* - * A 4K cluster slab can have at most 1 reference. + * A 4K cluster slab can have NBCLPG references. */ - VERIFY(sp->sl_refcnt == 1 && sp->sl_chunks == 1 && - sp->sl_len == m_maxsize(class) && sp->sl_head == NULL); + VERIFY(sp->sl_refcnt >= 1 && sp->sl_chunks == NBCLPG && + sp->sl_len == PAGE_SIZE && + (sp->sl_refcnt < NBCLPG || sp->sl_head == NULL)); } else if (class == MC_16KCL) { mcl_slab_t *nsp; int k; @@ -1770,18 +1783,19 @@ slab_alloc(mbuf_class_t class, int wait) * Since we have incremented the reference count above, * an mbuf slab (formerly a 4KB cluster slab that was cut * up into mbufs) must have a reference count between 1 - * and NMBPBG at this point. + * and NMBPG at this point. */ - VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NMBPBG && - sp->sl_chunks == NMBPBG && - sp->sl_len == m_maxsize(MC_BIGCL)); - VERIFY(sp->sl_refcnt < NMBPBG || sp->sl_head == NULL); + VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NMBPG && + sp->sl_chunks == NMBPG && + sp->sl_len == PAGE_SIZE); + VERIFY(sp->sl_refcnt < NMBPG || sp->sl_head == NULL); } /* If empty, remove this slab from the class's freelist */ if (sp->sl_head == NULL) { - VERIFY(class != MC_MBUF || sp->sl_refcnt == NMBPBG); - VERIFY(class != MC_CL || sp->sl_refcnt == NCLPBG); + VERIFY(class != MC_MBUF || sp->sl_refcnt == NMBPG); + VERIFY(class != MC_CL || sp->sl_refcnt == NCLPG); + VERIFY(class != MC_BIGCL || sp->sl_refcnt == NBCLPG); slab_remove(sp, class); } @@ -1795,11 +1809,14 @@ static void slab_free(mbuf_class_t class, mcache_obj_t *buf) { mcl_slab_t *sp; + boolean_t reinit_supercl = false; + mbuf_class_t super_class; lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED); VERIFY(class != MC_16KCL || njcl > 0); VERIFY(buf->obj_next == NULL); + sp = slab_get(buf); VERIFY(sp->sl_class == class && slab_inrange(sp, buf) && (sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED); @@ -1813,20 +1830,17 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) * A slab that has been splitted for 2KB clusters can have * at most 1 outstanding reference at this point. */ - VERIFY(sp->sl_refcnt >= 0 && sp->sl_refcnt <= (NCLPBG - 1) && - sp->sl_chunks == NCLPBG && - sp->sl_len == m_maxsize(MC_BIGCL)); - VERIFY(sp->sl_refcnt < (NCLPBG - 1) || + VERIFY(sp->sl_refcnt >= 0 && sp->sl_refcnt <= (NCLPG - 1) && + sp->sl_chunks == NCLPG && sp->sl_len == PAGE_SIZE); + VERIFY(sp->sl_refcnt < (NCLPG - 1) || (slab_is_detached(sp) && sp->sl_head == NULL)); } else if (class == MC_BIGCL) { - VERIFY(IS_P2ALIGNED(buf, MCLBYTES)); - /* - * A 4KB cluster slab can have at most 1 reference - * which must be 0 at this point. - */ - VERIFY(sp->sl_refcnt == 0 && sp->sl_chunks == 1 && - sp->sl_len == m_maxsize(class) && sp->sl_head == NULL); - VERIFY(slab_is_detached(sp)); + VERIFY(IS_P2ALIGNED(buf, MBIGCLBYTES)); + + /* A 4KB cluster slab can have NBCLPG references at most */ + VERIFY(sp->sl_refcnt >= 0 && sp->sl_chunks == NBCLPG); + VERIFY(sp->sl_refcnt < (NBCLPG - 1) || + (slab_is_detached(sp) && sp->sl_head == NULL)); } else if (class == MC_16KCL) { mcl_slab_t *nsp; int k; @@ -1834,7 +1848,7 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) * A 16KB cluster takes NSLABSP16KB slabs, all must * now have 0 reference. */ - VERIFY(IS_P2ALIGNED(buf, MBIGCLBYTES)); + VERIFY(IS_P2ALIGNED(buf, PAGE_SIZE)); VERIFY(sp->sl_refcnt == 0 && sp->sl_chunks == 1 && sp->sl_len == m_maxsize(class) && sp->sl_head == NULL); VERIFY(slab_is_detached(sp)); @@ -1852,15 +1866,17 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) } } else { /* - * A slab that has been splitted for mbufs has at most NMBPBG - * reference counts. Since we have decremented one reference - * above, it must now be between 0 and NMBPBG-1. + * A slab that has been splitted for mbufs has at most + * NMBPG reference counts. Since we have decremented + * one reference above, it must now be between 0 and + * NMBPG-1. */ VERIFY(class == MC_MBUF); - VERIFY(sp->sl_refcnt >= 0 && sp->sl_refcnt <= (NMBPBG - 1) && - sp->sl_chunks == NMBPBG && - sp->sl_len == m_maxsize(MC_BIGCL)); - VERIFY(sp->sl_refcnt < (NMBPBG - 1) || + VERIFY(sp->sl_refcnt >= 0 && + sp->sl_refcnt <= (NMBPG - 1) && + sp->sl_chunks == NMBPG && + sp->sl_len == PAGE_SIZE); + VERIFY(sp->sl_refcnt < (NMBPG - 1) || (slab_is_detached(sp) && sp->sl_head == NULL)); } @@ -1872,7 +1888,8 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) if (mclaudit != NULL) { mcache_audit_t *mca = mcl_audit_buf2mca(class, buf); if (mclverify) { - mcache_audit_free_verify(mca, buf, 0, m_maxsize(class)); + mcache_audit_free_verify(mca, buf, 0, + m_maxsize(class)); } mca->mca_uflags &= ~MB_SCVALID; } @@ -1883,6 +1900,7 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) } else if (class == MC_BIGCL) { mbstat.m_bigclfree = (++m_infree(MC_BIGCL)) + m_infree(MC_MBUF_BIGCL); + buf->obj_next = sp->sl_head; } else if (class == MC_16KCL) { ++m_infree(MC_16KCL); } else { @@ -1892,24 +1910,25 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) sp->sl_head = buf; /* - * If a slab has been splitted to either one which holds 2KB clusters, - * or one which holds mbufs, turn it back to one which holds a 4KB - * cluster. + * If a slab has been split to either one which holds 2KB clusters, + * or one which holds mbufs, turn it back to one which holds a + * 4 or 16 KB cluster depending on the page size. */ + if (m_maxsize(MC_BIGCL) == PAGE_SIZE) { + super_class = MC_BIGCL; + } else { + VERIFY(PAGE_SIZE == m_maxsize(MC_16KCL)); + super_class = MC_16KCL; + } if (class == MC_MBUF && sp->sl_refcnt == 0 && - m_total(class) > m_minlimit(class) && - m_total(MC_BIGCL) < m_maxlimit(MC_BIGCL)) { - int i = NMBPBG; + m_total(class) >= (m_minlimit(class) + NMBPG) && + m_total(super_class) < m_maxlimit(super_class)) { + int i = NMBPG; - m_total(MC_BIGCL)++; - mbstat.m_bigclusters = m_total(MC_BIGCL); - m_total(MC_MBUF) -= NMBPBG; + m_total(MC_MBUF) -= NMBPG; mbstat.m_mbufs = m_total(MC_MBUF); - m_infree(MC_MBUF) -= NMBPBG; - mtype_stat_add(MT_FREE, -((unsigned)NMBPBG)); - - VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL)); - VERIFY(m_total(MC_MBUF) >= m_minlimit(MC_MBUF)); + m_infree(MC_MBUF) -= NMBPG; + mtype_stat_add(MT_FREE, -((unsigned)NMBPG)); while (i--) { struct mbuf *m = sp->sl_head; @@ -1917,37 +1936,15 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) sp->sl_head = m->m_next; m->m_next = NULL; } - VERIFY(sp->sl_head == NULL); - - /* Remove the slab from the mbuf class's slab list */ - slab_remove(sp, class); - - /* Reinitialize it as a 4KB cluster slab */ - slab_init(sp, MC_BIGCL, sp->sl_flags, sp->sl_base, sp->sl_base, - sp->sl_len, 0, 1); - - if (mclverify) { - mcache_set_pattern(MCACHE_FREE_PATTERN, - (caddr_t)sp->sl_head, m_maxsize(MC_BIGCL)); - } - mbstat.m_bigclfree = (++m_infree(MC_BIGCL)) + - m_infree(MC_MBUF_BIGCL); - - VERIFY(slab_is_detached(sp)); - /* And finally switch class */ - class = MC_BIGCL; + reinit_supercl = true; } else if (class == MC_CL && sp->sl_refcnt == 0 && - m_total(class) > m_minlimit(class) && - m_total(MC_BIGCL) < m_maxlimit(MC_BIGCL)) { - int i = NCLPBG; + m_total(class) >= (m_minlimit(class) + NCLPG) && + m_total(super_class) < m_maxlimit(super_class)) { + int i = NCLPG; - m_total(MC_BIGCL)++; - mbstat.m_bigclusters = m_total(MC_BIGCL); - m_total(MC_CL) -= NCLPBG; + m_total(MC_CL) -= NCLPG; mbstat.m_clusters = m_total(MC_CL); - m_infree(MC_CL) -= NCLPBG; - VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL)); - VERIFY(m_total(MC_CL) >= m_minlimit(MC_CL)); + m_infree(MC_CL) -= NCLPG; while (i--) { union mcluster *c = sp->sl_head; @@ -1955,25 +1952,56 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) sp->sl_head = c->mcl_next; c->mcl_next = NULL; } - VERIFY(sp->sl_head == NULL); + reinit_supercl = true; + } else if (class == MC_BIGCL && super_class != MC_BIGCL && + sp->sl_refcnt == 0 && + m_total(class) >= (m_minlimit(class) + NBCLPG) && + m_total(super_class) < m_maxlimit(super_class)) { + int i = NBCLPG; + + VERIFY(super_class == MC_16KCL); + m_total(MC_BIGCL) -= NBCLPG; + mbstat.m_bigclusters = m_total(MC_BIGCL); + m_infree(MC_BIGCL) -= NBCLPG; - /* Remove the slab from the 2KB cluster class's slab list */ + while (i--) { + union mbigcluster *bc = sp->sl_head; + VERIFY(bc != NULL); + sp->sl_head = bc->mbc_next; + bc->mbc_next = NULL; + } + reinit_supercl = true; + } + + if (reinit_supercl) { + VERIFY(sp->sl_head == NULL); + VERIFY(m_total(class) >= m_minlimit(class)); slab_remove(sp, class); - /* Reinitialize it as a 4KB cluster slab */ - slab_init(sp, MC_BIGCL, sp->sl_flags, sp->sl_base, sp->sl_base, - sp->sl_len, 0, 1); + /* Reinitialize it as a cluster for the super class */ + m_total(super_class)++; + m_infree(super_class)++; + VERIFY(sp->sl_flags == (SLF_MAPPED | SLF_DETACHED) && + sp->sl_len == PAGE_SIZE && sp->sl_refcnt == 0); - if (mclverify) { + slab_init(sp, super_class, SLF_MAPPED, sp->sl_base, + sp->sl_base, PAGE_SIZE, 0, 1); + if (mclverify) mcache_set_pattern(MCACHE_FREE_PATTERN, - (caddr_t)sp->sl_head, m_maxsize(MC_BIGCL)); + (caddr_t)sp->sl_base, sp->sl_len); + ((mcache_obj_t *)(sp->sl_base))->obj_next = NULL; + + if (super_class == MC_BIGCL) { + mbstat.m_bigclusters = m_total(MC_BIGCL); + mbstat.m_bigclfree = m_infree(MC_BIGCL) + + m_infree(MC_MBUF_BIGCL); } - mbstat.m_bigclfree = (++m_infree(MC_BIGCL)) + - m_infree(MC_MBUF_BIGCL); VERIFY(slab_is_detached(sp)); + VERIFY(m_total(super_class) <= m_maxlimit(super_class)); + /* And finally switch class */ - class = MC_BIGCL; + class = super_class; } /* Reinsert the slab to the class's slab list */ @@ -2013,7 +2041,7 @@ mbuf_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait) * it later when we run out of elements. */ if (!mbuf_cached_above(class, wait) && - m_infree(class) < m_total(class) >> 5) { + m_infree(class) < (m_total(class) >> 5)) { (void) freelist_populate(class, 1, M_DONTWAIT); } @@ -2203,9 +2231,10 @@ cslab_alloc(mbuf_class_t class, mcache_obj_t ***plist, unsigned int num) if (class == MC_MBUF_CL) { VERIFY(clsp->sl_refcnt >= 1 && - clsp->sl_refcnt <= NCLPBG); + clsp->sl_refcnt <= NCLPG); } else { - VERIFY(clsp->sl_refcnt == 1); + VERIFY(clsp->sl_refcnt >= 1 && + clsp->sl_refcnt <= NBCLPG); } if (class == MC_MBUF_16KCL) { @@ -2290,9 +2319,10 @@ cslab_free(mbuf_class_t class, mcache_obj_t *list, int purged) VERIFY(MEXT_RFA(ms) != NULL && MBUF_IS_COMPOSITE(ms)); if (cl_class == MC_CL) { VERIFY(clsp->sl_refcnt >= 1 && - clsp->sl_refcnt <= NCLPBG); + clsp->sl_refcnt <= NCLPG); } else { - VERIFY(clsp->sl_refcnt == 1); + VERIFY(clsp->sl_refcnt >= 1 && + clsp->sl_refcnt <= NBCLPG); } if (cl_class == MC_16KCL) { int k; @@ -2486,7 +2516,8 @@ mbuf_cslab_alloc(void *arg, mcache_obj_t ***plist, unsigned int needed, lck_mtx_lock(mbuf_mlock); mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m); ms = MCA_SAVED_MBUF_PTR(mca); - cl_mca = mcl_audit_buf2mca(MC_CL, (mcache_obj_t *)cl); + cl_mca = mcl_audit_buf2mca(cl_class, + (mcache_obj_t *)cl); /* * Pair them up. Note that this is done at the time @@ -2601,14 +2632,21 @@ mbuf_cslab_free(void *arg, mcache_obj_t *list, int purged) static void mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) { - mbuf_class_t class = (mbuf_class_t)arg; + mbuf_class_t class = (mbuf_class_t)arg, cl_class; mcache_audit_t *mca; struct mbuf *m, *ms; mcl_slab_t *clsp, *nsp; - size_t size; + size_t cl_size; void *cl; ASSERT(MBUF_CLASS_VALID(class) && MBUF_CLASS_COMPOSITE(class)); + if (class == MC_MBUF_CL) + cl_class = MC_CL; + else if (class == MC_MBUF_BIGCL) + cl_class = MC_BIGCL; + else + cl_class = MC_16KCL; + cl_size = m_maxsize(cl_class); while ((m = ms = (struct mbuf *)list) != NULL) { lck_mtx_lock(mbuf_mlock); @@ -2638,9 +2676,10 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) VERIFY(MEXT_RFA(ms) != NULL && MBUF_IS_COMPOSITE(ms)); if (class == MC_MBUF_CL) VERIFY(clsp->sl_refcnt >= 1 && - clsp->sl_refcnt <= NCLPBG); + clsp->sl_refcnt <= NCLPG); else - VERIFY(clsp->sl_refcnt == 1); + VERIFY(clsp->sl_refcnt >= 1 && + clsp->sl_refcnt <= NBCLPG); if (class == MC_MBUF_16KCL) { int k; @@ -2652,14 +2691,9 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) } } - mca = mcl_audit_buf2mca(MC_CL, cl); - if (class == MC_MBUF_CL) - size = m_maxsize(MC_CL); - else if (class == MC_MBUF_BIGCL) - size = m_maxsize(MC_BIGCL); - else - size = m_maxsize(MC_16KCL); - mcl_audit_cluster(mca, cl, size, alloc, FALSE); + + mca = mcl_audit_buf2mca(cl_class, cl); + mcl_audit_cluster(mca, cl, cl_size, alloc, FALSE); if (mcltrace) mcache_buffer_log(mca, cl, m_cache(class), &mb_start); @@ -2679,17 +2713,29 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) static int m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) { - int i; + int i, count = 0; vm_size_t size = 0; - int numpages = 0, large_buffer = (bufsize == m_maxsize(MC_16KCL)); + int numpages = 0, large_buffer; vm_offset_t page = 0; mcache_audit_t *mca_list = NULL; mcache_obj_t *con_list = NULL; mcl_slab_t *sp; + mbuf_class_t class; + /* Set if a buffer allocation needs allocation of multiple pages */ + large_buffer = ((bufsize == m_maxsize(MC_16KCL)) && + PAGE_SIZE < M16KCLBYTES); VERIFY(bufsize == m_maxsize(MC_BIGCL) || bufsize == m_maxsize(MC_16KCL)); + VERIFY((bufsize == PAGE_SIZE) || + (bufsize > PAGE_SIZE && bufsize == m_maxsize(MC_16KCL))); + + if (bufsize == m_size(MC_BIGCL)) + class = MC_BIGCL; + else + class = MC_16KCL; + lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED); /* @@ -2733,8 +2779,8 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) if (page == 0) { if (bufsize == m_maxsize(MC_BIGCL)) { - /* Try for 1 page if failed, only 4KB request */ - size = NBPG; + /* Try for 1 page if failed */ + size = PAGE_SIZE; page = kmem_mb_alloc(mb_map, size, 0); } @@ -2744,8 +2790,8 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) } } - VERIFY(IS_P2ALIGNED(page, NBPG)); - numpages = size / NBPG; + VERIFY(IS_P2ALIGNED(page, PAGE_SIZE)); + numpages = size / PAGE_SIZE; /* If auditing is enabled, allocate the audit structures now */ if (mclaudit != NULL) { @@ -2754,19 +2800,23 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) /* * Yes, I realize this is a waste of memory for clusters * that never get transformed into mbufs, as we may end - * up with NMBPBG-1 unused audit structures per cluster. + * up with NMBPG-1 unused audit structures per cluster. * But doing so tremendously simplifies the allocation * strategy, since at this point we are not holding the * mbuf lock and the caller is okay to be blocked. */ - if (bufsize == m_maxsize(MC_BIGCL)) { - needed = numpages * NMBPBG; + if (bufsize == PAGE_SIZE) { + needed = numpages * NMBPG; i = mcache_alloc_ext(mcl_audit_con_cache, &con_list, needed, MCR_SLEEP); VERIFY(con_list != NULL && i == needed); } else { + /* + * if multiple 4K pages are being used for a + * 16K cluster + */ needed = numpages / NSLABSP16KB; } @@ -2778,19 +2828,19 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) lck_mtx_lock(mbuf_mlock); - for (i = 0; i < numpages; i++, page += NBPG) { - ppnum_t offset = ((char *)page - (char *)mbutl) / NBPG; + for (i = 0; i < numpages; i++, page += PAGE_SIZE) { + ppnum_t offset = + ((unsigned char *)page - mbutl) >> PAGE_SHIFT; ppnum_t new_page = pmap_find_phys(kernel_pmap, page); - mbuf_class_t class = MC_BIGCL; /* - * If there is a mapper the appropriate I/O page is returned; - * zero out the page to discard its past contents to prevent - * exposing leftover kernel memory. + * If there is a mapper the appropriate I/O page is + * returned; zero out the page to discard its past + * contents to prevent exposing leftover kernel memory. */ VERIFY(offset < mcl_pages); if (mcl_paddr_base != 0) { - bzero((void *)(uintptr_t) page, page_size); + bzero((void *)(uintptr_t) page, PAGE_SIZE); new_page = IOMapperInsertPage(mcl_paddr_base, offset, new_page); } @@ -2799,36 +2849,42 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) /* Pattern-fill this fresh page */ if (mclverify) { mcache_set_pattern(MCACHE_FREE_PATTERN, - (caddr_t)page, NBPG); + (caddr_t)page, PAGE_SIZE); } - if (bufsize == m_maxsize(MC_BIGCL)) { - union mbigcluster *mbc = (union mbigcluster *)page; - + if (bufsize == PAGE_SIZE) { + mcache_obj_t *buf; /* One for the entire page */ - sp = slab_get(mbc); + sp = slab_get((void *)page); if (mclaudit != NULL) { - mcl_audit_init(mbc, &mca_list, &con_list, - AUDIT_CONTENTS_SIZE, NMBPBG); + mcl_audit_init((void *)page, + &mca_list, &con_list, + AUDIT_CONTENTS_SIZE, NMBPG); } VERIFY(sp->sl_refcnt == 0 && sp->sl_flags == 0); - slab_init(sp, MC_BIGCL, SLF_MAPPED, - mbc, mbc, bufsize, 0, 1); + slab_init(sp, class, SLF_MAPPED, (void *)page, + (void *)page, PAGE_SIZE, 0, 1); + buf = (mcache_obj_t *)page; + buf->obj_next = NULL; /* Insert this slab */ - slab_insert(sp, MC_BIGCL); - - /* Update stats now since slab_get() drops the lock */ - mbstat.m_bigclfree = ++m_infree(MC_BIGCL) + - m_infree(MC_MBUF_BIGCL); - mbstat.m_bigclusters = ++m_total(MC_BIGCL); - VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL)); - class = MC_BIGCL; - } else if ((i % NSLABSP16KB) == 0) { + slab_insert(sp, class); + + /* Update stats now since slab_get drops the lock */ + ++m_infree(class); + ++m_total(class); + VERIFY(m_total(class) <= m_maxlimit(class)); + if (class == MC_BIGCL) { + mbstat.m_bigclfree = m_infree(MC_BIGCL) + + m_infree(MC_MBUF_BIGCL); + mbstat.m_bigclusters = m_total(MC_BIGCL); + } + ++count; + } else if ((bufsize > PAGE_SIZE) && + (i % NSLABSP16KB) == 0) { union m16kcluster *m16kcl = (union m16kcluster *)page; mcl_slab_t *nsp; int k; - - VERIFY(njcl > 0); + /* One for the entire 16KB */ sp = slab_get(m16kcl); if (mclaudit != NULL) @@ -2837,6 +2893,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) VERIFY(sp->sl_refcnt == 0 && sp->sl_flags == 0); slab_init(sp, MC_16KCL, SLF_MAPPED, m16kcl, m16kcl, bufsize, 0, 1); + m16kcl->m16kcl_next = NULL; /* * 2nd-Nth page's slab is part of the first one, @@ -2850,21 +2907,21 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) SLF_MAPPED | SLF_PARTIAL, m16kcl, NULL, 0, 0, 0); } - /* Insert this slab */ slab_insert(sp, MC_16KCL); - /* Update stats now since slab_get() drops the lock */ - m_infree(MC_16KCL)++; - m_total(MC_16KCL)++; + /* Update stats now since slab_get drops the lock */ + ++m_infree(MC_16KCL); + ++m_total(MC_16KCL); VERIFY(m_total(MC_16KCL) <= m_maxlimit(MC_16KCL)); - class = MC_16KCL; + ++count; } - if (!mb_peak_newreport && mbuf_report_usage(class)) - mb_peak_newreport = TRUE; } VERIFY(mca_list == NULL && con_list == NULL); + if (!mb_peak_newreport && mbuf_report_usage(class)) + mb_peak_newreport = TRUE; + /* We're done; let others enter */ mb_clalloc_busy = FALSE; if (mb_clalloc_waiters > 0) { @@ -2872,12 +2929,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) wakeup(mb_clalloc_waitchan); } - if (bufsize == m_maxsize(MC_BIGCL)) - return (numpages); - - VERIFY(bufsize == m_maxsize(MC_16KCL)); - return (numpages / NSLABSP16KB); - + return (count); out: lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED); @@ -2892,7 +2944,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) * When non-blocking we kick a thread if we have to grow the * pool or if the number of free clusters is less than requested. */ - if (bufsize == m_maxsize(MC_BIGCL)) { + if (class == MC_BIGCL) { if (i > 0) { /* * Remember total number of 4KB clusters needed @@ -2936,94 +2988,98 @@ freelist_populate(mbuf_class_t class, unsigned int num, int wait) { mcache_obj_t *o = NULL; int i, numpages = 0, count; + mbuf_class_t super_class; VERIFY(class == MC_MBUF || class == MC_CL || class == MC_BIGCL || class == MC_16KCL); lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED); - switch (class) { - case MC_MBUF: - case MC_CL: - case MC_BIGCL: - numpages = (num * m_size(class) + NBPG - 1) / NBPG; - i = m_clalloc(numpages, wait, m_maxsize(MC_BIGCL)); + VERIFY(PAGE_SIZE == m_maxsize(MC_BIGCL) || + PAGE_SIZE == m_maxsize(MC_16KCL)); - /* Respect the 4KB clusters minimum limit */ - if (m_total(MC_BIGCL) == m_maxlimit(MC_BIGCL) && - m_infree(MC_BIGCL) <= m_minlimit(MC_BIGCL)) { - if (class != MC_BIGCL || (wait & MCR_COMP)) - return (0); - } - if (class == MC_BIGCL) - return (i != 0); - break; + if (m_maxsize(class) >= PAGE_SIZE) + return(m_clalloc(num, wait, m_maxsize(class)) != 0); - case MC_16KCL: - return (m_clalloc(num, wait, m_maxsize(class)) != 0); - /* NOTREACHED */ + /* + * The rest of the function will allocate pages and will slice + * them up into the right size + */ - default: - VERIFY(0); - /* NOTREACHED */ - } + numpages = (num * m_size(class) + PAGE_SIZE - 1) / PAGE_SIZE; + + /* Currently assume that pages are 4K or 16K */ + if (PAGE_SIZE == m_maxsize(MC_BIGCL)) + super_class = MC_BIGCL; + else + super_class = MC_16KCL; - VERIFY(class == MC_MBUF || class == MC_CL); + i = m_clalloc(numpages, wait, m_maxsize(super_class)); + + /* Respect the minimum limit of super class */ + if (m_total(super_class) == m_maxlimit(super_class) && + m_infree(super_class) <= m_minlimit(super_class)) + if (wait & MCR_COMP) + return (0); /* how many objects will we cut the page into? */ - int numobj = (class == MC_MBUF ? NMBPBG : NCLPBG); + int numobj = PAGE_SIZE / m_maxsize(class); for (count = 0; count < numpages; count++) { - /* respect totals, minlimit, maxlimit */ - if (m_total(MC_BIGCL) <= m_minlimit(MC_BIGCL) || + if (m_total(super_class) <= m_minlimit(super_class) || m_total(class) >= m_maxlimit(class)) break; - if ((o = slab_alloc(MC_BIGCL, wait)) == NULL) + if ((o = slab_alloc(super_class, wait)) == NULL) break; struct mbuf *m = (struct mbuf *)o; union mcluster *c = (union mcluster *)o; + union mbigcluster *mbc = (union mbigcluster *)o; mcl_slab_t *sp = slab_get(o); mcache_audit_t *mca = NULL; - VERIFY(slab_is_detached(sp) && - (sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED); - + /* + * since one full page will be converted to MC_MBUF or + * MC_CL, verify that the reference count will match that + * assumption + */ + VERIFY(sp->sl_refcnt == 1 && slab_is_detached(sp)); + VERIFY((sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED); /* * Make sure that the cluster is unmolested * while in freelist */ if (mclverify) { - mca = mcl_audit_buf2mca(MC_BIGCL, o); - mcache_audit_free_verify(mca, o, 0, - m_maxsize(MC_BIGCL)); + mca = mcl_audit_buf2mca(super_class, + (mcache_obj_t *)o); + mcache_audit_free_verify(mca, + (mcache_obj_t *)o, 0, m_maxsize(super_class)); } - /* Reinitialize it as an mbuf or 2K slab */ + /* Reinitialize it as an mbuf or 2K or 4K slab */ slab_init(sp, class, sp->sl_flags, - sp->sl_base, NULL, sp->sl_len, 0, numobj); + sp->sl_base, NULL, PAGE_SIZE, 0, numobj); - VERIFY(o == (mcache_obj_t *)sp->sl_base); VERIFY(sp->sl_head == NULL); - VERIFY(m_total(MC_BIGCL) > 0); - m_total(MC_BIGCL)--; - mbstat.m_bigclusters = m_total(MC_BIGCL); + VERIFY(m_total(super_class) >= 1); + m_total(super_class)--; + + if (super_class == MC_BIGCL) + mbstat.m_bigclusters = m_total(MC_BIGCL); m_total(class) += numobj; m_infree(class) += numobj; - VERIFY(m_total(MC_BIGCL) >= m_minlimit(MC_BIGCL)); - VERIFY(m_total(class) <= m_maxlimit(class)); if (!mb_peak_newreport && mbuf_report_usage(class)) mb_peak_newreport = TRUE; i = numobj; if (class == MC_MBUF) { mbstat.m_mbufs = m_total(MC_MBUF); - mtype_stat_add(MT_FREE, NMBPBG); + mtype_stat_add(MT_FREE, NMBPG); while (i--) { /* * If auditing is enabled, construct the @@ -3045,7 +3101,7 @@ freelist_populate(mbuf_class_t class, unsigned int num, int wait) m->m_next = sp->sl_head; sp->sl_head = (void *)m++; } - } else { /* MC_CL */ + } else if (class == MC_CL) { /* MC_CL */ mbstat.m_clfree = m_infree(MC_CL) + m_infree(MC_MBUF_CL); mbstat.m_clusters = m_total(MC_CL); @@ -3053,9 +3109,18 @@ freelist_populate(mbuf_class_t class, unsigned int num, int wait) c->mcl_next = sp->sl_head; sp->sl_head = (void *)c++; } + } else { + VERIFY(class == MC_BIGCL); + mbstat.m_bigclusters = m_total(MC_BIGCL); + mbstat.m_bigclfree = m_infree(MC_BIGCL) + + m_infree(MC_MBUF_BIGCL); + while (i--) { + mbc->mbc_next = sp->sl_head; + sp->sl_head = (void *)mbc++; + } } - /* Insert into the mbuf or 2k slab list */ + /* Insert into the mbuf or 2k or 4k slab list */ slab_insert(sp, class); if ((i = mb_waiters) > 0) @@ -3737,6 +3802,7 @@ m_classifier_init(struct mbuf *m, uint32_t pktf_mask) #if MEASURE_BW m->m_pkthdr.pkt_bwseq = 0; #endif /* MEASURE_BW */ + m->m_pkthdr.pkt_enqueue_ts = 0; } void @@ -3935,9 +4001,9 @@ m_allocpacket_internal(unsigned int *numlist, size_t packetlen, } } else if (bufsize == m_maxsize(MC_16KCL)) { VERIFY(njcl > 0); - nsegs = ((packetlen - 1) >> (PGSHIFT + 2)) + 1; + nsegs = ((packetlen - 1) >> M16KCLSHIFT) + 1; } else if (bufsize == m_maxsize(MC_BIGCL)) { - nsegs = ((packetlen - 1) >> PGSHIFT) + 1; + nsegs = ((packetlen - 1) >> MBIGCLSHIFT) + 1; } else { nsegs = ((packetlen - 1) >> MCLSHIFT) + 1; } @@ -4498,8 +4564,13 @@ m_prepend(struct mbuf *m, int len, int how) } mn->m_next = m; m = mn; - if (len < MHLEN) + if (m->m_flags & M_PKTHDR) { + VERIFY(len <= MHLEN); MH_ALIGN(m, len); + } else { + VERIFY(len <= MLEN); + M_ALIGN(m, len); + } m->m_len = len; return (m); } @@ -4509,9 +4580,10 @@ m_prepend(struct mbuf *m, int len, int how) * chain, copy junk along, and adjust length. */ struct mbuf * -m_prepend_2(struct mbuf *m, int len, int how) +m_prepend_2(struct mbuf *m, int len, int how, int align) { - if (M_LEADINGSPACE(m) >= len) { + if (M_LEADINGSPACE(m) >= len && + (!align || IS_P2ALIGNED((m->m_data - len), sizeof(u_int32_t)))) { m->m_data -= len; m->m_len += len; } else { @@ -5279,12 +5351,6 @@ m_howmany(int num, size_t bufsize) } else { /* 16K CL */ VERIFY(njcl > 0); - /* Under minimum */ - if (m_16kclusters < MIN16KCL) - return (MIN16KCL - m_16kclusters); - if (m_16kclfree >= M16KCL_LOWAT) - return (0); - /* Ensure at least num clusters are available */ if (num >= m_16kclfree) i = num - m_16kclfree; @@ -5717,9 +5783,10 @@ m_dup(struct mbuf *m, int how) #define MBUF_MULTIPAGES(m) \ (((m)->m_flags & M_EXT) && \ - ((IS_P2ALIGNED((m)->m_data, NBPG) && (m)->m_len > NBPG) || \ - (!IS_P2ALIGNED((m)->m_data, NBPG) && \ - P2ROUNDUP((m)->m_data, NBPG) < ((uintptr_t)(m)->m_data + (m)->m_len)))) + ((IS_P2ALIGNED((m)->m_data, PAGE_SIZE) \ + && (m)->m_len > PAGE_SIZE) || \ + (!IS_P2ALIGNED((m)->m_data, PAGE_SIZE) && \ + P2ROUNDUP((m)->m_data, PAGE_SIZE) < ((uintptr_t)(m)->m_data + (m)->m_len)))) static struct mbuf * m_expand(struct mbuf *m, struct mbuf **last) @@ -5739,11 +5806,11 @@ m_expand(struct mbuf *m, struct mbuf **last) struct mbuf *n; data = data0; - if (IS_P2ALIGNED(data, NBPG) && len0 > NBPG) - len = NBPG; - else if (!IS_P2ALIGNED(data, NBPG) && - P2ROUNDUP(data, NBPG) < (data + len0)) - len = P2ROUNDUP(data, NBPG) - data; + if (IS_P2ALIGNED(data, PAGE_SIZE) && len0 > PAGE_SIZE) + len = PAGE_SIZE; + else if (!IS_P2ALIGNED(data, PAGE_SIZE) && + P2ROUNDUP(data, PAGE_SIZE) < (data + len0)) + len = P2ROUNDUP(data, PAGE_SIZE) - data; else len = len0; @@ -6260,7 +6327,7 @@ slab_get(void *buf) lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED); VERIFY(MBUF_IN_MAP(buf)); - ix = ((char *)buf - (char *)mbutl) >> MBSHIFT; + ix = ((unsigned char *)buf - mbutl) >> MBSHIFT; VERIFY(ix < maxslabgrp); if ((slg = slabstbl[ix]) == NULL) { @@ -6283,7 +6350,9 @@ slab_get(void *buf) /* This is a new buffer; create the slabs group for it */ MALLOC(slg, mcl_slabg_t *, sizeof (*slg), M_TEMP, M_WAITOK | M_ZERO); - VERIFY(slg != NULL); + MALLOC(slg->slg_slab, mcl_slab_t *, sizeof(mcl_slab_t) * NSLABSPMB, + M_TEMP, M_WAITOK | M_ZERO); + VERIFY(slg != NULL && slg->slg_slab != NULL); lck_mtx_lock(mbuf_mlock); /* @@ -6308,7 +6377,7 @@ slab_get(void *buf) } } - ix = MTOBG(buf) % NSLABSPMB; + ix = MTOPG(buf) % NSLABSPMB; VERIFY(ix < NSLABSPMB); return (&slg->slg_slab[ix]); @@ -6335,13 +6404,17 @@ slab_insert(mcl_slab_t *sp, mbuf_class_t class) m_slab_cnt(class)++; TAILQ_INSERT_TAIL(&m_slablist(class), sp, sl_link); sp->sl_flags &= ~SLF_DETACHED; + + /* + * If a buffer spans multiple contiguous pages then mark them as + * detached too + */ if (class == MC_16KCL) { int k; for (k = 1; k < NSLABSP16KB; k++) { sp = sp->sl_next; /* Next slab must already be present */ - VERIFY(sp != NULL); - VERIFY(slab_is_detached(sp)); + VERIFY(sp != NULL && slab_is_detached(sp)); sp->sl_flags &= ~SLF_DETACHED; } } @@ -6350,13 +6423,13 @@ slab_insert(mcl_slab_t *sp, mbuf_class_t class) static void slab_remove(mcl_slab_t *sp, mbuf_class_t class) { + int k; VERIFY(!slab_is_detached(sp)); VERIFY(m_slab_cnt(class) > 0); m_slab_cnt(class)--; TAILQ_REMOVE(&m_slablist(class), sp, sl_link); slab_detach(sp); if (class == MC_16KCL) { - int k; for (k = 1; k < NSLABSP16KB; k++) { sp = sp->sl_next; /* Next slab must already be present */ @@ -6429,14 +6502,14 @@ mcl_audit_init(void *buf, mcache_audit_t **mca_list, boolean_t save_contents = (con_list != NULL); unsigned int i, ix; - ASSERT(num <= NMBPBG); + ASSERT(num <= NMBPG); ASSERT(con_list == NULL || con_size != 0); - ix = MTOBG(buf); + ix = MTOPG(buf); VERIFY(ix < maxclaudit); /* Make sure we haven't been here before */ - for (i = 0; i < NMBPBG; i++) + for (i = 0; i < NMBPG; i++) VERIFY(mclaudit[ix].cl_audit[i] == NULL); mca = mca_tail = *mca_list; @@ -6482,7 +6555,7 @@ mcl_audit_free(void *buf, unsigned int num) unsigned int i, ix; mcache_audit_t *mca, *mca_list; - ix = MTOBG(buf); + ix = MTOPG(buf); VERIFY(ix < maxclaudit); if (mclaudit[ix].cl_audit[0] != NULL) { @@ -6504,13 +6577,16 @@ mcl_audit_free(void *buf, unsigned int num) * the corresponding audit structure for that buffer. */ static mcache_audit_t * -mcl_audit_buf2mca(mbuf_class_t class, mcache_obj_t *o) +mcl_audit_buf2mca(mbuf_class_t class, mcache_obj_t *mobj) { mcache_audit_t *mca = NULL; - int ix = MTOBG(o); + int ix = MTOPG(mobj), m_idx = 0; + unsigned char *page_addr; VERIFY(ix < maxclaudit); - VERIFY(IS_P2ALIGNED(o, MIN(m_maxsize(class), NBPG))); + VERIFY(IS_P2ALIGNED(mobj, MIN(m_maxsize(class), PAGE_SIZE))); + + page_addr = PGTOM(ix); switch (class) { case MC_MBUF: @@ -6521,19 +6597,25 @@ mcl_audit_buf2mca(mbuf_class_t class, mcache_obj_t *o) * mbuf index relative to the page base and use * it to locate the audit structure. */ - VERIFY(MCLIDX(BGTOM(ix), o) < (int)NMBPBG); - mca = mclaudit[ix].cl_audit[MCLIDX(BGTOM(ix), o)]; + m_idx = MBPAGEIDX(page_addr, mobj); + VERIFY(m_idx < (int)NMBPG); + mca = mclaudit[ix].cl_audit[m_idx]; break; case MC_CL: /* * Same thing as above, but for 2KB clusters in a page. */ - VERIFY(CLBGIDX(BGTOM(ix), o) < (int)NCLPBG); - mca = mclaudit[ix].cl_audit[CLBGIDX(BGTOM(ix), o)]; + m_idx = CLPAGEIDX(page_addr, mobj); + VERIFY(m_idx < (int)NCLPG); + mca = mclaudit[ix].cl_audit[m_idx]; break; case MC_BIGCL: + m_idx = BCLPAGEIDX(page_addr, mobj); + VERIFY(m_idx < (int)NBCLPG); + mca = mclaudit[ix].cl_audit[m_idx]; + break; case MC_16KCL: /* * Same as above, but only return the first element. @@ -7344,6 +7426,7 @@ mbuf_report_peak_usage(void) for (i = 0; i < NELEM(mbuf_table); i++) { m_peak(m_class(i)) = m_total(m_class(i)); memreleased += m_release_cnt(i); + m_release_cnt(i) = 0; } mb_peak_newreport = FALSE; lck_mtx_unlock(mbuf_mlock); @@ -7353,6 +7436,7 @@ mbuf_report_peak_usage(void) ns_data.u.mb_stats.total_256b = m_peak(MC_MBUF); ns_data.u.mb_stats.total_2kb = m_peak(MC_CL); ns_data.u.mb_stats.total_4kb = m_peak(MC_BIGCL); + ns_data.u.mb_stats.total_16kb = m_peak(MC_16KCL); ns_data.u.mb_stats.sbmb_total = total_sbmb_cnt_peak; ns_data.u.mb_stats.sb_atmbuflimit = sbmb_limreached; ns_data.u.mb_stats.draincnt = mbstat.m_drain; @@ -7478,23 +7562,25 @@ m_drain(void) slab_remove(sp, mc); switch (mc) { case MC_MBUF: - m_infree(mc) -= NMBPBG; - m_total(mc) -= NMBPBG; + m_infree(mc) -= NMBPG; + m_total(mc) -= NMBPG; if (mclaudit != NULL) - mcl_audit_free(sp->sl_base, NMBPBG); + mcl_audit_free(sp->sl_base, NMBPG); break; case MC_CL: - m_infree(mc) -= NCLPBG; - m_total(mc) -= NCLPBG; + m_infree(mc) -= NCLPG; + m_total(mc) -= NCLPG; if (mclaudit != NULL) - mcl_audit_free(sp->sl_base, NMBPBG); + mcl_audit_free(sp->sl_base, NMBPG); break; case MC_BIGCL: - m_infree(mc)--; - m_total(mc)--; + { + m_infree(mc) -= NBCLPG; + m_total(mc) -= NBCLPG; if (mclaudit != NULL) - mcl_audit_free(sp->sl_base, NMBPBG); + mcl_audit_free(sp->sl_base, NMBPG); break; + } case MC_16KCL: m_infree(mc)--; m_total(mc)--; @@ -7520,7 +7606,9 @@ m_drain(void) } m_release_cnt(mc) += m_size(mc); released += m_size(mc); - offset = ((char *)sp->sl_base - (char *)mbutl) / NBPG; + VERIFY(sp->sl_base != NULL && + sp->sl_len >= PAGE_SIZE); + offset = MTOPG(sp->sl_base); /* * Make sure the IOMapper points to a valid, but * bogus, address. This should prevent further DMA diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index 2dc33d759..d73d61a4b 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -126,6 +126,14 @@ #include #endif /* MULTIPATH */ +#define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1))) + +#if DEBUG || DEVELOPMENT +#define DEBUG_KERNEL_ADDRPERM(_v) (_v) +#else +#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v) +#endif + /* TODO: this should be in a header file somewhere */ extern char *proc_name_address(void *p); @@ -152,6 +160,8 @@ static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); static void filt_sockdetach(struct knote *kn); static int filt_sockev(struct knote *kn, long hint); +static void filt_socktouch(struct knote *kn, struct kevent_internal_s *kev, + long type); static int sooptcopyin_timeval(struct sockopt *, struct timeval *); static int sooptcopyout_timeval(struct sockopt *, const struct timeval *); @@ -172,6 +182,7 @@ static struct filterops sock_filtops = { .f_isfd = 1, .f_detach = filt_sockdetach, .f_event = filt_sockev, + .f_touch = filt_socktouch, }; SYSCTL_DECL(_kern_ipc); @@ -236,6 +247,15 @@ int sosendjcl_ignore_capab = 0; SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl_ignore_capab, 0, ""); +/* + * Set this to ignore SOF1_IF_2KCL and use big clusters for large + * writes on the socket for all protocols on any network interfaces. + * Be extra careful when setting this to 1, because sending down packets with + * clusters larger that 2 KB might lead to system panics or data corruption. + * When set to 0, the system will respect SOF1_IF_2KCL, which is set + * on the outgoing interface + * Set this to 1 for testing/debugging purposes only. + */ int sosendbigcl_ignore_capab = 0; SYSCTL_INT(_kern_ipc, OID_AUTO, sosendbigcl_ignore_capab, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendbigcl_ignore_capab, 0, ""); @@ -256,6 +276,10 @@ int sorestrictsend = 1; SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictsend, CTLFLAG_RW | CTLFLAG_LOCKED, &sorestrictsend, 0, "Enable outbound interface restrictions"); +int soreserveheadroom = 1; +SYSCTL_INT(_kern_ipc, OID_AUTO, soreserveheadroom, CTLFLAG_RW | CTLFLAG_LOCKED, + &soreserveheadroom, 0, "To allocate contiguous datagram buffers"); + extern struct inpcbinfo tcbinfo; /* TODO: these should be in header file */ @@ -270,10 +294,39 @@ static struct zone *se_zone; /* zone for sockaddr_entry */ vm_size_t so_cache_zone_element_size; -static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **, user_ssize_t *); +static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **, + user_ssize_t *); static void cached_sock_alloc(struct socket **, int); static void cached_sock_free(struct socket *); +/* + * Maximum of extended background idle sockets per process + * Set to zero to disable further setting of the option + */ + +#define SO_IDLE_BK_IDLE_MAX_PER_PROC 1 +#define SO_IDLE_BK_IDLE_TIME 600 +#define SO_IDLE_BK_IDLE_RCV_HIWAT 131072 + +struct soextbkidlestat soextbkidlestat; + +SYSCTL_UINT(_kern_ipc, OID_AUTO, maxextbkidleperproc, + CTLFLAG_RW | CTLFLAG_LOCKED, &soextbkidlestat.so_xbkidle_maxperproc, 0, + "Maximum of extended background idle sockets per process"); + +SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidletime, CTLFLAG_RW | CTLFLAG_LOCKED, + &soextbkidlestat.so_xbkidle_time, 0, + "Time in seconds to keep extended background idle sockets"); + +SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidlercvhiwat, CTLFLAG_RW | CTLFLAG_LOCKED, + &soextbkidlestat.so_xbkidle_rcvhiwat, 0, + "High water mark for extended background idle sockets"); + +SYSCTL_STRUCT(_kern_ipc, OID_AUTO, extbkidlestat, CTLFLAG_RD | CTLFLAG_LOCKED, + &soextbkidlestat, soextbkidlestat, ""); + +int so_set_extended_bk_idle(struct socket *, int); + /* * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from * setting the DSCP code on the packet based on the service class; see @@ -289,6 +342,22 @@ socketinit(void) _CASSERT(sizeof(so_gencnt) == sizeof(uint64_t)); VERIFY(IS_P2ALIGNED(&so_gencnt, sizeof(uint32_t))); +#ifdef __LP64__ + _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user64_sa_endpoints)); + _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user64_sa_endpoints, sae_srcif)); + _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user64_sa_endpoints, sae_srcaddr)); + _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user64_sa_endpoints, sae_srcaddrlen)); + _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user64_sa_endpoints, sae_dstaddr)); + _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user64_sa_endpoints, sae_dstaddrlen)); +#else + _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user32_sa_endpoints)); + _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user32_sa_endpoints, sae_srcif)); + _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user32_sa_endpoints, sae_srcaddr)); + _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user32_sa_endpoints, sae_srcaddrlen)); + _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user32_sa_endpoints, sae_dstaddr)); + _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user32_sa_endpoints, sae_dstaddrlen)); +#endif + if (socketinit_done) { printf("socketinit: already called...\n"); return; @@ -321,7 +390,7 @@ socketinit(void) so_cache_zone_element_size = (vm_size_t)(sizeof (struct socket) + 4 + get_inpcb_str_size() + 4 + get_tcp_str_size()); - so_cache_zone = zinit(so_cache_zone_element_size, + so_cache_zone = zinit(so_cache_zone_element_size, (120000 * so_cache_zone_element_size), 8192, "socache zone"); zone_change(so_cache_zone, Z_CALLERACCT, FALSE); zone_change(so_cache_zone, Z_NOENCRYPT, TRUE); @@ -344,6 +413,10 @@ socketinit(void) zone_change(se_zone, Z_CALLERACCT, FALSE); zone_change(se_zone, Z_EXPAND, TRUE); + bzero(&soextbkidlestat, sizeof(struct soextbkidlestat)); + soextbkidlestat.so_xbkidle_maxperproc = SO_IDLE_BK_IDLE_MAX_PER_PROC; + soextbkidlestat.so_xbkidle_time = SO_IDLE_BK_IDLE_TIME; + soextbkidlestat.so_xbkidle_rcvhiwat = SO_IDLE_BK_IDLE_RCV_HIWAT; in_pcbinit(); sflt_init(); @@ -390,8 +463,8 @@ cached_sock_alloc(struct socket **so, int waitok) bzero((caddr_t)*so, sizeof (struct socket)); /* - * Define offsets for extra structures into our - * single block of memory. Align extra structures + * Define offsets for extra structures into our + * single block of memory. Align extra structures * on longword boundaries. */ @@ -409,7 +482,7 @@ cached_sock_alloc(struct socket **so, int waitok) (caddr_t)offset; } - (*so)->cached_in_sock_layer = true; + OSBitOrAtomic(SOF1_CACHED_IN_SOCK_LAYER, &(*so)->so_flags1); } static void @@ -465,10 +538,12 @@ so_update_policy(struct socket *so) #if NECP static void -so_update_necp_policy(struct socket *so, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr) +so_update_necp_policy(struct socket *so, struct sockaddr *override_local_addr, + struct sockaddr *override_remote_addr) { if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) - inp_update_necp_policy(sotoinpcb(so), override_local_addr, override_remote_addr, 0); + inp_update_necp_policy(sotoinpcb(so), override_local_addr, + override_remote_addr, 0); } #endif /* NECP */ @@ -486,7 +561,7 @@ so_cache_timer(void) while (!STAILQ_EMPTY(&so_cache_head)) { VERIFY(cached_sock_count > 0); p = STAILQ_FIRST(&so_cache_head); - if ((so_cache_time - p->cache_timestamp) < + if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT) break; @@ -657,7 +732,8 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, * If this thread or task is marked to create backgrounded sockets, * mark the socket as background. */ - if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG)) { + if (proc_get_effective_thread_policy(current_thread(), + TASK_POLICY_NEW_SOCKETS_BG)) { socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND); so->so_background_thread = current_thread(); } @@ -768,11 +844,11 @@ sobindlock(struct socket *so, struct sockaddr *nam, int dolock) so_update_last_owner_locked(so, p); so_update_policy(so); - + #if NECP so_update_necp_policy(so, nam, NULL); #endif /* NECP */ - + /* * If this is a bind request on a socket that has been marked * as inactive, reject it now before we go any further. @@ -780,7 +856,7 @@ sobindlock(struct socket *so, struct sockaddr *nam, int dolock) if (so->so_flags & SOF_DEFUNCT) { error = EINVAL; SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", - __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), + __func__, proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), error)); goto out; } @@ -825,7 +901,7 @@ sodealloc(struct socket *so) mac_socket_label_destroy(so); #endif /* MAC_SOCKET */ - if (so->cached_in_sock_layer) { + if (so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) { cached_sock_free(so); } else { FREE_ZONE(so, sizeof (*so), so->so_zone); @@ -861,11 +937,11 @@ solisten(struct socket *so, int backlog) so_update_last_owner_locked(so, p); so_update_policy(so); - + #if NECP so_update_necp_policy(so, NULL, NULL); #endif /* NECP */ - + if (so->so_proto == NULL) { error = EINVAL; goto out; @@ -887,7 +963,7 @@ solisten(struct socket *so, int backlog) if (so->so_flags & SOF_DEFUNCT) { SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] " "(%d)\n", __func__, proc_pid(p), - (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), error)); } goto out; @@ -1052,6 +1128,11 @@ soclose_locked(struct socket *so) } #endif /* CONTENT_FILTER */ + if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) { + soresume(current_proc(), so, 1); + so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED; + } + if ((so->so_options & SO_ACCEPTCONN)) { struct socket *sp, *sonext; int socklock = 0; @@ -1377,7 +1458,7 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock) if (so->so_flags & SOF_DEFUNCT) { SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] " "(%d)\n", __func__, proc_pid(p), - (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), error)); } if (dolock) @@ -1454,14 +1535,14 @@ soconnect2(struct socket *so1, struct socket *so2) int soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, uio_t auio, user_ssize_t *bytes_written) { int error; so_update_last_owner_locked(so, p); so_update_policy(so); - + /* * If this is a listening socket or if this is a previously-accepted * socket that has been marked as inactive, reject the connect request. @@ -1471,7 +1552,7 @@ soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl, if (so->so_flags & SOF_DEFUNCT) { SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] " "(%d)\n", __func__, proc_pid(p), - (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), error)); } return (error); @@ -1503,7 +1584,7 @@ soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl, } else { error = (*so->so_proto->pr_usrreqs->pru_connectx) (so, src_sl, dst_sl, p, ifscope, aid, pcid, - flags, arg, arglen); + flags, arg, arglen, auio, bytes_written); } } @@ -1545,7 +1626,7 @@ sodisconnect(struct socket *so) } int -sodisconnectxlocked(struct socket *so, associd_t aid, connid_t cid) +sodisconnectxlocked(struct socket *so, sae_associd_t aid, sae_connid_t cid) { int error; @@ -1566,7 +1647,7 @@ sodisconnectxlocked(struct socket *so, associd_t aid, connid_t cid) } int -sodisconnectx(struct socket *so, associd_t aid, connid_t cid) +sodisconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid) { int error; @@ -1577,7 +1658,7 @@ sodisconnectx(struct socket *so, associd_t aid, connid_t cid) } int -sopeelofflocked(struct socket *so, associd_t aid, struct socket **psop) +sopeelofflocked(struct socket *so, sae_associd_t aid, struct socket **psop) { return ((*so->so_proto->pr_usrreqs->pru_peeloff)(so, aid, psop)); } @@ -1636,7 +1717,8 @@ sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid, defunct: error = EPIPE; SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", - __func__, proc_selfpid(), (uint64_t)VM_KERNEL_ADDRPERM(so), + __func__, proc_selfpid(), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), error)); return (error); } @@ -1651,7 +1733,7 @@ sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid, cfil_sock_data_pending(&so->so_snd) != 0) CFIL_LOG(LOG_INFO, "so %llx ignore SS_CANTSENDMORE", - (uint64_t)VM_KERNEL_ADDRPERM(so)); + (uint64_t)DEBUG_KERNEL_ADDRPERM(so)); else #endif /* CONTENT_FILTER */ return (EPIPE); @@ -1665,16 +1747,17 @@ sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid, if ((so->so_state & SS_ISCONNECTED) == 0) { if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) { if (((so->so_state & SS_ISCONFIRMING) == 0) && - (resid != 0 || clen == 0)) { + (resid != 0 || clen == 0) && + !(so->so_flags1 & SOF1_PRECONNECT_DATA)) { #if MPTCP - /* - * MPTCP Fast Join sends data before the + /* + * MPTCP Fast Join sends data before the * socket is truly connected. */ if ((so->so_flags & (SOF_MP_SUBFLOW | SOF_MPTCP_FASTJOIN)) != (SOF_MP_SUBFLOW | SOF_MPTCP_FASTJOIN)) -#endif /* MPTCP */ +#endif /* MPTCP */ return (ENOTCONN); } } else if (addr == 0 && !(flags&MSG_HOLD)) { @@ -1682,6 +1765,7 @@ sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid, ENOTCONN : EDESTADDRREQ); } } + if (so->so_flags & SOF_ENABLE_MSGS) space = msgq_sbspace(so, control); else @@ -1694,8 +1778,21 @@ sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid, return (EMSGSIZE); if ((space < resid + clen && - (atomic || space < (int32_t)so->so_snd.sb_lowat || space < clen)) || + (atomic || (space < (int32_t)so->so_snd.sb_lowat) || + space < clen)) || (so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) { + /* + * don't block the connectx call when there's more data + * than can be copied. + */ + if (so->so_flags1 & SOF1_PRECONNECT_DATA) { + if (space == 0) { + return (EWOULDBLOCK); + } + if (space < (int32_t)so->so_snd.sb_lowat) { + return (0); + } + } if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) { return (EWOULDBLOCK); @@ -1779,12 +1876,14 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, { struct mbuf **mp; struct mbuf *m, *freelist = NULL; - user_ssize_t space, len, resid; + user_ssize_t space, len, resid, orig_resid; int clen = 0, error, dontroute, mlen, sendflags; int atomic = sosendallatonce(so) || top; int sblocked = 0; struct proc *p = current_proc(); struct mbuf *control_copy = NULL; + uint16_t headroom = 0; + boolean_t en_tracing = FALSE; if (uio != NULL) resid = uio_resid(uio); @@ -1796,18 +1895,36 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, socket_lock(so, 1); + /* + * trace if tracing & network (vs. unix) sockets & and + * non-loopback + */ + if (ENTR_SHOULDTRACE && + (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) { + struct inpcb *inp = sotoinpcb(so); + if (inp->inp_last_outifp != NULL && + !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) { + en_tracing = TRUE; + KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START, + VM_KERNEL_ADDRPERM(so), + ((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0), + (int64_t)resid); + orig_resid = resid; + } + } + /* * Re-injection should not affect process accounting */ if ((flags & MSG_SKIPCFIL) == 0) { - so_update_last_owner_locked(so, p); - so_update_policy(so); - + so_update_last_owner_locked(so, p); + so_update_policy(so); + #if NECP - so_update_necp_policy(so, NULL, addr); + so_update_necp_policy(so, NULL, addr); #endif /* NECP */ } - + if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) { error = EOPNOTSUPP; socket_unlock(so, 1); @@ -1842,6 +1959,9 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, if (control != NULL) clen = control->m_len; + if (soreserveheadroom != 0) + headroom = so->so_pktheadroom; + do { error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked, control); @@ -1868,22 +1988,26 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, int bytes_to_copy; boolean_t jumbocl; boolean_t bigcl; + int bytes_to_alloc; bytes_to_copy = imin(resid, space); + bytes_to_alloc = bytes_to_copy; + if (top == NULL) + bytes_to_alloc += headroom; + if (sosendminchain > 0) chainlength = 0; else chainlength = sosendmaxchain; /* - * Use big 4 KB cluster only when outgoing - * interface does not want 2 LB clusters + * Use big 4 KB cluster when the outgoing interface + * does not prefer 2 KB clusters */ - bigcl = - !(so->so_flags1 & SOF1_IF_2KCL) || + bigcl = !(so->so_flags1 & SOF1_IF_2KCL) || sosendbigcl_ignore_capab; - + /* * Attempt to use larger than system page-size * clusters for large writes only if there is @@ -1917,12 +2041,12 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * haven't yet consumed. */ if (freelist == NULL && - bytes_to_copy > MBIGCLBYTES && + bytes_to_alloc > MBIGCLBYTES && jumbocl) { num_needed = - bytes_to_copy / M16KCLBYTES; + bytes_to_alloc / M16KCLBYTES; - if ((bytes_to_copy - + if ((bytes_to_alloc - (num_needed * M16KCLBYTES)) >= MINCLSIZE) num_needed++; @@ -1939,12 +2063,12 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, } if (freelist == NULL && - bytes_to_copy > MCLBYTES && + bytes_to_alloc > MCLBYTES && bigcl) { num_needed = - bytes_to_copy / MBIGCLBYTES; + bytes_to_alloc / MBIGCLBYTES; - if ((bytes_to_copy - + if ((bytes_to_alloc - (num_needed * MBIGCLBYTES)) >= MINCLSIZE) num_needed++; @@ -1960,12 +2084,34 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, */ } - if (freelist == NULL && - bytes_to_copy > MINCLSIZE) { + /* + * Allocate a cluster as we want to + * avoid to split the data in more + * that one segment and using MINCLSIZE + * would lead us to allocate two mbufs + */ + if (soreserveheadroom != 0 && + freelist == NULL && + ((top == NULL && + bytes_to_alloc > _MHLEN) || + bytes_to_alloc > _MLEN)) { + num_needed = ROUNDUP(bytes_to_alloc, MCLBYTES) / + MCLBYTES; + freelist = + m_getpackets_internal( + (unsigned int *)&num_needed, + hdrs_needed, M_WAIT, 0, + MCLBYTES); + /* + * Fall back to a single mbuf + * if allocation failed + */ + } else if (freelist == NULL && + bytes_to_alloc > MINCLSIZE) { num_needed = - bytes_to_copy / MCLBYTES; + bytes_to_alloc / MCLBYTES; - if ((bytes_to_copy - + if ((bytes_to_alloc - (num_needed * MCLBYTES)) >= MINCLSIZE) num_needed++; @@ -1980,7 +2126,20 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * if allocation failed */ } - + /* + * For datagram protocols, leave + * headroom for protocol headers + * in the first cluster of the chain + */ + if (freelist != NULL && atomic && + top == NULL && headroom > 0) { + freelist->m_data += headroom; + } + + /* + * Fall back to regular mbufs without + * reserving the socket headroom + */ if (freelist == NULL) { if (top == NULL) MGETHDR(freelist, @@ -2010,12 +2169,13 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, m->m_next = NULL; if ((m->m_flags & M_EXT)) - mlen = m->m_ext.ext_size; + mlen = m->m_ext.ext_size - + m_leadingspace(m); else if ((m->m_flags & M_PKTHDR)) mlen = MHLEN - m_leadingspace(m); else - mlen = MLEN; + mlen = MLEN - m_leadingspace(m); len = imin(mlen, bytes_to_copy); chainlength += len; @@ -2074,19 +2234,20 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, if (dontroute) so->so_options |= SO_DONTROUTE; - /* Compute flags here, for pru_send and NKEs */ + /* + * Compute flags here, for pru_send and NKEs + * + * If the user set MSG_EOF, the protocol + * understands this flag and nothing left to + * send then use PRU_SEND_EOF instead of PRU_SEND. + */ sendflags = (flags & MSG_OOB) ? PRUS_OOB : - /* - * If the user set MSG_EOF, the protocol - * understands this flag and nothing left to - * send then use PRU_SEND_EOF instead of PRU_SEND. - */ ((flags & MSG_EOF) && - (so->so_proto->pr_flags & PR_IMPLOPCL) && - (resid <= 0)) ? PRUS_EOF : - /* If there is more to send set PRUS_MORETOCOME */ - (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; - + (so->so_proto->pr_flags & PR_IMPLOPCL) && + (resid <= 0)) ? PRUS_EOF : + /* If there is more to send set PRUS_MORETOCOME */ + (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; + if ((flags & MSG_SKIPCFIL) == 0) { /* * Socket filter processing @@ -2108,7 +2269,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * Content filter processing */ error = cfil_sock_data_out(so, addr, top, - control, (sendflags & MSG_OOB) ? + control, (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0); if (error) { if (error == EJUSTRETURN) { @@ -2163,24 +2324,44 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, if (control_copy != NULL) m_freem(control_copy); - KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid, so->so_snd.sb_cc, - space, error); + /* + * One write has been done. This was enough. Get back to "normal" + * behavior. + */ + if (so->so_flags1 & SOF1_PRECONNECT_DATA) + so->so_flags1 &= ~SOF1_PRECONNECT_DATA; + + if (en_tracing) { + /* resid passed here is the bytes left in uio */ + KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END, + VM_KERNEL_ADDRPERM(so), + ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0), + (int64_t)(orig_resid - resid)); + } + KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid, + so->so_snd.sb_cc, space, error); return (error); } +/* + * Supported only connected sockets (no address) without ancillary data + * (control mbuf) for atomic protocols + */ int -sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray, - u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags) +sosend_list(struct socket *so, struct uio **uioarray, u_int uiocnt, int flags) { struct mbuf *m, *freelist = NULL; user_ssize_t len, resid; - int clen = 0, error, dontroute, mlen; - int atomic = sosendallatonce(so) || top; + int error, dontroute, mlen; + int atomic = sosendallatonce(so); int sblocked = 0; struct proc *p = current_proc(); u_int uiofirst = 0; u_int uiolast = 0; + struct mbuf *top = NULL; + uint16_t headroom = 0; + boolean_t bigcl; KERNEL_DEBUG((DBG_FNC_SOSEND_LIST | DBG_FUNC_START), so, uiocnt, so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat); @@ -2201,10 +2382,7 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray, error = EINVAL; goto out; } - if (uioarray != NULL) - resid = uio_array_resid(uioarray, uiocnt); - else - resid = mbuf_pkt_list_len(top); + resid = uio_array_resid(uioarray, uiocnt); /* * In theory resid should be unsigned. @@ -2220,166 +2398,186 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray, error = EINVAL; goto out; } - /* - * Disallow functionality not currently supported - * Note: Will need to treat arrays of addresses and controls - */ - if (addr != NULL) { - printf("%s addr not supported\n", __func__); - error = EOPNOTSUPP; - goto out; - } - if (control != NULL) { - printf("%s control not supported\n", __func__); - error = EOPNOTSUPP; - goto out; - } socket_lock(so, 1); so_update_last_owner_locked(so, p); so_update_policy(so); - + #if NECP - so_update_necp_policy(so, NULL, addr); + so_update_necp_policy(so, NULL, NULL); #endif /* NECP */ - + dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd); - if (control != NULL) - clen = control->m_len; - - error = sosendcheck(so, addr, resid, clen, atomic, flags, - &sblocked, control); + error = sosendcheck(so, NULL, resid, 0, atomic, flags, + &sblocked, NULL); if (error) goto release; + /* + * Use big 4 KB clusters when the outgoing interface does not prefer + * 2 KB clusters + */ + bigcl = !(so->so_flags1 & SOF1_IF_2KCL) || sosendbigcl_ignore_capab; + + if (soreserveheadroom != 0) + headroom = so->so_pktheadroom; + do { int i; + int num_needed = 0; + int chainlength; + size_t maxpktlen = 0; + int bytes_to_alloc; - if (uioarray == NULL) { - /* - * Data is prepackaged in "top". - */ - resid = 0; - } else { - int num_needed = 0; - int chainlength; - size_t maxpktlen = 0; + if (sosendminchain > 0) + chainlength = 0; + else + chainlength = sosendmaxchain; - if (sosendminchain > 0) - chainlength = 0; - else - chainlength = sosendmaxchain; + socket_unlock(so, 0); - socket_unlock(so, 0); + /* + * Find a set of uio that fit in a reasonable number + * of mbuf packets + */ + for (i = uiofirst; i < uiocnt; i++) { + struct uio *auio = uioarray[i]; - /* - * Find a set of uio that fit in a reasonable number - * of mbuf packets - */ - for (i = uiofirst; i < uiocnt; i++) { - struct uio *auio = uioarray[i]; + len = uio_resid(auio); - len = uio_resid(auio); + /* Do nothing for empty messages */ + if (len == 0) + continue; - /* Do nothing for empty messages */ - if (len == 0) - continue; + num_needed += 1; + uiolast += 1; - num_needed += 1; - uiolast += 1; - - if (len > maxpktlen) - maxpktlen = len; + if (len > maxpktlen) + maxpktlen = len; - chainlength += len; - if (chainlength > sosendmaxchain) - break; - } - /* - * Nothing left to send - */ - if (num_needed == 0) { - socket_lock(so, 0); + chainlength += len; + if (chainlength > sosendmaxchain) break; - } - /* - * Allocate the mbuf packets at once - */ + } + /* + * Nothing left to send + */ + if (num_needed == 0) { + socket_lock(so, 0); + break; + } + /* + * Allocate buffer large enough to include headroom space for + * network and link header + * + */ + bytes_to_alloc = maxpktlen + headroom; + + /* + * Allocate a single contiguous buffer of the smallest available + * size when possible + */ + if (bytes_to_alloc > MCLBYTES && + bytes_to_alloc <= MBIGCLBYTES && bigcl) { + freelist = m_getpackets_internal( + (unsigned int *)&num_needed, + num_needed, M_WAIT, 1, + MBIGCLBYTES); + } else if (bytes_to_alloc > _MHLEN && + bytes_to_alloc <= MCLBYTES) { + freelist = m_getpackets_internal( + (unsigned int *)&num_needed, + num_needed, M_WAIT, 1, + MCLBYTES); + } else { freelist = m_allocpacket_internal( (unsigned int *)&num_needed, - maxpktlen, NULL, M_WAIT, 1, 0); + bytes_to_alloc, NULL, M_WAIT, 1, 0); + } + + if (freelist == NULL) { + socket_lock(so, 0); + error = ENOMEM; + goto release; + } + /* + * Copy each uio of the set into its own mbuf packet + */ + for (i = uiofirst, m = freelist; + i < uiolast && m != NULL; + i++) { + int bytes_to_copy; + struct mbuf *n; + struct uio *auio = uioarray[i]; - if (freelist == NULL) { - socket_lock(so, 0); - error = ENOMEM; - goto release; - } + bytes_to_copy = uio_resid(auio); + + /* Do nothing for empty messages */ + if (bytes_to_copy == 0) + continue; /* - * Copy each uio of the set into its own mbuf packet + * Leave headroom for protocol headers + * in the first mbuf of the chain */ - for (i = uiofirst, m = freelist; - i < uiolast && m != NULL; - i++) { - int bytes_to_copy; - struct mbuf *n; - struct uio *auio = uioarray[i]; - - bytes_to_copy = uio_resid(auio); - - /* Do nothing for empty messages */ - if (bytes_to_copy == 0) - continue; - - for (n = m; n != NULL; n = n->m_next) { - mlen = mbuf_maxlen(n); - - len = imin(mlen, bytes_to_copy); - - /* - * Note: uiomove() decrements the iovec - * length - */ - error = uiomove(mtod(n, caddr_t), - len, auio); - if (error != 0) - break; - n->m_len = len; - m->m_pkthdr.len += len; + m->m_data += headroom; + + for (n = m; n != NULL; n = n->m_next) { + if ((m->m_flags & M_EXT)) + mlen = m->m_ext.ext_size - + m_leadingspace(m); + else if ((m->m_flags & M_PKTHDR)) + mlen = + MHLEN - m_leadingspace(m); + else + mlen = MLEN - m_leadingspace(m); + len = imin(mlen, bytes_to_copy); - VERIFY(m->m_pkthdr.len <= maxpktlen); - - bytes_to_copy -= len; - resid -= len; - } - if (m->m_pkthdr.len == 0) { - printf("%s so %llx pkt %llx len null\n", - __func__, - (uint64_t)VM_KERNEL_ADDRPERM(so), - (uint64_t)VM_KERNEL_ADDRPERM(m)); - } + /* + * Note: uiomove() decrements the iovec + * length + */ + error = uiomove(mtod(n, caddr_t), + len, auio); if (error != 0) break; - m = m->m_nextpkt; - } + n->m_len = len; + m->m_pkthdr.len += len; - socket_lock(so, 0); + VERIFY(m->m_pkthdr.len <= maxpktlen); - if (error) - goto release; - top = freelist; - freelist = NULL; + bytes_to_copy -= len; + resid -= len; + } + if (m->m_pkthdr.len == 0) { + printf( + "%s:%d so %llx pkt %llx type %u len null\n", + __func__, __LINE__, + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), + (uint64_t)DEBUG_KERNEL_ADDRPERM(m), + m->m_type); + } + if (error != 0) + break; + m = m->m_nextpkt; } + socket_lock(so, 0); + + if (error) + goto release; + top = freelist; + freelist = NULL; + if (dontroute) so->so_options |= SO_DONTROUTE; if ((flags & MSG_SKIPCFIL) == 0) { struct mbuf **prevnextp = NULL; - + for (i = uiofirst, m = top; i < uiolast && m != NULL; i++) { @@ -2388,18 +2586,18 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray, /* * Socket filter processing */ - error = sflt_data_out(so, addr, &m, - &control, 0); + error = sflt_data_out(so, NULL, &m, + NULL, 0); if (error != 0 && error != EJUSTRETURN) goto release; - + #if CONTENT_FILTER if (error == 0) { /* * Content filter processing */ - error = cfil_sock_data_out(so, addr, m, - control, 0); + error = cfil_sock_data_out(so, NULL, m, + NULL, 0); if (error != 0 && error != EJUSTRETURN) goto release; } @@ -2414,8 +2612,8 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray, *prevnextp = nextpkt; else top = nextpkt; - } - + } + m = nextpkt; if (m != NULL) prevnextp = &m->m_nextpkt; @@ -2423,12 +2621,11 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray, } if (top != NULL) error = (*so->so_proto->pr_usrreqs->pru_send_list) - (so, 0, top, addr, control, p); + (so, 0, top, NULL, NULL, p); if (dontroute) so->so_options &= ~SO_DONTROUTE; - clen = 0; top = NULL; uiofirst = uiolast; } while (resid > 0 && error == 0); @@ -2440,8 +2637,6 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray, out: if (top != NULL) m_freem(top); - if (control != NULL) - m_freem(control); if (freelist != NULL) m_freem_list(freelist); @@ -2451,6 +2646,256 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray, return (error); } +/* + * May return ERESTART when packet is dropped by MAC policy check + */ +static int +soreceive_addr(struct proc *p, struct socket *so, struct sockaddr **psa, + int flags, struct mbuf **mp, struct mbuf **nextrecordp, int canwait) +{ + int error = 0; + struct mbuf *m = *mp; + struct mbuf *nextrecord = *nextrecordp; + + KASSERT(m->m_type == MT_SONAME, ("receive 1a")); +#if CONFIG_MACF_SOCKET_SUBSET + /* + * Call the MAC framework for policy checking if we're in + * the user process context and the socket isn't connected. + */ + if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) { + struct mbuf *m0 = m; + /* + * Dequeue this record (temporarily) from the receive + * list since we're about to drop the socket's lock + * where a new record may arrive and be appended to + * the list. Upon MAC policy failure, the record + * will be freed. Otherwise, we'll add it back to + * the head of the list. We cannot rely on SB_LOCK + * because append operation uses the socket's lock. + */ + do { + m->m_nextpkt = NULL; + sbfree(&so->so_rcv, m); + m = m->m_next; + } while (m != NULL); + m = m0; + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a"); + socket_unlock(so, 0); + + if (mac_socket_check_received(proc_ucred(p), so, + mtod(m, struct sockaddr *)) != 0) { + /* + * MAC policy failure; free this record and + * process the next record (or block until + * one is available). We have adjusted sb_cc + * and sb_mbcnt above so there is no need to + * call sbfree() again. + */ + m_freem(m); + /* + * Clear SB_LOCK but don't unlock the socket. + * Process the next record or wait for one. + */ + socket_lock(so, 0); + sbunlock(&so->so_rcv, TRUE); /* stay locked */ + error = ERESTART; + goto done; + } + socket_lock(so, 0); + /* + * If the socket has been defunct'd, drop it. + */ + if (so->so_flags & SOF_DEFUNCT) { + m_freem(m); + error = ENOTCONN; + goto done; + } + /* + * Re-adjust the socket receive list and re-enqueue + * the record in front of any packets which may have + * been appended while we dropped the lock. + */ + for (m = m0; m->m_next != NULL; m = m->m_next) + sballoc(&so->so_rcv, m); + sballoc(&so->so_rcv, m); + if (so->so_rcv.sb_mb == NULL) { + so->so_rcv.sb_lastrecord = m0; + so->so_rcv.sb_mbtail = m; + } + m = m0; + nextrecord = m->m_nextpkt = so->so_rcv.sb_mb; + so->so_rcv.sb_mb = m; + SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b"); + } +#endif /* CONFIG_MACF_SOCKET_SUBSET */ + if (psa != NULL) { + *psa = dup_sockaddr(mtod(m, struct sockaddr *), canwait); + if ((*psa == NULL) && (flags & MSG_NEEDSA)) { + error = EWOULDBLOCK; + goto done; + } + } + if (flags & MSG_PEEK) { + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + if (m->m_next == NULL && so->so_rcv.sb_cc != 0) { + panic("%s: about to create invalid socketbuf", + __func__); + /* NOTREACHED */ + } + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + if (m != NULL) { + m->m_nextpkt = nextrecord; + } else { + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } + } +done: + *mp = m; + *nextrecordp = nextrecord; + + return (error); +} + +/* + * Process one or more MT_CONTROL mbufs present before any data mbufs + * in the first mbuf chain on the socket buffer. If MSG_PEEK, we + * just copy the data; if !MSG_PEEK, we call into the protocol to + * perform externalization. + */ +static int +soreceive_ctl(struct socket *so, struct mbuf **controlp, int flags, + struct mbuf **mp, struct mbuf **nextrecordp) +{ + int error = 0; + struct mbuf *cm = NULL, *cmn; + struct mbuf **cme = &cm; + struct sockbuf *sb_rcv = &so->so_rcv; + struct mbuf **msgpcm = NULL; + struct mbuf *m = *mp; + struct mbuf *nextrecord = *nextrecordp; + struct protosw *pr = so->so_proto; + + /* + * Externalizing the control messages would require us to + * drop the socket's lock below. Once we re-acquire the + * lock, the mbuf chain might change. In order to preserve + * consistency, we unlink all control messages from the + * first mbuf chain in one shot and link them separately + * onto a different chain. + */ + do { + if (flags & MSG_PEEK) { + if (controlp != NULL) { + if (*controlp == NULL) { + msgpcm = controlp; + } + *controlp = m_copy(m, 0, m->m_len); + + /* + * If we failed to allocate an mbuf, + * release any previously allocated + * mbufs for control data. Return + * an error. Keep the mbufs in the + * socket as this is using + * MSG_PEEK flag. + */ + if (*controlp == NULL) { + m_freem(*msgpcm); + error = ENOBUFS; + goto done; + } + controlp = &(*controlp)->m_next; + } + m = m->m_next; + } else { + m->m_nextpkt = NULL; + sbfree(sb_rcv, m); + sb_rcv->sb_mb = m->m_next; + m->m_next = NULL; + *cme = m; + cme = &(*cme)->m_next; + m = sb_rcv->sb_mb; + } + } while (m != NULL && m->m_type == MT_CONTROL); + + if (!(flags & MSG_PEEK)) { + if (sb_rcv->sb_mb != NULL) { + sb_rcv->sb_mb->m_nextpkt = nextrecord; + } else { + sb_rcv->sb_mb = nextrecord; + SB_EMPTY_FIXUP(sb_rcv); + } + if (nextrecord == NULL) + sb_rcv->sb_lastrecord = m; + } + + SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl"); + + while (cm != NULL) { + int cmsg_type; + + cmn = cm->m_next; + cm->m_next = NULL; + cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type; + + /* + * Call the protocol to externalize SCM_RIGHTS message + * and return the modified message to the caller upon + * success. Otherwise, all other control messages are + * returned unmodified to the caller. Note that we + * only get into this loop if MSG_PEEK is not set. + */ + if (pr->pr_domain->dom_externalize != NULL && + cmsg_type == SCM_RIGHTS) { + /* + * Release socket lock: see 3903171. This + * would also allow more records to be appended + * to the socket buffer. We still have SB_LOCK + * set on it, so we can be sure that the head + * of the mbuf chain won't change. + */ + socket_unlock(so, 0); + error = (*pr->pr_domain->dom_externalize)(cm); + socket_lock(so, 0); + } else { + error = 0; + } + + if (controlp != NULL && error == 0) { + *controlp = cm; + controlp = &(*controlp)->m_next; + } else { + (void) m_free(cm); + } + cm = cmn; + } + /* + * Update the value of nextrecord in case we received new + * records when the socket was unlocked above for + * externalizing SCM_RIGHTS. + */ + if (m != NULL) + nextrecord = sb_rcv->sb_mb->m_nextpkt; + else + nextrecord = sb_rcv->sb_mb; + +done: + *mp = m; + *nextrecordp = nextrecord; + + return (error); +} + /* * Implement receive operations on a socket. * We depend on the way that records are added to the sockbuf @@ -2497,15 +2942,13 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, int flags, error, offset; user_ssize_t len; struct protosw *pr = so->so_proto; - int moff, type =0; + int moff, type = 0; user_ssize_t orig_resid = uio_resid(uio); user_ssize_t delayed_copy_len; int can_delay; int need_event; struct proc *p = current_proc(); - - KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so, uio_resid(uio), - so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat); + boolean_t en_tracing = FALSE; /* * Sanity check on the length passed by caller as we are making 'int' @@ -2514,6 +2957,10 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, if (orig_resid < 0 || orig_resid > INT_MAX) return (EINVAL); + KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so, + uio_resid(uio), so->so_rcv.sb_cc, so->so_rcv.sb_lowat, + so->so_rcv.sb_hiwat); + socket_lock(so, 1); so_update_last_owner_locked(so, p); so_update_policy(so); @@ -2544,7 +2991,7 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, error = ENOTCONN; SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", - __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), + __func__, proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), error)); /* * This socket should have been disconnected and flushed @@ -2557,6 +3004,40 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, return (error); } + if ((so->so_flags1 & SOF1_PRECONNECT_DATA) && + pr->pr_usrreqs->pru_preconnect) { + /* + * A user may set the CONNECT_RESUME_ON_READ_WRITE-flag but not + * calling write() right after this. *If* the app calls a read + * we do not want to block this read indefinetely. Thus, + * we trigger a connect so that the session gets initiated. + */ + error = (*pr->pr_usrreqs->pru_preconnect)(so); + + if (error) { + socket_unlock(so, 1); + return (error); + } + } + + if (ENTR_SHOULDTRACE && + (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) { + /* + * enable energy tracing for inet sockets that go over + * non-loopback interfaces only. + */ + struct inpcb *inp = sotoinpcb(so); + if (inp->inp_last_outifp != NULL && + !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) { + en_tracing = TRUE; + KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_START, + VM_KERNEL_ADDRPERM(so), + ((so->so_state & SS_NBIO) ? + kEnTrFlagNonBlocking : 0), + (int64_t)orig_resid); + } + } + /* * When SO_WANTOOBFLAG is set we try to get out-of-band data * regardless of the flags argument. Here is the case were @@ -2602,6 +3083,11 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, } } socket_unlock(so, 1); + if (en_tracing) { + KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END, + VM_KERNEL_ADDRPERM(so), 0, + (int64_t)(orig_resid - uio_resid(uio))); + } KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error, 0, 0, 0, 0); @@ -2621,7 +3107,7 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, #ifdef MORE_LOCKING_DEBUG if (so->so_usecount <= 1) printf("soreceive: sblock so=0x%llx ref=%d on socket\n", - (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_usecount); + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount); #endif /* * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) @@ -2650,6 +3136,11 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, socket_unlock(so, 1); KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error, 0, 0, 0, 0); + if (en_tracing) { + KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END, + VM_KERNEL_ADDRPERM(so), 0, + (int64_t)(orig_resid - uio_resid(uio))); + } return (error); } @@ -2696,8 +3187,8 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, cfil_sock_data_pending(&so->so_rcv) != 0) CFIL_LOG(LOG_INFO, "so %llx ignore SS_CANTRCVMORE", - (uint64_t)VM_KERNEL_ADDRPERM(so)); - else + (uint64_t)DEBUG_KERNEL_ADDRPERM(so)); + else #endif /* CONTENT_FILTER */ if (m != NULL) goto dontblock; @@ -2716,6 +3207,7 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, } if (uio_resid(uio) == 0) goto release; + if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { error = EWOULDBLOCK; @@ -2743,6 +3235,11 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, socket_unlock(so, 1); KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error, 0, 0, 0, 0); + if (en_tracing) { + KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END, + VM_KERNEL_ADDRPERM(so), 0, + (int64_t)(orig_resid - uio_resid(uio))); + } return (error); } goto restart; @@ -2752,111 +3249,15 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); nextrecord = m->m_nextpkt; - if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) { - KASSERT(m->m_type == MT_SONAME, ("receive 1a")); -#if CONFIG_MACF_SOCKET_SUBSET - /* - * Call the MAC framework for policy checking if we're in - * the user process context and the socket isn't connected. - */ - if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) { - struct mbuf *m0 = m; - /* - * Dequeue this record (temporarily) from the receive - * list since we're about to drop the socket's lock - * where a new record may arrive and be appended to - * the list. Upon MAC policy failure, the record - * will be freed. Otherwise, we'll add it back to - * the head of the list. We cannot rely on SB_LOCK - * because append operation uses the socket's lock. - */ - do { - m->m_nextpkt = NULL; - sbfree(&so->so_rcv, m); - m = m->m_next; - } while (m != NULL); - m = m0; - so->so_rcv.sb_mb = nextrecord; - SB_EMPTY_FIXUP(&so->so_rcv); - SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a"); - SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a"); - socket_unlock(so, 0); - if (mac_socket_check_received(proc_ucred(p), so, - mtod(m, struct sockaddr *)) != 0) { - /* - * MAC policy failure; free this record and - * process the next record (or block until - * one is available). We have adjusted sb_cc - * and sb_mbcnt above so there is no need to - * call sbfree() again. - */ - do { - m = m_free(m); - } while (m != NULL); - /* - * Clear SB_LOCK but don't unlock the socket. - * Process the next record or wait for one. - */ - socket_lock(so, 0); - sbunlock(&so->so_rcv, TRUE); /* stay locked */ - goto restart; - } - socket_lock(so, 0); - /* - * If the socket has been defunct'd, drop it. - */ - if (so->so_flags & SOF_DEFUNCT) { - m_freem(m); - error = ENOTCONN; - goto release; - } - /* - * Re-adjust the socket receive list and re-enqueue - * the record in front of any packets which may have - * been appended while we dropped the lock. - */ - for (m = m0; m->m_next != NULL; m = m->m_next) - sballoc(&so->so_rcv, m); - sballoc(&so->so_rcv, m); - if (so->so_rcv.sb_mb == NULL) { - so->so_rcv.sb_lastrecord = m0; - so->so_rcv.sb_mbtail = m; - } - m = m0; - nextrecord = m->m_nextpkt = so->so_rcv.sb_mb; - so->so_rcv.sb_mb = m; - SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b"); - SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b"); - } -#endif /* CONFIG_MACF_SOCKET_SUBSET */ + if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) { + error = soreceive_addr(p, so, psa, flags, &m, &nextrecord, + mp0 == NULL); + if (error == ERESTART) + goto restart; + else if (error != 0) + goto release; orig_resid = 0; - if (psa != NULL) { - *psa = dup_sockaddr(mtod(m, struct sockaddr *), - mp0 == NULL); - if ((*psa == NULL) && (flags & MSG_NEEDSA)) { - error = EWOULDBLOCK; - goto release; - } - } - if (flags & MSG_PEEK) { - m = m->m_next; - } else { - sbfree(&so->so_rcv, m); - if (m->m_next == NULL && so->so_rcv.sb_cc != 0) { - panic("%s: about to create invalid socketbuf", - __func__); - /* NOTREACHED */ - } - MFREE(m, so->so_rcv.sb_mb); - m = so->so_rcv.sb_mb; - if (m != NULL) { - m->m_nextpkt = nextrecord; - } else { - so->so_rcv.sb_mb = nextrecord; - SB_EMPTY_FIXUP(&so->so_rcv); - } - } } /* @@ -2866,116 +3267,9 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, * perform externalization. */ if (m != NULL && m->m_type == MT_CONTROL) { - struct mbuf *cm = NULL, *cmn; - struct mbuf **cme = &cm; - struct sockbuf *sb_rcv = &so->so_rcv; - struct mbuf **msgpcm = NULL; - - /* - * Externalizing the control messages would require us to - * drop the socket's lock below. Once we re-acquire the - * lock, the mbuf chain might change. In order to preserve - * consistency, we unlink all control messages from the - * first mbuf chain in one shot and link them separately - * onto a different chain. - */ - do { - if (flags & MSG_PEEK) { - if (controlp != NULL) { - if (*controlp == NULL) { - msgpcm = controlp; - } - *controlp = m_copy(m, 0, m->m_len); - - /* - * If we failed to allocate an mbuf, - * release any previously allocated - * mbufs for control data. Return - * an error. Keep the mbufs in the - * socket as this is using - * MSG_PEEK flag. - */ - if (*controlp == NULL) { - m_freem(*msgpcm); - error = ENOBUFS; - goto release; - } - controlp = &(*controlp)->m_next; - } - m = m->m_next; - } else { - m->m_nextpkt = NULL; - sbfree(sb_rcv, m); - sb_rcv->sb_mb = m->m_next; - m->m_next = NULL; - *cme = m; - cme = &(*cme)->m_next; - m = sb_rcv->sb_mb; - } - } while (m != NULL && m->m_type == MT_CONTROL); - - if (!(flags & MSG_PEEK)) { - if (sb_rcv->sb_mb != NULL) { - sb_rcv->sb_mb->m_nextpkt = nextrecord; - } else { - sb_rcv->sb_mb = nextrecord; - SB_EMPTY_FIXUP(sb_rcv); - } - if (nextrecord == NULL) - sb_rcv->sb_lastrecord = m; - } - - SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl"); - SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl"); - - while (cm != NULL) { - int cmsg_type; - - cmn = cm->m_next; - cm->m_next = NULL; - cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type; - - /* - * Call the protocol to externalize SCM_RIGHTS message - * and return the modified message to the caller upon - * success. Otherwise, all other control messages are - * returned unmodified to the caller. Note that we - * only get into this loop if MSG_PEEK is not set. - */ - if (pr->pr_domain->dom_externalize != NULL && - cmsg_type == SCM_RIGHTS) { - /* - * Release socket lock: see 3903171. This - * would also allow more records to be appended - * to the socket buffer. We still have SB_LOCK - * set on it, so we can be sure that the head - * of the mbuf chain won't change. - */ - socket_unlock(so, 0); - error = (*pr->pr_domain->dom_externalize)(cm); - socket_lock(so, 0); - } else { - error = 0; - } - - if (controlp != NULL && error == 0) { - *controlp = cm; - controlp = &(*controlp)->m_next; - orig_resid = 0; - } else { - (void) m_free(cm); - } - cm = cmn; - } - /* - * Update the value of nextrecord in case we received new - * records when the socket was unlocked above for - * externalizing SCM_RIGHTS. - */ - if (m != NULL) - nextrecord = sb_rcv->sb_mb->m_nextpkt; - else - nextrecord = sb_rcv->sb_mb; + error = soreceive_ctl(so, controlp, flags, &m, &nextrecord); + if (error != 0) + goto release; orig_resid = 0; } @@ -3390,6 +3684,12 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, sbunlock(&so->so_rcv, FALSE); /* will unlock socket */ + if (en_tracing) { + KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END, + VM_KERNEL_ADDRPERM(so), + ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0), + (int64_t)(orig_resid - uio_resid(uio))); + } KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, so, uio_resid(uio), so->so_rcv.sb_cc, 0, error); @@ -3425,54 +3725,59 @@ sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, return (error); } +static int +sodelayed_copy_list(struct socket *so, struct recv_msg_elem *msgarray, + u_int uiocnt, struct mbuf **free_list, user_ssize_t *resid) +{ +#pragma unused(so) + int error = 0; + struct mbuf *ml, *m; + int i = 0; + struct uio *auio; + + for (ml = *free_list, i = 0; ml != NULL && i < uiocnt; + ml = ml->m_nextpkt, i++) { + auio = msgarray[i].uio; + for (m = ml; m != NULL; m = m->m_next) { + error = uiomove(mtod(m, caddr_t), m->m_len, auio); + if (error != 0) + goto out; + } + } +out: + m_freem_list(*free_list); + + *free_list = NULL; + *resid = 0; + + return (error); +} + int -soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, - u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +soreceive_list(struct socket *so, struct recv_msg_elem *msgarray, u_int uiocnt, + int *flagsp) { - struct mbuf *m, **mp; + struct mbuf *m; struct mbuf *nextrecord; - struct mbuf *ml = NULL, *free_list = NULL; - int flags, error, offset; - user_ssize_t len; + struct mbuf *ml = NULL, *free_list = NULL, *free_tail = NULL; + int error; + user_ssize_t len, pktlen, delayed_copy_len = 0; struct protosw *pr = so->so_proto; - user_ssize_t orig_resid, resid; + user_ssize_t resid; struct proc *p = current_proc(); struct uio *auio = NULL; - int i = 0; + int npkts = 0; int sblocked = 0; + struct sockaddr **psa = NULL; + struct mbuf **controlp = NULL; + int can_delay; + int flags; + struct mbuf *free_others = NULL; KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_START, so, uiocnt, so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat); - mp = mp0; - if (psa != NULL) - *psa = NULL; - if (controlp != NULL) - *controlp = NULL; - if (flagsp != NULL) - flags = *flagsp &~ MSG_EOR; - else - flags = 0; - /* - * Disallow functionality not currently supported - */ - if (mp0 != NULL) { - printf("%s mp0 not supported\n", __func__); - error = EOPNOTSUPP; - goto out; - } - if (psa != NULL) { - printf("%s sockaddr not supported\n", __func__); - error = EOPNOTSUPP; - goto out; - } - if (controlp != NULL) { - printf("%s control not supported\n", __func__); - error = EOPNOTSUPP; - goto out; - } - /* * Sanity checks: * - Only supports don't wait flags @@ -3481,9 +3786,14 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, * - Protocol must support packet chains * - The uio array is NULL (should we panic?) */ - if (flags & ~(MSG_DONTWAIT | MSG_NBIO)) { - printf("%s flags not supported\n", __func__); - error = EOPNOTSUPP; + if (flagsp != NULL) + flags = *flagsp; + else + flags = 0; + if (flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA | + MSG_NBIO)) { + printf("%s invalid flags 0x%x\n", __func__, flags); + error = EINVAL; goto out; } if (so->so_type != SOCK_DGRAM) { @@ -3498,7 +3808,7 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, error = EPROTONOSUPPORT; goto out; } - if (uioarray == NULL) { + if (msgarray == NULL) { printf("%s uioarray is NULL\n", __func__); error = EINVAL; goto out; @@ -3512,12 +3822,17 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, * Sanity check on the length passed by caller as we are making 'int' * comparisons */ - resid = orig_resid = uio_array_resid(uioarray, uiocnt); - if (orig_resid < 0 || orig_resid > INT_MAX) { + resid = recv_msg_array_resid(msgarray, uiocnt); + if (resid < 0 || resid > INT_MAX) { error = EINVAL; goto out; } + if (!(flags & MSG_PEEK) && sorecvmincopy > 0) + can_delay = 1; + else + can_delay = 0; + socket_lock(so, 1); so_update_last_owner_locked(so, p); so_update_policy(so); @@ -3525,7 +3840,7 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, #if NECP so_update_necp_policy(so, NULL, NULL); #endif /* NECP */ - + /* * If a recv attempt is made on a previously-accepted socket * that has been marked as inactive (disconnected), reject @@ -3536,7 +3851,7 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, error = ENOTCONN; SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", - __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), + __func__, proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), error)); /* * This socket should have been disconnected and flushed @@ -3547,8 +3862,15 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, sb_empty_assert(sb, __func__); goto release; } - if (mp != NULL) - *mp = NULL; + +next: + /* + * The uio may be empty + */ + if (npkts >= uiocnt) { + error = 0; + goto release; + } restart: /* * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) @@ -3574,23 +3896,13 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, } sblocked = 1; - /* - * Skip empty uio - */ - auio = uioarray[i]; - while (uio_resid(auio) == 0) { - i++; - if (i >= uiocnt) { - error = 0; - goto release; - } - } - m = so->so_rcv.sb_mb; /* * Block awaiting more datagram if needed */ - if (m == NULL) { + if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && + (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || + ((flags & MSG_WAITALL) && npkts < uiocnt))))) { /* * Panic if we notice inconsistencies in the socket's * receive list; both sb_mb and sb_cc should correctly @@ -3602,6 +3914,8 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, if (so->so_error) { error = so->so_error; + if ((flags & MSG_PEEK) == 0) + so->so_error = 0; goto release; } if (so->so_state & SS_CANTRCVMORE) { @@ -3619,14 +3933,12 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, } /* * Do not block if we got some data - * Note: We could use MSG_WAITALL to wait */ - resid = uio_array_resid(uioarray, uiocnt); - if (resid != orig_resid) { + if (free_list != NULL) { error = 0; goto release; } - + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); @@ -3640,13 +3952,6 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, goto restart; } - if (m->m_pkthdr.len == 0) { - printf("%s so %llx pkt %llx len is null\n", - __func__, - (uint64_t)VM_KERNEL_ADDRPERM(so), - (uint64_t)VM_KERNEL_ADDRPERM(m)); - goto restart; - } OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv); SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); @@ -3654,97 +3959,106 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, /* * Consume the current uio index as we have a datagram */ - i += 1; + auio = msgarray[npkts].uio; + resid = uio_resid(auio); + msgarray[npkts].which |= SOCK_MSG_DATA; + psa = (msgarray[npkts].which & SOCK_MSG_SA) ? + &msgarray[npkts].psa : NULL; + controlp = (msgarray[npkts].which & SOCK_MSG_CONTROL) ? + &msgarray[npkts].controlp : NULL; + npkts += 1; nextrecord = m->m_nextpkt; -#if SO_RECEIVE_LIST_SOCKADDR_NOT_YET if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) { - /* - * to be adapted from soreceive() - */ + error = soreceive_addr(p, so, psa, flags, &m, &nextrecord, 1); + if (error == ERESTART) + goto restart; + else if (error != 0) + goto release; } -#endif /* SO_RECEIVE_LIST_SOCKADDR_NOT_YET */ -#if SO_RECEIVE_LIST_CONTROL_NOT_YET - /* - * Process one or more MT_CONTROL mbufs present before any data mbufs - * in the first mbuf chain on the socket buffer. If MSG_PEEK, we - * just copy the data; if !MSG_PEEK, we call into the protocol to - * perform externalization. - */ if (m != NULL && m->m_type == MT_CONTROL) { - /* - * to be adapted from soreceive() - */ + error = soreceive_ctl(so, controlp, flags, &m, &nextrecord); + if (error != 0) + goto release; } -#endif /* SO_RECEIVE_LIST_CONTROL_NOT_YET */ - offset = 0; + if (m->m_pkthdr.len == 0) { + printf("%s:%d so %llx pkt %llx type %u pktlen null\n", + __func__, __LINE__, + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), + (uint64_t)DEBUG_KERNEL_ADDRPERM(m), + m->m_type); + } /* - * Loop to copy out the mbufs of the current record + * Loop to copy the mbufs of the current record + * Support zero length packets */ - while (m != NULL && uio_resid(auio) > 0 && error == 0) { - len = uio_resid(auio); - + ml = NULL; + pktlen = 0; + while (m != NULL && (len = resid - pktlen) >= 0 && error == 0) { if (m->m_len == 0) - printf("%s: so %llx m %llx m_len is 0\n", - __func__, - (uint64_t)VM_KERNEL_ADDRPERM(so), - (uint64_t)VM_KERNEL_ADDRPERM(m)); - + panic("%p m_len zero", m); + if (m->m_type == 0) + panic("%p m_type zero", m); /* * Clip to the residual length */ if (len > m->m_len) len = m->m_len; + pktlen += len; /* - * If mp is set, just pass back the mbufs. - * Otherwise copy them out via the uio, then free. + * Copy the mbufs via the uio or delay the copy * Sockbuf must be consistent here (points to current mbuf, * it points to next record) when we drop priority; * we must note any additions to the sockbuf when we * block interrupts again. */ - if (mp != NULL) { - uio_setresid(auio, (uio_resid(auio) - len)); - } else { - SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); - SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); - + if (len > 0 && can_delay == 0) { socket_unlock(so, 0); error = uiomove(mtod(m, caddr_t), (int)len, auio); socket_lock(so, 0); - if (error) goto release; + } else { + delayed_copy_len += len; } + if (len == m->m_len) { /* - * m was entirely copied + * m was entirely copied */ - nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); + nextrecord = m->m_nextpkt; m->m_nextpkt = NULL; /* - * Move to m_next + * Set the first packet to the head of the free list */ - if (mp != NULL) { - *mp = m; - mp = &m->m_next; - so->so_rcv.sb_mb = m = m->m_next; - *mp = NULL; - } else { - if (free_list == NULL) - free_list = m; - else - ml->m_next = m; - ml = m; - so->so_rcv.sb_mb = m = m->m_next; - ml->m_next = NULL; - ml->m_nextpkt = NULL; + if (free_list == NULL) + free_list = m; + /* + * Link current packet to tail of free list + */ + if (ml == NULL) { + if (free_tail != NULL) + free_tail->m_nextpkt = m; + free_tail = m; } + /* + * Link current mbuf to last mbuf of current packet + */ + if (ml != NULL) + ml->m_next = m; + ml = m; + + /* + * Move next buf to head of socket buffer + */ + so->so_rcv.sb_mb = m = ml->m_next; + ml->m_next = NULL; + if (m != NULL) { m->m_nextpkt = nextrecord; if (nextrecord == NULL) @@ -3759,27 +4073,6 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, /* * Stop the loop on partial copy */ - if (mp != NULL) { - int copy_flag; - - if (flags & MSG_DONTWAIT) - copy_flag = M_DONTWAIT; - else - copy_flag = M_WAIT; - *mp = m_copym(m, 0, len, copy_flag); - /* - * Failed to allocate an mbuf? - * Adjust uio_resid back, it was - * adjusted down by len bytes which - * we didn't copy over. - */ - if (*mp == NULL) { - uio_setresid(auio, - (uio_resid(auio) + len)); - error = ENOMEM; - break; - } - } break; } } @@ -3787,7 +4080,7 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, if (so->so_usecount <= 1) { panic("%s: after big while so=%llx ref=%d on socket\n", __func__, - (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_usecount); + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount); /* NOTREACHED */ } #endif @@ -3796,6 +4089,22 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, */ if (m != NULL) { if (so->so_options & SO_DONTTRUNC) { + /* + * Copyout first the freelist then the partial mbuf + */ + socket_unlock(so, 0); + if (delayed_copy_len) + error = sodelayed_copy_list(so, msgarray, + uiocnt, &free_list, &delayed_copy_len); + + if (error == 0) { + error = uiomove(mtod(m, caddr_t), (int)len, + auio); + } + socket_lock(so, 0); + if (error) + goto release; + m->m_data += len; m->m_len -= len; so->so_rcv.sb_cc -= len; @@ -3832,21 +4141,24 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, * - There was no error * - A packet was not truncated * - We can still receive more data - */ - if (i < uiocnt && error == 0 && - (flags & (MSG_RCVMORE | MSG_TRUNC)) == 0 - && (so->so_state & SS_CANTRCVMORE) == 0) { + */ + if (npkts < uiocnt && error == 0 && + (flags & (MSG_RCVMORE | MSG_TRUNC)) == 0 && + (so->so_state & SS_CANTRCVMORE) == 0) { sbunlock(&so->so_rcv, TRUE); /* keep socket locked */ sblocked = 0; - goto restart; + goto next; } + if (flagsp != NULL) + *flagsp |= flags; release: /* * pru_rcvd may cause more data to be received if the socket lock * is dropped so we set MSG_HAVEMORE now based on what we know. - * That way the caller won't be surprised if it receives less data than requested. + * That way the caller won't be surprised if it receives less data + * than requested. */ if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) flags |= MSG_HAVEMORE; @@ -3854,18 +4166,22 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreqs->pru_rcvd)(so, flags); - if (flagsp != NULL) - *flagsp |= flags; if (sblocked) sbunlock(&so->so_rcv, FALSE); /* will unlock socket */ else socket_unlock(so, 1); + + if (delayed_copy_len) + error = sodelayed_copy_list(so, msgarray, uiocnt, + &free_list, &delayed_copy_len); out: /* - * Amortize the cost + * Amortize the cost of freeing the mbufs */ if (free_list != NULL) m_freem_list(free_list); + if (free_others != NULL) + m_freem_list(free_others); KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_END, error, 0, 0, 0, 0); @@ -3971,7 +4287,7 @@ soshutdownlock(struct socket *so, int how) } } #endif /* CONTENT_FILTER */ - + error = soshutdownlock_final(so, how); done: @@ -4369,7 +4685,8 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) if (so->so_snd.sb_flags & SB_UNIX) { struct unpcb *unp = (struct unpcb *)(so->so_pcb); - if (unp != NULL && unp->unp_conn != NULL) { + if (unp != NULL && + unp->unp_conn != NULL) { hiwat += unp->unp_conn->unp_cc; } } @@ -4382,13 +4699,13 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) sowwakeup(so); } break; - } + } case SO_RCVLOWAT: { int64_t data_len; so->so_rcv.sb_lowat = (optval > so->so_rcv.sb_hiwat) ? so->so_rcv.sb_hiwat : optval; - data_len = so->so_rcv.sb_cc + data_len = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; if (data_len >= so->so_rcv.sb_lowat) sorwakeup(so); @@ -4656,7 +4973,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) SODEFUNCTLOG(("%s[%d]: so 0x%llx [%s %s:%d -> " "%s:%d] is now marked as %seligible for " "defunct\n", __func__, proc_selfpid(), - (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), (SOCK_TYPE(so) == SOCK_STREAM) ? "TCP" : "UDP", inet_ntop(SOCK_DOM(so), ((SOCK_DOM(so) == PF_INET) ? @@ -4674,7 +4991,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) SODEFUNCTLOG(("%s[%d]: so 0x%llx [%d,%d] is " "now marked as %seligible for defunct\n", __func__, proc_selfpid(), - (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), (so->so_flags & SOF_NODEFUNCT) ? "not " : "")); @@ -4736,7 +5053,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) error = so_set_effective_uuid(so, euuid, sopt->sopt_p); break; } - + #if NECP case SO_NECP_ATTRIBUTES: error = necp_set_socket_attributes(so, sopt); @@ -4763,6 +5080,13 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) break; #endif /* MPTCP */ + case SO_EXTENDED_BK_IDLE: + error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval)); + if (error == 0) + error = so_set_extended_bk_idle(so, optval); + break; + default: error = ENOPROTOOPT; break; @@ -5027,7 +5351,7 @@ sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock) goto integer; case SO_AWDL_UNRESTRICTED: - if (SOCK_DOM(so) == PF_INET || + if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { optval = inp_get_awdl_unrestricted( sotoinpcb(so)); @@ -5140,7 +5464,7 @@ sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock) error = flow_divert_token_get(so, sopt); break; #endif /* FLOW_DIVERT */ - + #if NECP case SO_NECP_ATTRIBUTES: error = necp_get_socket_attributes(so, sopt); @@ -5153,7 +5477,7 @@ sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock) sock_id = cfil_sock_id_from_socket(so); - error = sooptcopyout(sopt, &sock_id, + error = sooptcopyout(sopt, &sock_id, sizeof(cfil_sock_id_t)); break; } @@ -5168,9 +5492,14 @@ sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock) break; } optval = (so->so_flags & SOF_MPTCP_FASTJOIN); - break; + /* Fixed along with rdar://19391339 */ + goto integer; #endif /* MPTCP */ + case SO_EXTENDED_BK_IDLE: + optval = (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED); + goto integer; + default: error = ENOPROTOOPT; break; @@ -5395,7 +5724,7 @@ soo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) /* * If the caller explicitly asked for OOB results (e.g. poll()), * save that off in the hookid field and reserve the kn_flags - * EV_OOBAND bit for output only). + * EV_OOBAND bit for output only. */ if (kn->kn_flags & EV_OOBAND) { kn->kn_flags &= ~EV_OOBAND; @@ -5412,6 +5741,8 @@ soo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) case EVFILT_SOCK: kn->kn_fop = &sock_filtops; skl = &so->so_klist; + kn->kn_hookid = 0; + kn->kn_status |= KN_TOUCH; break; default: socket_unlock(so, 1); @@ -5478,14 +5809,19 @@ filt_soread(struct knote *kn, long hint) } /* socket isn't a listener */ + /* + * NOTE_LOWAT specifies new low water mark in data, i.e. + * the bytes of protocol data. We therefore exclude any + * control bytes. + */ kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; + /* * Clear out EV_OOBAND that filt_soread may have set in the * past. */ kn->kn_flags &= ~EV_OOBAND; - - if ((so->so_oobmark) || (so->so_state & SS_RCVATMARK)){ + if ((so->so_oobmark) || (so->so_state & SS_RCVATMARK)) { kn->kn_flags |= EV_OOBAND; /* * If caller registered explicit interest in OOB data, @@ -5503,7 +5839,7 @@ filt_soread(struct knote *kn, long hint) return (1); } } - + if ((so->so_state & SS_CANTRCVMORE) #if CONTENT_FILTER && cfil_sock_data_pending(&so->so_rcv) == 0 @@ -5523,6 +5859,11 @@ filt_soread(struct knote *kn, long hint) } int64_t lowwat = so->so_rcv.sb_lowat; + /* + * Ensure that when NOTE_LOWAT is used, the derived + * low water mark is bounded by socket's rcv buf's + * high and low water mark values. + */ if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_sdata > so->so_rcv.sb_hiwat) lowwat = so->so_rcv.sb_hiwat; @@ -5533,7 +5874,15 @@ filt_soread(struct knote *kn, long hint) if ((hint & SO_FILT_HINT_LOCKED) == 0) socket_unlock(so, 1); - return (kn->kn_data >= lowwat); + /* + * The order below is important. Since NOTE_LOWAT + * overrides sb_lowat, check for NOTE_LOWAT case + * first. + */ + if (kn->kn_sfflags & NOTE_LOWAT) + return (kn->kn_data >= lowwat); + + return (so->so_rcv.sb_cc >= lowwat); } static void @@ -5581,11 +5930,14 @@ filt_sowrite(struct knote *kn, long hint) ret = 1; goto out; } - if (((so->so_state & SS_ISCONNECTED) == 0) && - (so->so_proto->pr_flags & PR_CONNREQUIRED)) { + if (!socanwrite(so)) { ret = 0; goto out; } + if (so->so_flags1 & SOF1_PRECONNECT_DATA) { + ret = 1; + goto out; + } int64_t lowwat = so->so_snd.sb_lowat; if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_sdata > so->so_snd.sb_hiwat) @@ -5607,7 +5959,8 @@ filt_sowrite(struct knote *kn, long hint) } #endif else { - return (1); + ret = 1; + goto out; } } else { ret = 1; @@ -5639,6 +5992,7 @@ filt_sockev(struct knote *kn, long hint) int ret = 0, locked = 0; struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; long ev_hint = (hint & SO_FILT_HINT_EV); + uint32_t level_trigger = 0; if ((hint & SO_FILT_HINT_LOCKED) == 0) { socket_lock(so, 1); @@ -5646,72 +6000,76 @@ filt_sockev(struct knote *kn, long hint) } if (ev_hint & SO_FILT_HINT_CONNRESET) { - if (kn->kn_sfflags & NOTE_CONNRESET) - kn->kn_fflags |= NOTE_CONNRESET; + kn->kn_fflags |= NOTE_CONNRESET; } if (ev_hint & SO_FILT_HINT_TIMEOUT) { - if (kn->kn_sfflags & NOTE_TIMEOUT) - kn->kn_fflags |= NOTE_TIMEOUT; + kn->kn_fflags |= NOTE_TIMEOUT; } if (ev_hint & SO_FILT_HINT_NOSRCADDR) { - if (kn->kn_sfflags & NOTE_NOSRCADDR) - kn->kn_fflags |= NOTE_NOSRCADDR; + kn->kn_fflags |= NOTE_NOSRCADDR; } if (ev_hint & SO_FILT_HINT_IFDENIED) { - if ((kn->kn_sfflags & NOTE_IFDENIED)) - kn->kn_fflags |= NOTE_IFDENIED; + kn->kn_fflags |= NOTE_IFDENIED; } if (ev_hint & SO_FILT_HINT_KEEPALIVE) { - if (kn->kn_sfflags & NOTE_KEEPALIVE) - kn->kn_fflags |= NOTE_KEEPALIVE; + kn->kn_fflags |= NOTE_KEEPALIVE; } if (ev_hint & SO_FILT_HINT_ADAPTIVE_WTIMO) { - if (kn->kn_sfflags & NOTE_ADAPTIVE_WTIMO) - kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO; + kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO; } if (ev_hint & SO_FILT_HINT_ADAPTIVE_RTIMO) { - if (kn->kn_sfflags & NOTE_ADAPTIVE_RTIMO) - kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO; + kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO; } - if (ev_hint & SO_FILT_HINT_CONNECTED) { - if (kn->kn_sfflags & NOTE_CONNECTED) - kn->kn_fflags |= NOTE_CONNECTED; + if ((ev_hint & SO_FILT_HINT_CONNECTED) || + (so->so_state & SS_ISCONNECTED)) { + kn->kn_fflags |= NOTE_CONNECTED; + level_trigger |= NOTE_CONNECTED; } - if (ev_hint & SO_FILT_HINT_DISCONNECTED) { - if (kn->kn_sfflags & NOTE_DISCONNECTED) - kn->kn_fflags |= NOTE_DISCONNECTED; + if ((ev_hint & SO_FILT_HINT_DISCONNECTED) || + (so->so_state & SS_ISDISCONNECTED)) { + kn->kn_fflags |= NOTE_DISCONNECTED; + level_trigger |= NOTE_DISCONNECTED; } if (ev_hint & SO_FILT_HINT_CONNINFO_UPDATED) { if (so->so_proto != NULL && - (so->so_proto->pr_flags & PR_EVCONNINFO) && - (kn->kn_sfflags & NOTE_CONNINFO_UPDATED)) + (so->so_proto->pr_flags & PR_EVCONNINFO)) kn->kn_fflags |= NOTE_CONNINFO_UPDATED; } - if ((kn->kn_sfflags & NOTE_READCLOSED) && - (so->so_state & SS_CANTRCVMORE) + if ((so->so_state & SS_CANTRCVMORE) #if CONTENT_FILTER - && cfil_sock_data_pending(&so->so_rcv) == 0 + && cfil_sock_data_pending(&so->so_rcv) == 0 #endif /* CONTENT_FILTER */ - ) + ) { kn->kn_fflags |= NOTE_READCLOSED; + level_trigger |= NOTE_READCLOSED; + } - if ((kn->kn_sfflags & NOTE_WRITECLOSED) && - (so->so_state & SS_CANTSENDMORE)) + if (so->so_state & SS_CANTSENDMORE) { kn->kn_fflags |= NOTE_WRITECLOSED; + level_trigger |= NOTE_WRITECLOSED; + } - if ((kn->kn_sfflags & NOTE_SUSPEND) && - ((ev_hint & SO_FILT_HINT_SUSPEND) || - (so->so_flags & SOF_SUSPENDED))) { + if ((ev_hint & SO_FILT_HINT_SUSPEND) || + (so->so_flags & SOF_SUSPENDED)) { kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME); + + /* If resume event was delivered before, reset it */ + kn->kn_hookid &= ~NOTE_RESUME; + kn->kn_fflags |= NOTE_SUSPEND; + level_trigger |= NOTE_SUSPEND; } - if ((kn->kn_sfflags & NOTE_RESUME) && - ((ev_hint & SO_FILT_HINT_RESUME) || - (so->so_flags & SOF_SUSPENDED) == 0)) { + if ((ev_hint & SO_FILT_HINT_RESUME) || + (so->so_flags & SOF_SUSPENDED) == 0) { kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME); + + /* If suspend event was delivered before, reset it */ + kn->kn_hookid &= ~NOTE_SUSPEND; + kn->kn_fflags |= NOTE_RESUME; + level_trigger |= NOTE_RESUME; } if (so->so_error != 0) { @@ -5722,7 +6080,16 @@ filt_sockev(struct knote *kn, long hint) get_sockev_state(so, (u_int32_t *)&(kn->kn_data)); } - if (kn->kn_fflags != 0) + /* Reset any events that are not requested on this knote */ + kn->kn_fflags &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK); + level_trigger &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK); + + /* Find the level triggerred events that are already delivered */ + level_trigger &= kn->kn_hookid; + level_trigger &= EVFILT_SOCK_LEVEL_TRIGGER_MASK; + + /* Do not deliver level triggerred events more than once */ + if ((kn->kn_fflags & ~level_trigger) != 0) ret = 1; if (locked) @@ -5731,6 +6098,51 @@ filt_sockev(struct knote *kn, long hint) return (ret); } +static void +filt_socktouch(struct knote *kn, struct kevent_internal_s *kev, long type) +{ +#pragma unused(kev) + switch (type) { + case EVENT_REGISTER: + { + uint32_t changed_flags; + changed_flags = (kn->kn_sfflags ^ kn->kn_hookid); + + /* + * Since we keep track of events that are already + * delivered, if any of those events are not requested + * anymore the state related to them can be reset + */ + kn->kn_hookid &= + ~(changed_flags & EVFILT_SOCK_LEVEL_TRIGGER_MASK); + break; + } + case EVENT_PROCESS: + /* + * Store the state of the events being delivered. This + * state can be used to deliver level triggered events + * ateast once and still avoid waking up the application + * multiple times as long as the event is active. + */ + if (kn->kn_fflags != 0) + kn->kn_hookid |= (kn->kn_fflags & + EVFILT_SOCK_LEVEL_TRIGGER_MASK); + + /* + * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver + * only one of them and remember the last one that was + * delivered last + */ + if (kn->kn_fflags & NOTE_SUSPEND) + kn->kn_hookid &= ~NOTE_RESUME; + if (kn->kn_fflags & NOTE_RESUME) + kn->kn_hookid &= ~NOTE_SUSPEND; + break; + default: + break; + } +} + void get_sockev_state(struct socket *so, u_int32_t *statep) { @@ -5921,15 +6333,45 @@ sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce) SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) " "so 0x%llx [%d,%d] is not eligible for defunct " "(%d)\n", __func__, proc_selfpid(), proc_pid(p), - level, (uint64_t)VM_KERNEL_ADDRPERM(so), + level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), err)); return (err); } so->so_flags &= ~SOF_NODEFUNCT; SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx " "[%d,%d] defunct by force\n", __func__, proc_selfpid(), - proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so), + proc_pid(p), level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so))); + } else if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) { + struct inpcb *inp = (struct inpcb *)so->so_pcb; + struct ifnet *ifp = inp->inp_last_outifp; + + if (ifp && IFNET_IS_CELLULAR(ifp)) { + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nocell); + } else if (so->so_flags & SOF_DELEGATED) { + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd); + } else if (soextbkidlestat.so_xbkidle_time == 0) { + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_notime); + } else if (noforce) { + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_active); + + so->so_flags1 |= SOF1_EXTEND_BK_IDLE_INPROG; + so->so_extended_bk_start = net_uptime(); + OSBitOrAtomic(P_LXBKIDLEINPROG, &p->p_ladvflag); + + inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY); + + err = EOPNOTSUPP; + SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) " + "extend bk idle " + "so 0x%llx rcv hw %d cc %d\n", + __func__, proc_selfpid(), proc_pid(p), + level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so), + so->so_rcv.sb_hiwat, so->so_rcv.sb_cc)); + return (err); + } else { + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_forced); + } } so->so_flags |= SOF_DEFUNCT; @@ -5952,9 +6394,10 @@ sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce) done: SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%d,%d] %s " - "defunct\n", __func__, proc_selfpid(), proc_pid(p), level, - (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), - defunct ? "is already" : "marked as")); + "defunct%s\n", __func__, proc_selfpid(), proc_pid(p), level, + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), + defunct ? "is already" : "marked as", + (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ? " extbkidle" : "")); return (err); } @@ -5982,7 +6425,7 @@ sodefunct(struct proc *p, struct socket *so, int level) SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%s " "%s:%d -> %s:%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, " "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__, proc_selfpid(), - proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so), + proc_pid(p), level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so), (SOCK_TYPE(so) == SOCK_STREAM) ? "TCP" : "UDP", inet_ntop(SOCK_DOM(so), ((SOCK_DOM(so) == PF_INET) ? (void *)&inp->inp_laddr.s_addr : (void *)&inp->in6p_laddr), @@ -5997,7 +6440,7 @@ sodefunct(struct proc *p, struct socket *so, int level) SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx " "[%d,%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, " "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__, proc_selfpid(), - proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so), + proc_pid(p), level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), (uint32_t)rcv->sb_sel.si_flags, (uint32_t)snd->sb_sel.si_flags, rcv->sb_flags, snd->sb_flags)); @@ -6051,6 +6494,192 @@ sodefunct(struct proc *p, struct socket *so, int level) return (0); } +int +soresume(struct proc *p, struct socket *so, int locked) +{ + if (locked == 0) + socket_lock(so, 1); + + if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) { + SODEFUNCTLOG(("%s[%d]: )target pid %d) so 0x%llx [%d,%d] " + "resumed from bk idle\n", + __func__, proc_selfpid(), proc_pid(p), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so))); + + so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG; + so->so_extended_bk_start = 0; + OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag); + + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resumed); + OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active); + VERIFY(soextbkidlestat.so_xbkidle_active >= 0); + } + if (locked == 0) + socket_unlock(so, 1); + + return (0); +} + +/* + * Does not attempt to account for sockets that are delegated from + * the current process + */ +int +so_set_extended_bk_idle(struct socket *so, int optval) +{ + int error = 0; + + if ((SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) || + SOCK_PROTO(so) != IPPROTO_TCP) { + OSDecrementAtomic(&soextbkidlestat.so_xbkidle_notsupp); + error = EOPNOTSUPP; + } else if (optval == 0) { + so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED; + + soresume(current_proc(), so, 1); + } else { + struct proc *p = current_proc(); + int i; + struct filedesc *fdp; + int count = 0; + + proc_fdlock(p); + + fdp = p->p_fd; + for (i = 0; i < fdp->fd_nfiles; i++) { + struct fileproc *fp = fdp->fd_ofiles[i]; + struct socket *so2; + + if (fp == NULL || + (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || + FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) + continue; + + so2 = (struct socket *)fp->f_fglob->fg_data; + if (so != so2 && + so2->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) + count++; + if (count >= soextbkidlestat.so_xbkidle_maxperproc) + break; + } + if (count >= soextbkidlestat.so_xbkidle_maxperproc) { + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_toomany); + error = EBUSY; + } else if (so->so_flags & SOF_DELEGATED) { + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd); + error = EBUSY; + } else { + so->so_flags1 |= SOF1_EXTEND_BK_IDLE_WANTED; + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_wantok); + } + SODEFUNCTLOG(("%s[%d]: so 0x%llx [%d,%d] " + "%s marked for extended bk idle\n", + __func__, proc_selfpid(), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), + (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ? + "is" : "not")); + + proc_fdunlock(p); + } + + return (error); +} + +static void +so_stop_extended_bk_idle(struct socket *so) +{ + so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG; + so->so_extended_bk_start = 0; + + OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active); + VERIFY(soextbkidlestat.so_xbkidle_active >= 0); + /* + * Force defunct + */ + sosetdefunct(current_proc(), so, + SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE); + if (so->so_flags & SOF_DEFUNCT) { + sodefunct(current_proc(), so, + SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL); + } +} + +void +so_drain_extended_bk_idle(struct socket *so) +{ + if (so && (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) { + /* + * Only penalize sockets that have outstanding data + */ + if (so->so_rcv.sb_cc || so->so_snd.sb_cc) { + so_stop_extended_bk_idle(so); + + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_drained); + } + } +} + +/* + * Return values tells if socket is still in extended background idle + */ +int +so_check_extended_bk_idle_time(struct socket *so) +{ + int ret = 1; + + if ((so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) { + SODEFUNCTLOG(("%s[%d]: so 0x%llx [%d,%d]\n", + __func__, proc_selfpid(), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so))); + if (net_uptime() - so->so_extended_bk_start > + soextbkidlestat.so_xbkidle_time) { + so_stop_extended_bk_idle(so); + + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_expired); + + ret = 0; + } else { + struct inpcb *inp = (struct inpcb *)so->so_pcb; + + inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY); + OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resched); + } + } + + return (ret); +} + +void +resume_proc_sockets(proc_t p) +{ + if (p->p_ladvflag & P_LXBKIDLEINPROG) { + struct filedesc *fdp; + int i; + + proc_fdlock(p); + fdp = p->p_fd; + for (i = 0; i < fdp->fd_nfiles; i++) { + struct fileproc *fp; + struct socket *so; + + fp = fdp->fd_ofiles[i]; + if (fp == NULL || + (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || + FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) + continue; + + so = (struct socket *)fp->f_fglob->fg_data; + (void) soresume(p, so, 0); + } + proc_fdunlock(p); + + OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag); + } +} + __private_extern__ int so_set_recv_anyif(struct socket *so, int optval) { @@ -6108,7 +6737,7 @@ so_set_restrictions(struct socket *so, uint32_t vals) nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR); noexpensive_old = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE); so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN | - SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR | + SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR | SO_RESTRICT_DENY_EXPENSIVE)); nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR); noexpensive_new = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE); @@ -6123,7 +6752,10 @@ so_set_restrictions(struct socket *so, uint32_t vals) if (SOCK_DOM(so) == PF_INET) { #endif /* !INET6 */ if (nocell_new - nocell_old != 0) { - /* if deny cellular is now set, do what's needed for INPCB */ + /* + * if deny cellular is now set, do what's needed + * for INPCB + */ inp_set_nocellular(sotoinpcb(so)); } if (noexpensive_new - noexpensive_old != 0) { @@ -6138,7 +6770,7 @@ uint32_t so_get_restrictions(struct socket *so) { return (so->so_restrictions & (SO_RESTRICT_DENY_IN | - SO_RESTRICT_DENY_OUT | + SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR | SO_RESTRICT_DENY_EXPENSIVE)); } @@ -6331,14 +6963,16 @@ so_set_effective_pid(struct socket *so, int epid, struct proc *p) uuid_unparse(so->e_uuid, buf); log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) " "euuid %s%s\n", __func__, proc_name_address(p), - proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), - SOCK_TYPE(so), so->e_pid, proc_name_address(ep), buf, + proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), + so->e_pid, proc_name_address(ep), buf, ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : "")); } else if (error != 0 && net_io_policy_log) { log(LOG_ERR, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) " "ERROR (%d)\n", __func__, proc_name_address(p), - proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), - SOCK_TYPE(so), epid, (ep == PROC_NULL) ? "PROC_NULL" : + proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), + epid, (ep == PROC_NULL) ? "PROC_NULL" : proc_name_address(ep), error); } @@ -6367,7 +7001,7 @@ so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p) /* UUID must not be all-zeroes (reserved for kernel) */ if (uuid_is_null(euuid)) { error = EINVAL; - goto done;; + goto done; } /* @@ -6431,14 +7065,14 @@ so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p) uuid_unparse(so->e_uuid, buf); log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d " "euuid %s%s\n", __func__, proc_name_address(p), proc_pid(p), - (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), so->e_pid, buf, ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : "")); } else if (error != 0 && net_io_policy_log) { uuid_unparse(euuid, buf); log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s " "ERROR (%d)\n", __func__, proc_name_address(p), proc_pid(p), - (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), buf, error); } @@ -6480,7 +7114,7 @@ netpolicy_post_msg(uint32_t ev_code, struct netpolicy_event_data *ev_data, } void -socket_post_kev_msg(uint32_t ev_code, +socket_post_kev_msg(uint32_t ev_code, struct kev_socket_event_data *ev_data, uint32_t ev_datalen) { @@ -6517,7 +7151,7 @@ socket_post_kev_msg_closed(struct socket *so) min(peersa->sa_len, sizeof (ev.ev_data.kev_peername))); socket_post_kev_msg(KEV_SOCKET_CLOSED, - &ev.ev_data, sizeof (ev)); + &ev.ev_data, sizeof (ev)); } } if (socksa != NULL) diff --git a/bsd/kern/uipc_socket2.c b/bsd/kern/uipc_socket2.c index 5cbf06334..f8b94b904 100644 --- a/bsd/kern/uipc_socket2.c +++ b/bsd/kern/uipc_socket2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2014 Apple Inc. All rights reserved. + * Copyright (c) 1998-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -105,6 +105,12 @@ #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4) #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5) +SYSCTL_DECL(_kern_ipc); + +__private_extern__ u_int32_t net_io_policy_throttle_best_effort = 0; +SYSCTL_INT(_kern_ipc, OID_AUTO, throttle_best_effort, + CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttle_best_effort, 0, ""); + static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *); static struct socket *sonewconn_internal(struct socket *, int); static int sbappendaddr_internal(struct sockbuf *, struct sockaddr *, @@ -131,6 +137,7 @@ u_int32_t high_sb_max = SB_MAX; static u_int32_t sb_efficiency = 8; /* parameter for sbreserve() */ int32_t total_sbmb_cnt __attribute__((aligned(8))) = 0; int32_t total_sbmb_cnt_peak __attribute__((aligned(8))) = 0; +int32_t total_snd_byte_count __attribute__((aligned(8))) = 0; int64_t sbmb_limreached __attribute__((aligned(8))) = 0; /* Control whether to throttle sockets eligible to be throttled */ @@ -189,6 +196,8 @@ soisconnected(struct socket *so) so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; + soreserve_preconnect(so, 0); + sflt_notify(so, sock_evt_connected, NULL); if (head && (so->so_state & SS_INCOMP)) { @@ -218,6 +227,15 @@ soisconnected(struct socket *so) } } +boolean_t +socanwrite(struct socket *so) +{ + return ((so->so_state & SS_ISCONNECTED) || + !(so->so_proto->pr_flags & PR_CONNREQUIRED) || + (so->so_flags1 & SOF1_PRECONNECT_DATA)); + +} + void soisdisconnecting(struct socket *so) { @@ -671,6 +689,14 @@ soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc) return (ENOBUFS); } +void +soreserve_preconnect(struct socket *so, unsigned int pre_cc) +{ + /* As of now, same bytes for both preconnect read and write */ + so->so_snd.sb_preconn_hiwat = pre_cc; + so->so_rcv.sb_preconn_hiwat = pre_cc; +} + /* * Allot mbufs to a sockbuf. * Attempt to scale mbmax so that mbcnt doesn't become limiting @@ -898,7 +924,8 @@ sblastmbufchk(struct sockbuf *sb, const char *where) for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { printf("\t"); for (n = m; n != NULL; n = n->m_next) - printf("0x%llx ", (uint64_t)VM_KERNEL_ADDRPERM(n)); + printf("0x%llx ", + (uint64_t)VM_KERNEL_ADDRPERM(n)); printf("\n"); } panic("sblastmbufchk from %s", where); @@ -1147,7 +1174,8 @@ sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, #if CONTENT_FILTER if (error == 0) - error = cfil_sock_data_in(sb->sb_so, asa, m0, control, 0); + error = cfil_sock_data_in(sb->sb_so, asa, m0, control, + 0); #endif /* CONTENT_FILTER */ if (error) { @@ -1249,7 +1277,8 @@ sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, #if CONTENT_FILTER if (error == 0) - error = cfil_sock_data_in(sb->sb_so, NULL, m0, control, 0); + error = cfil_sock_data_in(sb->sb_so, NULL, m0, control, + 0); #endif /* CONTENT_FILTER */ if (error) { @@ -2033,10 +2062,10 @@ pru_connect2_notsupp(struct socket *so1, struct socket *so2) int pru_connectx_notsupp(struct socket *so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written) { -#pragma unused(so, src_sl, dst_sl, p, ifscope, aid, pcid, flags, arg, arglen) +#pragma unused(so, src_sl, dst_sl, p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written) return (EOPNOTSUPP); } @@ -2063,7 +2092,7 @@ pru_disconnect_notsupp(struct socket *so) } int -pru_disconnectx_notsupp(struct socket *so, associd_t aid, connid_t cid) +pru_disconnectx_notsupp(struct socket *so, sae_associd_t aid, sae_connid_t cid) { #pragma unused(so, aid, cid) return (EOPNOTSUPP); @@ -2077,7 +2106,7 @@ pru_listen_notsupp(struct socket *so, struct proc *p) } int -pru_peeloff_notsupp(struct socket *so, associd_t aid, struct socket **psop) +pru_peeloff_notsupp(struct socket *so, sae_associd_t aid, struct socket **psop) { #pragma unused(so, aid, psop) return (EOPNOTSUPP); @@ -2152,10 +2181,10 @@ pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, } int -pru_sosend_list_notsupp(struct socket *so, struct sockaddr *addr, struct uio **uio, - u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags) +pru_sosend_list_notsupp(struct socket *so, struct uio **uio, + u_int uiocnt, int flags) { -#pragma unused(so, addr, uio, uiocnt, top, control, flags) +#pragma unused(so, uio, uiocnt, flags) return (EOPNOTSUPP); } @@ -2168,10 +2197,10 @@ pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, } int -pru_soreceive_list_notsupp(struct socket *so, struct sockaddr **paddr, - struct uio **uio, u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +pru_soreceive_list_notsupp(struct socket *so, + struct recv_msg_elem *recv_msg_array, u_int uiocnt, int *flagsp) { -#pragma unused(so, paddr, uio, uiocnt, mp0, controlp, flagsp) +#pragma unused(so, recv_msg_array, uiocnt, flagsp) return (EOPNOTSUPP); } @@ -2206,6 +2235,13 @@ pru_socheckopt_null(struct socket *so, struct sockopt *sopt) return (0); } +static int +pru_preconnect_null(struct socket *so) +{ +#pragma unused(so) + return (0); +} + void pru_sanitize(struct pr_usrreqs *pru) { @@ -2237,6 +2273,7 @@ pru_sanitize(struct pr_usrreqs *pru) DEFAULT(pru->pru_sosend, pru_sosend_notsupp); DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp); DEFAULT(pru->pru_socheckopt, pru_socheckopt_null); + DEFAULT(pru->pru_preconnect, pru_preconnect_null); #undef DEFAULT } @@ -2267,6 +2304,10 @@ sbspace(struct sockbuf *sb) int pending = 0; int space = imin((int)(sb->sb_hiwat - sb->sb_cc), (int)(sb->sb_mbmax - sb->sb_mbcnt)); + + if (sb->sb_preconn_hiwat != 0) + space = imin((int)(sb->sb_preconn_hiwat - sb->sb_cc), space); + if (space < 0) space = 0; @@ -2291,7 +2332,7 @@ msgq_sbspace(struct socket *so, struct mbuf *control) { int space = 0, error; u_int32_t msgpri; - VERIFY(so->so_type == SOCK_STREAM && + VERIFY(so->so_type == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP); if (control != NULL) { error = tcp_get_msg_priority(control, &msgpri); @@ -2323,7 +2364,7 @@ soreadable(struct socket *so) #if CONTENT_FILTER && cfil_sock_data_pending(&so->so_rcv) == 0 #endif /* CONTENT_FILTER */ - ) || + ) || so->so_comp.tqh_first || so->so_error); } @@ -2335,15 +2376,16 @@ sowriteable(struct socket *so) if ((so->so_state & SS_CANTSENDMORE) || so->so_error > 0) return (1); + if (so_wait_for_if_feedback(so) || !socanwrite(so)) + return (0); + if (so->so_flags1 & SOF1_PRECONNECT_DATA) + return(1); - if (!so_wait_for_if_feedback(so) && - sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && - ((so->so_state & SS_ISCONNECTED) || - !(so->so_proto->pr_flags & PR_CONNREQUIRED))) { + if (sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat) { if (so->so_flags & SOF_NOTSENT_LOWAT) { - if ((SOCK_DOM(so) == PF_INET6 - || SOCK_DOM(so) == PF_INET) - && so->so_type == SOCK_STREAM) { + if ((SOCK_DOM(so) == PF_INET6 || + SOCK_DOM(so) == PF_INET) && + so->so_type == SOCK_STREAM) { return (tcp_notsent_lowat_check(so)); } #if MPTCP @@ -2382,6 +2424,13 @@ sballoc(struct sockbuf *sb, struct mbuf *m) VERIFY(total_sbmb_cnt > 0); if (total_sbmb_cnt > total_sbmb_cnt_peak) total_sbmb_cnt_peak = total_sbmb_cnt; + + /* + * If data is being appended to the send socket buffer, + * update the send byte count + */ + if (!(sb->sb_flags & SB_RECV)) + OSAddAtomic(cnt, &total_snd_byte_count); } /* adjust counters in sb reflecting freeing of m */ @@ -2401,6 +2450,14 @@ sbfree(struct sockbuf *sb, struct mbuf *m) } OSAddAtomic(cnt, &total_sbmb_cnt); VERIFY(total_sbmb_cnt >= 0); + + /* + * If data is being removed from the send socket buffer, + * update the send byte count + */ + if (!(sb->sb_flags & SB_RECV)) { + OSAddAtomic(cnt, &total_snd_byte_count); + } } /* @@ -2442,7 +2499,7 @@ sblock(struct sockbuf *sb, uint32_t flags) */ if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) panic("%s: SB_LOCK not held for %p\n", - __func__, sb); + __func__, sb); /* Keep the sockbuf locked */ return (0); @@ -2537,25 +2594,25 @@ sbunlock(struct sockbuf *sb, boolean_t keeplocked) * been cleared by sodefunct() */ if (!(so->so_flags & SOF_DEFUNCT) && - !(sb->sb_flags & SB_LOCK) && + !(sb->sb_flags & SB_LOCK) && !(so->so_state & SS_DEFUNCT) && !(so->so_flags1 & SOF1_DEFUNCTINPROG)) { panic("%s: SB_LOCK not held for %p\n", - __func__, sb); + __func__, sb); } - /* Keep the sockbuf locked and proceed*/ + /* Keep the sockbuf locked and proceed */ } else { VERIFY((sb->sb_flags & SB_LOCK) || - (so->so_state & SS_DEFUNCT) || - (so->so_flags1 & SOF1_DEFUNCTINPROG)); + (so->so_state & SS_DEFUNCT) || + (so->so_flags1 & SOF1_DEFUNCTINPROG)); sb->sb_flags &= ~SB_LOCK; if (sb->sb_wantlock > 0) { /* - * We may get here from sorflush(), in which case "sb" may not - * point to the real socket buffer. Use the actual socket - * buffer address from the socket instead. + * We may get here from sorflush(), in which case "sb" + * may not point to the real socket buffer. Use the + * actual socket buffer address from the socket instead. */ wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags : &so->so_snd.sb_flags); @@ -2606,7 +2663,7 @@ soevent(struct socket *so, long hint) * Don't post an event if this a subflow socket or * the app has opted out of using cellular interface */ - if ((hint & SO_FILT_HINT_IFDENIED) && + if ((hint & SO_FILT_HINT_IFDENIED) && !(so->so_flags & SOF_MP_SUBFLOW) && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) && !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) @@ -2618,12 +2675,17 @@ soevupcall(struct socket *so, u_int32_t hint) { if (so->so_event != NULL) { caddr_t so_eventarg = so->so_eventarg; + int locked = hint & SO_FILT_HINT_LOCKED; hint &= so->so_eventmask; if (hint != 0) { - socket_unlock(so, 0); + if (locked) + socket_unlock(so, 0); + so->so_event(so, so_eventarg, hint); - socket_lock(so, 0); + + if (locked) + socket_lock(so, 0); } } } @@ -2793,7 +2855,7 @@ sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) * Based on the policy set by an all knowing decison maker, throttle sockets * that either have been marked as belonging to "background" process. */ -int +inline int soisthrottled(struct socket *so) { /* @@ -2804,23 +2866,32 @@ soisthrottled(struct socket *so) (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND)); } -int +inline int soisprivilegedtraffic(struct socket *so) { return ((so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS) ? 1 : 0); } -int +inline int soissrcbackground(struct socket *so) { return ((so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND) || IS_SO_TC_BACKGROUND(so->so_traffic_class)); } -int +inline int soissrcrealtime(struct socket *so) { - return (so->so_traffic_class >= SO_TC_AV); + return (so->so_traffic_class >= SO_TC_AV && + so->so_traffic_class <= SO_TC_VO); +} + +inline int +soissrcbesteffort(struct socket *so) +{ + return (so->so_traffic_class == SO_TC_BE || + so->so_traffic_class == SO_TC_RD || + so->so_traffic_class == SO_TC_OAM); } void diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c index 106e11dc2..f44291282 100644 --- a/bsd/kern/uipc_syscalls.c +++ b/bsd/kern/uipc_syscalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -89,6 +89,7 @@ #include #include #include +#include #include @@ -127,12 +128,19 @@ #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8)) #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8)) +#if DEBUG || DEVELOPMENT +#define DEBUG_KERNEL_ADDRPERM(_v) (_v) +#define DBG_PRINTF(...) printf(__VA_ARGS__) +#else +#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v) +#define DBG_PRINTF(...) do { } while (0) +#endif /* TODO: should be in header file */ int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int); -static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int, - int32_t *); +static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t, + int, int32_t *); static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t, int32_t *); static int connectit(struct socket *, struct sockaddr *); @@ -148,19 +156,36 @@ static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **, #endif /* SENDFILE */ static int connectx_nocancel(struct proc *, struct connectx_args *, int *); static int connectitx(struct socket *, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *); + struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uio_t, unsigned int, user_ssize_t *); static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *); static int disconnectx_nocancel(struct proc *, struct disconnectx_args *, int *); static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int); static int internalize_user_msghdr_array(const void *, int, int, u_int, - struct user_msghdr_x *, struct uio **); + struct user_msghdr_x *, struct uio **); static u_int externalize_user_msghdr_array(void *, int, int, u_int, - const struct user_msghdr_x *, struct uio **); + const struct user_msghdr_x *, struct uio **); static void free_uio_array(struct uio **, u_int); static int uio_array_is_valid(struct uio **, u_int); +static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int); +static int internalize_recv_msghdr_array(const void *, int, int, + u_int, struct user_msghdr_x *, struct recv_msg_elem *); +static u_int externalize_recv_msghdr_array(void *, int, int, u_int, + const struct user_msghdr_x *, struct recv_msg_elem *); +static struct recv_msg_elem *alloc_recv_msg_array(u_int count); +static void free_recv_msg_array(struct recv_msg_elem *, u_int); + +SYSCTL_DECL(_kern_ipc); + +static u_int somaxsendmsgx = 100; +SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx, + CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, ""); +static u_int somaxrecvmsgx = 100; +SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx, + CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, ""); /* * System call interface to the socket abstraction. @@ -250,6 +275,10 @@ socket_common(struct proc *p, proc_fdunlock(p); *retval = fd; + if (ENTR_SHOULDTRACE) { + KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START, + fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so)); + } } return (error); } @@ -601,6 +630,11 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, out: file_drop(fd); + + if (error == 0 && ENTR_SHOULDTRACE) { + KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START, + newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so)); + } return (error); } @@ -608,7 +642,8 @@ int accept(struct proc *p, struct accept_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval)); + return (accept_nocancel(p, (struct accept_nocancel_args *)uap, + retval)); } /* @@ -638,7 +673,8 @@ int connect(struct proc *p, struct connect_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval)); + return (connect_nocancel(p, (struct connect_nocancel_args *)uap, + retval)); } int @@ -695,11 +731,17 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) #pragma unused(p, retval) struct sockaddr_list *src_sl = NULL, *dst_sl = NULL; struct socket *so; - int error, fd = uap->s; + int error, error1, fd = uap->socket; boolean_t dgram; - connid_t cid = CONNID_ANY; + sae_connid_t cid = SAE_CONNID_ANY; + struct user32_sa_endpoints ep32; + struct user64_sa_endpoints ep64; + struct user_sa_endpoints ep; + user_ssize_t bytes_written = 0; + struct user_iovec *iovp; + uio_t auio = NULL; - AUDIT_ARG(fd, uap->s); + AUDIT_ARG(fd, uap->socket); error = file_socket(fd, &so); if (error != 0) return (error); @@ -708,11 +750,32 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) goto out; } - /* - * XXX Workaround to ensure connectx does not fail because - * of unreaped so_error. - */ - so->so_error = 0; + if (uap->endpoints == USER_ADDR_NULL) { + error = EINVAL; + goto out; + } + + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64)); + if (error != 0) + goto out; + + ep.sae_srcif = ep64.sae_srcif; + ep.sae_srcaddr = ep64.sae_srcaddr; + ep.sae_srcaddrlen = ep64.sae_srcaddrlen; + ep.sae_dstaddr = ep64.sae_dstaddr; + ep.sae_dstaddrlen = ep64.sae_dstaddrlen; + } else { + error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32)); + if (error != 0) + goto out; + + ep.sae_srcif = ep32.sae_srcif; + ep.sae_srcaddr = ep32.sae_srcaddr; + ep.sae_srcaddrlen = ep32.sae_srcaddrlen; + ep.sae_dstaddr = ep32.sae_dstaddr; + ep.sae_dstaddrlen = ep32.sae_dstaddrlen; + } /* * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET @@ -725,27 +788,87 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) * sockaddr_list for src address for convenience, if present, * even though it won't hold more than one. */ - if (uap->src != USER_ADDR_NULL && (error = getsockaddrlist(so, - &src_sl, uap->src, uap->srclen, dgram)) != 0) + if (ep.sae_srcaddr != USER_ADDR_NULL && (error = getsockaddrlist(so, + &src_sl, (user_addr_t)(caddr_t)ep.sae_srcaddr, ep.sae_srcaddrlen, + dgram)) != 0) goto out; - error = getsockaddrlist(so, &dst_sl, uap->dsts, uap->dstlen, dgram); + if (ep.sae_dstaddr == USER_ADDR_NULL) { + error = EINVAL; + goto out; + } + + error = getsockaddrlist(so, &dst_sl, (user_addr_t)(caddr_t)ep.sae_dstaddr, + ep.sae_dstaddrlen, dgram); if (error != 0) goto out; VERIFY(dst_sl != NULL && !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0); - error = connectitx(so, &src_sl, &dst_sl, p, uap->ifscope, - uap->aid, &cid); + if (uap->iov != USER_ADDR_NULL) { + /* Verify range before calling uio_create() */ + if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) + return (EINVAL); + + if (uap->len == USER_ADDR_NULL) + return (EINVAL); + + /* allocate a uio to hold the number of iovecs passed */ + auio = uio_create(uap->iovcnt, 0, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + UIO_WRITE); + + if (auio == NULL) { + error = ENOMEM; + goto out; + } + + /* + * get location of iovecs within the uio. + * then copyin the iovecs from user space. + */ + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto out; + } + error = copyin_user_iovec_array(uap->iov, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + uap->iovcnt, iovp); + if (error != 0) + goto out; + + /* finish setup of uio_t */ + error = uio_calculateresid(auio); + if (error != 0) { + goto out; + } + } + + error = connectitx(so, &src_sl, &dst_sl, p, ep.sae_srcif, uap->associd, + &cid, auio, uap->flags, &bytes_written); if (error == ERESTART) error = EINTR; - if (uap->cid != USER_ADDR_NULL) - (void) copyout(&cid, uap->cid, sizeof (cid)); + if (uap->len != USER_ADDR_NULL) { + error1 = copyout(&bytes_written, uap->len, sizeof (uap->len)); + /* give precedence to connectitx errors */ + if ((error1 != 0) && (error == 0)) + error = error1; + } + if (uap->connid != USER_ADDR_NULL) { + error1 = copyout(&cid, uap->connid, sizeof (cid)); + /* give precedence to connectitx errors */ + if ((error1 != 0) && (error == 0)) + error = error1; + } out: file_drop(fd); + if (auio != NULL) { + uio_free(auio); + } if (src_sl != NULL) sockaddrlist_free(src_sl); if (dst_sl != NULL) @@ -816,10 +939,12 @@ connectit(struct socket *so, struct sockaddr *sa) static int connectitx(struct socket *so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid) + sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags, + user_ssize_t *bytes_written) { struct sockaddr_entry *se; int error; +#pragma unused (flags) VERIFY(dst_sl != NULL && *dst_sl != NULL); @@ -839,12 +964,50 @@ connectitx(struct socket *so, struct sockaddr_list **src_sl, error = EALREADY; goto out; } + + if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) && + (flags & CONNECT_DATA_IDEMPOTENT)) + so->so_flags1 |= SOF1_DATA_IDEMPOTENT; + + /* + * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data. + * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error) + * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data + * Case 3 allows user to combine write with connect even if they have + * no use for TFO (such as regular TCP, and UDP). + * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case) + */ + if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) && + ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) + so->so_flags1 |= SOF1_PRECONNECT_DATA; + + /* + * If a user sets data idempotent and does not pass an uio, or + * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset + * SOF1_DATA_IDEMPOTENT. + */ + if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) && + (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) { + /* We should return EINVAL instead perhaps. */ + so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT; + } + error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope, - aid, pcid, 0, NULL, 0); + aid, pcid, 0, NULL, 0, auio, bytes_written); if (error != 0) { so->so_state &= ~SS_ISCONNECTING; goto out; } + /* + * If, after the call to soconnectxlocked the flag is still set (in case + * data has been queued and the connect() has actually been triggered, + * it will have been unset by the transport), we exit immediately. There + * is no reason to wait on any event. + */ + if (so->so_flags1 & SOF1_PRECONNECT_DATA) { + error = 0; + goto out; + } if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { error = EINPROGRESS; goto out; @@ -1113,7 +1276,7 @@ socketpair(struct proc *p, struct socketpair_args *uap, * sockargs:??? */ static int -sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, +sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop, int flags, int32_t *retval) { struct mbuf *control = NULL; @@ -1121,20 +1284,10 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, struct sockaddr *to = NULL; boolean_t want_free = TRUE; int error; - struct socket *so; user_ssize_t len; KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0); - error = file_socket(s, &so); - if (error) { - KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0); - return (error); - } - if (so == NULL) { - error = EBADF; - goto out; - } if (mp->msg_name != USER_ADDR_NULL) { if (mp->msg_namelen > sizeof (ss)) { error = getsockaddr(so, &to, mp->msg_name, @@ -1166,9 +1319,9 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, /* * We check the state without holding the socket lock; * if a race condition occurs, it would simply result - * in an extra call to the MAC check function. + * in an extra call to the MAC check function. */ - if ( to != NULL && + if (to != NULL && !(so->so_state & SS_DEFUNCT) && (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) goto bad; @@ -1192,7 +1345,7 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, FREE(to, M_SONAME); out: KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0); - file_drop(s); + return (error); } @@ -1217,6 +1370,7 @@ sendto_nocancel(struct proc *p, struct user_msghdr msg; int error; uio_t auio = NULL; + struct socket *so; KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); @@ -1225,7 +1379,8 @@ sendto_nocancel(struct proc *p, (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), UIO_WRITE); if (auio == NULL) { - return (ENOMEM); + error = ENOMEM; + goto done; } uio_addiov(auio, uap->buf, uap->len); @@ -1237,12 +1392,21 @@ sendto_nocancel(struct proc *p, msg.msg_control = 0; msg.msg_flags = 0; - error = sendit(p, uap->s, &msg, auio, uap->flags, retval); + error = file_socket(uap->s, &so); + if (error) + goto done; - if (auio != NULL) { - uio_free(auio); + if (so == NULL) { + error = EBADF; + } else { + error = sendit(p, so, &msg, auio, uap->flags, retval); } + file_drop(uap->s); +done: + if (auio != NULL) + uio_free(auio); + KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0); return (error); @@ -1258,11 +1422,13 @@ int sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval) { __pthread_testcancel(1); - return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval)); + return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, + retval)); } int -sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval) +sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, + int32_t *retval) { struct user32_msghdr msg32; struct user64_msghdr msg64; @@ -1272,6 +1438,7 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *ret int error; uio_t auio = NULL; struct user_iovec *iovp; + struct socket *so; KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); @@ -1350,7 +1517,16 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *ret /* msg_flags is ignored for send */ user_msg.msg_flags = 0; - error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval); + error = file_socket(uap->s, &so); + if (error) { + goto done; + } + if (so == NULL) { + error = EBADF; + } else { + error = sendit(p, so, &user_msg, auio, uap->flags, retval); + } + file_drop(uap->s); done: if (auio != NULL) { uio_free(auio); @@ -1364,17 +1540,17 @@ int sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) { int error = 0; - struct user_msghdr_x *user_msg = NULL; + struct user_msghdr_x *user_msg_x = NULL; struct uio **uiop = NULL; struct socket *so; u_int i; struct sockaddr *to = NULL; - struct mbuf *control = NULL; user_ssize_t len_before = 0, len_after; int need_drop = 0; size_t size_of_msghdr; void *umsgp = NULL; u_int uiocnt; + int has_addr_or_ctl = 0; KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0); @@ -1387,11 +1563,6 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) error = EBADF; goto out; } - if (so->so_proto->pr_usrreqs->pru_sosend_list == NULL) { - printf("%s no pru_sosend_list\n", __func__); - error = EOPNOTSUPP; - goto out; - } /* * Input parameter range check @@ -1400,17 +1571,23 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) error = EINVAL; goto out; } - user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), + /* + * Clip to max currently allowed + */ + if (uap->cnt > somaxsendmsgx) + uap->cnt = somaxsendmsgx; + + user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), M_TEMP, M_WAITOK | M_ZERO); - if (user_msg == NULL) { - printf("%s _MALLOC() user_msg failed\n", __func__); + if (user_msg_x == NULL) { + DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__); error = ENOMEM; goto out; } uiop = _MALLOC(uap->cnt * sizeof(struct uio *), M_TEMP, M_WAITOK | M_ZERO); if (uiop == NULL) { - printf("%s _MALLOC() uiop failed\n", __func__); + DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__); error = ENOMEM; goto out; } @@ -1418,23 +1595,23 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) size_of_msghdr = IS_64BIT_PROCESS(p) ? sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x); - umsgp = _MALLOC(uap->cnt * size_of_msghdr, + umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO); if (umsgp == NULL) { - printf("%s _MALLOC() user_msg failed\n", __func__); + printf("%s _MALLOC() user_msg_x failed\n", __func__); error = ENOMEM; goto out; } error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr); if (error) { - printf("%s copyin() failed\n", __func__); + DBG_PRINTF("%s copyin() failed\n", __func__); goto out; } error = internalize_user_msghdr_array(umsgp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, - UIO_WRITE, uap->cnt, user_msg, uiop); + UIO_WRITE, uap->cnt, user_msg_x, uiop); if (error) { - printf("%s copyin_user_msghdr_array() failed\n", __func__); + DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__); goto out; } /* @@ -1450,7 +1627,7 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) * Sanity check on passed arguments */ for (i = 0; i < uap->cnt; i++) { - struct user_msghdr_x *mp = &user_msg[i]; + struct user_msghdr_x *mp = user_msg_x + i; /* * No flags on send message @@ -1462,43 +1639,72 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) /* * No support for address or ancillary data (yet) */ - if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) { - error = EINVAL; - goto out; - } + if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) + has_addr_or_ctl = 1; + if (mp->msg_control != USER_ADDR_NULL || - mp->msg_controllen != 0) { - error = EINVAL; - goto out; - } + mp->msg_controllen != 0) + has_addr_or_ctl = 1; + #if CONFIG_MACF_SOCKET_SUBSET /* * We check the state without holding the socket lock; * if a race condition occurs, it would simply result - * in an extra call to the MAC check function. + * in an extra call to the MAC check function. * * Note: The following check is never true taken with the * current limitation that we do not accept to pass an address, - * this is effectively placeholder code. If we add support for addresses, - * we will have to check every address. + * this is effectively placeholder code. If we add support for + * addresses, we will have to check every address. */ - if ( to != NULL && + if (to != NULL && !(so->so_state & SS_DEFUNCT) && - (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) + (error = mac_socket_check_send(kauth_cred_get(), so, to)) + != 0) goto out; #endif /* MAC_SOCKET_SUBSET */ } len_before = uio_array_resid(uiop, uap->cnt); - error = so->so_proto->pr_usrreqs->pru_sosend_list(so, to, uiop, - uap->cnt, 0, control, uap->flags); - + /* + * Feed list of packets at once only for connected socket without + * control message + */ + if (so->so_proto->pr_usrreqs->pru_sosend_list != + pru_sosend_list_notsupp && + has_addr_or_ctl == 0 && somaxsendmsgx == 0) { + error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop, + uap->cnt, uap->flags); + } else { + for (i = 0; i < uap->cnt; i++) { + struct user_msghdr_x *mp = user_msg_x + i; + struct user_msghdr user_msg; + uio_t auio = uiop[i]; + int32_t tmpval; + + user_msg.msg_flags = mp->msg_flags; + user_msg.msg_controllen = mp->msg_controllen; + user_msg.msg_control = mp->msg_control; + user_msg.msg_iovlen = mp->msg_iovlen; + user_msg.msg_iov = mp->msg_iov; + user_msg.msg_namelen = mp->msg_namelen; + user_msg.msg_name = mp->msg_name; + + error = sendit(p, so, &user_msg, auio, uap->flags, + &tmpval); + if (error != 0) + break; + } + } len_after = uio_array_resid(uiop, uap->cnt); + VERIFY(len_after <= len_before); + if (error != 0) { if (len_after != len_before && (error == ERESTART || - error == EINTR || error == EWOULDBLOCK)) + error == EINTR || error == EWOULDBLOCK || + error == ENOBUFS)) error = 0; /* Generation of SIGPIPE can be controlled per socket */ if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) @@ -1507,7 +1713,7 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) if (error == 0) { uiocnt = externalize_user_msghdr_array(umsgp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, - UIO_WRITE, uap->cnt, user_msg, uiop); + UIO_WRITE, uap->cnt, user_msg_x, uiop); *retval = (int)(uiocnt); } @@ -1520,14 +1726,130 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) free_uio_array(uiop, uap->cnt); _FREE(uiop, M_TEMP); } - if (user_msg != NULL) - _FREE(user_msg, M_TEMP); + if (user_msg_x != NULL) + _FREE(user_msg_x, M_TEMP); KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0); return (error); } + +static int +copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen) +{ + int error = 0; + socklen_t sa_len = 0; + ssize_t len; + + len = *namelen; + if (len <= 0 || fromsa == 0) { + len = 0; + } else { +#ifndef MIN +#define MIN(a, b) ((a) > (b) ? (b) : (a)) +#endif + sa_len = fromsa->sa_len; + len = MIN((unsigned int)len, sa_len); + error = copyout(fromsa, name, (unsigned)len); + if (error) + goto out; + } + *namelen = sa_len; +out: + return (0); +} + +static int +copyout_control(struct proc *p, struct mbuf *m, user_addr_t control, + socklen_t *controllen, int *flags) +{ + int error = 0; + ssize_t len; + user_addr_t ctlbuf; + + len = *controllen; + *controllen = 0; + ctlbuf = control; + + while (m && len > 0) { + unsigned int tocopy; + struct cmsghdr *cp = mtod(m, struct cmsghdr *); + int cp_size = CMSG_ALIGN(cp->cmsg_len); + int buflen = m->m_len; + + while (buflen > 0 && len > 0) { + /* + * SCM_TIMESTAMP hack because struct timeval has a + * different size for 32 bits and 64 bits processes + */ + if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) { + unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))]; + struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer; + int tmp_space; + struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp); + + tmp_cp->cmsg_level = SOL_SOCKET; + tmp_cp->cmsg_type = SCM_TIMESTAMP; + + if (proc_is64bit(p)) { + struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp); + + tv64->tv_sec = tv->tv_sec; + tv64->tv_usec = tv->tv_usec; + + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval)); + tmp_space = CMSG_SPACE(sizeof(struct user64_timeval)); + } else { + struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp); + + tv32->tv_sec = tv->tv_sec; + tv32->tv_usec = tv->tv_usec; + + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval)); + tmp_space = CMSG_SPACE(sizeof(struct user32_timeval)); + } + if (len >= tmp_space) { + tocopy = tmp_space; + } else { + *flags |= MSG_CTRUNC; + tocopy = len; + } + error = copyout(tmp_buffer, ctlbuf, tocopy); + if (error) + goto out; + } else { + if (cp_size > buflen) { + panic("cp_size > buflen, something" + "wrong with alignment!"); + } + if (len >= cp_size) { + tocopy = cp_size; + } else { + *flags |= MSG_CTRUNC; + tocopy = len; + } + error = copyout((caddr_t) cp, ctlbuf, tocopy); + if (error) + goto out; + } + + ctlbuf += tocopy; + len -= tocopy; + + buflen -= cp_size; + cp = (struct cmsghdr *)(void *) + ((unsigned char *) cp + cp_size); + cp_size = CMSG_ALIGN(cp->cmsg_len); + } + + m = m->m_next; + } + *controllen = ctlbuf - control; +out: + return (error); +} + /* * Returns: 0 Success * ENOTSOCK @@ -1556,8 +1878,7 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, { ssize_t len; int error; - struct mbuf *m, *control = 0; - user_addr_t ctlbuf; + struct mbuf *control = 0; struct socket *so; struct sockaddr *fromsa = 0; struct fileproc *fp; @@ -1614,120 +1935,26 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, error == EINTR || error == EWOULDBLOCK)) error = 0; } - if (error) goto out; *retval = len - uio_resid(uiop); - if (mp->msg_name) { - socklen_t sa_len = 0; - len = mp->msg_namelen; - if (len <= 0 || fromsa == 0) { - len = 0; - } else { -#ifndef MIN -#define MIN(a, b) ((a) > (b) ? (b) : (a)) -#endif - sa_len = fromsa->sa_len; - len = MIN((unsigned int)len, sa_len); - error = copyout(fromsa, mp->msg_name, (unsigned)len); - if (error) - goto out; - } - mp->msg_namelen = sa_len; + if (mp->msg_name) { + error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen); + if (error) + goto out; /* return the actual, untruncated address length */ if (namelenp && - (error = copyout((caddr_t)&sa_len, namelenp, + (error = copyout((caddr_t)&mp->msg_namelen, namelenp, sizeof (int)))) { goto out; } } - if (mp->msg_control) { - len = mp->msg_controllen; - m = control; - mp->msg_controllen = 0; - ctlbuf = mp->msg_control; - - while (m && len > 0) { - unsigned int tocopy; - struct cmsghdr *cp = mtod(m, struct cmsghdr *); - int cp_size = CMSG_ALIGN(cp->cmsg_len); - int buflen = m->m_len; - - while (buflen > 0 && len > 0) { - - /* - SCM_TIMESTAMP hack because struct timeval has a - * different size for 32 bits and 64 bits processes - */ - if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) { - unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))]; - struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer; - int tmp_space; - struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp); - - tmp_cp->cmsg_level = SOL_SOCKET; - tmp_cp->cmsg_type = SCM_TIMESTAMP; - - if (proc_is64bit(p)) { - struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp); - - tv64->tv_sec = tv->tv_sec; - tv64->tv_usec = tv->tv_usec; - - tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval)); - tmp_space = CMSG_SPACE(sizeof(struct user64_timeval)); - } else { - struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp); - - tv32->tv_sec = tv->tv_sec; - tv32->tv_usec = tv->tv_usec; - - tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval)); - tmp_space = CMSG_SPACE(sizeof(struct user32_timeval)); - } - if (len >= tmp_space) { - tocopy = tmp_space; - } else { - mp->msg_flags |= MSG_CTRUNC; - tocopy = len; - } - error = copyout(tmp_buffer, ctlbuf, tocopy); - if (error) - goto out; - - } else { - - if (cp_size > buflen) { - panic("cp_size > buflen, something wrong with alignment!"); - } - - if (len >= cp_size) { - tocopy = cp_size; - } else { - mp->msg_flags |= MSG_CTRUNC; - tocopy = len; - } - - error = copyout((caddr_t) cp, ctlbuf, - tocopy); - if (error) - goto out; - } - - - ctlbuf += tocopy; - len -= tocopy; - - buflen -= cp_size; - cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size); - cp_size = CMSG_ALIGN(cp->cmsg_len); - } - m = m->m_next; - } - mp->msg_controllen = ctlbuf - mp->msg_control; + if (mp->msg_control) { + error = copyout_control(p, control, mp->msg_control, + &mp->msg_controllen, &mp->msg_flags); } out: if (fromsa) @@ -1759,11 +1986,13 @@ int recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval)); + return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, + retval)); } int -recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval) +recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, + int32_t *retval) { struct user_msghdr msg; int error; @@ -1820,11 +2049,13 @@ int recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval)); + return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, + retval)); } int -recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval) +recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, + int32_t *retval) { struct user32_msghdr msg32; struct user64_msghdr msg64; @@ -1944,8 +2175,8 @@ int recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) { int error = EOPNOTSUPP; - struct user_msghdr_x *user_msg = NULL; - struct uio **uiop = NULL; + struct user_msghdr_x *user_msg_x = NULL; + struct recv_msg_elem *recv_msg_array = NULL; struct socket *so; user_ssize_t len_before = 0, len_after; int need_drop = 0; @@ -1965,12 +2196,6 @@ recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) error = EBADF; goto out; } - if (so->so_proto->pr_usrreqs->pru_soreceive_list == NULL) { - printf("%s no pru_soreceive_list\n", __func__); - error = EOPNOTSUPP; - goto out; - } - /* * Input parameter range check */ @@ -1978,73 +2203,61 @@ recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) error = EINVAL; goto out; } - user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), + if (uap->cnt > somaxrecvmsgx) + uap->cnt = somaxrecvmsgx; + + user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), M_TEMP, M_WAITOK | M_ZERO); - if (user_msg == NULL) { - printf("%s _MALLOC() user_msg failed\n", __func__); + if (user_msg_x == NULL) { + DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__); error = ENOMEM; goto out; } - uiop = _MALLOC(uap->cnt * sizeof(struct uio *), - M_TEMP, M_WAITOK | M_ZERO); - if (uiop == NULL) { - printf("%s _MALLOC() uiop failed\n", __func__); + recv_msg_array = alloc_recv_msg_array(uap->cnt); + if (recv_msg_array == NULL) { + DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__); error = ENOMEM; goto out; } - size_of_msghdr = IS_64BIT_PROCESS(p) ? sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x); umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO); if (umsgp == NULL) { - printf("%s _MALLOC() user_msg failed\n", __func__); + DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__); error = ENOMEM; goto out; } error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr); if (error) { - printf("%s copyin() failed\n", __func__); + DBG_PRINTF("%s copyin() failed\n", __func__); goto out; } - error = internalize_user_msghdr_array(umsgp, + error = internalize_recv_msghdr_array(umsgp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, - UIO_READ, uap->cnt, user_msg, uiop); + UIO_READ, uap->cnt, user_msg_x, recv_msg_array); if (error) { - printf("%s copyin_user_msghdr_array() failed\n", __func__); + DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__); goto out; } /* * Make sure the size of each message iovec and * the aggregate size of all the iovec is valid */ - if (uio_array_is_valid(uiop, uap->cnt) == 0) { + if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) { error = EINVAL; goto out; } - /* * Sanity check on passed arguments */ for (i = 0; i < uap->cnt; i++) { - struct user_msghdr_x *mp = &user_msg[i]; + struct user_msghdr_x *mp = user_msg_x + i; if (mp->msg_flags != 0) { error = EINVAL; goto out; } - /* - * No support for address or ancillary data (yet) - */ - if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) { - error = EINVAL; - goto out; - } - if (mp->msg_control != USER_ADDR_NULL || - mp->msg_controllen != 0) { - error = EINVAL; - goto out; - } } #if CONFIG_MACF_SOCKET_SUBSET /* @@ -2059,44 +2272,107 @@ recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) goto out; #endif /* MAC_SOCKET_SUBSET */ - len_before = uio_array_resid(uiop, uap->cnt); + len_before = recv_msg_array_resid(recv_msg_array, uap->cnt); - error = so->so_proto->pr_usrreqs->pru_soreceive_list(so, NULL, uiop, - uap->cnt, (struct mbuf **)0, NULL, NULL); + if (so->so_proto->pr_usrreqs->pru_soreceive_list != + pru_soreceive_list_notsupp && + somaxrecvmsgx == 0) { + error = so->so_proto->pr_usrreqs->pru_soreceive_list(so, + recv_msg_array, uap->cnt, &uap->flags); + } else { + int flags = uap->flags; - len_after = uio_array_resid(uiop, uap->cnt); + for (i = 0; i < uap->cnt; i++) { + struct recv_msg_elem *recv_msg_elem; + uio_t auio; + struct sockaddr **psa; + struct mbuf **controlp; + + recv_msg_elem = recv_msg_array + i; + auio = recv_msg_elem->uio; + + /* + * Do not block if we got at least one packet + */ + if (i > 0) + flags |= MSG_DONTWAIT; + + psa = (recv_msg_elem->which & SOCK_MSG_SA) ? + &recv_msg_elem->psa : NULL; + controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ? + &recv_msg_elem->controlp : NULL; + + error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa, + auio, (struct mbuf **)0, controlp, &flags); + if (error) + break; + /* + * We have some data + */ + recv_msg_elem->which |= SOCK_MSG_DATA; + /* + * Stop on partial copy + */ + if (flags & (MSG_RCVMORE | MSG_TRUNC)) + break; + } + if ((uap->flags & MSG_DONTWAIT) == 0) + flags &= ~MSG_DONTWAIT; + uap->flags = flags; + } + + len_after = recv_msg_array_resid(recv_msg_array, uap->cnt); if (error) { if (len_after != len_before && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; + else + goto out; } - if (error == 0) { - uiocnt = externalize_user_msghdr_array(umsgp, - IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, - UIO_READ, uap->cnt, user_msg, uiop); - error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr); - if (error) { - printf("%s copyout() failed\n", __func__); - goto out; + uiocnt = externalize_recv_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_READ, uap->cnt, user_msg_x, recv_msg_array); + + error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr); + if (error) { + DBG_PRINTF("%s copyout() failed\n", __func__); + goto out; + } + *retval = (int)(uiocnt); + + for (i = 0; i < uap->cnt; i++) { + struct user_msghdr_x *mp = user_msg_x + i; + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + struct sockaddr *fromsa = recv_msg_elem->psa; + + if (mp->msg_name) { + error = copyout_sa(fromsa, mp->msg_name, + &mp->msg_namelen); + if (error) + goto out; + } + if (mp->msg_control) { + error = copyout_control(p, recv_msg_elem->controlp, + mp->msg_control, &mp->msg_controllen, + &mp->msg_flags); + if (error) + goto out; } - *retval = (int)(uiocnt); } out: if (need_drop) file_drop(uap->s); if (umsgp != NULL) _FREE(umsgp, M_TEMP); - if (uiop != NULL) { - free_uio_array(uiop, uap->cnt); - _FREE(uiop, M_TEMP); - } - if (user_msg != NULL) - _FREE(user_msg, M_TEMP); - + if (recv_msg_array != NULL) + free_recv_msg_array(recv_msg_array, uap->cnt); + if (user_msg_x != NULL) + _FREE(user_msg_x, M_TEMP); + KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0); - + return (error); } @@ -2419,16 +2695,20 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type) size_t alloc_buflen = (size_t)buflen; - if(alloc_buflen > INT_MAX/2) + if (alloc_buflen > INT_MAX/2) return (EINVAL); #ifdef __LP64__ - /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */ - if(type == MT_CONTROL) - alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr); + /* + * The fd's in the buffer must expand to be pointers, thus we need twice + * as much space + */ + if (type == MT_CONTROL) + alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + + sizeof(struct cmsghdr); #endif if (alloc_buflen > MLEN) { if (type == MT_SONAME && alloc_buflen <= 112) - alloc_buflen = MLEN; /* unix domain compat. hack */ + alloc_buflen = MLEN; /* unix domain compat. hack */ else if (alloc_buflen > MCLBYTES) return (EINVAL); } @@ -2442,8 +2722,10 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type) return (ENOBUFS); } } - /* K64: We still copyin the original buflen because it gets expanded later - * and we lie about the size of the mbuf because it only affects unp_* functions + /* + * K64: We still copyin the original buflen because it gets expanded + * later and we lie about the size of the mbuf because it only affects + * unp_* functions */ m->m_len = buflen; error = copyin(data, mtod(m, caddr_t), (u_int)buflen); @@ -2623,20 +2905,22 @@ getsockaddrlist(struct socket *so, struct sockaddr_list **slp, int internalize_user_msghdr_array(const void *src, int spacetype, int direction, - u_int count, struct user_msghdr_x *dst, struct uio **uiop) + u_int count, struct user_msghdr_x *dst, struct uio **uiop) { int error = 0; u_int i; + u_int namecnt = 0; + u_int ctlcnt = 0; for (i = 0; i < count; i++) { uio_t auio; struct user_iovec *iovp; - struct user_msghdr_x *user_msg = &dst[i]; + struct user_msghdr_x *user_msg = dst + i; if (spacetype == UIO_USERSPACE64) { - struct user64_msghdr_x *msghdr64; + const struct user64_msghdr_x *msghdr64; - msghdr64 = ((struct user64_msghdr_x *)src) + i; + msghdr64 = ((const struct user64_msghdr_x *)src) + i; user_msg->msg_name = msghdr64->msg_name; user_msg->msg_namelen = msghdr64->msg_namelen; @@ -2647,9 +2931,9 @@ internalize_user_msghdr_array(const void *src, int spacetype, int direction, user_msg->msg_flags = msghdr64->msg_flags; user_msg->msg_datalen = msghdr64->msg_datalen; } else { - struct user32_msghdr_x *msghdr32; + const struct user32_msghdr_x *msghdr32; - msghdr32 = ((struct user32_msghdr_x *)src) + i; + msghdr32 = ((const struct user32_msghdr_x *)src) + i; user_msg->msg_name = msghdr32->msg_name; user_msg->msg_namelen = msghdr32->msg_namelen; @@ -2660,45 +2944,128 @@ internalize_user_msghdr_array(const void *src, int spacetype, int direction, user_msg->msg_flags = msghdr32->msg_flags; user_msg->msg_datalen = msghdr32->msg_datalen; } - - if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) { + + if (user_msg->msg_iovlen <= 0 || + user_msg->msg_iovlen > UIO_MAXIOV) { error = EMSGSIZE; goto done; } - auio = uio_create(user_msg->msg_iovlen, 0, spacetype, direction); + auio = uio_create(user_msg->msg_iovlen, 0, spacetype, + direction); if (auio == NULL) { error = ENOMEM; goto done; } uiop[i] = auio; - if (user_msg->msg_iovlen) { - iovp = uio_iovsaddr(auio); - if (iovp == NULL) { - error = ENOMEM; - goto done; - } - error = copyin_user_iovec_array(user_msg->msg_iov, - spacetype, user_msg->msg_iovlen, iovp); - if (error) - goto done; - user_msg->msg_iov = CAST_USER_ADDR_T(iovp); + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto done; + } + error = copyin_user_iovec_array(user_msg->msg_iov, + spacetype, user_msg->msg_iovlen, iovp); + if (error) + goto done; + user_msg->msg_iov = CAST_USER_ADDR_T(iovp); - error = uio_calculateresid(auio); - if (error) - goto done; - user_msg->msg_datalen = uio_resid(auio); + error = uio_calculateresid(auio); + if (error) + goto done; + user_msg->msg_datalen = uio_resid(auio); + + if (user_msg->msg_name && user_msg->msg_namelen) + namecnt++; + if (user_msg->msg_control && user_msg->msg_controllen) + ctlcnt++; + } +done: + + return (error); +} + +int +internalize_recv_msghdr_array(const void *src, int spacetype, int direction, + u_int count, struct user_msghdr_x *dst, + struct recv_msg_elem *recv_msg_array) +{ + int error = 0; + u_int i; + + for (i = 0; i < count; i++) { + struct user_iovec *iovp; + struct user_msghdr_x *user_msg = dst + i; + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + + if (spacetype == UIO_USERSPACE64) { + const struct user64_msghdr_x *msghdr64; + + msghdr64 = ((const struct user64_msghdr_x *)src) + i; + + user_msg->msg_name = msghdr64->msg_name; + user_msg->msg_namelen = msghdr64->msg_namelen; + user_msg->msg_iov = msghdr64->msg_iov; + user_msg->msg_iovlen = msghdr64->msg_iovlen; + user_msg->msg_control = msghdr64->msg_control; + user_msg->msg_controllen = msghdr64->msg_controllen; + user_msg->msg_flags = msghdr64->msg_flags; + user_msg->msg_datalen = msghdr64->msg_datalen; } else { - user_msg->msg_datalen = 0; + const struct user32_msghdr_x *msghdr32; + + msghdr32 = ((const struct user32_msghdr_x *)src) + i; + + user_msg->msg_name = msghdr32->msg_name; + user_msg->msg_namelen = msghdr32->msg_namelen; + user_msg->msg_iov = msghdr32->msg_iov; + user_msg->msg_iovlen = msghdr32->msg_iovlen; + user_msg->msg_control = msghdr32->msg_control; + user_msg->msg_controllen = msghdr32->msg_controllen; + user_msg->msg_flags = msghdr32->msg_flags; + user_msg->msg_datalen = msghdr32->msg_datalen; } + + if (user_msg->msg_iovlen <= 0 || + user_msg->msg_iovlen > UIO_MAXIOV) { + error = EMSGSIZE; + goto done; + } + recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0, + spacetype, direction); + if (recv_msg_elem->uio == NULL) { + error = ENOMEM; + goto done; + } + + iovp = uio_iovsaddr(recv_msg_elem->uio); + if (iovp == NULL) { + error = ENOMEM; + goto done; + } + error = copyin_user_iovec_array(user_msg->msg_iov, + spacetype, user_msg->msg_iovlen, iovp); + if (error) + goto done; + user_msg->msg_iov = CAST_USER_ADDR_T(iovp); + + error = uio_calculateresid(recv_msg_elem->uio); + if (error) + goto done; + user_msg->msg_datalen = uio_resid(recv_msg_elem->uio); + + if (user_msg->msg_name && user_msg->msg_namelen) + recv_msg_elem->which |= SOCK_MSG_SA; + if (user_msg->msg_control && user_msg->msg_controllen) + recv_msg_elem->which |= SOCK_MSG_CONTROL; } done: + return (error); } u_int externalize_user_msghdr_array(void *dst, int spacetype, int direction, - u_int count, const struct user_msghdr_x *src, struct uio **uiop) + u_int count, const struct user_msghdr_x *src, struct uio **uiop) { #pragma unused(direction) u_int i; @@ -2706,13 +3073,60 @@ externalize_user_msghdr_array(void *dst, int spacetype, int direction, u_int retcnt = 0; for (i = 0; i < count; i++) { - const struct user_msghdr_x *user_msg = &src[i]; + const struct user_msghdr_x *user_msg = src + i; uio_t auio = uiop[i]; user_ssize_t len = user_msg->msg_datalen - uio_resid(auio); if (user_msg->msg_datalen != 0 && len == 0) seenlast = 1; - + + if (seenlast == 0) + retcnt ++; + + if (spacetype == UIO_USERSPACE64) { + struct user64_msghdr_x *msghdr64; + + msghdr64 = ((struct user64_msghdr_x *)dst) + i; + + msghdr64->msg_flags = user_msg->msg_flags; + msghdr64->msg_datalen = len; + + } else { + struct user32_msghdr_x *msghdr32; + + msghdr32 = ((struct user32_msghdr_x *)dst) + i; + + msghdr32->msg_flags = user_msg->msg_flags; + msghdr32->msg_datalen = len; + } + } + return (retcnt); +} + +u_int +externalize_recv_msghdr_array(void *dst, int spacetype, int direction, + u_int count, const struct user_msghdr_x *src, + struct recv_msg_elem *recv_msg_array) +{ + u_int i; + int seenlast = 0; + u_int retcnt = 0; + + for (i = 0; i < count; i++) { + const struct user_msghdr_x *user_msg = src + i; + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + user_ssize_t len; + + len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio); + + if (direction == UIO_READ) { + if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0) + seenlast = 1; + } else { + if (user_msg->msg_datalen != 0 && len == 0) + seenlast = 1; + } + if (seenlast == 0) retcnt ++; @@ -2723,7 +3137,7 @@ externalize_user_msghdr_array(void *dst, int spacetype, int direction, msghdr64->msg_flags = user_msg->msg_flags; msghdr64->msg_datalen = len; - + } else { struct user32_msghdr_x *msghdr32; @@ -2756,7 +3170,7 @@ uio_array_resid(struct uio **uiop, u_int count) for (i = 0; i < count; i++) { struct uio *auio = uiop[i]; - if (auio!= NULL) + if (auio != NULL) len += uio_resid(auio); } return (len); @@ -2770,17 +3184,90 @@ uio_array_is_valid(struct uio **uiop, u_int count) for (i = 0; i < count; i++) { struct uio *auio = uiop[i]; - + if (auio != NULL) { user_ssize_t resid = uio_resid(auio); - + /* * Sanity check on the validity of the iovec: * no point of going over sb_max */ if (resid < 0 || (u_int32_t)resid > sb_max) return (0); - + + len += resid; + if (len < 0 || (u_int32_t)len > sb_max) + return (0); + } + } + return (1); +} + + +struct recv_msg_elem * +alloc_recv_msg_array(u_int count) +{ + struct recv_msg_elem *recv_msg_array; + + recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem), + M_TEMP, M_WAITOK | M_ZERO); + + return (recv_msg_array); +} + +void +free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count) +{ + u_int i; + + for (i = 0; i < count; i++) { + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + + if (recv_msg_elem->uio != NULL) + uio_free(recv_msg_elem->uio); + if (recv_msg_elem->psa != NULL) + _FREE(recv_msg_elem->psa, M_TEMP); + if (recv_msg_elem->controlp != NULL) + m_freem(recv_msg_elem->controlp); + } + _FREE(recv_msg_array, M_TEMP); +} + + +__private_extern__ user_ssize_t +recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count) +{ + user_ssize_t len = 0; + u_int i; + + for (i = 0; i < count; i++) { + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + + if (recv_msg_elem->uio != NULL) + len += uio_resid(recv_msg_elem->uio); + } + return (len); +} + +int +recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count) +{ + user_ssize_t len = 0; + u_int i; + + for (i = 0; i < count; i++) { + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + + if (recv_msg_elem->uio != NULL) { + user_ssize_t resid = uio_resid(recv_msg_elem->uio); + + /* + * Sanity check on the validity of the iovec: + * no point of going over sb_max + */ + if (resid < 0 || (u_int32_t)resid > sb_max) + return (0); + len += resid; if (len < 0 || (u_int32_t)len > sb_max) return (0); @@ -2794,11 +3281,11 @@ uio_array_is_valid(struct uio **uiop, u_int count) #define SFUIOBUFS 64 /* Macros to compute the number of mbufs needed depending on cluster size */ -#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1) -#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1) +#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1) +#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1) /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */ -#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT) +#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT) /* Upper send limit in the number of mbuf clusters */ #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES) @@ -2871,13 +3358,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) size_t sizeof_hdtr; off_t file_size; struct vfs_context context = *vfs_context_current(); -#define ENXIO_10146739_DBG(err_str) { \ - if (error == ENXIO) { \ - printf(err_str, \ - __func__, \ - "File a radar related to rdar://10146739 \n"); \ - } \ -} + KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s, 0, 0, 0, 0); @@ -2889,7 +3370,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) * type and connected socket out, positive offset. */ if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) { - ENXIO_10146739_DBG("%s: fp_getfvp error. %s"); goto done; } if ((fp->f_flag & FREAD) == 0) { @@ -2902,7 +3382,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) } error = file_socket(uap->s, &so); if (error) { - ENXIO_10146739_DBG("%s: file_socket error. %s"); goto done1; } if (so == NULL) { @@ -2986,7 +3465,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) nuap.iovcnt = user_hdtr.hdr_cnt; error = writev_nocancel(p, &nuap, &writev_retval); if (error) { - ENXIO_10146739_DBG("%s: writev_nocancel error. %s"); goto done2; } sbytes += writev_retval; @@ -2999,7 +3477,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) * 2. We don't want to read past the end of file */ if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) { - ENXIO_10146739_DBG("%s: vnode_size error. %s"); goto done2; } @@ -3113,7 +3590,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) error == EINTR || error == EWOULDBLOCK)) { error = 0; } else { - ENXIO_10146739_DBG("%s: fo_read error. %s"); mbuf_freem(m0); goto done3; } @@ -3124,7 +3600,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) (unsigned int)(xfsize & 0x0ffffffff), 0, 0); if (xfsize == 0) { - //printf("sendfile: fo_read 0 bytes, EOF\n"); + // printf("sendfile: fo_read 0 bytes, EOF\n"); break; } if (xfsize + off > file_size) @@ -3163,7 +3639,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) so->so_error = 0; } m_freem(m0); - ENXIO_10146739_DBG("%s: Unexpected socket error. %s"); goto done3; } /* @@ -3206,7 +3681,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) error = 0; continue; } - ENXIO_10146739_DBG("%s: sflt_data_out error. %s"); goto done3; } /* @@ -3220,7 +3694,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START), uap->s, 0, 0, 0, 0); if (error) { - ENXIO_10146739_DBG("%s: pru_send error. %s"); goto done3; } } @@ -3236,7 +3709,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) nuap.iovcnt = user_hdtr.trl_cnt; error = writev_nocancel(p, &nuap, &writev_retval); if (error) { - ENXIO_10146739_DBG("%s: writev_nocancel error. %s"); goto done2; } sbytes += writev_retval; diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c index 71c4fce53..cfe63ef28 100644 --- a/bsd/kern/uipc_usrreq.c +++ b/bsd/kern/uipc_usrreq.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -99,6 +100,11 @@ #include +/* + * Maximum number of FDs that can be passed in an mbuf + */ +#define UIPC_MAX_CMSG_FD 512 + #define f_msgcount f_fglob->fg_msgcount #define f_cred f_fglob->fg_cred #define f_ops f_fglob->fg_ops @@ -166,10 +172,9 @@ static void unp_disconnect(struct unpcb *); static void unp_shutdown(struct unpcb *); static void unp_drop(struct unpcb *, int); __private_extern__ void unp_gc(void); -static void unp_scan(struct mbuf *, void (*)(struct fileglob *)); -static void unp_mark(struct fileglob *); -static void unp_discard(struct fileglob *); -static void unp_discard_fdlocked(struct fileglob *, proc_t); +static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg); +static void unp_mark(struct fileglob *, __unused void *); +static void unp_discard(struct fileglob *, void *); static int unp_internalize(struct mbuf *, proc_t); static int unp_listen(struct unpcb *, proc_t); static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *); @@ -1870,9 +1875,16 @@ unp_externalize(struct mbuf *rights) struct fileglob **rp = (struct fileglob **)(cm + 1); int *fds = (int *)(cm + 1); struct fileproc *fp; - struct fileglob *fg; + struct fileglob **fgl; int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); - int f; + int f, error = 0; + + MALLOC(fgl, struct fileglob **, newfds * sizeof (struct fileglob *), + M_TEMP, M_WAITOK); + if (fgl == NULL) { + error = ENOMEM; + goto discard; + } proc_fdlock(p); @@ -1880,14 +1892,9 @@ unp_externalize(struct mbuf *rights) * if the new FD's will not fit, then we free them all */ if (!fdavail(p, newfds)) { - for (i = 0; i < newfds; i++) { - fg = *rp; - unp_discard_fdlocked(fg, p); - *rp++ = NULL; - } proc_fdunlock(p); - - return (EMSGSIZE); + error = EMSGSIZE; + goto discard; } /* * now change each pointer to an fd in the global table to @@ -1903,34 +1910,55 @@ unp_externalize(struct mbuf *rights) * If receive access is denied, don't pass along * and error message, just discard the descriptor. */ - if (mac_file_check_receive(kauth_cred_get(), *rp)) { - fg = *rp; - *rp++ = 0; - unp_discard_fdlocked(fg, p); + if (mac_file_check_receive(kauth_cred_get(), rp[i])) { + proc_fdunlock(p); + unp_discard(rp[i], p); + fds[i] = 0; + proc_fdlock(p); continue; } #endif if (fdalloc(p, 0, &f)) panic("unp_externalize:fdalloc"); - fg = rp[i]; fp = fileproc_alloc_init(NULL); if (fp == NULL) panic("unp_externalize: MALLOC_ZONE"); fp->f_iocount = 0; - fp->f_fglob = fg; - fg_removeuipc(fg); + fp->f_fglob = rp[i]; + if (fg_removeuipc_mark(rp[i])) + fgl[i] = rp[i]; + else + fgl[i] = NULL; procfdtbl_releasefd(p, f, fp); - (void) OSAddAtomic(-1, &unp_rights); fds[i] = f; } proc_fdunlock(p); - return (0); + for (i = 0; i < newfds; i++) { + if (fgl[i] != NULL) { + VERIFY(fgl[i]->fg_lflags & FG_RMMSGQ); + fg_removeuipc(fgl[i]); + } + if (fds[i]) + (void) OSAddAtomic(-1, &unp_rights); + } + +discard: + if (fgl) + FREE(fgl, M_TEMP); + if (error) { + for (i = 0; i < newfds; i++) { + unp_discard(*rp, p); + *rp++ = NULL; + } + } + return (error); } void unp_init(void) { + _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int))); unp_zone = zinit(sizeof (struct unpcb), (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone"); @@ -1979,6 +2007,7 @@ unp_internalize(struct mbuf *control, proc_t p) struct fileproc *fp; int i, error; int oldfds; + uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8]; /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */ if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || @@ -1986,6 +2015,7 @@ unp_internalize(struct mbuf *control, proc_t p) return (EINVAL); } oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); + bzero(fg_ins, sizeof(fg_ins)); proc_fdlock(p); fds = (int *)(cm + 1); @@ -1995,7 +2025,7 @@ unp_internalize(struct mbuf *control, proc_t p) if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) { proc_fdunlock(p); return (error); - } else if (!filetype_issendable(FILEGLOB_DTYPE(tmpfp->f_fglob))) { + } else if (!file_issendable(p, tmpfp)) { proc_fdunlock(p); return (EINVAL); } else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) { @@ -2012,12 +2042,20 @@ unp_internalize(struct mbuf *control, proc_t p) */ for (i = (oldfds - 1); i >= 0; i--) { (void) fdgetf_noref(p, fds[i], &fp); - fg_insertuipc(fp->f_fglob); + if (fg_insertuipc_mark(fp->f_fglob)) + fg_ins[i / 8] |= 0x80 >> (i % 8); rp[i] = fp->f_fglob; - (void) OSAddAtomic(1, &unp_rights); } proc_fdunlock(p); + for (i = 0; i < oldfds; i++) { + if (fg_ins[i / 8] & (0x80 >> (i % 8))) { + VERIFY(rp[i]->fg_lflags & FG_INSMSGQ); + fg_insertuipc(rp[i]); + } + (void) OSAddAtomic(1, &unp_rights); + } + return (0); } @@ -2152,7 +2190,7 @@ unp_gc(void) */ lck_mtx_unlock(&fg->fg_lock); - unp_scan(so->so_rcv.sb_mb, unp_mark); + unp_scan(so->so_rcv.sb_mb, unp_mark, 0); } } while (unp_defer); /* @@ -2265,7 +2303,7 @@ void unp_dispose(struct mbuf *m) { if (m) { - unp_scan(m, unp_discard); + unp_scan(m, unp_discard, NULL); } } @@ -2283,7 +2321,7 @@ unp_listen(struct unpcb *unp, proc_t p) } static void -unp_scan(struct mbuf *m0, void (*op)(struct fileglob *)) +unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg) { struct mbuf *m; struct fileglob **rp; @@ -2303,7 +2341,7 @@ unp_scan(struct mbuf *m0, void (*op)(struct fileglob *)) sizeof (int); rp = (struct fileglob **)(cm + 1); for (i = 0; i < qfds; i++) - (*op)(*rp++); + (*op)(*rp++, arg); break; /* XXX, but saves time */ } m0 = m0->m_act; @@ -2311,7 +2349,7 @@ unp_scan(struct mbuf *m0, void (*op)(struct fileglob *)) } static void -unp_mark(struct fileglob *fg) +unp_mark(struct fileglob *fg, __unused void *arg) { lck_mtx_lock(&fg->fg_lock); @@ -2327,23 +2365,21 @@ unp_mark(struct fileglob *fg) } static void -unp_discard(struct fileglob *fg) +unp_discard(struct fileglob *fg, void *p) { - proc_t p = current_proc(); /* XXX */ + if (p == NULL) + p = current_proc(); /* XXX */ (void) OSAddAtomic(1, &unp_disposed); + if (fg_removeuipc_mark(fg)) { + VERIFY(fg->fg_lflags & FG_RMMSGQ); + fg_removeuipc(fg); + } + (void) OSAddAtomic(-1, &unp_rights); proc_fdlock(p); - unp_discard_fdlocked(fg, p); - proc_fdunlock(p); -} -static void -unp_discard_fdlocked(struct fileglob *fg, proc_t p) -{ - fg_removeuipc(fg); - - (void) OSAddAtomic(-1, &unp_rights); (void) closef_locked((struct fileproc *)0, fg, p); + proc_fdunlock(p); } int diff --git a/bsd/kern/vm_pressure.c b/bsd/kern/vm_pressure.c index 27c1aed10..028411c9a 100644 --- a/bsd/kern/vm_pressure.c +++ b/bsd/kern/vm_pressure.c @@ -360,7 +360,7 @@ void vm_find_pressure_candidate(void) goto exit; } - VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0); + VM_DEBUG_CONSTANT_EVENT(vm_pressure_event, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0); VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); KNOTE_DETACH(&vm_pressure_klist, kn_max); @@ -475,6 +475,7 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int struct task* t = TASK_NULL; int curr_task_importance = 0; boolean_t consider_knote = FALSE; + boolean_t privileged_listener = FALSE; p = kn->kn_kq->kq_p; proc_list_lock(); @@ -528,7 +529,42 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int curr_task_importance = task_importance_estimate(t); - /* + /* + * Privileged listeners are only considered in the multi-level pressure scheme + * AND only if the pressure is increasing. + */ + if (level > 0) { + + if (task_has_been_notified(t, level) == FALSE) { + + /* + * Is this a privileged listener? + */ + if (task_low_mem_privileged_listener(t, FALSE, &privileged_listener) == 0) { + + if (privileged_listener) { + kn_max = kn; + proc_rele(p); + goto done_scanning; + } + } + } else { + proc_rele(p); + continue; + } + } else if (level == 0) { + + /* + * Task wasn't notified when the pressure was increasing and so + * no need to notify it that the pressure is decreasing. + */ + if ((task_has_been_notified(t, kVMPressureWarning) == FALSE) && (task_has_been_notified(t, kVMPressureCritical) == FALSE)) { + proc_rele(p); + continue; + } + } + + /* * We don't want a small process to block large processes from * being notified again. */ @@ -551,9 +587,7 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int * b) has importance equal to that of the current selected process but is larger */ - if (task_has_been_notified(t, level) == FALSE) { - consider_knote = TRUE; - } + consider_knote = TRUE; } } else { if ((curr_task_importance > selected_task_importance) || @@ -566,9 +600,7 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int * b) has importance equal to that of the current selected process but is larger */ - if (task_has_been_notified(t, level) == FALSE) { - consider_knote = TRUE; - } + consider_knote = TRUE; } } } else if (level == 0) { @@ -578,9 +610,7 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int if ((curr_task_importance > selected_task_importance) || ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { - if ((task_has_been_notified(t, kVMPressureWarning) == TRUE) || (task_has_been_notified(t, kVMPressureCritical) == TRUE)) { - consider_knote = TRUE; - } + consider_knote = TRUE; } } else if (level == -1) { @@ -606,8 +636,10 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int proc_rele(p); } +done_scanning: if (kn_max) { - VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); + VM_DEBUG_CONSTANT_EVENT(vm_pressure_event, VM_PRESSURE_EVENT, DBG_FUNC_NONE, kn_max->kn_kq->kq_p->p_pid, resident_max, 0, 0); + VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); } return kn_max; diff --git a/bsd/libkern/Makefile b/bsd/libkern/Makefile index 825806a41..65e206405 100644 --- a/bsd/libkern/Makefile +++ b/bsd/libkern/Makefile @@ -8,11 +8,11 @@ include $(MakeInc_cmd) include $(MakeInc_def) -DATAFILES = \ +KERNELFILES = \ libkern.h -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${KERNELFILES} EXPORT_MI_DIR = libkern diff --git a/bsd/libkern/memchr.c b/bsd/libkern/memchr.c index fced31c67..28b335d46 100644 --- a/bsd/libkern/memchr.c +++ b/bsd/libkern/memchr.c @@ -36,7 +36,7 @@ memchr(const void *bigptr, int ch, size_t length) size_t n; for (n = 0; n < length; n++) if (big[n] == ch) - return (void *)&big[n]; + return __DECONST(void *, &big[n]); return NULL; } diff --git a/bsd/machine/Makefile b/bsd/machine/Makefile index b7c7225dc..556aeb506 100644 --- a/bsd/machine/Makefile +++ b/bsd/machine/Makefile @@ -15,6 +15,9 @@ DATAFILES = \ vmparam.h _types.h _limits.h _param.h \ _mcontext.h +PRIVATE_DATAFILES = \ + disklabel.h + KERNELFILES = \ disklabel.h \ byte_order.h endian.h \ @@ -25,7 +28,7 @@ KERNELFILES = \ INSTALL_MI_LIST = ${DATAFILES} -INSTALL_MI_LCL_LIST = ${DATAFILES} disklabel.h +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} INSTALL_MI_DIR = machine diff --git a/bsd/man/man2/Makefile b/bsd/man/man2/Makefile index 5c906991d..075ebeb16 100644 --- a/bsd/man/man2/Makefile +++ b/bsd/man/man2/Makefile @@ -35,6 +35,8 @@ DATAFILES = \ chroot.2 \ close.2 \ connect.2 \ + connectx.2 \ + disconnectx.2 \ dup.2 \ dup2.2 \ execve.2 \ @@ -81,7 +83,6 @@ DATAFILES = \ getgid.2 \ getgroups.2 \ getitimer.2 \ - getlcid.2 \ getlogin.2 \ getpeername.2 \ getpgrp.2 \ @@ -105,6 +106,7 @@ DATAFILES = \ kill.2 \ kevent.2 \ kevent64.2 \ + kevent_qos.2 \ kqueue.2 \ lchown.2 \ link.2 \ @@ -178,7 +180,6 @@ DATAFILES = \ setgid.2 \ setgroups.2 \ setitimer.2 \ - setlcid.2 \ setlogin.2 \ setpgid.2 \ setpgrp.2 \ diff --git a/bsd/man/man2/accept.2 b/bsd/man/man2/accept.2 index cbe526799..63f183812 100644 --- a/bsd/man/man2/accept.2 +++ b/bsd/man/man2/accept.2 @@ -33,7 +33,7 @@ .\" .\" @(#)accept.2 8.2 (Berkeley) 12/11/93 .\" -.Dd December 11, 1993 +.Dd March 18, 2015 .Dt ACCEPT 2 .Os BSD 4.2 .Sh NAME @@ -201,6 +201,7 @@ is necessary. .Sh SEE ALSO .Xr bind 2 , .Xr connect 2 , +.Xr connectx 2 , .Xr listen 2 , .Xr select 2 , .Xr socket 2 , diff --git a/bsd/man/man2/bind.2 b/bsd/man/man2/bind.2 index 1e968edaa..8374320da 100644 --- a/bsd/man/man2/bind.2 +++ b/bsd/man/man2/bind.2 @@ -33,7 +33,7 @@ .\" .\" @(#)bind.2 8.1 (Berkeley) 6/4/93 .\" -.Dd June 4, 1993 +.Dd March 18, 2015 .Dt BIND 2 .Os BSD 4.2 .Sh NAME @@ -48,14 +48,14 @@ .Fa "socklen_t address_len" .Fc .Sh DESCRIPTION -.Fn Bind +.Fn bind assigns a name to an unnamed socket. When a socket is created with .Xr socket 2 it exists in a name space (address family) but has no name assigned. -.Fn Bind +.Fn bind requests that .Fa address be assigned to the socket. @@ -172,6 +172,7 @@ The include file is necessary. .Sh SEE ALSO .Xr connect 2 , +.Xr connectx 2 , .Xr getsockname 2 , .Xr listen 2 , .Xr socket 2 , diff --git a/bsd/man/man2/chflags.2 b/bsd/man/man2/chflags.2 index 0d16cab19..8df343407 100644 --- a/bsd/man/man2/chflags.2 +++ b/bsd/man/man2/chflags.2 @@ -96,7 +96,7 @@ flags may only be set or unset by the super-user. They may be set at any time, but normally may only be unset when the system is in single-user mode. (See -.Xr init 8 +.Xr launchd 8 for details.) .Sh RETURN VALUES Upon successful completion, a value of 0 is returned. @@ -162,7 +162,7 @@ The operation isn't supported by the filesystem. .Xr fflagstostr 3 , .Xr lchflags 3 , .Xr strtofflags 3 , -.Xr init 8 +.Xr launchd 8 .Sh HISTORY The .Fn chflags diff --git a/bsd/man/man2/connect.2 b/bsd/man/man2/connect.2 index 186b01c20..c5a06d674 100644 --- a/bsd/man/man2/connect.2 +++ b/bsd/man/man2/connect.2 @@ -33,7 +33,7 @@ .\" .\" @(#)connect.2 8.1 (Berkeley) 6/4/93 .\" -.Dd June 4, 1993 +.Dd March 18, 2015 .Dt CONNECT 2 .Os BSD 4.2 .Sh NAME @@ -74,7 +74,9 @@ only once; datagram sockets may use .Fn connect multiple times to change their association. Datagram sockets may dissolve the association -by connecting to an invalid address, such as a null address +by calling +.Xr disconnectx 2 , +or by connecting to an invalid address, such as a null address or an address with the address family set to .Dv AF_UNSPEC @@ -220,6 +222,8 @@ The include file is necessary. .Sh SEE ALSO .Xr accept 2 , +.Xr connectx 2 , +.Xr disconnectx 2 , .Xr getsockname 2 , .Xr select 2 , .Xr socket 2 , diff --git a/bsd/man/man2/connectx.2 b/bsd/man/man2/connectx.2 index 96e85c7ab..7fbca576e 100644 --- a/bsd/man/man2/connectx.2 +++ b/bsd/man/man2/connectx.2 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2012 Apple Inc. All rights reserved. +.\" Copyright (c) 2015 Apple Inc. All rights reserved. .\" .\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@ .\" @@ -25,149 +25,250 @@ .\" .\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@ .\" -.Dd November 14, 2012 +.Dd March 26, 2015 .Dt CONNECTX 2 .Os Darwin .Sh NAME .Nm connectx -.Nd initiate one or more connections on a socket +.Nd initiate a connection on a socket .Sh SYNOPSIS .Fd #include .Ft int .Fo connectx .Fa "int socket" -.Fa "const struct sockaddr *saddress" -.Fa "socklen_t saddress_len" -.Fa "const struct sockaddr *daddress" -.Fa "socklen_t daddress_len" -.Fa "unsigned int ifscope" -.Fa "associd_t associd" -.Fa "connid_t *connid" +.Fa "const sa_endpoints_t *endpoints" +.Fa "sae_associd_t associd" +.Fa "unsigned int flags" +.Fa "const struct iovec *iov" +.Fa "unsigned int iovcnt" +.Fa "size_t *len" +.Fa "sae_connid_t *connid" .Fc .Sh DESCRIPTION The parameter .Fa socket -is a socket. The communication domain of the socket determines the -availability and behavior of -.Fn connectx . +is a socket. In general, .Fn connectx may be used as a substitute for cases when .Xr bind 2 and .Xr connect 2 -are issued in succession. +are issued in succession, as well as a mechanism to transmit data +at connection establishment time. .Pp -When the source address -.Fa saddress +The +.Fn connectx +system call uses a +.Fa sa_endpoints +structure to minimize the number of directly supplied arguments. This structure +has the following form, as defined in +.In sys/socket.h : +.Pp +.Bd -literal +typedef struct sa_endpoints { + unsigned int sae_srcif; /* optional source interface */ + struct sockaddr *sae_srcaddr; /* optional source address */ + socklen_t sae_srcaddrlen; /* size of source address */ + struct sockaddr *sae_dstaddr; /* destination address */ + socklen_t sae_dstaddrlen; /* size of destination address */ +}sa_endpoints_t; +.Ed +.Pp +When the optional source address +.Fa sae_srcaddr parameter is specified, .Fn connectx -binds the connection to one of the addresses, as if +binds the connection to the address, as if .Xr bind 2 is used. The length of -.Fa saddress +.Fa sae_srcaddr buffer is specified by -.Fa saddress_len . -This buffer may hold more than one addresses, where each successive address -immediately follows the previous one. The parameter -.Fa ifscope -may also be specified instead of -.Fa saddress , -in order to bind the connection to the interface whose interface index -equals to -.Fa ifscope . -Both -.Fa saddress +.Fa sae_srcaddrlen . +.\" This buffer may hold more than one addresses, where each successive address +.\" immediately follows the previous one. +The source address can be obtained by calling +.Xr getifaddrs 3 . +.Pp +The optional parameter +.Fa sae_srcif +may also be specified, in order to force the connection to use the interface +whose interface index equals to +.Fa sae_srcif . +The value for +.Fa sae_srcif +may be obtained by issuing a call to +.Xr if_nametoindex 3 . +If only +.Fa sae_srcif +is specified, the communication domain will choose a source address on that +interface for communicating to the peer socket. Both +.Fa sae_srcaddr and -.Fa ifscope -parameters may be specified in order to add more constraints to the connection. +.Fa sae_srcif +parameters may also be specified in order to add more constraints to the connection, and +.Fn connectx +will fail unless the address is currently assigned to that interface. .Pp -At least one destination address must be specified in the -.Fa daddress +A destination address must be specified in the +.Fa sae_dstaddr parameter. The -.Fa daddress_len -specifies the length of that buffer. When more than one addresses -is specified, each successive address immediately follows the previous one. +.Fa sae_dstaddrlen +specifies the length of that buffer. +.\" When more than one addresses +.\" is specified, each successive address immediately follows the previous one. +.\" Each communication domain interprets the +.\" .Fa sae_srcaddr +.\" and +.\" .Fa sae_dstaddr +.\" parameters in its own way. +.\" When multiple addresses are specified, one of the addresses will be chosen. +.\" The rules used in selecting the eligible addresses as well as their address family requirements vary between communication domains. +.\" .Pp +.\" Changes related to the connection state may be monitored by registering for the +.\" .Dv NOTE_CONNINFO_UPDATED +.\" .Xr kqueue 2 +.\" event, using the predefined system filter +.\" .Dv EVFILT_SOCK . +.\" Details regarding the event may be retrieved by calling +.\" .Xr getconninfo 3 . +.\" .Sh MULTIPATH +.\" On a multipath socket, +.\" .Fn connectx +.\" may be used multiple times, in order to establish the initial session +.\" association with the peer socket upon the first connection, and to further +.\" establish additional connections related to that association on subsequent +.\" ones. +.\" .Pp +.\" The parameter +.\" .Fa associd +.\" specifies the association identifier. When +.\" .Fn connectx +.\" is initially called to establish an associtation, the association identifier +.\" is not yet known, and +.\" .Dv ASSOCID_ANY +.\" must be specified. After the initial connection is established, the +.\" association identifier may be retrieved using +.\" .Xr getassocids 3 , +.\" and the value may then be used on subsequent +.\" .Fn connectx +.\" calls. +.\" .Pp +.\" If the initial connection is established without any protocol-level +.\" multipath association, the error +.\" .Er EPROTO +.\" will be returned, and the connection can be extracted to a new socket with +.\" the same properties of +.\" .Fa socket , +.\" by calling +.\" .Xr peeloff 2 . +.\" .Pp +.\" An association representing one or more connections, or a single connection +.\" may be dissolved by calling +.\" .Xr disconnectx 2 . +.\" .Sh NON-MULTIPATH +.\" On non-multipath socket, +.\" .Fn connectx +.\" behaves much like a combination of +.\" .Xr bind 2 +.\" and +.\" .Xr connect 2 . .Pp -Each communications domain interprets the -.Fa saddress +Data to be transmitted may optionally be defined via the +.Fa iovcnt +buffers specified by members of the +.Fa iov +array, along with a non-NULL +.Fa len +parameter, which upon success, indicates the number of bytes enqueued for +transmission. +.Pp +When the +.Fa iov and -.Fa daddress -parameters in its own way. When multiple addresses are specified, one -of the addresses will be chosen. The rules used in selecting the -address vary between communicaton domains. +.Fa len +parameters are non-NULL, the communication domain will copy the data to the +socket send buffer. The communication domain may impose a limit on the amount of data allowed to be buffered before connection establishment. +.Pp +When the flags parameter is set to CONNECT_RESUME_ON_READ_WRITE and an +.Fa iov +is not passed in, the communication domain will trigger the actual connection +establishment upon the first read or write following the +.Xr connectx 2 +system call. This flag is ignored if the iov is specified in the +.Xr connectx 2 +call itself. .Pp -Changes related to the connection state may be monitored by registering for the -.Dv NOTE_CONNINFO_UPDATED -.Xr kqueue 2 -event, using the predefined system filter -.Dv EVFILT_SOCK . -Details regarding the event may be retrieved by calling -.Xr getconninfo 3 . -.Sh MULTIPATH -On a multipath socket, +The flags parameter may also be set to CONNECT_DATA_IDEMPOTENT to indicate to +the communication domain that the data is idempotent. For example, this will +trigger TCP Fast Open (RFC 7413) with SOCK_STREAM type. The data must be passed in the +.Fa iov +parameter in +.Xr connectx 2 +, or passed in with the first write call such as with the +.Xr writev 2 +or similar system call if the CONNECT_RESUME_ON_READ_WRITE is also set. +.Pp +In general, the communication domain makes the final decision on the amount of +data that may get transmitted at connection establishment time. If the socket +requires the data be sent atomically and the data size makes this impossible, +EMSGSIZE will be returned and the state of the socket is left unchanged as if .Fn connectx -may be used multiple times, in order to establish the initial session -association with the peer socket upon the first connection, and to further -establish additional connections related to that assocication on subsequent -ones. +was not called. .Pp The parameter .Fa associd -specifies the association identifier. When -.Fn connectx -is initially called to establish an associtation, the association identifier -is not yet known, and -.Dv ASSOCID_ANY -must be specified. After the initial connection is established, the -association identifier may be retrieved using -.Xr getassocids 3 , -and the value may then be used on subsequent +is reserved for future use, and must always be set to +.Dv SAE_ASSOCID_ANY . +The parameter +.Fa connid +is also reserved for future use and should be set to NULL. +.Sh NOTES .Fn connectx -calls. +is currently supported only on AF_INET and AF_INET6 sockets of type SOCK_DGRAM +and SOCK_STREAM. .Pp -If the initial connection is established without any protocol-level -multipath association, the error -.Er EPROTO -will be returned, and the connection can be extracted to a new socket with -the same properties of -.Fa socket , -by calling -.Xr peeloff 2 . -.Pp -An association representing one or more connections, or a single connection -may be dissolved by calling -.Xr disconnectx 2 . -.Sh NON-MULTIPATH -On non-multipath socket, +Generally, +.\" non-multipath +connection-oriented sockets may successfully .Fn connectx -behaves much like a combination of -.Xr bind 2 -and -.Xr connect 2 . -The parameter -.Fa associd -must always be set to -.Dv ASSOCID_ANY . -.Pp -Generally, non-multipath stream sockets may successfully +only once. Connectionless sockets may use .Fn connectx -only once; datagram sockets may use +to create an association to the peer socket, and it may call +.Xr disconnectx 2 +to dissolve any existing association. Unlike connection-oriented sockets, +connectionless sockets may call +.Fn connectx +again afterwards to associate to another peer socket. +.Pp +If CONNECT_RESUME_ON_READ_WRITE is set without data +supplied, .Fn connectx -multiple times to change their association, after first dissolving the -existing association by calling -.Xr disconnectx 2 . +will immediately return success, assuming the rest of the parameters are valid. +.Xr select 2 +will indicate that the socket is ready for writing, and the actual connection +establishment is attempted once the initial data is written to the socket via +.Xr writev 2 +or similar. Subsequent attempts to write more data will fail until the existing +connection establishment attempt is successful. The error status of the socket +may be retrieved via the SO_ERROR option using +.Xr getsockopt 2 . .Sh RETURN VALUES -Upon successful completion, a value of 0 is returned and the connection -identifier is returned through the -.Fa connid -parameter. If the initial connection establishes an association with -a peer socket, the association identifier may be retrieved by calling -.Xr getassocids 2 . -Both of these identifiers are unique -on a per -.Fa socket -basis. Upon failure, a value of -1 is returned and the global integer +Upon successful completion, a value of 0 is returned. +.\" and an opaque value may be returned through the +.\" .Fa connid +.\" parameter. +The number of bytes from +.Fa iov +array which were enqueued for transmission is returned via +.Fa len . +.\" If the initial connection establishes an association with a peer socket, the association identifier may be retrieved by calling +.\" .Xr getassocids 2 . +.\" Both of these identifiers are unique +.\" on a per +.\" .Fa socket +.\" basis. +Upon failure, a value of -1 is returned and the global integer variable .Va errno is set to indicate the error. @@ -177,25 +278,25 @@ The system call will fail if: .Bl -tag -width Er .\" ========== -.It Bq Er EACCES -The destination address is a broadcast address and the -socket option -.Dv SO_BROADCAST -is not set. -.\" ========== .It Bq Er EADDRINUSE -The address is already in use. +The address specified in +.Fa sae_srcaddr +parameter is already in use. .\" ========== .It Bq Er EADDRNOTAVAIL -The specified address is not available on this machine. +The specified in +.Fa sae_srcaddr +parameter is not available on this machine, or is not assigned to the interface specified by +.Fa sae_srcif . .\" ========== .It Bq Er EAFNOSUPPORT -Addresses in the specified address family cannot be used with this socket. +The +.Fa socket +cannot find any usable addresses of a specific address family +as required by the communication domain. .\" ========== .It Bq Er EALREADY -The socket is non-blocking -and a previous connection attempt -has not yet been completed. +A previous connection attempt has not yet been completed. .\" ========== .It Bq Er EBADF .Fa socket @@ -207,31 +308,39 @@ The attempt to connect was ignored or explicitly rejected. .\" ========== .It Bq Er EFAULT -The -.Fa address -parameter specifies an area outside -the process address space. +Part of +.Fa iov +or data to be written to +.Fa socket +points outside the process's allocated address space. .\" ========== .It Bq Er EHOSTUNREACH The target host cannot be reached (e.g., down, disconnected). .\" ========== .It Bq Er EINPROGRESS -The socket is non-blocking -and the connection cannot -be completed immediately. +The connection cannot be completed immediately. It is possible to .Xr select 2 -for completion by selecting the socket for writing. +for completion by selecting the +.Fa socket +for writing. .\" ========== .It Bq Er EINTR Its execution was interrupted by a signal. .\" ========== +.It Bq Er EMSGSIZE +The size of the message exceeds the available send buffer space in the +.Fa socket . +.\" ========== .It Bq Er EINVAL An invalid argument was detected (e.g., -.Fa address_len -is not valid for the address family, -the specified address family is invalid). +.Fa sae_dstaddrlen +is not valid, the contents of +.Fa sae_srcaddr +or +.Fa sae_dstaddr, +buffer is invalid, etc.) .\" ========== .It Bq Er EISCONN The socket is already connected. @@ -254,35 +363,38 @@ Because .Fa socket is listening, no connection is allowed. .\" ========== -.It Bq Er EPROTO -The connection was successfully established without any protocol-level -association. The connection can be extracted to a new socket using -.Xr peeloff 2 . +.\".It Bq Er EPROTO +.\"The connection was successfully established without any protocol-level +.\"association. The connection can be extracted to a new socket using +.\".Xr peeloff 2 . .\" ========== -.It Bq Er EPROTOTYPE -.Fa address -has a different type than the socket -that is bound to the specified peer address. +.\".It Bq Er EPROTOTYPE +.\".Fa address +.\"has a different type than the socket +.\"that is bound to the specified peer address. .\" ========== .It Bq Er ETIMEDOUT Connection establishment timed out without establishing a connection. .\" ========== -.It Bq Er ECONNRESET -Remote host reset the connection request. +.El .Sh SEE ALSO -.Xr accept 2 , +.\".Xr accept 2 , .Xr bind 2 , .Xr connect 2 , .Xr disconnectx 2 , -.Xr kqueue 2 , -.Xr peeloff 2 , +.Xr disconnectx 2 , +.Xr getsockopt 2 , +.\".Xr kqueue 2 , +.\".Xr peeloff 2 , +.\".Xr shutdown 2 , .Xr select 2 , .Xr socket 2 , -.Xr getassocids 3 , -.Xr getconnids 3 , -.Xr getconninfo 3 , +.\".Xr getassocids 3 , +.\".Xr getconnids 3 , +.\".Xr getconninfo 3 , +.Xr writev 2 , .Xr compat 5 .Sh HISTORY The .Fn connectx -function call appeared in Darwin 13.0.0 +function call appeared in Darwin 15.0.0 diff --git a/bsd/man/man2/disconnectx.2 b/bsd/man/man2/disconnectx.2 index eed45a0de..d9990242b 100644 --- a/bsd/man/man2/disconnectx.2 +++ b/bsd/man/man2/disconnectx.2 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2012 Apple Inc. All rights reserved. +.\" Copyright (c) 2015 Apple Inc. All rights reserved. .\" .\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@ .\" @@ -25,7 +25,7 @@ .\" .\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@ .\" -.Dd November 14, 2012 +.Dd April 10, 2015 .Dt DISCONNECTX 2 .Os Darwin .Sh NAME @@ -36,8 +36,8 @@ .Ft int .Fo disconnectx .Fa "int socket" -.Fa "associd_t associd" -.Fa "connid_t connid" +.Fa "sae_associd_t associd" +.Fa "sae_connid_t connid" .Fc .Sh DESCRIPTION The parameter @@ -45,43 +45,50 @@ The parameter is a socket. The communication domain of the socket determines the availability and behavior of .Fn disconnectx . -In general, +For connection-oriented socket, .Fn disconnectx is analogous to .Xr shutdown 2 .Dv with SHUT_RDWR -issued on the connection identified by -.Fa connid , -or on all connections associated with the -.Fa associd -association. +issued on the socket. For connectionless socket, it disassociates any existing +association to the peer socket. +.\" identified by +.\" .Fa connid , +.\" or on all connections associated with the +.\" .Fa associd +.\" association. .Pp The parameter .Fa associd -specifies the association identifier. It may be set to -.Dv ASSOCID_ANY -when there is only one association present; -.Dv ASSOCID_ALL -to specify all existing associations; or one of the identifiers returned from -.Xr getassocids 3 . +specifies the association identifier. It should be set to +.Dv SAE_ASSOCID_ANY . +.\"when there is only one association present; +.\".Dv SAE_ASSOCID_ALL +.\"to specify all existing associations; or one of the identifiers returned from +.\".Xr getassocids 3 . .Pp The parameter .Fa connid -specifies the connection identifier. It may be set to -.Dv CONNID_ANY -or -.Dv CONNID_ALL , -in which case the association represented by -.Fa associd -will be dissolved; or the value returned from -.Xr connectx 2 -or -.Xr getconnids 3 , -which indicates that the disconnection occurs only on that connection -while keeping the session association intact. For the latter, the connection -associated with -.Fa connid -will no longer be valid upon success. +should be set to +.\" specifies the connection identifier. It may be set to +.Dv SAE_CONNID_ANY . +.\" or +.\".Dv SAE_CONNID_ALL , +.\" in which case the association represented by +.\" .Fa associd +.\" will be dissolved; or the value returned from +.\" .Xr connectx 2 +.\" or +.\" .Xr getconnids 3 , +.\" which indicates that the disconnection occurs only on that connection +.\" while keeping the session association intact. For the latter, the connection +.\" associated with +.\" .Fa connid +.\" will no longer be valid upon success. +.Sh NOTES +.Fn disconnectx +is currently supported only on AF_INET and AF_INET6 sockets of type SOCK_DGRAM +and SOCK_STREAM. .Sh RETURN VALUES The .Fn disconnectx @@ -96,8 +103,9 @@ system call succeeds unless: .Bl -tag -width Er .\" =========== .It Bq Er EALREADY -Operation already in progress for the session association represented by -.Fa associd . +Operation already in progress. +.\" for the session association represented by +.\" .Fa associd . .\" =========== .It Bq Er EBADF .Fa Socket @@ -112,8 +120,11 @@ argument is invalid or the underlying protocol is no longer attached to .Fa socket . .\" =========== .It Bq Er ENOTCONN -The session association repreresented by -.Fa associd +.\" The session association repreresented by +.\" .Fa associd +.\" is not connected. +The +.Fa socket is not connected. .\" =========== .It Bq Er ENOTSOCK @@ -121,13 +132,14 @@ is not connected. is a file, not a socket. .El .Sh SEE ALSO +.Xr connect 2 , .Xr connectx 2 , .Xr socket 2 , -.Xr getassocids 3 , -.Xr getconnids 3 , -.Xr getconninfo 3 , +.\" .Xr getassocids 3 , +.\" .Xr getconnids 3 , +.\" .Xr getconninfo 3 , .Xr compat 5 .Sh HISTORY The .Fn disconnectx -function call appeared in Darwin 13.0.0 +function call appeared in Darwin 15.0.0 diff --git a/bsd/man/man2/getattrlistbulk.2 b/bsd/man/man2/getattrlistbulk.2 index 346402ef7..ee1b5a769 100644 --- a/bsd/man/man2/getattrlistbulk.2 +++ b/bsd/man/man2/getattrlistbulk.2 @@ -151,6 +151,9 @@ It is typical to ask for a combination of common, file, and directory attributes and then use the value of the .Dv ATTR_CMN_OBJTYPE attribute to parse the resulting attribute buffer. +.Pp +A directory which is a mount point for a file system, will have a value of "DIR_MNTSTATUS_MNTPOINT" set for it's the +ATTR_DIR_MOUNTSTATUS attribute entry. However the attributes for the mount point will be those from the (underlying) file system. The only way to get the attributes of mounted root directory is to call getattrlist(2) on the mount point. . .Sh RETURN VALUES Upon successful completion the numbers of entries successfully read diff --git a/bsd/man/man2/getlcid.2 b/bsd/man/man2/getlcid.2 deleted file mode 100644 index 6625da578..000000000 --- a/bsd/man/man2/getlcid.2 +++ /dev/null @@ -1,87 +0,0 @@ -.\" Copyright (c) 2005 SPARTA, Inc. -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD$ -.\" -.\" Note: The date here should be updated whenever a non-trivial -.\" change is made to the manual page. -.Dd May 5, 2005 -.Dt GETLCID 2 -.Os -.Sh NAME -.Nm getlcid -.Nd "get login context" -.Sh SYNOPSIS -.In sys/lctx.h -.Ft pid_t -.Fn getlcid "pid_t pid" -.Sh DESCRIPTION -The login context of the process identified by -.Fa pid -is returned by -.Fn getlcid . -If -.Fa pid -is zero, -.Fn getlcid -returns the login context of the current process. -.Pp -Login contexts are used for tracking processes that originated from a users -login session. A login context is inherited via -.Fn fork -or by explicit creation using the -.Fn setlcid -call. -.Sh RETURN VALUES -If successful, -.Fn getlcid -returns a non-negative integer, the Login Context ID; otherwise the -value \-1 is returned and the global variable -.Fa errno -is set to indicate -the error. -.Sh ERRORS -The -.Fn getlcid -function will fail if: -.Bl -tag -width Er -.It Bq Er EPERM -Operation not permitted. -.It Bq Er ESRCH -No such process. -.It Bq Er ENOATTR -Attribute not found. -.El -.Sh SEE ALSO -.Xr setlcid 2 -.Sh HISTORY -The -.Nm -manual page -first appeared in -.Fx 6.0 . -.Sh AUTHORS -This -manual page was written by -.An Matthew N. Dodd Aq mdodd@FreeBSD.org . diff --git a/bsd/man/man2/getsockname.2 b/bsd/man/man2/getsockname.2 index 4ca0f72e0..58950e7df 100644 --- a/bsd/man/man2/getsockname.2 +++ b/bsd/man/man2/getsockname.2 @@ -50,7 +50,7 @@ .Sh DESCRIPTION The .Fn getsockname -fynction returns the current +function returns the current .Fa address for the specified socket. .Pp diff --git a/bsd/man/man2/gettimeofday.2 b/bsd/man/man2/gettimeofday.2 index a9b300555..179e9bc0c 100644 --- a/bsd/man/man2/gettimeofday.2 +++ b/bsd/man/man2/gettimeofday.2 @@ -123,7 +123,7 @@ the appropriate part of the year. .Pp Only the super-user may set the time of day or time zone. If the system securelevel is greater than 1 (see -.Xr init 8 ), +.Xr launchd 8 ), the time may only be advanced. This limitation is imposed to prevent a malicious super-user from setting arbitrary time stamps on files. diff --git a/bsd/man/man2/intro.2 b/bsd/man/man2/intro.2 index 1f6ba1495..de40a734d 100644 --- a/bsd/man/man2/intro.2 +++ b/bsd/man/man2/intro.2 @@ -33,7 +33,7 @@ .\" .\" @(#)intro.2 8.3 (Berkeley) 12/11/93 .\" -.Dd December 11, 1993 +.Dd March 18, 2015 .Dt INTRO 2 .Os BSD 4 .Sh NAME @@ -240,7 +240,8 @@ same routine may complete normally. .It Er 36 EINPROGRESS Em "Operation now in progress" . An operation that takes a long time to complete (such as a -.Xr connect 2 ) +.Xr connect 2 or +.Xr connectx 2 ) was attempted on a non-blocking object (see .Xr fcntl 2 ) . .It Er 37 EALREADY Em "Operation already in progress" . @@ -308,6 +309,8 @@ the system lacked sufficient buffer space or because a queue was full. .It Er 56 EISCONN Em "Socket is already connected" . A .Xr connect +or +.Xr connectx request was made on an already connected socket; or, a .Xr sendto @@ -326,7 +329,8 @@ had already been shut down with a previous call. .It Er 60 ETIMEDOUT Em "Operation timed out" . A -.Xr connect +.Xr connect , +.Xr connectx or .Xr send request failed because the connected party did not @@ -476,7 +480,7 @@ A new process is created by a currently active process; (see The parent process ID of a process is initially the process ID of its creator. If the creating process exits, the parent process ID of each child is set to the ID of a system process, -.Xr init . +.Xr launchd 8 . .It Process Group Each active process is a member of a process group that is identified by a non-negative integer called the process group ID. This is the process @@ -523,7 +527,7 @@ as the group, but is in a different process group. Note that when a process exits, the parent process for its children is changed to be -.Xr init , +.Xr launchd 8 , which is in a separate session. Not all members of an orphaned process group are necessarily orphaned processes (those whose creating process has exited). @@ -582,7 +586,7 @@ process and is granted special privileges if its effective user ID is 0. .It Special Processes The processes with process IDs of 0, 1, and 2 are special. Process 0 is the scheduler. Process 1 is the initialization process -.Xr init , +.Xr launchd 8 , and is the ancestor of every other process in the system. It is used to control the process structure. Process 2 is the paging daemon. diff --git a/bsd/man/man2/kevent_qos.2 b/bsd/man/man2/kevent_qos.2 new file mode 100644 index 000000000..9f491e699 --- /dev/null +++ b/bsd/man/man2/kevent_qos.2 @@ -0,0 +1 @@ +.so man2/kqueue.2 diff --git a/bsd/man/man2/kqueue.2 b/bsd/man/man2/kqueue.2 index c3e668072..5e8702457 100644 --- a/bsd/man/man2/kqueue.2 +++ b/bsd/man/man2/kqueue.2 @@ -53,8 +53,9 @@ .Sh NAME .Nm kqueue , .Nm kevent , -and .Nm kevent64 +and +.Nm kevent_qos .Nd kernel event notification mechanism .Sh LIBRARY .Lb libc @@ -68,21 +69,28 @@ and .Fn kevent "int kq" "const struct kevent *changelist" "int nchanges" "struct kevent *eventlist" "int nevents" "const struct timespec *timeout" .Ft int .Fn kevent64 "int kq" "const struct kevent64_s *changelist" "int nchanges" "struct kevent64_s *eventlist" "int nevents" "unsigned int flags" "const struct timespec *timeout" +.Ft int +.Fn kevent_qos "int kq" "const struct kevent_qos_s *changelist" "int nchanges" "struct kevent_qos_s *eventlist" "int nevents" "void *data_out" "size_t *data_available" "unsigned int flags" .Fn EV_SET "&kev" ident filter flags fflags data udata .Fn EV_SET64 "&kev" ident filter flags fflags data udata "ext[0]" "ext[1]" +.Fn EV_SET_QOS "&kev" ident filter flags qos udata fflags xflags data "ext[0]" "ext[1]" "ext[2]" "ext[3]" .Sh DESCRIPTION The .Fn kqueue -system call +system call allocates a kqueue file descriptor. This file descriptor provides a generic method of notifying the user when a kernel event (kevent) happens or a condition holds, based on the results of small pieces of kernel code termed filters. -A kevent is identified by an (ident, filter) pair and specifies -the interesting conditions to be notified about for that pair. -An (ident, filter) pair can only appear once in a given kqueue. -Subsequent attempts to register the same pair for a given kqueue +.Pp +A kevent is identified by an (ident, filter, and optional udata value) +tuple. It specifies the interesting conditions to be notified about +for that tuple. An (ident, filter, and optional udata value) tuple can +only appear once in a given kqueue. +Subsequent attempts to register the same tuple for a given kqueue will result in the replacement of the conditions being watched, not an addition. +Whether the udata value is considered as part of the tuple is controlled +by the EV_UDATA_SPECIFIC flag on the kevent. .Pp The filter identified in a kevent is executed upon the initial registration of that event in order to detect whether a preexisting @@ -112,9 +120,10 @@ The queue is not inherited by a child created with .Xr fork 2 . .Pp The -.Fn kevent -and +.Fn kevent, .Fn kevent64 +and +.Fn kevent_qos system calls are used to register events with the queue, and return any pending events to the user. @@ -122,9 +131,10 @@ The .Fa changelist argument is a pointer to an array of -.Va kevent -or +.Va kevent, .Va kevent64_s +or +.Va kevent_qos_s structures, as defined in .Aq Pa sys/event.h . All changes contained in the @@ -138,16 +148,28 @@ gives the size of The .Fa eventlist argument -is a pointer to an array of -.Va kevent -or +is a pointer to an array of out +.Va kevent, .Va kevent64_s +or +.Va kevent_qos_s structures. The .Fa nevents -argument -determines the size of +argument determines the size of .Fa eventlist . +If the KEVENT_FLAG_STACK_EVENTS flag is provided on the system call, +the eventlist array is filled in in stack order (starting in the +highest available index) instead of typical array order. +The +.Fa out_data +argument provides space for extra out data provided by specific filters. +The +.Fa data_available +argument's contents specified the space available in the data pool on input, +and contains the amount still remaining on output. +If the KEVENT_FLAG_STACK_DATA flag is specified on the system call, +the data is allocated from the pool in stack order instead of typical heap order. If .Fa timeout is a non-NULL pointer, it specifies a maximum interval to wait @@ -158,6 +180,13 @@ is a NULL pointer, both and .Fn kevent64 wait indefinitely. To effect a poll, the +.Fa flags +argument to +.Fn kevent64 +or +.Fn kevent_qos +can include the KEVENT_FLAG_IMMEDIATE value to indicate an +immediate timeout. Alternatively, the .Fa timeout argument should be non-NULL, pointing to a zero-valued .Va timespec @@ -174,12 +203,17 @@ structure. Similarly, .Fn EV_SET64 initializes a .Va kevent64_s +structure and +.Fn EV_SET_QOS +initializes a +.Va kevent_qos_s structure. .Pp The -.Va kevent -and +.Va kevent, .Va kevent64_s +and +.Va kevent_qos_s structures are defined as: .Bd -literal struct kevent { @@ -191,7 +225,6 @@ struct kevent { void *udata; /* opaque user data identifier */ }; - struct kevent64_s { uint64_t ident; /* identifier for this event */ int16_t filter; /* filter for event */ @@ -201,18 +234,31 @@ struct kevent64_s { uint64_t udata; /* opaque user data identifier */ uint64_t ext[2]; /* filter-specific extensions */ }; + +struct kevent_qos_s { + uint64_t ident; /* identifier for this event */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + uint32_t qos; /* quality of service when servicing event */ + uint64_t udata; /* opaque user data identifier */ + uint32_t fflags; /* filter-specific flags */ + uint32_t xflags; /* extra filter-specific flags */ + int64_t data; /* filter-specific data */ + uint64_t ext[4]; /* filter-specific extensions */ +}; .Ed .Pp ---- .Pp The fields of -.Fa struct kevent -and +.Fa struct kevent, .Fa struct kevent64_s +and +.Fa struct kevent_qos_s are: .Bl -tag -width XXXfilter .It ident -Value used to identify this event. +Value used to identify the source of the event. The exact interpretation is determined by the attached filter, but often is a file descriptor. .It filter @@ -225,7 +271,8 @@ Filter-specific flags. .It data Filter-specific data value. .It udata -Opaque user-defined value passed through the kernel unchanged. +Opaque user-defined value passed through the kernel unchanged. It can +optionally be part of the uniquing decision of the kevent system .El .Pp In addition, @@ -237,6 +284,16 @@ This field stores extensions for the event's filter. What type of extension depe what type of filter is being used. .El .Pp +In addition, +.Fa struct kevent_qos_s +contains: +.Bl -tag -width XXXfilter +.It xflags +Extra filter-specific flags. +.It ext[4] +The QoS variant provides twice as many extension values for filter-specific uses. +.El +.Pp ---- .Pp The @@ -250,15 +307,17 @@ in a duplicate entry. Adding an event automatically enables it, unless overridden by the EV_DISABLE flag. .It EV_ENABLE Permit -.Fn kevent -and +.Fn kevent, .Fn kevent64 +and +.Fn kevent_qos to return the event if it is triggered. .It EV_DISABLE Disable the event so -.Fn kevent -and +.Fn kevent, .Fn kevent64 +and +.Fn kevent_qos will not return it. The filter itself is not disabled. .It EV_DELETE Removes the event from the kqueue. Events which are attached to @@ -294,13 +353,15 @@ below. .Pp The predefined system filters are listed below. Arguments may be passed to and from the filter via the +.Va data, .Va fflags -and -.Va data +and optionally +.Va xflags fields in the -.Va kevent -or +.Va kevent, .Va kevent64_s +or +.Va kevent_qos_s structure. .Bl -tag -width EVFILT_MACHPORT .It EVFILT_READ @@ -328,6 +389,8 @@ flag in .Va fflags , and specifying the new low water mark in .Va data . +The derived per filter low water mark value is, however, bounded +by socket receive buffer's high and low water mark values. On return, .Va data contains the number of bytes of protocol data available to read. @@ -351,7 +414,7 @@ Returns when the file pointer is not at the end of file. contains the offset from current position to end of file, and may be negative. .It "Fifos, Pipes" -Returns when the there is data to read; +Returns when there is data to read; .Va data contains the number of bytes available. .Pp @@ -360,6 +423,17 @@ When the last writer disconnects, the filter will set EV_EOF in This may be cleared by passing in EV_CLEAR, at which point the filter will resume waiting for data to become available before returning. +.It "Device nodes" +Returns when there is data to read from the device; +.Va data +contains the number of bytes available. If the device does +not support returning number of bytes, it will not allow the +filter to be attached. However, if the NOTE_LOWAT flag is +specified and the +.Va data +field contains 1 on input, those devices will attach - but +cannot be relied upon to provide an accurate count of bytes +to be read on output. .El .It EVFILT_WRITE Takes a file descriptor as the identifier, and returns whenever @@ -431,7 +505,8 @@ or the underlying fileystem was unmounted. .Pp On return, .Va fflags -contains the events which triggered the filter. +contains the filter-specific flags which are associated with +the triggered events seen by this filter. .It EVFILT_PROC Takes the process ID to monitor as the identifier and the events to watch for in @@ -484,12 +559,22 @@ This filter automatically sets the EV_CLEAR flag internally. .It EVFILT_MACHPORT Takes the name of a mach port, or port set, in .Va ident -and waits until a message is received on the port or port set. When a message -is recieved, the size of the message is returned in -.Va data -and if +and waits until a message is enqueued on the port or port set. When a message +is detected, but not directly received by the kevent call, the name of the +specific port where the message is enqueued is returned in +.Va data . +If .Va fflags -is set to MACH_RCV_MSG, a pointer to the message is returned in ext[0]. +contains MACH_RCV_MSG, the ext[0] and ext[1] flags are assumed to contain +a pointer to the buffer where the message is to be received and the size +of the receive buffer, respectively. If MACH_RCV_MSG is specifed, yet the +buffer size in ext[1] is zero, The space for the buffer may be carved out +of the +.Va +data_out +area provided to +.Fn kevent_qos +if there is enough space remaining there. .It EVFILT_TIMER Establishes an interval timer with the data timer identified by @@ -520,9 +605,11 @@ If fflags is not set, the default is milliseconds. The timer will be periodic un On return, .Va data contains the number of times the timeout has expired since the last call to -.Fn kevent +.Fn kevent , +.Fn kevent64 or -.Fn kevent64 . +.Fn kevent_qos . + This filter automatically sets the EV_CLEAR flag internally. .El .Pp @@ -554,9 +641,10 @@ If there was an error creating the kernel event queue, a value of -1 is returned and errno set. .Pp The -.Fn kevent -and +.Fn kevent , .Fn kevent64 +and +.Fn kevent_qos system calls return the number of events placed in the .Fa eventlist , @@ -580,9 +668,10 @@ will be returned, and .Dv errno will be set to indicate the error condition. If the time limit expires, then -.Fn kevent -and +.Fn kevent , .Fn kevent64 +and +.Fn kevent_qos return 0. .Sh ERRORS The diff --git a/bsd/man/man2/listen.2 b/bsd/man/man2/listen.2 index f62a62c1e..3949b41a5 100644 --- a/bsd/man/man2/listen.2 +++ b/bsd/man/man2/listen.2 @@ -34,7 +34,7 @@ .\" .\" @(#)listen.2 8.2 (Berkeley) 12/11/93 .\" -.Dd December 11, 1993 +.Dd March 18, 2015 .Dt LISTEN 2 .Os BSD 4.2 .Sh NAME @@ -60,9 +60,9 @@ accepted with The .Fn listen call applies only to sockets of type -.Dv SOCK_STREAM -or -.Dv SOCK_SEQPACKET. +.Dv SOCK_STREAM. +.\"or +.\".Dv SOCK_SEQPACKET. .Pp The .Fa backlog @@ -109,6 +109,7 @@ The socket is not of a type that supports the operation .Sh SEE ALSO .Xr accept 2 , .Xr connect 2 , +.Xr connectx 2 , .Xr socket 2 .Sh BUGS The diff --git a/bsd/man/man2/mmap.2 b/bsd/man/man2/mmap.2 index b55d054e1..5707d6c2d 100644 --- a/bsd/man/man2/mmap.2 +++ b/bsd/man/man2/mmap.2 @@ -126,6 +126,9 @@ argument by .Em or Ns 'ing the following values: .Bl -tag -width MAP_HASSEMAPHORE +.It Dv MAP_ANONYMOUS +Synonym for +.Dv MAP_ANON. .It Dv MAP_ANON Map anonymous memory not associated with any specific file. The @@ -265,7 +268,9 @@ does not include either MAP_PRIVATE or MAP_SHARED. The .Fa len argument -was negative. +was negative or zero. Historically, the system call would not return an error if the argument was zero. +See other potential additional restrictions in the +COMPATIBILITY section below. .It Bq Er EINVAL The .Fa offset @@ -314,7 +319,7 @@ The parameter must specify either MAP_PRIVATE or MAP_SHARED. .It The -.Fa size +.Fa len parameter must not be 0. .It The diff --git a/bsd/man/man2/mount.2 b/bsd/man/man2/mount.2 index dc70c857a..a40dc3112 100644 --- a/bsd/man/man2/mount.2 +++ b/bsd/man/man2/mount.2 @@ -92,6 +92,8 @@ Do not interpret special files on the filesystem. Union with underlying filesystem instead of obscuring it. .It Dv MNT_SYNCHRONOUS All I/O to the filesystem should be done synchronously. +.It Dv MNT_CPROTECT +Enable data protection on the filesystem if the filesystem is configured for it. .El .Pp The flag @@ -120,7 +122,7 @@ The format for these argument structures is described in the manual page for each filesystem. .Pp The -.Fn umount +.Fn unmount function call disassociates the filesystem from the specified mount point .Fa dir . @@ -142,8 +144,8 @@ and the variable .Va errno is set to indicate the error. .Pp -.Nm Umount -returns the value 0 if the umount succeeded; otherwise -1 is returned +.Nm unmount +returns the value 0 if the unmount succeeded; otherwise -1 is returned and the variable .Va errno is set to indicate the error. @@ -183,7 +185,7 @@ Another process currently holds a reference to points outside the process's allocated address space. .El .Pp -.Nm Umount +.Nm unmount may fail with one of the following errors: .Bl -tag -width [ENAMETOOLONG] .It Bq Er EPERM @@ -215,12 +217,12 @@ points outside the process's allocated address space. .El .Sh SEE ALSO .Xr mount 8 , -.Xr umount 8 +.Xr unmount 8 .Sh BUGS Some of the error codes need translation to more obvious messages. .Sh HISTORY .Fn Mount and -.Fn umount +.Fn unmount function calls appeared in .At v6 . diff --git a/bsd/man/man2/peeloff.2 b/bsd/man/man2/peeloff.2 index 11e795ccc..3ba0acb18 100644 --- a/bsd/man/man2/peeloff.2 +++ b/bsd/man/man2/peeloff.2 @@ -36,7 +36,7 @@ .Ft int .Fo peeloff .Fa "int socket" -.Fa "associd_t associd" +.Fa "sae_associd_t associd" .Fc .Sh DESCRIPTION The parameter @@ -53,7 +53,7 @@ into its own separate socket. The parameter .Fa associd specifies the association identifier. It may be set to -.Dv ASSOCID_ANY +.Dv SAE_ASSOCID_ANY when there is only one association present; or one of the identifiers returned from .Xr getassocids 3 . diff --git a/bsd/man/man2/poll.2 b/bsd/man/man2/poll.2 index c0da72110..c0fa79ea2 100644 --- a/bsd/man/man2/poll.2 +++ b/bsd/man/man2/poll.2 @@ -48,7 +48,7 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd February 27, 2005 +.Dd March 18, 2015 .Dt POLL 2 .Os .Sh NAME @@ -63,7 +63,7 @@ .Fa "int timeout" .Fc .Sh DESCRIPTION -.Fn Poll +.Fn poll examines a set of file descriptors to see if some of them are ready for I/O or if certain events have occurred on them. @@ -167,7 +167,7 @@ will return without blocking. If the value of .Fa timeout is -1, the poll blocks indefinitely. .Sh RETURN VALUES -.Fn Poll +.Fn poll returns the number of descriptors that are ready for I/O, or -1 if an error occurred. If the time limit expires, @@ -183,7 +183,7 @@ array will be unmodified and the global variable .Va errno will be set to indicate the error. .Sh ERRORS -.Fn Poll +.Fn poll will fail if: .Bl -tag -width Er .\" =========== @@ -213,6 +213,7 @@ system call currently does not support devices. .Sh SEE ALSO .Xr accept 2 , .Xr connect 2 , +.Xr connectx 2 , .Xr kevent 2 , .Xr read 2 , .Xr recv 2 , diff --git a/bsd/man/man2/ptrace.2 b/bsd/man/man2/ptrace.2 index 5df4371b2..2415874d6 100644 --- a/bsd/man/man2/ptrace.2 +++ b/bsd/man/man2/ptrace.2 @@ -2,7 +2,7 @@ .\" $NetBSD: ptrace.2,v 1.3 1996/02/23 01:39:41 jtc Exp $ .\" .\" This file is in the public domain. -.Dd November 7, 1994 +.Dd March 25, 2015 .Dt PTRACE 2 .Os .Sh NAME @@ -94,6 +94,8 @@ had been used with .Dv SIGKILL given as the signal to be delivered. .It Dv PT_ATTACH +This call has been replaced with PT_ATTACHEXC. +.It Dv PT_ATTACHEXC This request allows a process to gain control of an otherwise unrelated process and begin tracing it. It does not need any cooperation from the to-be-traced process. In this case, @@ -104,7 +106,10 @@ must have the same real UID as the tracing process, and that it must not be executing a setuid or setgid executable. (If the tracing process is running as root, these restrictions do not apply.) The tracing process will see the newly-traced process stop and may then -control it as if it had been traced all along. +control it as if it had been traced all along. Note that this call differs +from the prior call ( +.Dv PT_ATTACH ) +in that signals from the child are delivered to the parent as Mach exceptions (see EXC_SOFT_SIGNAL). .It Dv PT_DETACH This request is like PT_CONTINUE, except that it does not allow specifying an alternate place to continue execution, and after it @@ -128,7 +133,7 @@ No process having the specified process ID exists. .Bl -bullet -compact .It A process attempted to use -.Dv PT_ATTACH +.Dv PT_ATTACHEXC on itself. .It The @@ -152,27 +157,27 @@ normally true only of system processes.) .It Bq Er EBUSY .Bl -bullet -compact .It -.Dv PT_ATTACH +.Dv PT_ATTACHEXC was attempted on a process that was already being traced. .It A request attempted to manipulate a process that was being traced by some process other than the one making the request. .It A request (other than -.Dv PT_ATTACH ) +.Dv PT_ATTACHEXC ) specified a process that wasn't stopped. .El .It Bq Er EPERM .Bl -bullet -compact .It A request (other than -.Dv PT_ATTACH ) +.Dv PT_ATTACHEXC ) attempted to manipulate a process that wasn't being traced at all. .It An attempt was made to use -.Dv PT_ATTACH +.Dv PT_ATTACHEXC on a process in violation of the requirements listed under -.Dv PT_ATTACH +.Dv PT_ATTACHEXC above. .El .El diff --git a/bsd/man/man2/reboot.2 b/bsd/man/man2/reboot.2 index 4edf12252..38791c085 100644 --- a/bsd/man/man2/reboot.2 +++ b/bsd/man/man2/reboot.2 @@ -90,9 +90,9 @@ the processor is simply halted; no reboot takes place. This option should be used with caution. .It Dv RB_INITNAME An option allowing the specification of an init program (see -.Xr init 8 ) +.Xr launchd 8 ) other than -.Pa /sbin/init +.Pa /sbin/launchd to be run when the system reboots. This switch is not currently available. .It Dv RB_KDB @@ -124,7 +124,7 @@ prevents this, booting the system with a single-user shell on the console. .Dv RB_SINGLE is actually interpreted by the -.Xr init 8 +.Xr launchd 8 program in the newly booted system. .Pp When no options are given (i.e., diff --git a/bsd/man/man2/recv.2 b/bsd/man/man2/recv.2 index b777106b3..251fe51c6 100644 --- a/bsd/man/man2/recv.2 +++ b/bsd/man/man2/recv.2 @@ -31,7 +31,7 @@ .\" .\" @(#)recv.2 8.3 (Berkeley) 2/21/94 .\" -.Dd May 15, 2006 +.Dd March 18, 2015 .Dt RECV 2 .Os .Sh NAME @@ -94,7 +94,9 @@ The function is normally used only on a .Em connected socket (see -.Xr connect 2 ) +.Xr connect 2 +or +.Xr connectx 2 ) and is identical to .Fn recvfrom with a @@ -258,8 +260,9 @@ The field is set on return according to the message received. .Dv MSG_EOR indicates end-of-record; -the data returned completed a record (generally used with sockets of type -.Dv SOCK_SEQPACKET ) . +the data returned completed a record. +.\" (generally used with sockets of type +.\".Dv SOCK_SEQPACKET ) . .Dv MSG_TRUNC indicates that the trailing portion of a datagram was discarded @@ -311,7 +314,8 @@ An attempt to allocate a memory buffer fails. .It Bq Er ENOTCONN The socket is associated with a connection-oriented protocol and has not been connected (see -.Xr connect 2 +.Xr connect 2, +.Xr connectx 2, and .Xr accept 2 ) . .\" =========== diff --git a/bsd/man/man2/searchfs.2 b/bsd/man/man2/searchfs.2 index 456b8dc7b..f141a5437 100644 --- a/bsd/man/man2/searchfs.2 +++ b/bsd/man/man2/searchfs.2 @@ -580,7 +580,9 @@ The parameter contains an invalid flag or sizeofsearchparams1/2 is greater than SEARCHFS_MAX_SEARCHPARMS (see attr.h). Additionally, filesystems that do not support SRCHFS_SKIPLINKS may return EINVAL if this search option -is requested. +is requested. EINVAL may also be returned if you request attributes for either +searching or to be returned for matched entries if the filesystem does not support +vending that particular attribute. . .It Bq Er EAGAIN The search terminated with partial results, either because @@ -609,17 +611,23 @@ An I/O error occurred while reading from or writing to the file system. . .Sh CAVEATS -The list of attributes that are valid as search criteria currently includes the -following list of attributes. Note that this list is substantially smaller than -what is available via +The list of attributes valid for searching and returning to the caller may +be substantially smaller than that of the .Xr getattrlist 2 -for a particular filesystem object. In general, a filesystem that supports +system call. See the following lists for the currently available search criteria. +In general, a filesystem that supports .Fn searchfs will typically supply per-item attributes for matched objects that are also supported by the .Xr getdirentries 2 system call. This varies from filesystem to filesystem. + +.Sh SEARCH ATTRIBUTES + +The list of attributes that are valid as search criteria currently includes the +following list of attributes for a particular filesystem object. + .Pp . .Bl -item -compact @@ -669,6 +677,97 @@ ATTR_FILE_RSRCLENGTH ATTR_FILE_RSRCALLOCSIZE .El . + +.Sh RETURN ATTRIBUTES + +As mentioned above, the list of attributes that are available to be returned to the caller +vary by filesystem, but should include the following attributes, in the following order. +The buffer should be assumed to be packed similar to the output buffer of the +.Xr getattrlist 2 +system call. Note that again, this list may be substantially smaller than what is available via +.Xr getattrlist 2 + +.Pp +. +.Bl -item -compact +.It +ATTR_CMN_NAME +.It +ATTR_CMN_DEVID +.It +ATTR_CMN_FSID +.It +ATTR_CMN_OBJTYPE +.It +ATTR_CMN_OBJTAG +.It +ATTR_CMN_OBJID +.It +ATTR_CMN_OBJPERMANENTID +.It +ATTR_CMN_PAROBJID +.It +ATTR_CMN_SCRIPT +.It +ATTR_CMN_CRTIME +.It +ATTR_CMN_MODTIME +.It +ATTR_CMN_CHGTIME +.It +ATTR_CMN_ACCTIME +.It +ATTR_CMN_BKUPTIME +.It +ATTR_CMN_FNDRINFO +.It +ATTR_CMN_OWNERID +.It +ATTR_CMN_GRPID +.It +ATTR_CMN_ACCESSMASK +.It +ATTR_CMN_FLAGS +.It +ATTR_CMN_USERACCESS +.It +ATTR_CMN_FILEID +.It +ATTR_CMN_PARENTID +.Pp +. +.It +ATTR_DIR_LINKCOUNT +.It +ATTR_DIR_ENTRYCOUNT +.It +ATTR_DIR_MOUNTSTATUS +.Pp +. +.It +ATTR_FILE_LINKCOUNT +.It +ATTR_FILE_TOTALSIZE +.It +ATTR_FILE_ALLOCSIZE +.It +ATTR_FILE_IOBLOCKSIZE +.It +ATTR_FILE_CLUMPSIZE +.It +ATTR_FILE_DEVTYPE +.It +ATTR_FILE_DATALENGTH +.It +ATTR_FILE_DATAALLOCSIZE +.It +ATTR_FILE_RSRCLENGTH +.It +ATTR_FILE_RSRCALLOCSIZE +.El +. + + .Sh EXAMPLES . The following code searches a volume for files of the specified type and creator. diff --git a/bsd/man/man2/select.2 b/bsd/man/man2/select.2 index eeb8aceb8..fd0833dc1 100644 --- a/bsd/man/man2/select.2 +++ b/bsd/man/man2/select.2 @@ -33,7 +33,7 @@ .\" .\" @(#)select.2 8.2 (Berkeley) 3/25/94 .\" -.Dd March 25, 1994 +.Dd March 18, 2015 .Dt SELECT 2 .Os BSD 4.2 .Sh NAME @@ -80,7 +80,7 @@ .Fa "struct timeval *restrict timeout" .Fc .Sh DESCRIPTION -.Fn Select +.Fn select examines the I/O descriptor sets whose addresses are passed in .Fa readfds , .Fa writefds , @@ -102,7 +102,7 @@ On return, replaces the given descriptor sets with subsets consisting of those descriptors that are ready for the requested operation. -.Fn Select +.Fn select returns the total number of ready descriptors in all the sets. .Pp The descriptor sets are stored as bit fields in arrays of integers. @@ -160,7 +160,7 @@ and .Fa errorfds may be given as nil pointers if no descriptors are of interest. .Sh RETURN VALUES -.Fn Select +.Fn select returns the number of ready descriptors that are contained in the descriptor sets, or -1 if an error occurred. @@ -242,6 +242,7 @@ or compile with -D_DARWIN_UNLIMITED_SELECT. .Sh SEE ALSO .Xr accept 2 , .Xr connect 2 , +.Xr connectx 2 , .Xr getdtablesize 2 , .Xr gettimeofday 2 , .Xr read 2 , @@ -267,7 +268,7 @@ a larger definition of before the inclusion of .Aq Pa sys/types.h . .Pp -.Fn Select +.Fn select should probably have been designed to return the time remaining from the original timeout, if any, by modifying the time value in place. However, it is unlikely this semantic will ever be implemented, as the diff --git a/bsd/man/man2/sem_open.2 b/bsd/man/man2/sem_open.2 index 423e98ae4..9f06c7775 100644 --- a/bsd/man/man2/sem_open.2 +++ b/bsd/man/man2/sem_open.2 @@ -142,7 +142,7 @@ descriptors in use. .It Bq Er ENAMETOOLONG .Fa name exceeded -.Dv SEM_NAME_LEN +.Dv PSEMNAMLEN characters. .It Bq Er ENFILE Too many semaphores or file descriptors are open on the system. diff --git a/bsd/man/man2/sem_unlink.2 b/bsd/man/man2/sem_unlink.2 index 7fc7e9c4d..ba0fbbd4f 100644 --- a/bsd/man/man2/sem_unlink.2 +++ b/bsd/man/man2/sem_unlink.2 @@ -58,7 +58,7 @@ Permission is denied to be remove the semaphore. .It Bq Er ENAMETOOLONG .Fa name exceeded -.Dv SEM_NAME_LEN +.Dv PSEMNAMLEN characters. .It Bq Er ENOENT The named semaphore does not exist. diff --git a/bsd/man/man2/setattrlist.2 b/bsd/man/man2/setattrlist.2 index c0e393ee6..68898eb3c 100644 --- a/bsd/man/man2/setattrlist.2 +++ b/bsd/man/man2/setattrlist.2 @@ -85,6 +85,55 @@ The parameter for .Fn fsetattrlist must be a valid file descriptor for the calling process. +. +The list of potentially settable attributes via +.Fn setattrlist +is different than the list of attributes that are accessible via +.Fn getattrlist +In particular, only the following attributes are modifiable via +.Fn setattrlist +and not all of them may be supported on all filesystems. +.Pp +. +.Bl -item -compact +.It +ATTR_CMN_SCRIPT +.It +ATTR_CMN_CRTIME +.It +ATTR_CMN_MODTIME +.It +ATTR_CMN_CHGTIME +.It +ATTR_CMN_ACCTIME +.It +ATTR_CMN_BKUPTIME +.It +ATTR_CMN_FNDRINFO +.It +ATTR_CMN_OWNERID +.It +ATTR_CMN_GRPID +.It +ATTR_CMN_ACCESSMASK +.It +ATTR_CMN_FLAGS +.It +ATTR_CMN_EXTENDED_SECURITY +.It +ATTR_CMN_GRPUUID +.Pp +.It +ATTR_VOL_NAME +.It +ATTR_VOL_INFO +.Pp +.It +ATTR_FILE_DEVTYPE +.El +.Pp +. +. You must own the file system object in order to set any of the following attributes: .Pp @@ -101,15 +150,16 @@ ATTR_CMN_CRTIME .It ATTR_CMN_MODTIME .It -ATTR_CMN_CHGTIME -.It ATTR_CMN_ACCTIME +.Pp +ATTR_CMN_CHGTIME +.Fa cannot be set programmatically. Any attempt to set change time is ignored. .El .Pp . You must be root (that is, your process's effective UID must be 0) in order to change the .Dv ATTR_CMN_OWNERID -attribute. +attribute Setting other attributes requires that you have write access to the object. .Pp . diff --git a/bsd/man/man2/setlcid.2 b/bsd/man/man2/setlcid.2 deleted file mode 100644 index a564bfd3e..000000000 --- a/bsd/man/man2/setlcid.2 +++ /dev/null @@ -1,90 +0,0 @@ -.\" Copyright (c) 2005 SPARTA, Inc. -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD$ -.\" -.\" Note: The date here should be updated whenever a non-trivial -.\" change is made to the manual page. -.Dd May 5, 2005 -.Dt SETLCID 3 -.Os -.Sh NAME -.Nm setlcid -.Nd "set login context" -.Sh SYNOPSIS -.In sys/lctx.h -.Ft int -.Fn setlcid "pid_t pid" "pid_t lcid" -.Sh DESCRIPTION -The -.Fn setlcid -system call sets the login context of the specified process -.Fa pid -to the specified -.Fa lcid . -If the -.Fa pid -is zero, then the call applies to the current process. -If the -.Fa lcid -is zero a new login context will be created. -If the -.Fa lcid -is \-1 the process will be removed from the login context -it is currently a member of, if any. -.Pp -Creation of a new login context is only valid for the current process. -A process may not create a new login context if it is currently a member -of one. -.Pp -Superuser privilege is required to add or remove a process from -a login context. -.Sh RETURN VALUES -.Rv -std setlcid -.Sh ERRORS -The -.Fn setlcid -function will fail if: -.Bl -tag -width Er -.It Bq Er EPERM -Operation not permitted. -.It Bq Er ESRCH -No such process. -.It Bq Er ENOMEM -Cannot allocate memory. -.It Bq Er ENOATTR -Attribute not found. -.El -.Sh SEE ALSO -.Xr getlcid 2 -.Sh HISTORY -The -.Nm -manual page -first appeared in -.Fx 6.0 . -.Sh AUTHORS -This -manual page was written by -.An Matthew N. Dodd Aq mdodd@FreeBSD.org . diff --git a/bsd/man/man2/setpgid.2 b/bsd/man/man2/setpgid.2 index 36d82683a..63ab9783a 100644 --- a/bsd/man/man2/setpgid.2 +++ b/bsd/man/man2/setpgid.2 @@ -65,11 +65,6 @@ If the invoker is not the super-user, then the affected process must have the same effective user-id as the invoker or be a descendant of the invoking process. .Pp -If the calling process is not already a session leader, -.Fn setpgrp -sets the process group ID of the calling process -to that of the calling process. -Any new session that this creates will have no controlling terminal. .Sh RETURN VALUES .Fn Setpgid returns 0 when the operation was successful. diff --git a/bsd/man/man2/shutdown.2 b/bsd/man/man2/shutdown.2 index 4ac610045..5110848ab 100644 --- a/bsd/man/man2/shutdown.2 +++ b/bsd/man/man2/shutdown.2 @@ -33,7 +33,7 @@ .\" .\" @(#)shutdown.2 8.1 (Berkeley) 6/4/93 .\" -.Dd June 4, 1993 +.Dd March 18, 2015 .Dt SHUTDOWN 2 .Os BSD 4.2 .Sh NAME @@ -92,6 +92,8 @@ is a file, not a socket. .El .Sh SEE ALSO .Xr connect 2 , +.Xr connectx 2 , +.Xr disconnectx 2 , .Xr socket 2 .Sh HISTORY The diff --git a/bsd/man/man2/sigaction.2 b/bsd/man/man2/sigaction.2 index 069197f90..9a0b47079 100644 --- a/bsd/man/man2/sigaction.2 +++ b/bsd/man/man2/sigaction.2 @@ -52,7 +52,7 @@ struct sigaction { union __sigaction_u { void (*__sa_handler)(int); - void (*__sa_sigaction)(int, struct __siginfo *, + void (*__sa_sigaction)(int, siginfo_t *, void *); }; diff --git a/bsd/man/man2/socket.2 b/bsd/man/man2/socket.2 index c19161723..4815d2e94 100644 --- a/bsd/man/man2/socket.2 +++ b/bsd/man/man2/socket.2 @@ -33,7 +33,7 @@ .\" .\" @(#)socket.2 8.1 (Berkeley) 6/4/93 .\" -.Dd June 4, 1993 +.Dd March 18, 2015 .Dt SOCKET 2 .Os .Sh NAME @@ -48,7 +48,7 @@ .Fa "int protocol" .Fc .Sh DESCRIPTION -.Fn Socket +.Fn socket creates an endpoint for communication and returns a descriptor. .Pp The @@ -80,8 +80,8 @@ defined types are: SOCK_STREAM SOCK_DGRAM SOCK_RAW -SOCK_SEQPACKET -SOCK_RDM +.\"SOCK_SEQPACKET +.\"SOCK_RDM .Ed .Pp A @@ -94,23 +94,24 @@ A socket supports datagrams (connectionless, unreliable messages of a fixed (typically small) maximum length). -A -.Dv SOCK_SEQPACKET -socket may provide a sequenced, reliable, -two-way connection-based data transmission path for datagrams -of fixed maximum length; a consumer may be required to read -an entire packet with each read system call. -This facility is protocol specific, and presently implemented -only for -.Dv PF_NS . +.\"A +.\".Dv SOCK_SEQPACKET +.\"socket may provide a sequenced, reliable, +.\"two-way connection-based data transmission path for datagrams +.\"of fixed maximum length; a consumer may be required to read +.\"an entire packet with each read system call. +.\"This facility is protocol specific, and presently implemented +.\"only for +.\".Dv PF_NS . .Dv SOCK_RAW sockets provide access to internal network protocols and interfaces. -The types +The type .Dv SOCK_RAW , -which is available only to the super-user, and -.Dv SOCK_RDM , -which is planned, -but not yet implemented, are not described here. +which is available only to the super-user. +.\" , and +.\" .Dv SOCK_RDM , +.\" which is planned, +.\" but not yet implemented, are not described here. .Pp The .Fa protocol @@ -131,6 +132,8 @@ to pipes. A stream socket must be in a state before any data may be sent or received on it. A connection to another socket is created with a .Xr connect 2 +or +.Xr connectx 2 call. Once connected, data may be transferred using .Xr read 2 and @@ -172,16 +175,16 @@ A signal is raised if a process sends on a broken stream; this causes naive processes, which do not handle the signal, to exit. -.Pp -.Dv SOCK_SEQPACKET -sockets employ the same system calls -as -.Dv SOCK_STREAM -sockets. The only difference -is that -.Xr read 2 -calls will return only the amount of data requested, -and any remaining in the arriving packet will be discarded. +.\" .Pp +.\" .Dv SOCK_SEQPACKET +.\" sockets employ the same system calls +.\" as +.\" .Dv SOCK_STREAM +.\" sockets. The only difference +.\" is that +.\" .Xr read 2 +.\" calls will return only the amount of data requested, +.\" and any remaining in the arriving packet will be discarded. .Pp .Dv SOCK_DGRAM and @@ -266,6 +269,8 @@ is necessary. .Xr accept 2 , .Xr bind 2 , .Xr connect 2 , +.Xr connectx 2 , +.Xr disconnectx 2 , .Xr getsockname 2 , .Xr getsockopt 2 , .Xr ioctl 2 , diff --git a/bsd/man/man2/stat.2 b/bsd/man/man2/stat.2 index e9acf874f..7282de5a9 100644 --- a/bsd/man/man2/stat.2 +++ b/bsd/man/man2/stat.2 @@ -105,10 +105,7 @@ returns information about the link, while .Fn stat returns information about the file the link references. -Unlike other filesystem objects, -symbolic links may not have an owner, group, access mode, times, etc. -Instead, these attributes may be taken from the directory that -contains the link. +The attributes cannot be relied on in case of symbolic links. In this case, the only attributes returned from an .Fn lstat that refer to the symbolic link itself are the file type (S_IFLNK), @@ -259,9 +256,7 @@ system calls. .It st_birthtime Time of file creation. Only set once when the file is created. This field is only available in the 64 bit inode variants. On filesystems where birthtime is -not available, this field holds the -.Fa ctime -instead. +not available, this field is set to 0 (i.e. epoch). .El .Pp The size-related fields of the structures are as follows: diff --git a/bsd/man/man2/write.2 b/bsd/man/man2/write.2 index fe41a2e53..5d5ebcd8b 100644 --- a/bsd/man/man2/write.2 +++ b/bsd/man/man2/write.2 @@ -32,7 +32,7 @@ .\" @(#)write.2 8.5 (Berkeley) 4/2/94 .\" $FreeBSD: src/lib/libc/sys/write.2,v 1.12.2.7 2001/12/14 18:34:02 ru Exp $ .\" -.Dd April 2, 1994 +.Dd March 18, 2015 .Dt WRITE 2 .Os .Sh NAME @@ -65,21 +65,21 @@ .Fa "int iovcnt" .Fc .Sh DESCRIPTION -.Fn Write +.Fn write attempts to write .Fa nbyte of data to the object referenced by the descriptor .Fa fildes from the buffer pointed to by .Fa buf . -.Fn Writev +.Fn writev performs the same action, but gathers the output data from the .Fa iovcnt buffers specified by the members of the .Fa iov array: iov[0], iov[1], ..., iov[iovcnt\|-\|1]. -.Fn Pwrite +.Fn pwrite performs the same function, but writes to the specified position in the file without modifying the file pointer. .Pp @@ -100,7 +100,7 @@ Each .Fa iovec entry specifies the base address and length of an area in memory from which data should be written. -.Fn Writev +.Fn writev will always write a complete area before proceeding to the next. .Pp @@ -255,6 +255,8 @@ The destination is no longer available when writing to a .Ux domain datagram socket on which .Xr connect 2 +or +.Xr connectx 2 had been used to set a destination address. .\" =========== .It Bq Er EINVAL diff --git a/bsd/man/man4/inet.4 b/bsd/man/man4/inet.4 index 52ba9239b..9ea32c94d 100644 --- a/bsd/man/man4/inet.4 +++ b/bsd/man/man4/inet.4 @@ -33,7 +33,7 @@ .\" .\" @(#)inet.4 8.1 (Berkeley) 6/5/93 .\" -.Dd June 5, 1993 +.Dd March 18, 2015 .Dt INET 4 .Os BSD 4.2 .Sh NAME @@ -84,7 +84,8 @@ to effect .Dq wildcard matching on incoming messages. The address in a -.Xr connect 2 +.Xr connect 2 , +.Xr connectx 2 or .Xr sendto 2 call may be given as diff --git a/bsd/man/man4/route.4 b/bsd/man/man4/route.4 index 34088e51a..e4f456335 100644 --- a/bsd/man/man4/route.4 +++ b/bsd/man/man4/route.4 @@ -185,7 +185,6 @@ Messages include: #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics, Flags, or Gateway */ #define RTM_GET 0x4 /* Report Information */ -#define RTM_LOOSING 0x5 /* Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_RESOLVE 0xb /* request to resolve dst to LL addr */ diff --git a/bsd/man/man4/tcp.4 b/bsd/man/man4/tcp.4 index 05584e255..9f011e793 100644 --- a/bsd/man/man4/tcp.4 +++ b/bsd/man/man4/tcp.4 @@ -33,7 +33,7 @@ .\" .\" @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" -.Dd April 16, 2014 +.Dd March 18, 2015 .Dt TCP 4 .Os BSD 4.2 .Sh NAME @@ -87,6 +87,8 @@ Only passive sockets may use the call to accept incoming connections. Only active sockets may use the .Xr connect 2 +or +.Xr connectx 2 call to initiate connections. .Pp Passive sockets may @@ -200,7 +202,7 @@ acknowledgement for every other data packet. .It Dv TCP_ENABLE_ECN Using Explicit Congestion Notification (ECN) on .Tn TCP -allows end-to-end notification of congestion without dropping packets. Conventionally TCP/IP networks signal congestion by dropping packets. When ECN is successfully negotiated, an ECN-aware router may set a mark in the IP header instead of dropping a packet in order to signal impending congestion. The +allows bi-directional end-to-end notification of congestion without dropping packets. Conventionally TCP/IP networks signal congestion by dropping packets. When ECN is successfully negotiated, an ECN-aware router may set a mark in the IP header instead of dropping a packet in order to signal impending congestion. The .Tn TCP receiver of the packet echoes congestion indication to the .Tn TCP @@ -209,6 +211,15 @@ sender, which reduces it's transmission rate as if it detected a dropped packet. The send socket buffer of a .Tn TCP sender has unsent and unacknowledged data. This option allows a .Tn TCP sender to control the amount of unsent data kept in the send socket buffer. The value of the option should be the maximum amount of unsent data in bytes. Kevent, poll and select will generate a write notification when the unsent data falls below the amount given by this option. This will allow an application to generate just-in-time fresh updates for real-time communication. +.It Dv TCP_FASTOPEN +The TCP listener can set this option to use TCP Fast Open feature. After +setting this option, an +.Xr accept 2 +may return a socket that is in SYN_RECEIVED state but is readable and writable. +.It Dv TCP_CONNECTION_INFO +This socket option can be used to obtain TCP connection level statistics. The +"struct tcp_connection_info" defined in is copied to the +user buffer. .El .Pp The option level for the @@ -236,6 +247,8 @@ When a .Tn TCP socket is set non-blocking, and the connection cannot be established immediately, .Xr connect 2 +or +.Xr connectx 2 returns with the error .Dv EINPROGRESS , and the connection is established asynchronously. @@ -255,6 +268,8 @@ can be retrieved via the socket option Note that even if the socket is non-blocking, it is possible for the connection to be established immediately. In that case .Xr connect 2 +or +.Xr connectx 2 does not return with .Dv EINPROGRESS . .Sh DIAGNOSTICS @@ -290,16 +305,29 @@ address; .It Bq Er EINPROGRESS returned by .Xr connect 2 +or +.Xr connectx 2 when the socket is set nonblocking, and the connection cannot be immediately established; .It Bq Er EALREADY returned by .Xr connect 2 +or +.Xr connectx 2 when connection request is already in progress for the specified socket. +.It Bq Er ENODATA +returned by +.Xr recv 2 +or +.Xr send 2 +in case a connection is experiencing a data-stall (probably due to a middlebox issue). +It is advised that the current connection gets closed by the application and a +new attempt is being made. . .El .Sh SEE ALSO .Xr connect 2 , +.Xr connectx 2 , .Xr getsockopt 2 , .Xr kqueue 2 , .Xr poll 2 , diff --git a/bsd/man/man4/udp.4 b/bsd/man/man4/udp.4 index c104618fe..381cc7795 100644 --- a/bsd/man/man4/udp.4 +++ b/bsd/man/man4/udp.4 @@ -33,7 +33,7 @@ .\" .\" @(#)udp.4 8.1 (Berkeley) 6/5/93 .\" -.Dd June 5, 1993 +.Dd March 18, 2015 .Dt UDP 4 .Os BSD 4.2 .Sh NAME @@ -59,6 +59,8 @@ and .Xr recvfrom calls, though the .Xr connect 2 +or +.Xr connectx 2 call may also be used to fix the destination for future packets (in which case the .Xr recv 2 @@ -125,6 +127,8 @@ socket with a network address for which no network interface exists. .El .Sh SEE ALSO +.Xr connect 2 , +.Xr connectx 2 , .Xr getsockopt 2 , .Xr recv 2 , .Xr send 2 , diff --git a/bsd/miscfs/devfs/Makefile b/bsd/miscfs/devfs/Makefile index 46c7c695b..163dea893 100644 --- a/bsd/miscfs/devfs/Makefile +++ b/bsd/miscfs/devfs/Makefile @@ -10,6 +10,12 @@ include $(MakeInc_def) DATAFILES = \ devfs.h +KERNELFILES = \ + devfs.h \ + fdesc.h \ + devfs_proto.h \ + devfsdefs.h + INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = miscfs/devfs @@ -18,12 +24,10 @@ INSTALL_KF_MI_LIST = ${DATAFILES} INSTALL_KF_MI_LCL_LIST = ${DATAFILES} devfs_proto.h devfsdefs.h -EXPORT_MI_LIST = ${DATAFILES} fdesc.h devfs_proto.h devfsdefs.h +EXPORT_MI_LIST = ${KERNELFILES} EXPORT_MI_DIR = miscfs/devfs -INSTALL_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c index 45d529e8d..21912549a 100644 --- a/bsd/miscfs/devfs/devfs_tree.c +++ b/bsd/miscfs/devfs/devfs_tree.c @@ -1156,7 +1156,16 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) DEVFS_UNLOCK(); - error = vnode_getwithvid(vn_p, vid); + /* + * We want to use the drainok variant of vnode_getwithvid + * because we _don't_ want to get an iocount if the vnode is + * is blocked in vnode_drain as it can cause infinite + * loops in vn_open_auth. While in use vnodes are typically + * only reclaimed on forced unmounts, In use devfs tty vnodes + * can be quite frequently reclaimed by revoke(2) or by the + * exit of a controlling process. + */ + error = vnode_getwithvid_drainok(vn_p, vid); DEVFS_LOCK(); @@ -1180,22 +1189,25 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) * vnode. Therefore, ENOENT is a valid result. */ error = ENOENT; + } else if (error == ENODEV) { + /* + * The Filesystem is getting unmounted. + */ + error = ENOENT; } else if (error && (nretries < DEV_MAX_VNODE_RETRY)) { /* * If we got an error from vnode_getwithvid, it means * we raced with a recycle and lost i.e. we asked for - * an iocount only after vnode_drain had completed on - * the vnode and returned with an error only after - * devfs_reclaim was called on the vnode. While - * devfs_reclaim sets dn_vn to NULL but while we were - * waiting to reacquire DEVFS_LOCK, another vnode might - * have gotten associated with the dnp. In either case, - * we need to retry otherwise we will end up returning - * an ENOENT for this lookup but the next lookup will - * succeed because it creates a new vnode (or a racing - * lookup created a new vnode already). - * - * We cap the number of retries at 8. + * an iocount only after vnode_drain had been entered + * for the vnode and returned with an error only after + * devfs_reclaim was called on the vnode. devfs_reclaim + * sets dn_vn to NULL but while we were waiting to + * reacquire DEVFS_LOCK, another vnode might have gotten + * associated with the dnp. In either case, we need to + * retry otherwise we will end up returning an ENOENT + * for this lookup but the next lookup will succeed + * because it creates a new vnode (or a racing lookup + * created a new vnode already). */ error = 0; nretries++; diff --git a/bsd/miscfs/devfs/devfs_vfsops.c b/bsd/miscfs/devfs/devfs_vfsops.c index 6ad899939..f8406a251 100644 --- a/bsd/miscfs/devfs/devfs_vfsops.c +++ b/bsd/miscfs/devfs/devfs_vfsops.c @@ -182,7 +182,7 @@ devfs_mount(struct mount *mp, __unused vnode_t devvp, __unused user_addr_t data, /*- * Fill out some fields */ - mp->mnt_data = (qaddr_t)devfs_mp_p; + __IGNORE_WCASTALIGN(mp->mnt_data = (qaddr_t)devfs_mp_p); mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)(uintptr_t)devfs_mp_p; mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); mp->mnt_flag |= MNT_LOCAL; diff --git a/bsd/miscfs/fifofs/Makefile b/bsd/miscfs/fifofs/Makefile index 2694ee502..14f1db889 100644 --- a/bsd/miscfs/fifofs/Makefile +++ b/bsd/miscfs/fifofs/Makefile @@ -7,10 +7,10 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -DATAFILES = \ +KERNELFILES = \ fifo.h -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${KERNELFILES} EXPORT_MI_DIR = miscfs/fifofs diff --git a/bsd/miscfs/fifofs/fifo_vnops.c b/bsd/miscfs/fifofs/fifo_vnops.c index 23c596618..62cc11fed 100644 --- a/bsd/miscfs/fifofs/fifo_vnops.c +++ b/bsd/miscfs/fifofs/fifo_vnops.c @@ -327,6 +327,8 @@ fifo_read(struct vnop_read_args *ap) if (error != EWOULDBLOCK) { error = soreceive(rso, (struct sockaddr **)0, uio, (struct mbuf **)0, (struct mbuf **)0, &rflags); + if (error == 0 && ap->a_vp->v_knotes.slh_first != NULL) + KNOTE(&ap->a_vp->v_knotes, 0); } else { /* clear EWOULDBLOCK and return EOF (zero) */ @@ -358,6 +360,8 @@ fifo_write(struct vnop_write_args *ap) #endif error = sosend(wso, (struct sockaddr *)0, ap->a_uio, NULL, (struct mbuf *)0, (ap->a_ioflag & IO_NDELAY) ? MSG_NBIO : 0); + if (error == 0 && ap->a_vp->v_knotes.slh_first != NULL) + KNOTE(&ap->a_vp->v_knotes, 0); return (error); } diff --git a/bsd/miscfs/specfs/Makefile b/bsd/miscfs/specfs/Makefile index 2394edf46..109c5fc29 100644 --- a/bsd/miscfs/specfs/Makefile +++ b/bsd/miscfs/specfs/Makefile @@ -10,11 +10,14 @@ include $(MakeInc_def) DATAFILES = \ specdev.h +KERNELFILES = \ + ${DATAFILES} + INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = miscfs/specfs -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${KERNELFILES} EXPORT_MI_DIR = miscfs/specfs diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index fe762c21f..fd79c99fa 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -218,6 +218,7 @@ struct _throttle_io_info_t { int32_t throttle_refcnt; int32_t throttle_alloc; int32_t throttle_disabled; + int32_t throttle_is_fusion_with_priority; }; struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV]; @@ -661,7 +662,7 @@ spec_ioctl(struct vnop_ioctl_args *ap) int retval = 0; KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_START, - (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, (unsigned int)ap->a_vp->v_type, 0); + dev, ap->a_command, ap->a_fflag, ap->a_vp->v_type, 0); switch (ap->a_vp->v_type) { @@ -681,8 +682,14 @@ spec_ioctl(struct vnop_ioctl_args *ap) extent = unmap->extents; for (i = 0; i < unmap->extentsCount; i++, extent++) { - KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 1) | DBG_FUNC_NONE, dev, extent->offset/ap->a_vp->v_specsize, extent->length, 0, 0); + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 1) | DBG_FUNC_NONE, dev, + extent->offset/ap->a_vp->v_specsize, extent->length, 0, 0); } + } else if (ap->a_command == DKIOCSYNCHRONIZE) { + dk_synchronize_t *synch; + synch = (dk_synchronize_t *)ap->a_data; + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 1) | DBG_FUNC_NONE, dev, ap->a_command, + synch->options, 0, 0); } } retval = (*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, p); @@ -693,7 +700,7 @@ spec_ioctl(struct vnop_ioctl_args *ap) /* NOTREACHED */ } KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_END, - (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, retval, 0); + dev, ap->a_command, ap->a_fflag, retval, 0); return (retval); } @@ -721,23 +728,20 @@ int spec_kqfilter(vnode_t vp, struct knote *kn) { dev_t dev; - int err = EINVAL; + int err; + + assert(vnode_ischr(vp)); - /* - * For a few special kinds of devices, we can attach knotes. - * Each filter function must check whether the dev type matches it. - */ dev = vnode_specrdev(vp); - if (vnode_istty(vp)) { - /* We can hook into TTYs... */ - err = filt_specattach(kn); - } else { #if NETWORKING - /* Try a bpf device, as defined in bsd/net/bpf.c */ - err = bpfkqfilter(dev, kn); -#endif + /* Try a bpf device, as defined in bsd/net/bpf.c */ + if ((err = bpfkqfilter(dev, kn)) == 0) { + return err; } +#endif + /* Try to attach to other char special devices */ + err = filt_specattach(kn); return err; } @@ -1207,7 +1211,7 @@ throttle_init_throttle_period(struct _throttle_io_info_t *info, boolean_t isssd) */ /* Assign global defaults */ - if (isssd == TRUE) + if ((isssd == TRUE) && (info->throttle_is_fusion_with_priority == 0)) info->throttle_io_periods = &throttle_io_period_ssd_msecs[0]; else info->throttle_io_periods = &throttle_io_period_msecs[0]; @@ -1274,6 +1278,7 @@ throttle_init(void) } info->throttle_next_wake_level = THROTTLE_LEVEL_END; info->throttle_disabled = 0; + info->throttle_is_fusion_with_priority = 0; } #if CONFIG_IOSCHED if (PE_parse_boot_argn("iosched", &iosched, sizeof(iosched))) { @@ -1610,6 +1615,12 @@ throttle_io_will_be_throttled(__unused int lowpri_window_msecs, mount_t mp) else info = mp->mnt_throttle_info; + if (info->throttle_is_fusion_with_priority) { + uthread_t ut = get_bsdthread_info(current_thread()); + if (ut->uu_lowpri_window == 0) + return (THROTTLE_DISENGAGED); + } + if (info->throttle_disabled) return (THROTTLE_DISENGAGED); else @@ -1762,6 +1773,9 @@ void throttle_info_reset_window(uthread_t ut) { struct _throttle_io_info_t *info; + if (ut == NULL) + ut = get_bsdthread_info(current_thread()); + if ( (info = ut->uu_throttle_info) ) { throttle_info_rel(info); @@ -1892,7 +1906,7 @@ void throttle_info_update_by_mask(void *throttle_info_handle, int flags) * support I/O scheduling. */ -void throttle_info_disable_throttle(int devno) +void throttle_info_disable_throttle(int devno, boolean_t isfusion) { struct _throttle_io_info_t *info; @@ -1900,7 +1914,13 @@ void throttle_info_disable_throttle(int devno) panic("Illegal devno (%d) passed into throttle_info_disable_throttle()", devno); info = &_throttle_io_info[devno]; - info->throttle_disabled = 1; + // don't disable software throttling on devices that are part of a fusion device + // and override the software throttle periods to use HDD periods + if (isfusion) { + info->throttle_is_fusion_with_priority = isfusion; + throttle_init_throttle_period(info, FALSE); + } + info->throttle_disabled = !info->throttle_is_fusion_with_priority; return; } @@ -2368,15 +2388,20 @@ filt_specattach(struct knote *kn) return ENXIO; } - if ((cdevsw_flags[major(dev)] & CDEVSW_SELECT_KQUEUE) == 0) { + /* + * For a few special kinds of devices, we can attach knotes with + * no restrictions because their "select" vectors return the amount + * of data available. Others require an explicit NOTE_LOWAT with + * data of 1, indicating that the caller doesn't care about actual + * data counts, just an indication that the device has data. + */ + + if ((cdevsw_flags[major(dev)] & CDEVSW_SELECT_KQUEUE) == 0 && + ((kn->kn_sfflags & NOTE_LOWAT) == 0 || kn->kn_sdata != 1)) { return EINVAL; } - /* Resulting wql is safe to unlink even if it has never been linked */ - kn->kn_hook = wait_queue_link_allocate(); - if (kn->kn_hook == NULL) { - return EAGAIN; - } + kn->kn_hook_data = 0; kn->kn_fop = &spec_filtops; kn->kn_hookid = vnode_vid(vp); @@ -2389,21 +2414,24 @@ filt_specattach(struct knote *kn) static void filt_specdetach(struct knote *kn) { - kern_return_t ret; + knote_clearstayqueued(kn); - /* - * Given wait queue link and wait queue set, unlink. This is subtle. - * If the device has been revoked from under us, selclearthread() will - * have removed our link from the kqueue's wait queue set, which - * wait_queue_set_unlink_one() will detect and handle. + /* + * This is potentially tricky: the device's selinfo waitq that was + * tricked into being part of this knote's waitq set may not be a part + * of any other set, and the device itself may have revoked the memory + * in which the waitq was held. We use the knote's kn_hook_data field + * to keep the ID of the waitq's prepost table object. This + * object keeps a pointer back to the waitq, and gives us a safe way + * to decouple the dereferencing of driver allocated memory: if the + * driver goes away (taking the waitq with it) then the prepost table + * object will be invalidated. The waitq details are handled in the + * waitq API invoked here. */ - ret = wait_queue_set_unlink_one(kn->kn_kq->kq_wqs, kn->kn_hook); - if (ret != KERN_SUCCESS) { - panic("filt_specdetach(): failed to unlink wait queue link."); + if (kn->kn_hook_data) { + waitq_unlink_by_prepost_id(kn->kn_hook_data, kn->kn_kq->kq_wqs); + kn->kn_hook_data = 0; } - knote_clearstayqueued(kn); - (void)wait_queue_link_free(kn->kn_hook); - kn->kn_hook = NULL; } static int @@ -2411,15 +2439,15 @@ filt_spec(struct knote *kn, long hint) { vnode_t vp; uthread_t uth; - wait_queue_set_t old_wqs; + struct waitq_set *old_wqs; vfs_context_t ctx; int selres; int error; int use_offset; dev_t dev; uint64_t flags; - - assert(kn->kn_hook != NULL); + uint64_t rsvd, rsvd_arg; + uint64_t *rlptr = NULL; if (hint != 0) { panic("filt_spec(): nonzero hint?"); @@ -2438,14 +2466,60 @@ filt_spec(struct knote *kn, long hint) dev = vnode_specrdev(vp); flags = cdevsw_flags[major(dev)]; use_offset = ((flags & CDEVSW_USE_OFFSET) != 0); - assert((flags & CDEVSW_SELECT_KQUEUE) != 0); - /* Trick selrecord() into hooking kqueue's wait queue set into device wait queue */ + /* + * This function may be called many times to link or re-link the + * underlying vnode to the kqueue. If we've already linked the two, + * we will have a valid kn_hook_data which ties us to the underlying + * device's waitq via a the waitq's prepost table object. However, + * devices can abort any select action by calling selthreadclear(). + * This is OK because the table object will be invalidated by the + * driver (through a call to selthreadclear), so any attempt to access + * the associated waitq will fail because the table object is invalid. + * + * Even if we've already registered, we need to pass a pointer + * to a reserved link structure. Otherwise, selrecord() will + * infer that we're in the second pass of select() and won't + * actually do anything! + */ + rsvd = rsvd_arg = waitq_link_reserve(NULL); + rlptr = (void *)&rsvd_arg; + + /* + * Trick selrecord() into hooking kqueue's wait queue set + * set into device's selinfo wait queue + */ old_wqs = uth->uu_wqset; uth->uu_wqset = kn->kn_kq->kq_wqs; - selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter), 0, kn->kn_hook, ctx); + selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter), + 0, rlptr, ctx); uth->uu_wqset = old_wqs; + /* + * make sure to cleanup the reserved link - this guards against + * drivers that may not actually call selrecord(). + */ + waitq_link_release(rsvd); + if (rsvd != rsvd_arg) { + /* the driver / handler called selrecord() */ + struct waitq *wq; + memcpy(&wq, rlptr, sizeof(void *)); + + /* + * The waitq_get_prepost_id() function will (potentially) + * allocate a prepost table object for the waitq and return + * the table object's ID to us. It will also set the + * waitq_prepost_id field within the waitq structure. + * + * We can just overwrite kn_hook_data because it's simply a + * table ID used to grab a reference when needed. + * + * We have a reference on the vnode, so we know that the + * device won't go away while we get this ID. + */ + kn->kn_hook_data = waitq_get_prepost_id(wq); + } + if (use_offset) { if (kn->kn_fp->f_fglob->fg_offset >= (uint32_t)selres) { kn->kn_data = 0; @@ -2458,6 +2532,9 @@ filt_spec(struct knote *kn, long hint) vnode_put(vp); + if ((kn->kn_sfflags & NOTE_LOWAT) != 0) + return (kn->kn_data >= kn->kn_sdata); + return (kn->kn_data != 0); } @@ -2466,9 +2543,11 @@ filt_specpeek(struct knote *kn) { vnode_t vp; uthread_t uth; - wait_queue_set_t old_wqs; + struct waitq_set *old_wqs; vfs_context_t ctx; int error, selres; + uint64_t rsvd, rsvd_arg; + uint64_t *rlptr = NULL; uth = get_bsdthread_info(current_thread()); ctx = vfs_context_current(); @@ -2480,13 +2559,45 @@ filt_specpeek(struct knote *kn) } /* - * Why pass the link here? Because we may not have registered in the past... + * Even if we've already registered, we need to pass a pointer + * to a reserved link structure. Otherwise, selrecord() will + * infer that we're in the second pass of select() and won't + * actually do anything! */ + rsvd = rsvd_arg = waitq_link_reserve(NULL); + rlptr = (void *)&rsvd_arg; + old_wqs = uth->uu_wqset; uth->uu_wqset = kn->kn_kq->kq_wqs; - selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter), 0, kn->kn_hook, ctx); + selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter), + 0, (void *)rlptr, ctx); uth->uu_wqset = old_wqs; + /* + * make sure to cleanup the reserved link - this guards against + * drivers that may not actually call selrecord() + */ + waitq_link_release(rsvd); + if (rsvd != rsvd_arg) { + /* the driver / handler called selrecord() */ + struct waitq *wq; + memcpy(&wq, rlptr, sizeof(void *)); + + /* + * The waitq_get_prepost_id() function will (potentially) + * allocate a prepost table object for the waitq and return + * the table object's ID to us. It will also set the + * waitq_prepost_id field within the waitq structure. + * + * We can just overwrite kn_hook_data because it's simply a + * table ID used to grab a reference when needed. + * + * We have a reference on the vnode, so we know that the + * device won't go away while we get this ID. + */ + kn->kn_hook_data = waitq_get_prepost_id(wq); + } + vnode_put(vp); return selres; } diff --git a/bsd/miscfs/union/Makefile b/bsd/miscfs/union/Makefile index b0ef42982..773b2cd00 100644 --- a/bsd/miscfs/union/Makefile +++ b/bsd/miscfs/union/Makefile @@ -10,11 +10,14 @@ include $(MakeInc_def) DATAFILES = \ union.h +KERNELFILES = \ + ${DATAFILES} + INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = miscfs/union -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${KERNELFILES} EXPORT_MI_DIR = miscfs/union diff --git a/bsd/net/Makefile b/bsd/net/Makefile index bdb100c29..93855776e 100644 --- a/bsd/net/Makefile +++ b/bsd/net/Makefile @@ -11,12 +11,21 @@ INSTINC_SUBDIRS = \ altq classq pktsched DATAFILES= \ - bpf.h dlil.h \ - ethernet.h if.h if_arp.h \ - if_dl.h if_llc.h if_media.h if_mib.h \ - if_types.h if_var.h \ + bpf.h \ + dlil.h \ + ethernet.h \ + if.h \ + if_arp.h \ + if_dl.h \ + if_llc.h \ + if_media.h \ + if_mib.h \ + if_types.h \ + if_var.h \ if_utun.h \ - kext_net.h ndrv.h pfkeyv2.h \ + kext_net.h \ + ndrv.h \ + pfkeyv2.h \ route.h KERNELFILES= \ @@ -24,17 +33,47 @@ KERNELFILES= \ if_ether.h init.h radix.h PRIVATE_DATAFILES = \ - if_vlan_var.h if_ppp.h firewire.h \ - ppp_defs.h radix.h if_bond_var.h if_bond_internal.h lacp.h ndrv_var.h \ - netsrc.h raw_cb.h etherdefs.h if_pflog.h pfvar.h \ - if_bridgevar.h ntstat.h iptap.h if_llreach.h \ - if_utun_crypto.h if_utun_crypto_ipsec.h if_utun_crypto_dtls.h \ - pktap.h if_ipsec.h necp.h content_filter.h packet_mangler.h + bpf.h \ + content_filter.h \ + etherdefs.h \ + firewire.h \ + if.h \ + if_bond_var.h \ + if_bond_internal.h \ + if_bridgevar.h \ + if_ipsec.h \ + if_llreach.h \ + if_media.h \ + if_mib.h \ + if_pflog.h \ + if_ppp.h \ + if_utun.h \ + if_utun_crypto.h \ + if_utun_crypto_ipsec.h \ + if_utun_crypto_dtls.h \ + if_var.h \ + if_vlan_var.h \ + iptap.h \ + lacp.h \ + ndrv_var.h \ + necp.h \ + netsrc.h \ + network_agent.h \ + ntstat.h \ + packet_mangler.h \ + pfkeyv2.h \ + pfvar.h \ + pktap.h \ + ppp_defs.h \ + radix.h \ + raw_cb.h \ + route.h \ + net_perf.h PRIVATE_KERNELFILES = $(filter-out radix.h,${KERNELFILES}) \ bpfdesc.h ppp_comp.h \ zlib.h bpf_compat.h net_osdep.h \ - flowadv.h + flowadv.h net_perf.h INSTALL_MI_LIST = ${DATAFILES} @@ -44,9 +83,9 @@ EXPORT_MI_LIST = ${INSTALL_MI_LIST} ${KERNELFILES} EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} -INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} +INSTALL_KF_MI_LCL_LIST = $(sort ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES}) include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/net/altq/Makefile b/bsd/net/altq/Makefile index 6a330882f..c20cf04be 100644 --- a/bsd/net/altq/Makefile +++ b/bsd/net/altq/Makefile @@ -23,9 +23,9 @@ EXPORT_MI_LIST = ${INSTALL_MI_LIST} ${KERNELFILES} EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} -INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} +INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/net/bpf.c b/bsd/net/bpf.c index 1383cbb80..f98100d2b 100644 --- a/bsd/net/bpf.c +++ b/bsd/net/bpf.c @@ -160,6 +160,10 @@ static unsigned int bpf_wantpktap = 0; SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED, &bpf_wantpktap, 0, ""); +static int bpf_debug = 0; +SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED, + &bpf_debug, 0, ""); + /* * bpf_iflist is the list of interfaces; each corresponds to an ifnet * bpf_dtab holds pointer to the descriptors, indexed by minor device # @@ -191,24 +195,25 @@ static mbuf_tag_id_t bpf_mtag_id; static int bpf_allocbufs(struct bpf_d *); static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp); -static void bpf_detachd(struct bpf_d *d); +static int bpf_detachd(struct bpf_d *d, int); static void bpf_freed(struct bpf_d *); static void bpf_mcopy(const void *, void *, size_t); static int bpf_movein(struct uio *, int, struct mbuf **, struct sockaddr *, int *); -static int bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt, dev_t); +static int bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt); static void bpf_timed_out(void *, void *); static void bpf_wakeup(struct bpf_d *); static void catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int, u_int, int, void (*)(const void *, void *, size_t)); static void reset_d(struct bpf_d *); -static int bpf_setf(struct bpf_d *, u_int , user_addr_t , dev_t, u_long); +static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long); static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *); -static int bpf_setdlt(struct bpf_d *, u_int, dev_t); +static int bpf_setdlt(struct bpf_d *, u_int); static int bpf_set_traffic_class(struct bpf_d *, int); static void bpf_set_packet_service_class(struct mbuf *, int); -/*static void *bpf_devfs_token[MAXBPFILTER];*/ +static void bpf_acquire_d(struct bpf_d *); +static void bpf_release_d(struct bpf_d *); static int bpf_devsw_installed; @@ -485,7 +490,13 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp) d->bd_bif = bp; d->bd_next = bp->bif_dlist; bp->bif_dlist = d; - + + /* + * Take a reference on the device even if an error is returned + * because we keep the device in the interface's list of listeners + */ + bpf_acquire_d(d); + if (first) { /* Find the default bpf entry for this ifp */ if (bp->bif_ifp->if_bpf == NULL) { @@ -515,6 +526,11 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp) error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT); } + /* + * Reset the detach flags in case we previously detached an interface + */ + d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED); + if (bp->bif_ifp->if_bpf != NULL && bp->bif_ifp->if_bpf->bif_dlt == DLT_PKTAP) d->bd_flags |= BPF_FINALIZE_PKTAP; @@ -526,17 +542,34 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp) /* * Detach a file from its interface. + * + * Return 1 if was closed by some thread, 0 otherwise */ -static void -bpf_detachd(struct bpf_d *d) +static int +bpf_detachd(struct bpf_d *d, int closing) { struct bpf_d **p; struct bpf_if *bp; struct ifnet *ifp; + /* + * Some other thread already detached + */ + if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) + goto done; + /* + * This thread is doing the detach + */ + d->bd_flags |= BPF_DETACHING; + ifp = d->bd_bif->bif_ifp; bp = d->bd_bif; - + + if (bpf_debug != 0) + printf("%s: %llx %s%s\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(d), + if_name(ifp), closing ? " closing" : ""); + /* Remove d from the interface's descriptor list. */ p = &bp->bif_dlist; while (*p != d) { @@ -576,10 +609,37 @@ bpf_detachd(struct bpf_d *d) * take it out. * Most likely the network interface is gone. */ - printf("bpf: ifnet_set_promiscuous failed"); + printf("%s: ifnet_set_promiscuous failed\n", __func__); } lck_mtx_lock(bpf_mlock); } + + /* + * Wake up other thread that are waiting for this thread to finish + * detaching + */ + d->bd_flags &= ~BPF_DETACHING; + d->bd_flags |= BPF_DETACHED; + /* + * Note that We've kept the reference because we may have dropped + * the lock when turning off promiscuous mode + */ + bpf_release_d(d); + +done: + /* + * When closing makes sure no other thread refer to the bpf_d + */ + if (bpf_debug != 0) + printf("%s: %llx done\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(d)); + /* + * Let the caller know the bpf_d is closed + */ + if ((d->bd_flags & BPF_CLOSING)) + return (1); + else + return (0); } @@ -634,7 +694,42 @@ bpf_stop_timer(struct bpf_d *d) return (thread_call_cancel(d->bd_thread_call)); } +void +bpf_acquire_d(struct bpf_d *d) +{ + void *lr_saved = __builtin_return_address(0); + + lck_mtx_assert(bpf_mlock, LCK_MTX_ASSERT_OWNED); + + d->bd_refcnt += 1; + + d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved; + d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST; +} + +void +bpf_release_d(struct bpf_d *d) +{ + void *lr_saved = __builtin_return_address(0); + + lck_mtx_assert(bpf_mlock, LCK_MTX_ASSERT_OWNED); + + if (d->bd_refcnt <= 0) + panic("%s: %p refcnt <= 0", __func__, d); + + d->bd_refcnt -= 1; + d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved; + d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST; + + if (d->bd_refcnt == 0) { + /* Assert the device is detached */ + if ((d->bd_flags & BPF_DETACHED) == 0) + panic("%s: %p BPF_DETACHED not set", __func__, d); + + _FREE(d, M_DEVBUF); + } +} /* * Open ethernet device. Returns ENXIO for illegal minor device number, @@ -678,7 +773,8 @@ bpfopen(dev_t dev, int flags, __unused int fmt, lck_mtx_unlock(bpf_mlock); return (EBUSY); } - d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF, M_WAIT); + d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF, + M_WAIT | M_ZERO); if (d == NULL) { /* this really is a catastrophic failure */ printf("bpfopen: malloc bpf_d failed\n"); @@ -686,32 +782,28 @@ bpfopen(dev_t dev, int flags, __unused int fmt, lck_mtx_unlock(bpf_mlock); return ENOMEM; } - bzero(d, sizeof(struct bpf_d)); - - /* - * It is not necessary to take the BPF lock here because no other - * thread can access the device until it is marked opened... - */ - + /* Mark "in use" and do most initialization. */ + bpf_acquire_d(d); d->bd_bufsize = bpf_bufsize; d->bd_sig = SIGIO; d->bd_seesent = 1; d->bd_oflags = flags; d->bd_state = BPF_IDLE; - d->bd_thread_call = thread_call_allocate(bpf_timed_out, d); d->bd_traffic_class = SO_TC_BE; + d->bd_flags |= BPF_DETACHED; if (bpf_wantpktap) d->bd_flags |= BPF_WANT_PKTAP; else d->bd_flags &= ~BPF_WANT_PKTAP; - + d->bd_thread_call = thread_call_allocate(bpf_timed_out, d); if (d->bd_thread_call == NULL) { printf("bpfopen: malloc thread call failed\n"); bpf_dtab[minor(dev)] = NULL; + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); - _FREE(d, M_DEVBUF); - return ENOMEM; + + return (ENOMEM); } #if CONFIG_MACF_NET mac_bpfdesc_label_init(d); @@ -741,7 +833,17 @@ bpfclose(dev_t dev, __unused int flags, __unused int fmt, if (d == 0 || d == (void *)1) { lck_mtx_unlock(bpf_mlock); return (ENXIO); - } + } + + /* + * Other threads may call bpd_detachd() if we drop the bpf_mlock + */ + d->bd_flags |= BPF_CLOSING; + + if (bpf_debug != 0) + printf("%s: %llx\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(d)); + bpf_dtab[minor(dev)] = (void *)1; /* Mark closing */ /* @@ -799,7 +901,7 @@ bpfclose(dev_t dev, __unused int flags, __unused int fmt, } if (d->bd_bif) - bpf_detachd(d); + bpf_detachd(d, 1); selthreadclear(&d->bd_sel); #if CONFIG_MACF_NET mac_bpfdesc_label_destroy(d); @@ -813,10 +915,11 @@ bpfclose(dev_t dev, __unused int flags, __unused int fmt, /* Mark free in same context as bpfopen comes to check */ bpf_dtab[minor(dev)] = NULL; /* Mark closed */ + + bpf_release_d(d); + lck_mtx_unlock(bpf_mlock); - - _FREE(d, M_DEVBUF); - + return (0); } @@ -844,8 +947,10 @@ bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo) panic("rotating bpf buffers during read"); \ (d)->bd_hbuf = (d)->bd_sbuf; \ (d)->bd_hlen = (d)->bd_slen; \ + (d)->bd_hcnt = (d)->bd_scnt; \ (d)->bd_sbuf = (d)->bd_fbuf; \ (d)->bd_slen = 0; \ + (d)->bd_scnt = 0; \ (d)->bd_fbuf = NULL; /* * bpfread - read next chunk of packets from buffers @@ -862,16 +967,19 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) lck_mtx_lock(bpf_mlock); d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) { lck_mtx_unlock(bpf_mlock); return (ENXIO); } + bpf_acquire_d(d); + /* * Restrict application to use a buffer the same size as * as kernel buffers. */ if (uio_resid(uio) != d->bd_bufsize) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (EINVAL); } @@ -884,9 +992,9 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) while (d->bd_hbuf_read) msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); - - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + + if ((d->bd_flags & BPF_CLOSING) != 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (ENXIO); } @@ -918,10 +1026,12 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) * it before using it again. */ if (d->bd_bif == NULL) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (ENXIO); } if (ioflag & IO_NDELAY) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (EWOULDBLOCK); } @@ -930,8 +1040,8 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) /* * Make sure device is still opened */ - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + if ((d->bd_flags & BPF_CLOSING) != 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (ENXIO); } @@ -939,8 +1049,8 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) while (d->bd_hbuf_read) msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + if ((d->bd_flags & BPF_CLOSING) != 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (ENXIO); } @@ -965,6 +1075,7 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) ROTATE_BUFFERS(d); break; } + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (error); } @@ -983,6 +1094,7 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) break; if (d->bd_slen == 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (0); } @@ -1091,8 +1203,8 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) /* * Make sure device is still opened */ - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + if ((d->bd_flags & BPF_CLOSING) != 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (ENXIO); } @@ -1101,7 +1213,10 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; d->bd_hlen = 0; + d->bd_hcnt = 0; wakeup((caddr_t)d); + + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (error); @@ -1123,11 +1238,8 @@ bpf_wakeup(struct bpf_d *d) pgsigio(d->bd_sigio, d->bd_sig); selwakeup(&d->bd_sel); - KNOTE(&d->bd_sel.si_note, 1); -#ifndef __APPLE__ - /* XXX */ - d->bd_sel.si_pid = 0; -#endif + if ((d->bd_flags & BPF_KNOTE)) + KNOTE(&d->bd_sel.si_note, 1); } @@ -1178,11 +1290,15 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) lck_mtx_lock(bpf_mlock); d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) { lck_mtx_unlock(bpf_mlock); return (ENXIO); } + + bpf_acquire_d(d); + if (d->bd_bif == 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (ENXIO); } @@ -1190,10 +1306,12 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) ifp = d->bd_bif->bif_ifp; if ((ifp->if_flags & IFF_UP) == 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (ENETDOWN); } if (uio_resid(uio) == 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (0); } @@ -1213,26 +1331,31 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf, &datlen); + /* take the lock again */ + lck_mtx_lock(bpf_mlock); if (error) { + bpf_release_d(d); + lck_mtx_unlock(bpf_mlock); return (error); } - /* taking the lock again and verifying whether device is open */ - lck_mtx_lock(bpf_mlock); - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + /* verify the device is still open */ + if ((d->bd_flags & BPF_CLOSING) != 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); m_freem(m); return (ENXIO); } if (d->bd_bif == NULL) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); m_free(m); return (ENXIO); } if ((unsigned)datlen > ifp->if_mtu) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); m_freem(m); return (EMSGSIZE); @@ -1247,6 +1370,9 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) lck_mtx_unlock(bpf_mlock); + /* + * The driver frees the mbuf. + */ if (d->bd_hdrcmplt) { if (d->bd_bif->bif_send) error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m); @@ -1257,9 +1383,10 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) (struct sockaddr *)dst_buf, 0, NULL); } - /* - * The driver frees the mbuf. - */ + lck_mtx_lock(bpf_mlock); + bpf_release_d(d); + lck_mtx_unlock(bpf_mlock); + return (error); } @@ -1280,6 +1407,8 @@ reset_d(struct bpf_d *d) } d->bd_slen = 0; d->bd_hlen = 0; + d->bd_scnt = 0; + d->bd_hcnt = 0; d->bd_rcount = 0; d->bd_dcount = 0; } @@ -1306,6 +1435,8 @@ reset_d(struct bpf_d *d) * BIOCSETTC Set traffic class. * BIOCGETTC Get traffic class. * BIOCSEXTHDR Set "extended header" flag + * BIOCSHEADDROP Drop head of the buffer if user is not reading + * BIOCGHEADDROP Get "head-drop" flag */ /* ARGSUSED */ int @@ -1320,11 +1451,13 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, lck_mtx_lock(bpf_mlock); d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) { lck_mtx_unlock(bpf_mlock); return (ENXIO); } + bpf_acquire_d(d); + if (d->bd_state == BPF_WAITING) bpf_stop_timer(d); d->bd_state = BPF_IDLE; @@ -1399,7 +1532,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, bcopy(addr, &prg32, sizeof (prg32)); error = bpf_setf(d, prg32.bf_len, - CAST_USER_ADDR_T(prg32.bf_insns), dev, cmd); + CAST_USER_ADDR_T(prg32.bf_insns), cmd); break; } @@ -1408,7 +1541,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, struct bpf_program64 prg64; bcopy(addr, &prg64, sizeof (prg64)); - error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, dev, cmd); + error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd); break; } @@ -1419,11 +1552,10 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, while (d->bd_hbuf_read) { msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); } - - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) - return (ENXIO); - + if ((d->bd_flags & BPF_CLOSING) != 0) { + error = ENXIO; + break; + } reset_d(d); break; @@ -1478,7 +1610,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, u_int dlt; bcopy(addr, &dlt, sizeof (dlt)); - error = bpf_setdlt(d, dlt, dev); + error = bpf_setdlt(d, dlt); } break; @@ -1508,7 +1640,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, if (ifp == NULL) error = ENXIO; else - error = bpf_setif(d, ifp, 0, dev); + error = bpf_setif(d, ifp, 0); break; } @@ -1589,7 +1721,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, * Set immediate mode. */ case BIOCIMMEDIATE: /* u_int */ - bcopy(addr, &d->bd_immediate, sizeof (u_int)); + d->bd_immediate = *(u_int *)(void *)addr; break; case BIOCVERSION: { /* struct bpf_version */ @@ -1734,8 +1866,18 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, d->bd_flags &= ~BPF_WANT_PKTAP; break; #endif + + case BIOCSHEADDROP: + bcopy(addr, &int_arg, sizeof (int_arg)); + d->bd_headdrop = int_arg ? 1 : 0; + break; + + case BIOCGHEADDROP: + bcopy(&d->bd_headdrop, addr, sizeof (int)); + break; } + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (error); @@ -1746,7 +1888,8 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, * free it and replace it. Returns EINVAL for bogus requests. */ static int -bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, dev_t dev, u_long cmd) +bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, + u_long cmd) { struct bpf_insn *fcode, *old; u_int flen, size; @@ -1754,8 +1897,7 @@ bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, dev_t dev, u_long while (d->bd_hbuf_read) msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) + if ((d->bd_flags & BPF_CLOSING) != 0) return (ENXIO); old = d->bd_filter; @@ -1800,7 +1942,7 @@ bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, dev_t dev, u_long * Return an errno or 0. */ static int -bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev) +bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt) { struct bpf_if *bp; int error; @@ -1808,8 +1950,7 @@ bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev) while (d->bd_hbuf_read) msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) + if ((d->bd_flags & BPF_CLOSING) != 0) return (ENXIO); /* @@ -1839,15 +1980,15 @@ bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev) return (error); } if (bp != d->bd_bif) { - if (d->bd_bif) /* * Detach if attached to something else. */ - bpf_detachd(d); - - if (bpf_attachd(d, bp) != 0) { - return ENXIO; + if (d->bd_bif) { + if (bpf_detachd(d, 0) != 0) + return (ENXIO); } + if (bpf_attachd(d, bp) != 0) + return (ENXIO); } reset_d(d); return (0); @@ -1912,7 +2053,7 @@ bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p) * Set the data link type of a BPF instance. */ static int -bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev) +bpf_setdlt(struct bpf_d *d, uint32_t dlt) { int error, opromisc; struct ifnet *ifp; @@ -1924,8 +2065,7 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev) while (d->bd_hbuf_read) msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) + if ((d->bd_flags & BPF_CLOSING) != 0) return (ENXIO); ifp = d->bd_bif->bif_ifp; @@ -1935,7 +2075,8 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev) } if (bp != NULL) { opromisc = d->bd_promisc; - bpf_detachd(d); + if (bpf_detachd(d, 0) != 0) + return (ENXIO); error = bpf_attachd(d, bp); if (error) { printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n", @@ -1947,11 +2088,13 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev) lck_mtx_unlock(bpf_mlock); error = ifnet_set_promiscuous(bp->bif_ifp, 1); lck_mtx_lock(bpf_mlock); - if (error) - printf("bpf_setdlt: ifpromisc %s%d failed (%d)\n", - ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error); - else + if (error) { + printf("%s: ifpromisc %s%d failed (%d)\n", + __func__, ifnet_name(bp->bif_ifp), + ifnet_unit(bp->bif_ifp), error); + } else { d->bd_promisc = 1; + } } } return (bp == NULL ? EINVAL : 0); @@ -1995,21 +2138,24 @@ bpfselect(dev_t dev, int which, void * wql, struct proc *p) lck_mtx_lock(bpf_mlock); d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) { lck_mtx_unlock(bpf_mlock); return (ENXIO); } + bpf_acquire_d(d); + if (d->bd_bif == NULL) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (ENXIO); } while (d->bd_hbuf_read) msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); - - d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + + if ((d->bd_flags & BPF_CLOSING) != 0) { + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); return (ENXIO); } @@ -2036,7 +2182,9 @@ bpfselect(dev_t dev, int which, void * wql, struct proc *p) break; } + bpf_release_d(d); lck_mtx_unlock(bpf_mlock); + return (ret); } @@ -2074,7 +2222,7 @@ bpfkqfilter(dev_t dev, struct knote *kn) lck_mtx_lock(bpf_mlock); d = bpf_dtab[minor(dev)]; - if (d == 0 || d == (void *)1) { + if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) { lck_mtx_unlock(bpf_mlock); return (ENXIO); } @@ -2087,8 +2235,10 @@ bpfkqfilter(dev_t dev, struct knote *kn) kn->kn_hook = d; kn->kn_fop = &bpfread_filtops; KNOTE_ATTACH(&d->bd_sel.si_note, kn); + d->bd_flags |= BPF_KNOTE; + lck_mtx_unlock(bpf_mlock); - return 0; + return (0); } static void @@ -2097,7 +2247,10 @@ filt_bpfdetach(struct knote *kn) struct bpf_d *d = (struct bpf_d *)kn->kn_hook; lck_mtx_lock(bpf_mlock); - KNOTE_DETACH(&d->bd_sel.si_note, kn); + if (d->bd_flags & BPF_KNOTE) { + KNOTE_DETACH(&d->bd_sel.si_note, kn); + d->bd_flags &= ~BPF_KNOTE; + } lck_mtx_unlock(bpf_mlock); } @@ -2248,7 +2401,7 @@ bpf_tap_imp( hack_hdr.mh_type = m->m_type; hack_hdr.mh_flags = 0; - m = (mbuf_t)&hack_hdr; + __IGNORE_WCASTALIGN(m = (mbuf_t)&hack_hdr); } for (m0 = m; m0 != 0; m0 = m0->m_next) @@ -2348,14 +2501,23 @@ catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen, * pending reads. */ if (d->bd_fbuf == NULL) { + if (d->bd_headdrop == 0) { + /* + * We haven't completed the previous read yet, + * so drop the packet. + */ + ++d->bd_dcount; + return; + } /* - * We haven't completed the previous read yet, - * so drop the packet. + * Drop the hold buffer as it contains older packets */ - ++d->bd_dcount; - return; + d->bd_dcount += d->bd_hcnt; + d->bd_fbuf = d->bd_hbuf; + ROTATE_BUFFERS(d); + } else { + ROTATE_BUFFERS(d); } - ROTATE_BUFFERS(d); do_wakeup = 1; curlen = 0; } @@ -2421,6 +2583,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen, */ (*cpfn)(pkt, payload, caplen); d->bd_slen = curlen + totlen; + d->bd_scnt += 1; if (do_wakeup) bpf_wakeup(d); @@ -2443,6 +2606,8 @@ bpf_allocbufs(struct bpf_d *d) } d->bd_slen = 0; d->bd_hlen = 0; + d->bd_scnt = 0; + d->bd_hcnt = 0; return (0); } @@ -2495,7 +2660,8 @@ bpf_attach( struct bpf_if *bp_temp; struct bpf_if *bp_first = NULL; - bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF, M_WAIT); + bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF, + M_WAIT | M_ZERO); if (bp_new == 0) panic("bpfattach"); @@ -2519,7 +2685,6 @@ bpf_attach( return EEXIST; } - bzero(bp_new, sizeof(*bp_new)); bp_new->bif_ifp = ifp; bp_new->bif_dlt = dlt; bp_new->bif_send = send; @@ -2569,9 +2734,12 @@ void bpfdetach(struct ifnet *ifp) { struct bpf_if *bp, *bp_prev, *bp_next; - struct bpf_if *bp_free_list = NULL; struct bpf_d *d; + if (bpf_debug != 0) + printf("%s: %s\n", + __func__, if_name(ifp)); + lck_mtx_lock(bpf_mlock); /* @@ -2593,32 +2761,22 @@ bpfdetach(struct ifnet *ifp) else bpf_iflist = bp->bif_next; - /* Add to the list to be freed */ - bp->bif_next = bp_free_list; - bp_free_list = bp; - } - - /* - * Detach the bpf devices attached to the interface - * Now we do not care if we lose the bpf_mlock in bpf_detachd - */ - for (bp = bp_free_list; bp != NULL; bp = bp->bif_next) { + /* Detach the devices attached to the interface */ while ((d = bp->bif_dlist) != NULL) { - bpf_detachd(d); + /* + * Take an extra reference to prevent the device + * from being freed when bpf_detachd() releases + * the reference for the interface list + */ + bpf_acquire_d(d); + bpf_detachd(d, 0); bpf_wakeup(d); + bpf_release_d(d); } ifnet_release(ifp); } lck_mtx_unlock(bpf_mlock); - - /* - * Free the list - */ - while ((bp = bp_free_list) != NULL) { - bp_free_list = bp->bif_next; - FREE(bp, M_DEVBUF); - } } void diff --git a/bsd/net/bpf.h b/bsd/net/bpf.h index 003f631d1..20293abd9 100644 --- a/bsd/net/bpf.h +++ b/bsd/net/bpf.h @@ -209,6 +209,8 @@ struct bpf_version { #ifdef PRIVATE #define BIOCGWANTPKTAP _IOR('B', 127, u_int) #define BIOCSWANTPKTAP _IOWR('B', 127, u_int) +#define BIOCSHEADDROP _IOW('B', 128, int) +#define BIOCGHEADDROP _IOR('B', 128, int) #endif /* PRIVATE */ /* * Structure prepended to each packet. @@ -605,7 +607,11 @@ struct bpf_mtag { /* * For Apple private usage */ +#define DLT_USER0_APPLE_INTERNAL DLT_USER0 /* rdar://12019509 */ +#define DLT_USER1_APPLE_INTERNAL DLT_USER1 /* rdar://12019509 */ #define DLT_PKTAP DLT_USER2 /* rdar://11779467 */ +#define DLT_USER3_APPLE_INTERNAL DLT_USER3 /* rdar://19614531 */ +#define DLT_USER4_APPLE_INTERNAL DLT_USER4 /* rdar://19614531 */ #endif /* PRIVATE */ /* diff --git a/bsd/net/bpfdesc.h b/bsd/net/bpfdesc.h index 4145bf405..dcb9ac0af 100644 --- a/bsd/net/bpfdesc.h +++ b/bsd/net/bpfdesc.h @@ -97,15 +97,18 @@ struct bpf_d { caddr_t bd_fbuf; /* free slot */ int bd_slen; /* current length of store buffer */ int bd_hlen; /* current length of hold buffer */ + u_int32_t bd_scnt; /* number of packets in store buffer */ + u_int32_t bd_hcnt; /* number of packets in hold buffer */ int bd_bufsize; /* absolute length of buffers */ int bd_hbuf_read; /* reading from hbuf */ + int bd_headdrop; /* Keep newer packets */ struct bpf_if *bd_bif; /* interface descriptor */ - u_int32_t bd_rtout; /* Read timeout in 'ticks' */ + u_int32_t bd_rtout; /* Read timeout in 'ticks' */ struct bpf_insn *bd_filter; /* filter code */ - u_int32_t bd_rcount; /* number of packets received */ - u_int32_t bd_dcount; /* number of packets dropped */ + u_int32_t bd_rcount; /* number of packets received */ + u_int32_t bd_dcount; /* number of packets dropped */ u_char bd_promisc; /* true if listening promiscuously */ u_char bd_state; /* idle, waiting, or timed out */ @@ -129,12 +132,19 @@ struct bpf_d { int bd_hdrcmplt; /* false to fill in src lladdr automatically */ int bd_seesent; /* true if bpf should see sent packets */ int bd_oflags; /* device open flags */ - thread_call_t bd_thread_call; /* for BPF timeouts with select */ + thread_call_t bd_thread_call; /* for BPF timeouts with select */ #if CONFIG_MACF_NET struct label * bd_label; /* MAC label for descriptor */ #endif int bd_traffic_class; /* traffic service class */ int bd_flags; /* flags */ + + int bd_refcnt; +#define BPF_REF_HIST 4 /* how many callers to keep around */ + void *bd_ref_lr[BPF_REF_HIST]; + void *bd_unref_lr[BPF_REF_HIST]; + int bd_next_ref_lr; + int bd_next_unref_lr; }; /* Values for bd_state */ @@ -148,11 +158,14 @@ struct bpf_d { (((bd)->bd_immediate || (bd)->bd_state == BPF_TIMED_OUT) && \ (bd)->bd_slen != 0)) - /* Values for bd_flags */ #define BPF_EXTENDED_HDR 0x01 /* process req. the extended header */ -#define BPF_WANT_PKTAP 0x02 /* process knows how to keep DLT_PKTAP private */ +#define BPF_WANT_PKTAP 0x02 /* knows how to handle DLT_PKTAP */ #define BPF_FINALIZE_PKTAP 0x04 /* finalize pktap header on read */ +#define BPF_KNOTE 0x08 /* kernel note attached */ +#define BPF_DETACHING 0x10 /* bpf_d is being detached */ +#define BPF_DETACHED 0x20 /* bpf_d is detached */ +#define BPF_CLOSING 0x40 /* bpf_d is being closed */ /* * Descriptor associated with each attached hardware interface. diff --git a/bsd/net/classq/Makefile b/bsd/net/classq/Makefile index 1aa7079e2..a02432ac6 100644 --- a/bsd/net/classq/Makefile +++ b/bsd/net/classq/Makefile @@ -24,9 +24,9 @@ EXPORT_MI_LIST = ${INSTALL_MI_LIST} ${KERNELFILES} EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} -INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} +INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/net/classq/classq.c b/bsd/net/classq/classq.c index 3d9d324ae..625876773 100644 --- a/bsd/net/classq/classq.c +++ b/bsd/net/classq/classq.c @@ -73,7 +73,7 @@ #include -u_int32_t classq_verbose; /* more noise if greater than 1 */ +u_int32_t classq_verbose = 0; /* more noise if greater than 1 */ SYSCTL_NODE(_net, OID_AUTO, classq, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "classq"); diff --git a/bsd/net/classq/classq_sfb.c b/bsd/net/classq/classq_sfb.c index 4f705e0f1..7d12ba606 100644 --- a/bsd/net/classq/classq_sfb.c +++ b/bsd/net/classq/classq_sfb.c @@ -280,9 +280,9 @@ static u_int64_t sfb_hinterval; SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, hinterval, CTLFLAG_RW|CTLFLAG_LOCKED, &sfb_hinterval, "SFB hash interval in nanoseconds"); -static u_int64_t sfb_target_qdelay; +static u_int64_t sfb_target_qdelay = 0; SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, target_qdelay, CTLFLAG_RW|CTLFLAG_LOCKED, - &sfb_target_qdelay, "SFB target queue delay in milliseconds"); + &sfb_target_qdelay, "SFB target queue delay in nanoseconds"); static u_int64_t sfb_update_interval; SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, update_interval, @@ -458,6 +458,15 @@ sfb_calc_target_qdelay(struct sfb *sp, u_int64_t out_bw) if (target_qdelay == 0) target_qdelay = IFQ_TARGET_DELAY; + /* + * If a delay has been added to ifnet start callback for + * coalescing, we have to add that to the pre-set target delay + * because the packets can be in the queue longer. + */ + if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) && + ifp->if_start_delay_timeout > 0) + target_qdelay += ifp->if_start_delay_timeout; + sp->sfb_target_qdelay = target_qdelay; } @@ -1147,8 +1156,7 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) u_int16_t pmin; int fc_adv = 0; int ret = CLASSQEQ_SUCCESS; - - nanouptime(&now); + u_int32_t maxqsize = 0; s = sp->sfb_current; VERIFY((s + (s ^ 1)) == 1); @@ -1157,6 +1165,13 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) VERIFY(!(pkt->pkt_flags & PKTF_PRIV_GUARDED)); pkt->pkt_flags |= PKTF_PRIV_GUARDED; + if (pkt->pkt_enqueue_ts > 0) { + net_nsectimer(&pkt->pkt_enqueue_ts, &now); + } else { + nanouptime(&now); + net_timernsec(&now, &pkt->pkt_enqueue_ts); + } + /* time to swap the bins? */ if (net_timercmp(&now, &sp->sfb_nextreset, >=)) { net_timeradd(&now, &sp->sfb_hinterval, &sp->sfb_nextreset); @@ -1170,6 +1185,13 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) &sp->sfb_update_time); } + /* + * If getq time is not set because this is the first packet + * or after idle time, set it now so that we can detect a stall. + */ + if (qsize(q) == 0 && !net_timerisset(&sp->sfb_getqtime)) + *(&sp->sfb_getqtime) = *(&now); + pkt->pkt_sfb_flags = 0; pkt->pkt_sfb_hash16[s] = (SFB_HASH(&pkt->pkt_flowid, sizeof (pkt->pkt_flowid), @@ -1218,25 +1240,33 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) sp->sfb_stats.drop_pbox++; } - /* - * if max queue size is static, make it a forced drop - * when the queue length hits the queue limit - */ - if (!(SFB_QUEUE_DELAYBASED(sp)) && - droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) { - droptype = DTYPE_FORCED; - sp->sfb_stats.drop_queue++; - } + if (SFB_QUEUE_DELAYBASED(sp)) + maxqsize = SFB_QUEUE_DELAYBASED_MAXSIZE; + else + maxqsize = qlimit(q); /* - * delay based queues have a larger maximum size to - * allow for bursts + * When the queue length hits the queue limit, make it a forced + * drop */ - if (SFB_QUEUE_DELAYBASED(sp) && - droptype == DTYPE_NODROP && - qlen(q) >= SFB_QUEUE_DELAYBASED_MAXSIZE) { - droptype = DTYPE_FORCED; - sp->sfb_stats.drop_queue++; + if (droptype == DTYPE_NODROP && qlen(q) >= maxqsize) { + if (pkt->pkt_proto == IPPROTO_TCP && + ((pkt->pkt_flags & PKTF_TCP_REXMT) || + (sp->sfb_flags & SFBF_LAST_PKT_DROPPED))) { + /* + * At some level, dropping packets will make the + * flows backoff and will keep memory requirements + * under control. But we should not cause a tail + * drop because it can take a long time for a + * TCP flow to recover. We should try to drop + * alternate packets instead. + */ + sp->sfb_flags &= ~SFBF_LAST_PKT_DROPPED; + } else { + droptype = DTYPE_FORCED; + sp->sfb_stats.drop_queue++; + sp->sfb_flags |= SFBF_LAST_PKT_DROPPED; + } } if (fc_adv == 1 && droptype != DTYPE_FORCED && @@ -1255,7 +1285,6 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) } /* if successful enqueue this packet, else drop it */ if (droptype == DTYPE_NODROP) { - net_timernsec(&now, &pkt->pkt_enqueue_ts); _addq(q, m); } else { IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd); @@ -1346,6 +1375,7 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge) sp->sfb_min_qdelay = 0; } } + pkt->pkt_enqueue_ts = 0; /* * Clearpkts are the ones which were in the queue when the hash @@ -1378,6 +1408,7 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge) sp->sfb_min_qdelay = 0; sp->sfb_fc_threshold = 0; net_timerclear(&sp->sfb_update_time); + net_timerclear(&sp->sfb_getqtime); } return (m); diff --git a/bsd/net/classq/classq_sfb.h b/bsd/net/classq/classq_sfb.h index f401b0eb5..2a28a7192 100644 --- a/bsd/net/classq/classq_sfb.h +++ b/bsd/net/classq/classq_sfb.h @@ -104,6 +104,7 @@ struct sfb_fcl { #define SFBF_FLOWCTL 0x04 /* enable flow control advisories */ #define SFBF_DELAYBASED 0x08 /* queueing is delay based */ #define SFBF_DELAYHIGH 0x10 /* Estimated delay is greater than target */ +#define SFBF_LAST_PKT_DROPPED 0x20 /* Last packet dropped */ #define SFBF_SUSPENDED 0x1000 /* queue is suspended */ #define SFBF_USERFLAGS \ diff --git a/bsd/net/classq/classq_subr.c b/bsd/net/classq/classq_subr.c index 109cae586..98c007bd9 100644 --- a/bsd/net/classq/classq_subr.c +++ b/bsd/net/classq/classq_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 Apple Inc. All rights reserved. + * Copyright (c) 2011-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,6 +95,7 @@ ifclassq_setup(struct ifnet *ifp, u_int32_t sflags, boolean_t reuse) VERIFY(IFCQ_IS_EMPTY(ifq)); ifq->ifcq_ifp = ifp; IFCQ_LEN(ifq) = 0; + IFCQ_BYTES(ifq) = 0; bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt)); bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt)); @@ -197,6 +198,7 @@ ifclassq_teardown(struct ifnet *ifp) VERIFY(ifq->ifcq_dequeue_sc == NULL); VERIFY(ifq->ifcq_request == NULL); IFCQ_LEN(ifq) = 0; + IFCQ_BYTES(ifq) = 0; IFCQ_MAXLEN(ifq) = 0; bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt)); bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt)); @@ -331,7 +333,6 @@ ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc, IFCQ_LOCK_SPIN(ifq); while (i < limit) { - u_int64_t pktlen; #if PF_ALTQ u_int32_t qlen; @@ -383,13 +384,17 @@ ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc, last = *head; l += (*head)->m_pkthdr.len; - pktlen = (*head)->m_pkthdr.len; #if MEASURE_BW (*head)->m_pkthdr.pkt_bwseq = - atomic_add_64_ov(&(ifp->if_bw.cur_seq), pktlen); + atomic_add_64_ov(&(ifp->if_bw.cur_seq), m_pktlen(*head)); #endif /* MEASURE_BW */ - + if (IFNET_IS_CELLULAR(ifp)) { + (*head)->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA; + (*head)->m_pkthdr.pkt_unsent_databytes = + (total_snd_byte_count << MSIZESHIFT) + + ifq->ifcq_bytes; + } head = &(*head)->m_nextpkt; i++; } diff --git a/bsd/net/classq/if_classq.h b/bsd/net/classq/if_classq.h index cb60c5464..bc8f4191e 100644 --- a/bsd/net/classq/if_classq.h +++ b/bsd/net/classq/if_classq.h @@ -120,7 +120,7 @@ struct ifclassq { decl_lck_mtx_data(, ifcq_lock); struct ifnet *ifcq_ifp; /* back pointer to interface */ - u_int32_t ifcq_len; + u_int32_t ifcq_len; /* packet count */ u_int32_t ifcq_maxlen; struct pktcntr ifcq_xmitcnt; struct pktcntr ifcq_dropcnt; @@ -129,6 +129,7 @@ struct ifclassq { u_int32_t ifcq_flags; /* flags */ u_int32_t ifcq_sflags; /* scheduler flags */ u_int32_t ifcq_target_qdelay; /* target queue delay */ + u_int32_t ifcq_bytes; /* bytes count */ void *ifcq_disc; /* for scheduler-specific use */ /* * ifcq_disc_slots[] represents the leaf classes configured for the @@ -342,6 +343,9 @@ struct if_ifclassq_stats { #define IFCQ_MAXLEN(_ifcq) ((_ifcq)->ifcq_maxlen) #define IFCQ_SET_MAXLEN(_ifcq, _len) ((_ifcq)->ifcq_maxlen = (_len)) #define IFCQ_TARGET_QDELAY(_ifcq) ((_ifcq)->ifcq_target_qdelay) +#define IFCQ_BYTES(_ifcq) ((_ifcq)->ifcq_bytes) +#define IFCQ_INC_BYTES(_ifcq, _len) (IFCQ_BYTES(_ifcq) + _len) +#define IFCQ_DEC_BYTES(_ifcq, _len) (IFCQ_BYTES(_ifcq) - _len) #define IFCQ_XMIT_ADD(_ifcq, _pkt, _len) do { \ PKTCNTR_ADD(&(_ifcq)->ifcq_xmitcnt, _pkt, _len); \ diff --git a/bsd/net/content_filter.c b/bsd/net/content_filter.c index 58bea9bbb..9975c99dc 100644 --- a/bsd/net/content_filter.c +++ b/bsd/net/content_filter.c @@ -2944,7 +2944,7 @@ cfil_update_data_offsets(struct socket *so, uint32_t kcunit, int outgoing, uint64_t pass_offset, uint64_t peek_offset) { errno_t error = 0; - struct cfil_entry *entry; + struct cfil_entry *entry = NULL; struct cfe_buf *entrybuf; int updated = 0; @@ -3006,7 +3006,7 @@ cfil_update_data_offsets(struct socket *so, uint32_t kcunit, int outgoing, * or when the socket is closed and no more data is waiting * to be delivered to the filter */ - if (so->so_cfil != NULL && + if (entry != NULL && ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET && entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) || ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && @@ -3196,9 +3196,12 @@ cfil_action_drop(struct socket *so, uint32_t kcunit) p = current_proc(); - /* Force the socket to be marked defunct */ + /* + * Force the socket to be marked defunct + * (forcing fixed along with rdar://19391339) + */ error = sosetdefunct(p, so, - SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, 1); + SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, FALSE); /* Flush the socket buffer and disconnect */ if (error == 0) diff --git a/bsd/net/content_filter.h b/bsd/net/content_filter.h index 2e4facaef..7291b2fb4 100644 --- a/bsd/net/content_filter.h +++ b/bsd/net/content_filter.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -94,14 +95,6 @@ typedef uint64_t cfil_sock_id_t; #define CFIL_SOCK_ID_NONE UINT64_MAX -/* - * Invariant timeval structure definition across architectures - */ -struct timeval64 { - int64_t tv_sec; - int64_t tv_usec; -}; - /* * struct cfil_msg_hdr * diff --git a/bsd/net/devtimer.c b/bsd/net/devtimer.c index d0b55d251..45eb31f47 100644 --- a/bsd/net/devtimer.c +++ b/bsd/net/devtimer.c @@ -176,11 +176,10 @@ devtimer_create(devtimer_process_func process_func, void * arg0) { devtimer_ref timer; - timer = _MALLOC(sizeof(*timer), M_DEVTIMER, M_WAITOK); + timer = _MALLOC(sizeof(*timer), M_DEVTIMER, M_WAITOK | M_ZERO); if (timer == NULL) { return (timer); } - bzero(timer, sizeof(*timer)); devtimer_retain(timer); timer->dt_callout = thread_call_allocate(devtimer_process, timer); if (timer->dt_callout == NULL) { diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index 0df5c6ea4..5576af7d7 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2014 Apple Inc. All rights reserved. + * Copyright (c) 1999-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -425,6 +425,10 @@ static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS; static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS; static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS; static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS; +static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS; + +struct chain_len_stats tx_chain_len_stats; +static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS; /* The following are protected by dlil_ifnet_lock */ static TAILQ_HEAD(, ifnet) ifnet_detaching_head; @@ -621,6 +625,16 @@ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg, CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0, "enable hardware cksum debugging"); +u_int32_t ifnet_start_delayed = 0; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed, + CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0, + "number of times start was delayed"); + +u_int32_t ifnet_delay_start_disabled = 0; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled, + CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0, + "number of times start was delayed"); + #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */ #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */ #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */ @@ -696,6 +710,18 @@ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx, CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0, "enable receive hardware checksum offload"); +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats, + CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9, + sysctl_tx_chain_len_stats, "S", ""); + +uint32_t tx_chain_len_count = 0; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count, + CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, + ""); + +SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used, + CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, ""); + unsigned int net_rxpoll = 1; unsigned int net_affinity = 1; static kern_return_t dlil_affinity_set(struct thread *, u_int32_t); @@ -892,6 +918,26 @@ ifnet_lock_done(struct ifnet *ifp) lck_rw_done(&ifp->if_lock); } +#if INET +__private_extern__ void +if_inetdata_lock_shared(struct ifnet *ifp) +{ + lck_rw_lock_shared(&ifp->if_inetdata_lock); +} + +__private_extern__ void +if_inetdata_lock_exclusive(struct ifnet *ifp) +{ + lck_rw_lock_exclusive(&ifp->if_inetdata_lock); +} + +__private_extern__ void +if_inetdata_lock_done(struct ifnet *ifp) +{ + lck_rw_done(&ifp->if_inetdata_lock); +} +#endif + #if INET6 __private_extern__ void if_inet6data_lock_shared(struct ifnet *ifp) @@ -2476,7 +2522,10 @@ ifnet_start_common(struct ifnet *ifp, int resetfc) return; } ifp->if_start_req++; - if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) { + if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL && + (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) || + IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen + || ifp->if_start_delayed == 0)) { wakeup_one((caddr_t)&ifp->if_start_thread); } lck_mtx_unlock(&ifp->if_start_lock); @@ -2496,6 +2545,7 @@ ifnet_start_thread_fn(void *v, wait_result_t w) char ifname[IFNAMSIZ + 1]; struct timespec *ts = NULL; struct ifclassq *ifq = &ifp->if_snd; + struct timespec delay_start_ts; /* * Treat the dedicated starter thread for lo0 as equivalent to @@ -2530,8 +2580,9 @@ ifnet_start_thread_fn(void *v, wait_result_t w) lck_mtx_lock_spin(&ifp->if_start_lock); for (;;) { - (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock, - (PZERO - 1) | PSPIN, ifname, ts); + if (ifp->if_start_thread != NULL) + (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock, + (PZERO - 1) | PSPIN, ifname, ts); /* interface is detached? */ if (ifp->if_start_thread == THREAD_NULL) { @@ -2553,20 +2604,51 @@ ifnet_start_thread_fn(void *v, wait_result_t w) } ifp->if_start_active = 1; + for (;;) { u_int32_t req = ifp->if_start_req; - + if (!IFCQ_IS_EMPTY(ifq) && + (ifp->if_eflags & IFEF_ENQUEUE_MULTI) && + ifp->if_start_delayed == 0 && + IFCQ_LEN(ifq) < ifp->if_start_delay_qlen && + (ifp->if_eflags & IFEF_DELAY_START)) { + ifp->if_start_delayed = 1; + ifnet_start_delayed++; + break; + } else { + ifp->if_start_delayed = 0; + } lck_mtx_unlock(&ifp->if_start_lock); + + /* + * If no longer attached, don't call start because ifp + * is being destroyed; else hold an IO refcnt to + * prevent the interface from being detached (will be + * released below.) + */ + if (!ifnet_is_attached(ifp, 1)) { + lck_mtx_lock_spin(&ifp->if_start_lock); + break; + } + /* invoke the driver's start routine */ ((*ifp->if_start)(ifp)); + + /* + * Release the io ref count taken by ifnet_is_attached. + */ + ifnet_decr_iorefcnt(ifp); + lck_mtx_lock_spin(&ifp->if_start_lock); /* if there's no pending request, we're done */ if (req == ifp->if_start_req) break; } + ifp->if_start_req = 0; ifp->if_start_active = 0; + /* * Wakeup N ns from now if rate-controlled by TBR, and if * there are still packets in the send queue which haven't @@ -2576,6 +2658,12 @@ ifnet_start_thread_fn(void *v, wait_result_t w) ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ? &ifp->if_start_cycle : NULL); + if (ts == NULL && ifp->if_start_delayed == 1) { + delay_start_ts.tv_sec = 0; + delay_start_ts.tv_nsec = ifp->if_start_delay_timeout; + ts = &delay_start_ts; + } + if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) ts = NULL; } @@ -2922,6 +3010,8 @@ errno_t ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) { int error; + struct timespec now; + u_int64_t now_nsec; if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) || m->m_nextpkt != NULL) { @@ -2938,6 +3028,65 @@ ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) return (ENETDOWN); } + nanouptime(&now); + net_timernsec(&now, &now_nsec); + m->m_pkthdr.pkt_enqueue_ts = now_nsec; + + if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) { + /* + * If the driver chose to delay start callback for + * coalescing multiple packets, Then use the following + * heuristics to make sure that start callback will + * be delayed only when bulk data transfer is detected. + * 1. number of packets enqueued in (delay_win * 2) is + * greater than or equal to the delay qlen. + * 2. If delay_start is enabled it will stay enabled for + * another 10 idle windows. This is to take into account + * variable RTT and burst traffic. + * 3. If the time elapsed since last enqueue is more + * than 200ms we disable delaying start callback. This is + * is to take idle time into account. + */ + u_int64_t dwin = (ifp->if_start_delay_timeout << 1); + if (ifp->if_start_delay_swin > 0) { + if ((ifp->if_start_delay_swin + dwin) > now_nsec) { + ifp->if_start_delay_cnt++; + } else if ((now_nsec - ifp->if_start_delay_swin) + >= (200 * 1000 * 1000)) { + ifp->if_start_delay_swin = now_nsec; + ifp->if_start_delay_cnt = 1; + ifp->if_start_delay_idle = 0; + if (ifp->if_eflags & IFEF_DELAY_START) { + ifp->if_eflags &= + ~(IFEF_DELAY_START); + ifnet_delay_start_disabled++; + } + } else { + if (ifp->if_start_delay_cnt >= + ifp->if_start_delay_qlen) { + ifp->if_eflags |= IFEF_DELAY_START; + ifp->if_start_delay_idle = 0; + } else { + if (ifp->if_start_delay_idle >= 10) { + ifp->if_eflags &= ~(IFEF_DELAY_START); + ifnet_delay_start_disabled++; + } else { + ifp->if_start_delay_idle++; + } + } + ifp->if_start_delay_swin = now_nsec; + ifp->if_start_delay_cnt = 1; + } + } else { + ifp->if_start_delay_swin = now_nsec; + ifp->if_start_delay_cnt = 1; + ifp->if_start_delay_idle = 0; + ifp->if_eflags &= ~(IFEF_DELAY_START); + } + } else { + ifp->if_eflags &= ~(IFEF_DELAY_START); + } + /* enqueue the packet */ error = ifclassq_enqueue(&ifp->if_snd, m); @@ -2946,7 +3095,8 @@ ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) * for the packet is suspended (EQSUSPENDED), as the driver could still * be dequeueing from other unsuspended queues. */ - if (error == 0 || error == EQFULL || error == EQSUSPENDED) + if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) && + (error == 0 || error == EQFULL || error == EQSUSPENDED)) ifnet_start(ifp); return (error); @@ -3486,10 +3636,6 @@ dlil_event_internal(struct ifnet *ifp, struct kev_msg *event) int tmp_ifproto_arr_idx = 0; bool tmp_malloc = false; - /* Get an io ref count if the interface is attached */ - if (!ifnet_is_attached(ifp, 1)) - goto done; - /* * Pass the event to the interface filters */ @@ -3510,6 +3656,10 @@ dlil_event_internal(struct ifnet *ifp, struct kev_msg *event) if_flt_monitor_unbusy(ifp); lck_mtx_unlock(&ifp->if_flt_lock); + /* Get an io ref count if the interface is attached */ + if (!ifnet_is_attached(ifp, 1)) + goto done; + /* * An embedded tmp_list_entry in if_proto may still get * over-written by another thread after giving up ifnet lock, @@ -3716,6 +3866,38 @@ ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m) } } +static void +dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls) +{ + mbuf_t n = m; + int chainlen = 0; + + while (n != NULL) { + chainlen++; + n = n->m_next; + } + switch (chainlen) { + case 0: + break; + case 1: + atomic_add_64(&cls->cls_one, 1); + break; + case 2: + atomic_add_64(&cls->cls_two, 1); + break; + case 3: + atomic_add_64(&cls->cls_three, 1); + break; + case 4: + atomic_add_64(&cls->cls_four, 1); + break; + case 5: + default: + atomic_add_64(&cls->cls_five_or_more, 1); + break; + } +} + /* * dlil_output * @@ -3930,18 +4112,29 @@ dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, * update the timestamp to indicate recent activity * on a foreground socket. */ - if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND) && - (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) && - m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) - ifp->if_fg_sendts = net_uptime(); + if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) && + m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { + if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) + ifp->if_fg_sendts = net_uptime(); + + if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) + ifp->if_rt_sendts = net_uptime(); + } ifp_inc_traffic_class_out(ifp, m); pktap_output(ifp, proto_family, m, pre, post); + /* + * Count the number of elements in the mbuf chain + */ + if (tx_chain_len_count) { + dlil_count_chain_len(m, &tx_chain_len_stats); + } + /* * Finally, call the driver. */ - if (ifp->if_eflags & IFEF_SENDLIST) { + if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) { if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) { flen += (m_pktlen(m) - (pre + post)); m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED; @@ -3989,24 +4182,57 @@ dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, } while (m != NULL); if (send_head != NULL) { - VERIFY(ifp->if_eflags & IFEF_SENDLIST); KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0, 0, 0, 0, 0); - retval = (*ifp->if_output)(ifp, send_head); - if (retval == EQFULL || retval == EQSUSPENDED) { - if (adv != NULL) { - adv->code = (retval == EQFULL ? - FADV_FLOW_CONTROLLED : FADV_SUSPENDED); + if (ifp->if_eflags & IFEF_SENDLIST) { + retval = (*ifp->if_output)(ifp, send_head); + if (retval == EQFULL || retval == EQSUSPENDED) { + if (adv != NULL) { + adv->code = (retval == EQFULL ? + FADV_FLOW_CONTROLLED : + FADV_SUSPENDED); + } + retval = 0; + } + if (retval == 0 && flen > 0) { + fbytes += flen; + fpkts++; + } + if (retval != 0 && dlil_verbose) { + printf("%s: output error on %s retval = %d\n", + __func__, if_name(ifp), retval); + } + } else { + struct mbuf *send_m; + int enq_cnt = 0; + VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI); + while (send_head != NULL) { + send_m = send_head; + send_head = send_m->m_nextpkt; + send_m->m_nextpkt = NULL; + retval = (*ifp->if_output)(ifp, send_m); + if (retval == EQFULL || retval == EQSUSPENDED) { + if (adv != NULL) { + adv->code = (retval == EQFULL ? + FADV_FLOW_CONTROLLED : + FADV_SUSPENDED); + } + retval = 0; + } + if (retval == 0) { + enq_cnt++; + if (flen > 0) + fpkts++; + } + if (retval != 0 && dlil_verbose) { + printf("%s: output error on %s retval = %d\n", + __func__, if_name(ifp), retval); + } + } + if (enq_cnt > 0) { + fbytes += flen; + ifnet_start(ifp); } - retval = 0; - } - if (retval == 0 && flen > 0) { - fbytes += flen; - fpkts++; - } - if (retval != 0 && dlil_verbose) { - printf("%s: output error on %s retval = %d\n", - __func__, if_name(ifp), retval); } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); } @@ -4278,7 +4504,7 @@ __private_extern__ void net_thread_marks_pop(net_thread_marks_t popx) { static const char *const base = (const void*)&net_thread_marks_base; - ptrdiff_t pop = (caddr_t)popx - (caddr_t)base; + const ptrdiff_t pop = (const char *)popx - (const char *)base; if (pop != 0) { static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U; @@ -4294,7 +4520,7 @@ __private_extern__ void net_thread_unmarks_pop(net_thread_marks_t unpopx) { static const char *const base = (const void*)&net_thread_marks_base; - ptrdiff_t unpop = (caddr_t)unpopx - (caddr_t)base; + ptrdiff_t unpop = (const char *)unpopx - (const char *)base; if (unpop != 0) { static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U; @@ -5171,6 +5397,23 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) VERIFY(ifp->if_delegated.subfamily == 0); VERIFY(ifp->if_delegated.expensive == 0); + bzero(&ifp->if_agentids, sizeof(ifp->if_agentids)); + + /* Reset interface state */ + bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state)); + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID; + ifp->if_interface_state.interface_availability = + IF_INTERFACE_STATE_INTERFACE_AVAILABLE; + + /* Initialize Link Quality Metric (loopback [lo0] is always good) */ + if (ifp == lo_ifp) { + ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD; + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_LQM_STATE_VALID; + } else { + ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN; + } ifnet_lock_done(ifp); ifnet_head_done(); @@ -5223,9 +5466,6 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) */ lck_mtx_lock(rnh_lock); ifnet_lock_exclusive(ifp); - /* Initialize Link Quality Metric (loopback [lo0] is always good) */ - ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD : - IFNET_LQM_THRESH_UNKNOWN; lck_mtx_lock_spin(&ifp->if_ref_lock); ifp->if_refflags = IFRF_ATTACHED; lck_mtx_unlock(&ifp->if_ref_lock); @@ -5435,6 +5675,9 @@ ifnet_detach(ifnet_t ifp) ifp->if_link.tqe_prev = NULL; ifindex2ifnet[ifp->if_index] = NULL; + /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */ + ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER); + /* Record detach PC stacktrace */ ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach); @@ -5445,6 +5688,9 @@ ifnet_detach(ifnet_t ifp) delegated_ifp = ifp->if_delegated.ifp; bzero(&ifp->if_delegated, sizeof (ifp->if_delegated)); + /* Reset interface state */ + bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state)); + ifnet_lock_done(ifp); ifnet_head_done(); lck_mtx_unlock(rnh_lock); @@ -5455,7 +5701,7 @@ ifnet_detach(ifnet_t ifp) /* Reset Link Quality Metric (unless loopback [lo0]) */ if (ifp != lo_ifp) - if_lqm_update(ifp, IFNET_LQM_THRESH_OFF); + if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0); /* Reset TCP local statistics */ if (ifp->if_tcp_stat != NULL) @@ -5465,6 +5711,12 @@ ifnet_detach(ifnet_t ifp) if (ifp->if_udp_stat != NULL) bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat)); + /* Release memory held for interface link status report */ + if (ifp->if_link_status != NULL) { + FREE(ifp->if_link_status, M_TEMP); + ifp->if_link_status = NULL; + } + /* Let BPF know we're detaching */ bpfdetach(ifp); @@ -6057,10 +6309,19 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid, lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group, ifnet_lock_attr); lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr); +#if INET + lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group, + ifnet_lock_attr); + ifp1->if_inetdata = NULL; +#endif #if INET6 - lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, ifnet_lock_attr); + lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, + ifnet_lock_attr); ifp1->if_inet6data = NULL; #endif + lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group, + ifnet_lock_attr); + ifp1->if_link_status = NULL; /* for send data paths */ lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group, @@ -6280,26 +6541,43 @@ ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6) #endif /* INET6 */ void -if_lqm_update(struct ifnet *ifp, int lqm) +if_lqm_update(struct ifnet *ifp, int lqm, int locked) { struct kev_dl_link_quality_metric_data ev_lqm_data; VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX); /* Normalize to edge */ - if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_BAD) + if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_BAD) lqm = IFNET_LQM_THRESH_BAD; else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR) lqm = IFNET_LQM_THRESH_POOR; else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD) lqm = IFNET_LQM_THRESH_GOOD; - ifnet_lock_exclusive(ifp); - if (lqm == ifp->if_lqm) { - ifnet_lock_done(ifp); + /* + * Take the lock if needed + */ + if (!locked) + ifnet_lock_exclusive(ifp); + + if (lqm == ifp->if_interface_state.lqm_state && + (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_LQM_STATE_VALID)) { + /* + * Release the lock if was not held by the caller + */ + if (!locked) + ifnet_lock_done(ifp); return; /* nothing to update */ } - ifp->if_lqm = lqm; + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_LQM_STATE_VALID; + ifp->if_interface_state.lqm_state = lqm; + + /* + * Don't want to hold the lock when issuing kernel events + */ ifnet_lock_done(ifp); bzero(&ev_lqm_data, sizeof (ev_lqm_data)); @@ -6307,6 +6585,157 @@ if_lqm_update(struct ifnet *ifp, int lqm) dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED, (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data)); + + /* + * Reacquire the lock for the caller + */ + if (locked) + ifnet_lock_exclusive(ifp); +} + +static void +if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state) +{ + struct kev_dl_rrc_state kev; + + if (rrc_state == ifp->if_interface_state.rrc_state && + (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID)) + return; + + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_RRC_STATE_VALID; + + ifp->if_interface_state.rrc_state = rrc_state; + + /* + * Don't want to hold the lock when issuing kernel events + */ + ifnet_lock_done(ifp); + + bzero(&kev, sizeof(struct kev_dl_rrc_state)); + kev.rrc_state = rrc_state; + + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED, + (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state)); + + ifnet_lock_exclusive(ifp); +} + +errno_t +if_state_update(struct ifnet *ifp, + struct if_interface_state* if_interface_state) +{ + u_short if_index_available = 0; + + ifnet_lock_exclusive(ifp); + + if ((ifp->if_type != IFT_CELLULAR) && + (if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID)) { + ifnet_lock_done(ifp); + return (ENOTSUP); + } + if ((if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_LQM_STATE_VALID) && + (if_interface_state->lqm_state < IFNET_LQM_MIN || + if_interface_state->lqm_state > IFNET_LQM_MAX)) { + ifnet_lock_done(ifp); + return (EINVAL); + } + if ((if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID) && + if_interface_state->rrc_state != + IF_INTERFACE_STATE_RRC_STATE_IDLE && + if_interface_state->rrc_state != + IF_INTERFACE_STATE_RRC_STATE_CONNECTED) { + ifnet_lock_done(ifp); + return (EINVAL); + } + + if (if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_LQM_STATE_VALID) { + if_lqm_update(ifp, if_interface_state->lqm_state, 1); + } + if (if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID) { + if_rrc_state_update(ifp, if_interface_state->rrc_state); + } + if (if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) { + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID; + ifp->if_interface_state.interface_availability = + if_interface_state->interface_availability; + + if (ifp->if_interface_state.interface_availability == + IF_INTERFACE_STATE_INTERFACE_AVAILABLE) { + if_index_available = ifp->if_index; + } + } + ifnet_lock_done(ifp); + + /* + * Check if the TCP connections going on this interface should be + * forced to send probe packets instead of waiting for TCP timers + * to fire. This will be done when there is an explicit + * notification that the interface became available. + */ + if (if_index_available > 0) + tcp_interface_send_probe(if_index_available); + + return (0); +} + +void +if_get_state(struct ifnet *ifp, + struct if_interface_state* if_interface_state) +{ + ifnet_lock_shared(ifp); + + if_interface_state->valid_bitmask = 0; + + if (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID) { + if_interface_state->valid_bitmask |= + IF_INTERFACE_STATE_RRC_STATE_VALID; + if_interface_state->rrc_state = + ifp->if_interface_state.rrc_state; + } + if (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_LQM_STATE_VALID) { + if_interface_state->valid_bitmask |= + IF_INTERFACE_STATE_LQM_STATE_VALID; + if_interface_state->lqm_state = + ifp->if_interface_state.lqm_state; + } + if (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) { + if_interface_state->valid_bitmask |= + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID; + if_interface_state->interface_availability = + ifp->if_interface_state.interface_availability; + } + + ifnet_lock_done(ifp); +} + +errno_t +if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe) +{ + ifnet_lock_exclusive(ifp); + if (conn_probe > 1) { + ifnet_lock_done(ifp); + return (EINVAL); + } + if (conn_probe == 0) + ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY; + else + ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY; + ifnet_lock_done(ifp); + + tcp_probe_connectivity(ifp, conn_probe); + return (0); } /* for uuid.c */ @@ -6605,7 +7034,7 @@ dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN], _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN); _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN); - bzero(&kev, sizeof (&kev)); + bzero(&kev, sizeof (kev)); microtime(&tv); kev.timestamp = tv.tv_sec; @@ -7075,6 +7504,125 @@ ifnet_calc_flowhash(struct ifnet *ifp) return (flowhash); } +int +ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len, + uint16_t flags, uint8_t *data) +{ +#pragma unused(flags) + int error = 0; + + switch (family) { + case AF_INET: + if_inetdata_lock_exclusive(ifp); + if (IN_IFEXTRA(ifp) != NULL) { + if (len == 0) { + /* Allow clearing the signature */ + IN_IFEXTRA(ifp)->netsig_len = 0; + bzero(IN_IFEXTRA(ifp)->netsig, + sizeof (IN_IFEXTRA(ifp)->netsig)); + if_inetdata_lock_done(ifp); + break; + } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) { + error = EINVAL; + if_inetdata_lock_done(ifp); + break; + } + IN_IFEXTRA(ifp)->netsig_len = len; + bcopy(data, IN_IFEXTRA(ifp)->netsig, len); + } else { + error = ENOMEM; + } + if_inetdata_lock_done(ifp); + break; + + case AF_INET6: + if_inet6data_lock_exclusive(ifp); + if (IN6_IFEXTRA(ifp) != NULL) { + if (len == 0) { + /* Allow clearing the signature */ + IN6_IFEXTRA(ifp)->netsig_len = 0; + bzero(IN6_IFEXTRA(ifp)->netsig, + sizeof (IN6_IFEXTRA(ifp)->netsig)); + if_inet6data_lock_done(ifp); + break; + } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) { + error = EINVAL; + if_inet6data_lock_done(ifp); + break; + } + IN6_IFEXTRA(ifp)->netsig_len = len; + bcopy(data, IN6_IFEXTRA(ifp)->netsig, len); + } else { + error = ENOMEM; + } + if_inet6data_lock_done(ifp); + break; + + default: + error = EINVAL; + break; + } + + return (error); +} + +int +ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len, + uint16_t *flags, uint8_t *data) +{ + int error = 0; + + if (ifp == NULL || len == NULL || flags == NULL || data == NULL) + return (EINVAL); + + switch (family) { + case AF_INET: + if_inetdata_lock_shared(ifp); + if (IN_IFEXTRA(ifp) != NULL) { + if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) { + error = EINVAL; + if_inetdata_lock_done(ifp); + break; + } + if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) + bcopy(IN_IFEXTRA(ifp)->netsig, data, *len); + else + error = ENOENT; + } else { + error = ENOMEM; + } + if_inetdata_lock_done(ifp); + break; + + case AF_INET6: + if_inet6data_lock_shared(ifp); + if (IN6_IFEXTRA(ifp) != NULL) { + if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) { + error = EINVAL; + if_inet6data_lock_done(ifp); + break; + } + if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) + bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len); + else + error = ENOENT; + } else { + error = ENOMEM; + } + if_inet6data_lock_done(ifp); + break; + + default: + error = EINVAL; + break; + } + + if (error == 0) + *flags = 0; + + return (error); +} + static void dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff, protocol_family_t pf) @@ -7304,6 +7852,25 @@ sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS return (err); } +static int +sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int err; + + if (req->oldptr == USER_ADDR_NULL) { + + } + if (req->newptr != USER_ADDR_NULL) { + return (EPERM); + } + err = SYSCTL_OUT(req, &tx_chain_len_stats, + sizeof(struct chain_len_stats)); + + return (err); +} + + #if DEBUG /* Blob for sum16 verification */ static uint8_t sumdata[] = { @@ -7469,3 +8036,73 @@ dlil_kev_dl_code_str(u_int32_t event_code) } return (""); } + +/* + * Mirror the arguments of ifnet_get_local_ports_extended() + * ifindex + * protocol + * flags + */ +static int +sysctl_get_ports_used SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp) + int *name = (int *)arg1; + int namelen = arg2; + int error = 0; + int idx; + protocol_family_t protocol; + u_int32_t flags; + ifnet_t ifp = NULL; + u_int8_t *bitfield = NULL; + + if (req->newptr) { + error = EPERM; + goto done; + } + if (namelen != 3) { + error = ENOENT; + goto done; + } + + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = bitstr_size(65536); + goto done; + } + if (req->oldlen < bitstr_size(65536)) { + error = ENOMEM; + goto done; + } + + idx = name[0]; + protocol = name[1]; + flags = name[2]; + + + ifnet_head_lock_shared(); + if (idx > if_index) { + ifnet_head_done(); + error = ENOENT; + goto done; + } + ifp = ifindex2ifnet[idx]; + ifnet_head_done(); + + bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK); + if (bitfield == NULL) { + error = ENOMEM; + goto done; + } + error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield); + if (error != 0) { + printf("%s: ifnet_get_local_ports_extended() error %d\n", + __func__, error); + goto done; + } + error = SYSCTL_OUT(req, bitfield, bitstr_size(65536)); +done: + if (bitfield != NULL) + _FREE(bitfield, M_TEMP); + return (error); +} + diff --git a/bsd/net/dlil.h b/bsd/net/dlil.h index da72b75f3..f2fb7161f 100644 --- a/bsd/net/dlil.h +++ b/bsd/net/dlil.h @@ -103,15 +103,39 @@ enum { *(nsp) += ((tvp)->tv_sec * (integer_t)NSEC_PER_SEC); \ } while (0) +#if defined(__x86_64__) || defined(__arm64__) #define net_nsectimer(nsp, tvp) do { \ u_int64_t __nsp = *(nsp); \ net_timerclear(tvp); \ - while ((__nsp) >= NSEC_PER_SEC) { \ - (tvp)->tv_sec++; \ - (__nsp) -= NSEC_PER_SEC; \ - } \ - (tvp)->tv_nsec = (__nsp); \ + uint64_t __sec = __nsp / NSEC_PER_SEC; \ + (tvp)->tv_sec = (__darwin_time_t)__sec; \ + (tvp)->tv_nsec = (long)(__nsp - __sec * NSEC_PER_SEC); \ } while (0) +#else /* 32 bit */ +/* + * NSEC needs to be < 2^31*10^9 to be representable in a struct timespec + * because __darwin_time_t is 32 bit on 32-bit platforms. This bound + * is < 2^61. We get a first approximation to convert into seconds using + * the following values. + * a = floor(NSEC / 2^29) + * inv = floor(2^61 / 10^9) + * + * The approximation of seconds is correct or too low by 1 unit. + * So we fix it by computing the remainder. + */ +#define net_nsectimer(nsp, tvp) do { \ + u_int64_t __nsp = *(nsp); \ + net_timerclear(tvp); \ + uint32_t __a = (uint32_t)(__nsp >> 29); \ + const uint32_t __inv = 0x89705F41; \ + uint32_t __sec = (uint32_t)(((uint64_t)__a * __inv) >> 32); \ + uint32_t __rem = (uint32_t)(__nsp - __sec * NSEC_PER_SEC); \ + __sec += ((__rem >= NSEC_PER_SEC) ? 1 : 0); \ + (tvp)->tv_sec = (__darwin_time_t)__sec; \ + (tvp)->tv_nsec = \ + (long)((__rem >= NSEC_PER_SEC) ? (__rem - NSEC_PER_SEC) : __rem); \ +} while(0) +#endif /* 32 bit */ struct ifnet; struct mbuf; diff --git a/bsd/net/ether_if_module.c b/bsd/net/ether_if_module.c index 3a86d2674..8fd6074f1 100644 --- a/bsd/net/ether_if_module.c +++ b/bsd/net/ether_if_module.c @@ -550,7 +550,7 @@ ether_frameout_extended(struct ifnet *ifp, struct mbuf **m, * Add local net header. If no space in first mbuf, * allocate another. */ - M_PREPEND(*m, sizeof (struct ether_header), M_DONTWAIT); + M_PREPEND(*m, sizeof (struct ether_header), M_DONTWAIT, 0); if (*m == NULL) return (EJUSTRETURN); diff --git a/bsd/net/if.c b/bsd/net/if.c index c9bb74aa4..d65efe3a1 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -97,6 +97,7 @@ #include #include #include +#include #include #include @@ -150,6 +151,7 @@ static int ifioctl_ifdesc(struct ifnet *, u_long, caddr_t, struct proc *); static int ifioctl_linkparams(struct ifnet *, u_long, caddr_t, struct proc *); static int ifioctl_qstats(struct ifnet *, u_long, caddr_t); static int ifioctl_throttle(struct ifnet *, u_long, caddr_t, struct proc *); +static int ifioctl_netsignature(struct ifnet *, u_long, caddr_t); static int ifconf(u_long cmd, user_addr_t ifrp, int * ret_space); __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); void if_rtproto_del(struct ifnet *ifp, int protocol); @@ -417,7 +419,7 @@ if_next_index(void) /* allocate space for the larger arrays */ n = (2 * new_if_indexlim + 1) * sizeof(caddr_t); - new_ifnet_addrs = _MALLOC(n, M_IFADDR, M_WAITOK); + new_ifnet_addrs = _MALLOC(n, M_IFADDR, M_WAITOK | M_ZERO); if (new_ifnet_addrs == NULL) { --if_index; return -1; @@ -425,7 +427,6 @@ if_next_index(void) new_ifindex2ifnet = new_ifnet_addrs + new_if_indexlim * sizeof(caddr_t); - bzero(new_ifnet_addrs, n); if (ifnet_addrs != NULL) { /* copy the existing data */ bcopy((caddr_t)ifnet_addrs, new_ifnet_addrs, @@ -627,7 +628,6 @@ if_clone_attach(struct if_clone *ifc) ifc->ifc_units = _MALLOC(len, M_CLONE, M_WAITOK | M_ZERO); if (ifc->ifc_units == NULL) return ENOBUFS; - bzero(ifc->ifc_units, len); ifc->ifc_bmlen = len; LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list); @@ -691,6 +691,28 @@ if_clone_list(int count, int *ret_total, user_addr_t dst) return (error); } +u_int32_t +if_functional_type(struct ifnet *ifp) +{ + u_int32_t ret = IFRTYPE_FUNCTIONAL_UNKNOWN; + if (ifp != NULL) { + if (ifp->if_flags & IFF_LOOPBACK) { + ret = IFRTYPE_FUNCTIONAL_LOOPBACK; + } else if (IFNET_IS_WIFI(ifp)) { + if (ifp->if_eflags & IFEF_AWDL) + ret = IFRTYPE_FUNCTIONAL_WIFI_AWDL; + else + ret = IFRTYPE_FUNCTIONAL_WIFI_INFRA; + } else if (IFNET_IS_CELLULAR(ifp)) { + ret = IFRTYPE_FUNCTIONAL_CELLULAR; + } else if (IFNET_IS_WIRED(ifp)) { + ret = IFRTYPE_FUNCTIONAL_WIRED; + } + } + + return ret; +} + /* * Similar to ifa_ifwithaddr, except that this is IPv4 specific * and that it matches only the local (not broadcast) address. @@ -1698,6 +1720,173 @@ ifioctl_throttle(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p) return (error); } +static int +ifioctl_getnetagents(struct ifnet *ifp, u_int32_t *count, user_addr_t uuid_p) +{ + int error = 0; + int index = 0; + u_int32_t valid_netagent_count = 0; + *count = 0; + for (index = 0; index < IF_MAXAGENTS; index++) { + uuid_t *netagent_uuid = &(ifp->if_agentids[index]); + if (!uuid_is_null(*netagent_uuid)) { + if (uuid_p != USER_ADDR_NULL) { + if ((error = copyout(netagent_uuid, + uuid_p + sizeof(uuid_t) * valid_netagent_count, + sizeof(uuid_t))) != 0) { + return (error); + } + } + valid_netagent_count++; + } + } + *count = valid_netagent_count; + + return (0); +} + +static __attribute__((noinline)) int +ifioctl_netagent(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p) +{ + struct if_agentidreq *ifar = (struct if_agentidreq *)(void *)data; + union { + struct if_agentidsreq32 s32; + struct if_agentidsreq64 s64; + } u; + int error = 0; + int index = 0; + + VERIFY(ifp != NULL); + + switch (cmd) { + case SIOCAIFAGENTID: { /* struct if_agentidreq */ + uuid_t *first_empty_slot = NULL; + // TODO: Use priv_check_cred() instead of root check + if ((error = proc_suser(p)) != 0) { + break; + } + for (index = 0; index < IF_MAXAGENTS; index++) { + uuid_t *netagent_uuid = &(ifp->if_agentids[index]); + if (uuid_compare(*netagent_uuid, ifar->ifar_uuid) == 0) { + /* Already present, ignore */ + break; + } + if (first_empty_slot == NULL && + uuid_is_null(*netagent_uuid)) { + first_empty_slot = netagent_uuid; + } + } + if (first_empty_slot == NULL) { + error = ENOMEM; /* No empty slot for a netagent UUID, bail */ + break; + } + uuid_copy(*first_empty_slot, ifar->ifar_uuid); + netagent_post_updated_interfaces(ifar->ifar_uuid); + break; + } + case SIOCDIFAGENTID: { /* struct if_agentidreq */ + bool removed_agent_id = FALSE; + // TODO: Use priv_check_cred() instead of root check + if ((error = proc_suser(p)) != 0) { + break; + } + for (index = 0; index < IF_MAXAGENTS; index++) { + uuid_t *netagent_uuid = &(ifp->if_agentids[index]); + if (uuid_compare(*netagent_uuid, ifar->ifar_uuid) == 0) { + uuid_clear(*netagent_uuid); + removed_agent_id = TRUE; + break; + } + } + if (removed_agent_id) { + netagent_post_updated_interfaces(ifar->ifar_uuid); + } + break; + } + case SIOCGIFAGENTIDS32: { /* struct if_agentidsreq32 */ + bcopy(data, &u.s32, sizeof(u.s32)); + error = ifioctl_getnetagents(ifp, &u.s32.ifar_count, u.s32.ifar_uuids); + if (error == 0) { + bcopy(&u.s32, data, sizeof(u.s32)); + } + break; + } + case SIOCGIFAGENTIDS64: { /* struct if_agentidsreq64 */ + bcopy(data, &u.s64, sizeof(u.s64)); + error = ifioctl_getnetagents(ifp, &u.s64.ifar_count, u.s64.ifar_uuids); + if (error == 0) { + bcopy(&u.s64, data, sizeof(u.s64)); + } + break; + } + default: + VERIFY(0); + /* NOTREACHED */ + } + + return (error); +} + +void +ifnet_clear_netagent(uuid_t netagent_uuid) +{ + struct ifnet *ifp = NULL; + int index = 0; + bool removed_agent_id = FALSE; + + ifnet_head_lock_shared(); + + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + for (index = 0; index < IF_MAXAGENTS; index++) { + uuid_t *ifp_netagent_uuid = &(ifp->if_agentids[index]); + if (uuid_compare(*ifp_netagent_uuid, netagent_uuid) == 0) { + uuid_clear(*ifp_netagent_uuid); + removed_agent_id = TRUE; + } + } + } + + ifnet_head_done(); +} + +static __attribute__((noinline)) int +ifioctl_netsignature(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct if_nsreq *ifnsr = (struct if_nsreq *)(void *)data; + u_int16_t flags; + int error = 0; + + VERIFY(ifp != NULL); + + switch (cmd) { + case SIOCSIFNETSIGNATURE: /* struct if_nsreq */ + if (ifnsr->ifnsr_len > sizeof (ifnsr->ifnsr_data)) { + error = EINVAL; + break; + } + bcopy(&ifnsr->ifnsr_flags, &flags, sizeof (flags)); + error = ifnet_set_netsignature(ifp, ifnsr->ifnsr_family, + ifnsr->ifnsr_len, flags, ifnsr->ifnsr_data); + break; + + case SIOCGIFNETSIGNATURE: /* struct if_nsreq */ + ifnsr->ifnsr_len = sizeof (ifnsr->ifnsr_data); + error = ifnet_get_netsignature(ifp, ifnsr->ifnsr_family, + &ifnsr->ifnsr_len, &flags, ifnsr->ifnsr_data); + if (error == 0) + bcopy(&flags, &ifnsr->ifnsr_flags, sizeof (flags)); + else + ifnsr->ifnsr_len = 0; + break; + + default: + VERIFY(0); + /* NOTREACHED */ + } + + return (error); +} + /* * Interface ioctls. * @@ -1732,6 +1921,11 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) error = ifioctl_ifclone(cmd, data); goto done; + case SIOCGIFAGENTDATA32: /* struct netagent_req32 */ + case SIOCGIFAGENTDATA64: /* struct netagent_req64 */ + error = netagent_ioctl(cmd, data); + goto done; + case SIOCSIFDSTADDR: /* struct ifreq */ case SIOCSIFADDR: /* struct ifreq */ case SIOCSIFBRDADDR: /* struct ifreq */ @@ -1775,6 +1969,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCSIFBOND: /* struct ifreq */ case SIOCGIFLLADDR: /* struct ifreq */ case SIOCGIFTYPE: /* struct ifreq */ + case SIOCGIFFUNCTIONALTYPE: /* struct ifreq */ case SIOCGIFPSRCADDR: /* struct ifreq */ case SIOCGIFPDSTADDR: /* struct ifreq */ case SIOCGIFGENERIC: /* struct ifreq */ @@ -1792,7 +1987,12 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCGIFEXPENSIVE: /* struct ifreq */ case SIOCSIFEXPENSIVE: /* struct ifreq */ case SIOCSIF2KCL: /* struct ifreq */ - case SIOCGIF2KCL: { /* struct ifreq */ + case SIOCGIF2KCL: /* struct ifreq */ + case SIOCSIFINTERFACESTATE: /* struct ifreq */ + case SIOCGIFINTERFACESTATE: /* struct ifreq */ + case SIOCSIFPROBECONNECTIVITY: /* struct ifreq */ + case SIOCGIFPROBECONNECTIVITY: /* struct ifreq */ + case SIOCGSTARTDELAY: { /* struct ifreq */ struct ifreq ifr; bcopy(data, &ifr, sizeof (ifr)); ifr.ifr_name[IFNAMSIZ - 1] = '\0'; @@ -1881,6 +2081,22 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) ifp = ifunit(ifname); break; + case SIOCAIFAGENTID: /* struct if_agentidreq */ + case SIOCDIFAGENTID: /* struct if_agentidreq */ + case SIOCGIFAGENTIDS32: /* struct if_agentidsreq32 */ + case SIOCGIFAGENTIDS64: /* struct if_agentidsreq64 */ + bcopy(((struct if_agentidreq *)(void *)data)->ifar_name, + ifname, IFNAMSIZ); + ifp = ifunit(ifname); + break; + + case SIOCSIFNETSIGNATURE: /* struct if_nsreq */ + case SIOCGIFNETSIGNATURE: /* struct if_nsreq */ + bcopy(((struct if_nsreq *)(void *)data)->ifnsr_name, + ifname, IFNAMSIZ); + ifp = ifunit(ifname); + break; + default: /* * This is a bad assumption, but the code seems to @@ -1948,6 +2164,18 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) error = ifioctl_throttle(ifp, cmd, data, p); break; + case SIOCAIFAGENTID: /* struct if_agentidreq */ + case SIOCDIFAGENTID: /* struct if_agentidreq */ + case SIOCGIFAGENTIDS32: /* struct if_agentidsreq32 */ + case SIOCGIFAGENTIDS64: /* struct if_agentidsreq64 */ + error = ifioctl_netagent(ifp, cmd, data, p); + break; + + case SIOCSIFNETSIGNATURE: /* struct if_nsreq */ + case SIOCGIFNETSIGNATURE: /* struct if_nsreq */ + error = ifioctl_netsignature(ifp, cmd, data); + break; + default: if (so->so_proto == NULL) { error = EOPNOTSUPP; @@ -2334,6 +2562,10 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) ifr->ifr_type.ift_subfamily = ifp->if_subfamily; break; + case SIOCGIFFUNCTIONALTYPE: + ifr->ifr_functional_type = if_functional_type(ifp); + break; + case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFGENERIC: @@ -2362,7 +2594,17 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) case SIOCGIFLINKQUALITYMETRIC: ifnet_lock_shared(ifp); - ifr->ifr_link_quality_metric = ifp->if_lqm; + if ((ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_LQM_STATE_VALID)) + ifr->ifr_link_quality_metric = + ifp->if_interface_state.lqm_state; + else if ((ifp->if_refflags & IFRF_ATTACHED)) { + ifr->ifr_link_quality_metric = + IFNET_LQM_THRESH_UNKNOWN; + } else { + ifr->ifr_link_quality_metric = + IFNET_LQM_THRESH_OFF; + } ifnet_lock_done(ifp); break; @@ -2438,7 +2680,19 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) ifp->if_eflags &= ~IFEF_2KCL; ifnet_lock_done(ifp); break; - + case SIOCGSTARTDELAY: + ifnet_lock_shared(ifp); + if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) { + ifr->ifr_start_delay_qlen = + ifp->if_start_delay_qlen; + ifr->ifr_start_delay_timeout = + ifp->if_start_delay_timeout; + } else { + ifr->ifr_start_delay_qlen = 0; + ifr->ifr_start_delay_timeout = 0; + } + ifnet_lock_done(ifp); + break; case SIOCSIFDSTADDR: case SIOCSIFADDR: case SIOCSIFBRDADDR: @@ -2498,6 +2752,34 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) } break; + case SIOCGIFINTERFACESTATE: + if_get_state(ifp, &ifr->ifr_interface_state); + + break; + case SIOCSIFINTERFACESTATE: + if ((error = priv_check_cred(kauth_cred_get(), + PRIV_NET_INTERFACE_CONTROL, 0)) != 0) + return (error); + + error = if_state_update(ifp, &ifr->ifr_interface_state); + + break; + case SIOCSIFPROBECONNECTIVITY: + if ((error = priv_check_cred(kauth_cred_get(), + PRIV_NET_INTERFACE_CONTROL, 0)) != 0) + return (error); + error = if_probe_connectivity(ifp, + ifr->ifr_probe_connectivity); + break; + case SIOCGIFPROBECONNECTIVITY: + if ((error = priv_check_cred(kauth_cred_get(), + PRIV_NET_INTERFACE_CONTROL, 0)) != 0) + return (error); + if (ifp->if_eflags & IFEF_PROBE_CONNECTIVITY) + ifr->ifr_probe_connectivity = 1; + else + ifr->ifr_probe_connectivity = 0; + break; default: VERIFY(0); /* NOTREACHED */ @@ -4012,6 +4294,15 @@ ifioctl_cassert(void) case SIOCGIFDELEGATE: case SIOCGIFLLADDR: case SIOCGIFTYPE: + case SIOCGIFFUNCTIONALTYPE: + case SIOCAIFAGENTID: + case SIOCDIFAGENTID: + case SIOCGIFAGENTIDS32: + case SIOCGIFAGENTIDS64: + case SIOCGIFAGENTDATA32: + case SIOCGIFAGENTDATA64: + case SIOCSIFINTERFACESTATE: + case SIOCGIFINTERFACESTATE: ; } } diff --git a/bsd/net/if.h b/bsd/net/if.h index fd7800d8a..62afa9cd4 100644 --- a/bsd/net/if.h +++ b/bsd/net/if.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -104,6 +104,7 @@ #define KEV_DL_IFDELEGATE_CHANGED 25 #define KEV_DL_AWDL_RESTRICTED 26 #define KEV_DL_AWDL_UNRESTRICTED 27 +#define KEV_DL_RRC_STATE_CHANGED 28 #include #include @@ -158,6 +159,9 @@ struct if_clonereq32 { #ifdef PRIVATE /* extended flags definitions: (all bits reserved for internal/future use) */ #define IFEF_AUTOCONFIGURING 0x00000001 /* allow BOOTP/DHCP replies to enter */ +#define IFEF_ENQUEUE_MULTI 0x00000002 /* enqueue multiple packets at once */ +#define IFEF_DELAY_START 0x00000004 /* delay start callback */ +#define IFEF_PROBE_CONNECTIVITY 0x00000008 /* Probe connections going over this interface */ #define IFEF_IPV6_DISABLED 0x00000020 /* coupled to ND6_IFF_IFDISABLED */ #define IFEF_ACCEPT_RTADV 0x00000040 /* accepts IPv6 RA on the interface */ #define IFEF_TXSTART 0x00000080 /* has start callback */ @@ -466,9 +470,22 @@ struct ifreq { #define IFRTYPE_SUBFAMILY_THUNDERBOLT 4 #define IFRTYPE_SUBFAMILY_RESERVED 5 } ifru_type; + u_int32_t ifru_functional_type; +#define IFRTYPE_FUNCTIONAL_UNKNOWN 0 +#define IFRTYPE_FUNCTIONAL_LOOPBACK 1 +#define IFRTYPE_FUNCTIONAL_WIRED 2 +#define IFRTYPE_FUNCTIONAL_WIFI_INFRA 3 +#define IFRTYPE_FUNCTIONAL_WIFI_AWDL 4 +#define IFRTYPE_FUNCTIONAL_CELLULAR 5 +#define IFRTYPE_FUNCTIONAL_LAST 5 u_int32_t ifru_expensive; - u_int32_t ifru_awdl_restricted; u_int32_t ifru_2kcl; + struct { + u_int32_t qlen; + u_int32_t timeout; + } ifru_start_delay; + struct if_interface_state ifru_interface_state; + u_int32_t ifru_probe_connectivity; #endif /* PRIVATE */ } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ @@ -505,8 +522,12 @@ struct ifreq { #define ifr_delegated ifr_ifru.ifru_delegated /* delegated interface index */ #define ifr_expensive ifr_ifru.ifru_expensive #define ifr_type ifr_ifru.ifru_type /* interface type */ -#define ifr_awdl_restricted ifr_ifru.ifru_awdl_restricted +#define ifr_functional_type ifr_ifru.ifru_functional_type #define ifr_2kcl ifr_ifru.ifru_2kcl +#define ifr_start_delay_qlen ifr_ifru.ifru_start_delay.qlen +#define ifr_start_delay_timeout ifr_ifru.ifru_start_delay.timeout +#define ifr_interface_state ifr_ifru.ifru_interface_state +#define ifr_probe_connectivity ifr_ifru.ifru_probe_connectivity #endif /* PRIVATE */ }; @@ -818,6 +839,36 @@ enum { #endif /* XNU_KERNEL_PRIVATE */ }; +/* + * Structure for SIOC[A/D]IFAGENTID + */ +struct if_agentidreq { + char ifar_name[IFNAMSIZ]; /* interface name */ + uuid_t ifar_uuid; /* agent UUID to add or delete */ +}; + +/* + * Structure for SIOCGIFAGENTIDS + */ +struct if_agentidsreq { + char ifar_name[IFNAMSIZ]; /* interface name */ + u_int32_t ifar_count; /* number of agent UUIDs */ + uuid_t *ifar_uuids; /* array of agent UUIDs */ +}; + +#ifdef BSD_KERNEL_PRIVATE +struct if_agentidsreq32 { + char ifar_name[IFNAMSIZ]; + u_int32_t ifar_count; + user32_addr_t ifar_uuids; +}; +struct if_agentidsreq64 { + char ifar_name[IFNAMSIZ]; + u_int32_t ifar_count; + user64_addr_t ifar_uuids __attribute__((aligned(8))); +}; +#endif /* BSD_KERNEL_PRIVATE */ + #define DLIL_MODIDLEN 20 /* same as IFNET_MODIDLEN */ #define DLIL_MODARGLEN 12 /* same as IFNET_MODARGLEN */ @@ -830,6 +881,30 @@ struct kev_dl_issues { u_int64_t timestamp; u_int8_t info[DLIL_MODARGLEN]; }; + +/* + * DLIL KEV_DL_RRC_STATE_CHANGED structure + */ +struct kev_dl_rrc_state { + struct net_event_data link_data; + u_int32_t rrc_state; +}; + +/* + * Length of network signature/fingerprint blob. + */ +#define IFNET_SIGNATURELEN 20 + +/* + * Structure for SIOC[S/G]IFNETSIGNATURE + */ +struct if_nsreq { + char ifnsr_name[IFNAMSIZ]; + u_int8_t ifnsr_family; /* address family */ + u_int8_t ifnsr_len; /* data length */ + u_int16_t ifnsr_flags; /* for future */ + u_int8_t ifnsr_data[IFNET_SIGNATURELEN]; +}; #endif /* PRIVATE */ #ifdef KERNEL diff --git a/bsd/net/if_bond.c b/bsd/net/if_bond.c index 2bb5113f1..34f6e03d4 100644 --- a/bsd/net/if_bond.c +++ b/bsd/net/if_bond.c @@ -801,6 +801,10 @@ link_speed(int active) case IFM_10G_SR: case IFM_10G_LR: return (10000); + case IFM_2500_T: + return (2500); + case IFM_5000_T: + return (5000); } } @@ -866,11 +870,10 @@ bond_globals_create(lacp_system_priority sys_pri, { bond_globals_ref b; - b = _MALLOC(sizeof(*b), M_BOND, M_WAITOK); + b = _MALLOC(sizeof(*b), M_BOND, M_WAITOK | M_ZERO); if (b == NULL) { return (NULL); } - bzero(b, sizeof(*b)); TAILQ_INIT(&b->ifbond_list); b->system = *sys; b->system_priority = sys_pri; @@ -1089,11 +1092,10 @@ bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params) return (error); } - ifb = _MALLOC(sizeof(ifbond), M_BOND, M_WAITOK); + ifb = _MALLOC(sizeof(ifbond), M_BOND, M_WAITOK | M_ZERO); if (ifb == NULL) { return (ENOMEM); } - bzero(ifb, sizeof(*ifb)); ifbond_retain(ifb); TAILQ_INIT(&ifb->ifb_port_list); @@ -1818,12 +1820,11 @@ bondport_create(struct ifnet * port_ifp, lacp_port_priority priority, lacp_actor_partner_state s; *ret_error = 0; - p = _MALLOC(sizeof(*p), M_BOND, M_WAITOK); + p = _MALLOC(sizeof(*p), M_BOND, M_WAITOK | M_ZERO); if (p == NULL) { *ret_error = ENOMEM; return (NULL); } - bzero(p, sizeof(*p)); multicast_list_init(&p->po_multicast); if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d", ifnet_name(port_ifp), ifnet_unit(port_ifp)) diff --git a/bsd/net/if_bridge.c b/bsd/net/if_bridge.c index 98fff2803..9a2b34f3d 100644 --- a/bsd/net/if_bridge.c +++ b/bsd/net/if_bridge.c @@ -5414,7 +5414,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) { /* put the Ethernet header back on */ - M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT); + M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0); if (*mp == NULL) return (error); bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN); @@ -5545,13 +5545,13 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) * Finally, put everything back the way it was and return */ if (snap) { - M_PREPEND(*mp, sizeof (struct llc), M_DONTWAIT); + M_PREPEND(*mp, sizeof (struct llc), M_DONTWAIT, 0); if (*mp == NULL) return (error); bcopy(&llc1, mtod(*mp, caddr_t), sizeof (struct llc)); } - M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT); + M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0); if (*mp == NULL) return (error); bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN); @@ -5748,7 +5748,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh, for (m0 = m; m0; m0 = m0->m_nextpkt) { if (error == 0) { if (snap) { - M_PREPEND(m0, sizeof (struct llc), M_DONTWAIT); + M_PREPEND(m0, sizeof (struct llc), M_DONTWAIT, 0); if (m0 == NULL) { error = ENOBUFS; continue; @@ -5756,7 +5756,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh, bcopy(llc, mtod(m0, caddr_t), sizeof (struct llc)); } - M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT); + M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT, 0); if (m0 == NULL) { error = ENOBUFS; continue; diff --git a/bsd/net/if_gif.c b/bsd/net/if_gif.c index c33ef2f04..f144822ba 100644 --- a/bsd/net/if_gif.c +++ b/bsd/net/if_gif.c @@ -330,14 +330,13 @@ gif_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) goto done; } - sc = _MALLOC(sizeof (struct gif_softc), M_DEVBUF, M_WAITOK); + sc = _MALLOC(sizeof (struct gif_softc), M_DEVBUF, M_WAITOK | M_ZERO); if (sc == NULL) { log(LOG_ERR, "gif_clone_create: failed to allocate gif%d\n", unit); error = ENOBUFS; goto done; } - bzero(sc, sizeof (struct gif_softc)); /* use the interface name as the unique id for ifp recycle */ snprintf(sc->gif_ifname, sizeof (sc->gif_ifname), "%s%d", diff --git a/bsd/net/if_ipsec.c b/bsd/net/if_ipsec.c index 460fa731c..f8953609d 100644 --- a/bsd/net/if_ipsec.c +++ b/bsd/net/if_ipsec.c @@ -655,7 +655,12 @@ ipsec_output(ifnet_t interface, ipsec_state.dst = (struct sockaddr *)&ip->ip_dst; bzero(&ipsec_state.ro, sizeof(ipsec_state.ro)); - error = ipsec4_interface_output(&ipsec_state, interface); + error = ipsec4_interface_output(&ipsec_state, interface); + /* Tunneled in IPv6 - packet is gone */ + if (error == 0 && ipsec_state.tunneled == 6) { + goto done; + } + data = ipsec_state.m; if (error || data == NULL) { printf("ipsec_output: ipsec4_output error %d.\n", error); @@ -708,6 +713,11 @@ ipsec_output(ifnet_t interface, bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af)); data = ipsec6_splithdr(data); + if (data == NULL) { + printf("ipsec_output: ipsec6_splithdr returned NULL\n"); + goto ipsec_output_err; + } + ip6 = mtod(data, struct ip6_hdr *); bzero(&ipsec_state, sizeof(ipsec_state)); @@ -900,8 +910,12 @@ ipsec_proto_input(ifnet_t interface, mbuf_pkthdr_setrcvif(m, interface); bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af)); - if (proto_input(protocol, m) != 0) + if (proto_input(protocol, m) != 0) { + ifnet_stat_increment_in(interface, 0, 0, 1); m_freem(m); + } else { + ifnet_stat_increment_in(interface, 1, m->m_pkthdr.len, 0); + } return 0; } @@ -966,7 +980,7 @@ ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family) if (family == AF_INET) { struct ip *ip = mtod(packet, struct ip *); packet->m_pkthdr.pkt_proto = ip->ip_p; - } else if (family == AF_INET) { + } else if (family == AF_INET6) { struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *); packet->m_pkthdr.pkt_proto = ip6->ip6_nxt; } diff --git a/bsd/net/if_llreach.h b/bsd/net/if_llreach.h index e2ce299a8..4b1f5ff3a 100644 --- a/bsd/net/if_llreach.h +++ b/bsd/net/if_llreach.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 Apple Inc. All rights reserved. + * Copyright (c) 2011-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,7 +76,7 @@ struct if_llreach_info { #endif /* !INET6 */ /* - * Per-interface link-layer reachability. (Currently only for ARP/Ethernet.) + * Per-interface link-layer reachability. (Currently only for ARP/NDP/Ethernet.) */ #define IF_LLREACH_MAXLEN ETHER_ADDR_LEN diff --git a/bsd/net/if_loop.c b/bsd/net/if_loop.c index f659d3582..a22c68cf0 100644 --- a/bsd/net/if_loop.c +++ b/bsd/net/if_loop.c @@ -223,7 +223,7 @@ lo_framer(struct ifnet *ifp, struct mbuf **m, const struct sockaddr *dest, #pragma unused(ifp, dest, dest_linkaddr) struct loopback_header *header; - M_PREPEND(*m, sizeof (struct loopback_header), M_WAITOK); + M_PREPEND(*m, sizeof (struct loopback_header), M_WAITOK, 1); if (*m == NULL) { /* Tell caller not to try to free passed-in mbuf */ return (EJUSTRETURN); diff --git a/bsd/net/if_media.h b/bsd/net/if_media.h index afba92a43..633eb62cd 100644 --- a/bsd/net/if_media.h +++ b/bsd/net/if_media.h @@ -121,6 +121,8 @@ #define IFM_10G_LR 19 /* 10GbaseLR - single-mode fiber */ #define IFM_10G_CX4 20 /* 10GbaseCX4 - copper */ #define IFM_10G_T 21 /* 10GbaseT - 4 pair cat 6 */ +#define IFM_2500_T 22 /* 2500baseT - 4 pair cat 5 */ +#define IFM_5000_T 23 /* 5000baseT - 4 pair cat 5 */ /* * Token ring @@ -254,6 +256,8 @@ struct ifmedia_description { { IFM_10G_LR, "10GbaseLR" }, \ { IFM_10G_CX4, "10GbaseCX4" }, \ { IFM_10G_T, "10GbaseT" }, \ + { IFM_2500_T, "2500baseT" }, \ + { IFM_5000_T, "5000baseT" }, \ { 0, NULL }, \ } diff --git a/bsd/net/if_stf.c b/bsd/net/if_stf.c index bf29b91f6..72abbef09 100644 --- a/bsd/net/if_stf.c +++ b/bsd/net/if_stf.c @@ -321,14 +321,12 @@ stfattach(void) if (error != 0) printf("proto_register_plumber failed for AF_INET6 error=%d\n", error); - sc = _MALLOC(sizeof(struct stf_softc), M_DEVBUF, M_WAITOK); + sc = _MALLOC(sizeof(struct stf_softc), M_DEVBUF, M_WAITOK | M_ZERO); if (sc == 0) { printf("stf softc attach failed\n" ); return; } - bzero(sc, sizeof(*sc)); - p = encap_attach_func(AF_INET, IPPROTO_IPV6, stf_encapcheck, &in_stf_protosw, sc); if (p == NULL) { @@ -579,7 +577,7 @@ stf_pre_output( bpf_tap_out(ifp, 0, m, &af, sizeof(af)); } - M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); + M_PREPEND(m, sizeof(struct ip), M_DONTWAIT, 1); if (m && mbuf_len(m) < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) { @@ -600,7 +598,7 @@ stf_pre_output( ip->ip_ttl = ip_stf_ttl; ip->ip_len = m->m_pkthdr.len; /*host order*/ if (ifp->if_flags & IFF_LINK1) - ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos); + ip_ecn_ingress(ECN_NORMAL, &ip->ip_tos, &tos); else ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); @@ -796,7 +794,7 @@ in_stf_input( itos = (ntohl(ip6.ip6_flow) >> 20) & 0xff; if ((ifnet_flags(ifp) & IFF_LINK1) != 0) - ip_ecn_egress(ECN_ALLOWED, &otos, &itos); + ip_ecn_egress(ECN_NORMAL, &otos, &itos); else ip_ecn_egress(ECN_NOCARE, &otos, &itos); ip6.ip6_flow &= ~htonl(0xff << 20); diff --git a/bsd/net/if_utun.c b/bsd/net/if_utun.c index 5570c8578..4261be968 100644 --- a/bsd/net/if_utun.c +++ b/bsd/net/if_utun.c @@ -202,7 +202,6 @@ utun_ctl_connect( *unitinfo = pcb; pcb->utun_ctlref = kctlref; pcb->utun_unit = sac->sc_unit; - pcb->utun_pending_packets = 0; pcb->utun_max_pending_packets = 1; printf("utun_ctl_connect: creating interface utun%d\n", pcb->utun_unit - 1); @@ -655,20 +654,25 @@ utun_ctl_getopt( static void utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags) { -#pragma unused(kctlref, unit, flags) +#pragma unused(flags) bool reenable_output = false; struct utun_pcb *pcb = unitinfo; if (pcb == NULL) { return; } ifnet_lock_exclusive(pcb->utun_ifp); - if (pcb->utun_pending_packets > 0) { - pcb->utun_pending_packets--; - if (pcb->utun_pending_packets < pcb->utun_max_pending_packets) { - reenable_output = true; - } + + u_int32_t utun_packet_cnt; + errno_t error_pc = ctl_getenqueuepacketcount(kctlref, unit, &utun_packet_cnt); + if (error_pc != 0) { + printf("utun_ctl_rcvd: ctl_getenqueuepacketcount returned error %d\n", error_pc); + utun_packet_cnt = 0; } - + + if (utun_packet_cnt < pcb->utun_max_pending_packets) { + reenable_output = true; + } + if (reenable_output) { errno_t error = ifnet_enable_output(pcb->utun_ifp); if (error != 0) { @@ -687,7 +691,15 @@ utun_start(ifnet_t interface) for (;;) { bool can_accept_packets = true; ifnet_lock_shared(pcb->utun_ifp); - can_accept_packets = (pcb->utun_pending_packets < pcb->utun_max_pending_packets); + + u_int32_t utun_packet_cnt; + errno_t error_pc = ctl_getenqueuepacketcount(pcb->utun_ctlref, pcb->utun_unit, &utun_packet_cnt); + if (error_pc != 0) { + printf("utun_start: ctl_getenqueuepacketcount returned error %d\n", error_pc); + utun_packet_cnt = 0; + } + + can_accept_packets = (utun_packet_cnt < pcb->utun_max_pending_packets); if (!can_accept_packets && pcb->utun_ctlref) { u_int32_t difference = 0; if (ctl_getenqueuereadable(pcb->utun_ctlref, pcb->utun_unit, &difference) == 0) { @@ -750,16 +762,8 @@ utun_output( *(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data)); length = mbuf_pkthdr_len(data); - // Increment packet count optimistically - ifnet_lock_exclusive(pcb->utun_ifp); - pcb->utun_pending_packets++; - ifnet_lock_done(pcb->utun_ifp); result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR); if (result != 0) { - // Decrement packet count if errored - ifnet_lock_exclusive(pcb->utun_ifp); - pcb->utun_pending_packets--; - ifnet_lock_done(pcb->utun_ifp); mbuf_freem(data); printf("utun_output - ctl_enqueuembuf failed: %d\n", result); diff --git a/bsd/net/if_utun.h b/bsd/net/if_utun.h index cc74d87b3..2ffd72ee1 100644 --- a/bsd/net/if_utun.h +++ b/bsd/net/if_utun.h @@ -46,7 +46,6 @@ struct utun_pcb { u_int32_t utun_flags; int utun_ext_ifdata_stats; u_int32_t utun_max_pending_packets; - u_int32_t utun_pending_packets; utun_crypto_ctx_t utun_crypto_ctx[UTUN_CRYPTO_CTX_NUM_DIRS]; }; diff --git a/bsd/net/if_utun_crypto_dtls.c b/bsd/net/if_utun_crypto_dtls.c index 966447af2..3565c4feb 100644 --- a/bsd/net/if_utun_crypto_dtls.c +++ b/bsd/net/if_utun_crypto_dtls.c @@ -910,7 +910,7 @@ utun_ctl_stop_datatraffic_crypto_dtls(struct utun_pcb *pcb) *(protocol_family_t *)mbuf_data(*pkt) = htonl(*(protocol_family_t *)mbuf_data(*pkt)); \ rc = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, *pkt, CTL_DATA_EOR); \ if (rc != 0) { \ - printf("%s: - ctl_enqueuembuf failed (rc %d) for %s:\n", __FUNCTION__, rc, (char *)errstr); \ + printf("%s: - ctl_enqueuembuf failed (rc %d) for %s:\n", __FUNCTION__, rc, errstr); \ mbuf_freem(*pkt); \ ifnet_stat_increment_out(pcb->utun_ifp, 0, 0, 1); \ lck_mtx_unlock(&dtls_ctl_mutex); \ @@ -965,7 +965,7 @@ utun_pkt_dtls_output(struct utun_pcb *pcb, mbuf_t *pkt) } else if (rc == EINVAL) { // unsupported proto... fall through and punt (but 1st undo the protocol strip) utun_pkt_dtls_prepend_proto(pkt, proto); - utun_pkt_dtls_puntup(pcb, pkt, (char *)"unsupported proto", rc); + utun_pkt_dtls_puntup(pcb, pkt, "unsupported proto", rc); } else { // mbuf_prepend failure... mbuf will be already freed printf("%s: failed to encrypsulate and send pkt %d\n", __FUNCTION__,rc); @@ -974,7 +974,7 @@ utun_pkt_dtls_output(struct utun_pcb *pcb, mbuf_t *pkt) return 0; // and drop packet } } else { - utun_pkt_dtls_puntup(pcb, pkt, (char *)"slowpath", rc); + utun_pkt_dtls_puntup(pcb, pkt, "slowpath", rc); } if (!rc) diff --git a/bsd/net/if_var.h b/bsd/net/if_var.h index efa21e8b8..29e253bc4 100644 --- a/bsd/net/if_var.h +++ b/bsd/net/if_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -326,6 +326,204 @@ struct if_rxpoll_stats { u_int32_t ifi_poll_packets_limit; /* max packets per poll call */ u_int64_t ifi_poll_interval_time; /* poll interval (nsec) */ }; + +/* + * Interface link status report -- includes statistics related to + * the link layer technology sent by the driver. The driver will monitor + * these statistics over an interval (3-4 secs) and will generate a report + * to the network stack. This will give first-hand information about the + * status of the first hop of the network path. The version and + * length values should be correct for the data to be processed correctly. + * The definitions are different for different kind of interfaces like + * Wifi, Cellular etc,. + */ +#define IF_CELLULAR_STATUS_REPORT_VERSION_1 1 +#define IF_WIFI_STATUS_REPORT_VERSION_1 1 +#define IF_CELLULAR_STATUS_REPORT_CURRENT_VERSION \ + IF_CELLULAR_STATUS_REPORT_VERSION_1 +#define IF_WIFI_STATUS_REPORT_CURRENT_VERSION IF_WIFI_STATUS_REPORT_VERSION_1 +/* + * For cellular interface -- + * There is no way to share common headers between the Baseband and + * the kernel. Any changes to this structure will need to be communicated + * to the Baseband team. It is better to use reserved space instead of + * changing the size or existing fields in the structure. + */ +struct if_cellular_status_v1 { + u_int32_t valid_bitmask; /* indicates which fields are valid */ +#define IF_CELL_LINK_QUALITY_METRIC_VALID 0x1 +#define IF_CELL_UL_EFFECTIVE_BANDWIDTH_VALID 0x2 +#define IF_CELL_UL_MAX_BANDWIDTH_VALID 0x4 +#define IF_CELL_UL_MIN_LATENCY_VALID 0x8 +#define IF_CELL_UL_EFFECTIVE_LATENCY_VALID 0x10 +#define IF_CELL_UL_MAX_LATENCY_VALID 0x20 +#define IF_CELL_UL_RETXT_LEVEL_VALID 0x40 +#define IF_CELL_UL_BYTES_LOST_VALID 0x80 +#define IF_CELL_UL_MIN_QUEUE_SIZE_VALID 0x100 +#define IF_CELL_UL_AVG_QUEUE_SIZE_VALID 0x200 +#define IF_CELL_UL_MAX_QUEUE_SIZE_VALID 0x400 +#define IF_CELL_DL_EFFECTIVE_BANDWIDTH_VALID 0x800 +#define IF_CELL_DL_MAX_BANDWIDTH_VALID 0x1000 +#define IF_CELL_CONFIG_INACTIVITY_TIME_VALID 0x2000 +#define IF_CELL_CONFIG_BACKOFF_TIME_VALID 0x4000 + u_int32_t link_quality_metric; + u_int32_t ul_effective_bandwidth; /* Measured uplink bandwidth based on current activity (bps) */ + u_int32_t ul_max_bandwidth; /* Maximum supported uplink bandwidth (bps) */ + u_int32_t ul_min_latency; /* min expected uplink latency for first hop (ms) */ + u_int32_t ul_effective_latency; /* current expected uplink latency for first hop (ms) */ + u_int32_t ul_max_latency; /* max expected uplink latency first hop (ms) */ + u_int32_t ul_retxt_level; /* Retransmission metric */ +#define IF_CELL_UL_RETXT_LEVEL_NONE 1 +#define IF_CELL_UL_RETXT_LEVEL_LOW 2 +#define IF_CELL_UL_RETXT_LEVEL_MEDIUM 3 +#define IF_CELL_UL_RETXT_LEVEL_HIGH 4 + u_int32_t ul_bytes_lost; /* % of total bytes lost on uplink in Q10 format */ + u_int32_t ul_min_queue_size; /* minimum bytes in queue */ + u_int32_t ul_avg_queue_size; /* average bytes in queue */ + u_int32_t ul_max_queue_size; /* maximum bytes in queue */ + u_int32_t dl_effective_bandwidth; /* Measured downlink bandwidth based on current activity (bps) */ + u_int32_t dl_max_bandwidth; /* Maximum supported downlink bandwidth (bps) */ + u_int32_t config_inactivity_time; /* ms */ + u_int32_t config_backoff_time; /* new connections backoff time in ms */ + u_int64_t reserved_1; + u_int64_t reserved_2; + u_int64_t reserved_3; + u_int64_t reserved_4; + u_int64_t reserved_5; +} __attribute__((packed)); + +struct if_cellular_status { + union { + struct if_cellular_status_v1 if_status_v1; + } if_cell_u; +}; + +/* + * These statistics will be provided by the Wifi driver periodically. + * After sending each report, the driver should start computing again + * for the next report duration so that the values represent the link + * status for one report duration. + */ + +struct if_wifi_status_v1 { + u_int32_t valid_bitmask; +#define IF_WIFI_LINK_QUALITY_METRIC_VALID 0x1 +#define IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID 0x2 +#define IF_WIFI_UL_MAX_BANDWIDTH_VALID 0x4 +#define IF_WIFI_UL_MIN_LATENCY_VALID 0x8 +#define IF_WIFI_UL_EFFECTIVE_LATENCY_VALID 0x10 +#define IF_WIFI_UL_MAX_LATENCY_VALID 0x20 +#define IF_WIFI_UL_RETXT_LEVEL_VALID 0x40 +#define IF_WIFI_UL_ERROR_RATE_VALID 0x80 +#define IF_WIFI_UL_BYTES_LOST_VALID 0x100 +#define IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID 0x200 +#define IF_WIFI_DL_MAX_BANDWIDTH_VALID 0x400 +#define IF_WIFI_DL_MIN_LATENCY_VALID 0x800 +#define IF_WIFI_DL_EFFECTIVE_LATENCY_VALID 0x1000 +#define IF_WIFI_DL_MAX_LATENCY_VALID 0x2000 +#define IF_WIFI_DL_ERROR_RATE_VALID 0x4000 +#define IF_WIFI_CONFIG_FREQUENCY_VALID 0x8000 +#define IF_WIFI_CONFIG_MULTICAST_RATE_VALID 0x10000 +#define IF_WIFI_CONFIG_SCAN_COUNT_VALID 0x20000 +#define IF_WIFI_CONFIG_SCAN_DURATION_VALID 0x40000 + u_int32_t link_quality_metric; /* link quality metric */ + u_int32_t ul_effective_bandwidth; /* Measured uplink bandwidth based on current activity (bps) */ + u_int32_t ul_max_bandwidth; /* Maximum supported uplink bandwidth (bps) */ + u_int32_t ul_min_latency; /* min expected uplink latency for first hop (ms) */ + u_int32_t ul_effective_latency; /* current expected uplink latency for first hop (ms) */ + u_int32_t ul_max_latency; /* max expected uplink latency for first hop (ms) */ + u_int32_t ul_retxt_level; /* Retransmission metric */ +#define IF_WIFI_UL_RETXT_LEVEL_NONE 1 +#define IF_WIFI_UL_RETXT_LEVEL_LOW 2 +#define IF_WIFI_UL_RETXT_LEVEL_MEDIUM 3 +#define IF_WIFI_UL_RETXT_LEVEL_HIGH 4 + u_int32_t ul_bytes_lost; /* % of total bytes lost on uplink in Q10 format */ + u_int32_t ul_error_rate; /* % of bytes dropped on uplink after many retransmissions in Q10 format */ + u_int32_t dl_effective_bandwidth; /* Measured downlink bandwidth based on current activity (bps) */ + u_int32_t dl_max_bandwidth; /* Maximum supported downlink bandwidth (bps) */ + /* + * The download latency values indicate the time AP may have to wait for the + * driver to receive the packet. These values give the range of expected latency + * mainly due to co-existence events and channel hopping where the interface + * becomes unavailable. + */ + u_int32_t dl_min_latency; /* min expected latency for first hop in ms */ + u_int32_t dl_effective_latency; /* current expected latency for first hop in ms */ + u_int32_t dl_max_latency; /* max expected latency for first hop in ms */ + u_int32_t dl_error_rate; /* % of CRC or other errors in Q10 format */ + u_int32_t config_frequency; /* 2.4 or 5 GHz */ +#define IF_WIFI_CONFIG_FREQUENCY_2_4_GHZ 1 +#define IF_WIFI_CONFIG_FREQUENCY_5_0_GHZ 2 + u_int32_t config_multicast_rate; /* bps */ + u_int32_t scan_count; /* scan count during the previous period */ + u_int32_t scan_duration; /* scan duration in ms */ + u_int64_t reserved_1; + u_int64_t reserved_2; + u_int64_t reserved_3; + u_int64_t reserved_4; +} __attribute__((packed)); + +struct if_wifi_status { + union { + struct if_wifi_status_v1 if_status_v1; + } if_wifi_u; +}; + +struct if_link_status { + u_int32_t ifsr_version; /* version of this report */ + u_int32_t ifsr_len; /* length of the following struct */ + union { + struct if_cellular_status ifsr_cell; + struct if_wifi_status ifsr_wifi; + } ifsr_u; +}; + +struct if_interface_state { + /* + * The bitmask tells which of the fields + * to consider: + * - When setting, to control which fields + * are being modified; + * - When getting, it tells which fields are set. + */ + u_int8_t valid_bitmask; +#define IF_INTERFACE_STATE_RRC_STATE_VALID 0x1 +#define IF_INTERFACE_STATE_LQM_STATE_VALID 0x2 +#define IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID 0x4 + + /* + * Valid only for cellular interface + */ + u_int8_t rrc_state; +#define IF_INTERFACE_STATE_RRC_STATE_IDLE 0x0 +#define IF_INTERFACE_STATE_RRC_STATE_CONNECTED 0x1 + + /* + * Values normalized to the edge of the following values + * that are defined on : + * IFNET_LQM_THRESH_BAD + * IFNET_LQM_THRESH_POOR + * IFNET_LQM_THRESH_GOOD + */ + int8_t lqm_state; + + /* + * Indicate if the underlying link is currently + * available + */ + u_int8_t interface_availability; +#define IF_INTERFACE_STATE_INTERFACE_AVAILABLE 0x0 +#define IF_INTERFACE_STATE_INTERFACE_UNAVAILABLE 0x1 +}; + +struct chain_len_stats { + uint64_t cls_one; + uint64_t cls_two; + uint64_t cls_three; + uint64_t cls_four; + uint64_t cls_five_or_more; +}; + #endif /* PRIVATE */ #pragma pack() @@ -362,7 +560,7 @@ struct if_data_internal { u_int32_t ifi_mtu; /* maximum transmission unit */ u_int32_t ifi_metric; /* routing metric (external only) */ u_int32_t ifi_baudrate; /* linespeed */ - u_int32_t _pad; + u_int32_t ifi_preamblelen;/* length of the packet preamble */ /* volatile statistics */ u_int64_t ifi_ipackets; /* packets received on interface */ u_int64_t ifi_ierrors; /* input errors on interface */ @@ -414,6 +612,7 @@ struct if_measured_bw { #define if_physical if_data.ifi_physical #define if_addrlen if_data.ifi_addrlen #define if_hdrlen if_data.ifi_hdrlen +#define if_preamblelen if_data.ifi_preamblelen #define if_metric if_data.ifi_metric #define if_baudrate if_data.ifi_baudrate #define if_hwassist if_data.ifi_hwassist @@ -589,7 +788,13 @@ struct ifnet { decl_lck_mtx_data(, if_start_lock); u_int32_t if_start_flags; /* see IFSF flags below */ u_int32_t if_start_req; - u_int32_t if_start_active; /* output is active */ + u_int16_t if_start_active; /* output is active */ + u_int16_t if_start_delayed; + u_int16_t if_start_delay_qlen; + u_int16_t if_start_delay_idle; + u_int64_t if_start_delay_swin; + u_int32_t if_start_delay_cnt; + u_int32_t if_start_delay_timeout; /* nanoseconds */ struct timespec if_start_cycle; /* restart interval */ struct thread *if_start_thread; @@ -664,7 +869,6 @@ struct ifnet { struct mld_ifinfo *if_mli; /* for MLDv2 */ #endif /* INET6 */ - int if_lqm; /* link quality metric */ #if MEASURE_BW struct if_measured_bw if_bw; #endif /* MEASURE_BW */ @@ -686,13 +890,24 @@ struct ifnet { uint32_t expensive:1; /* delegated i/f expensive? */ } if_delegated; +#define IF_MAXAGENTS 8 + uuid_t if_agentids[IF_MAXAGENTS]; + u_int64_t if_data_threshold; u_int32_t if_fg_sendts; /* last send on a fg socket in seconds */ + u_int32_t if_rt_sendts; /* last of a real time packet */ +#if INET + decl_lck_rw_data(, if_inetdata_lock); + void *if_inetdata; +#endif /* INET */ #if INET6 decl_lck_rw_data(, if_inet6data_lock); void *if_inet6data; #endif + decl_lck_rw_data(, if_link_status_lock); + struct if_link_status *if_link_status; + struct if_interface_state if_interface_state; }; #define IF_TCP_STATINC(_ifp, _s) do { \ @@ -835,6 +1050,7 @@ struct ifaddr { void (*ifa_detached)(struct ifaddr *); /* callback fn for detaching */ }; + /* * Valid values for ifa_flags */ @@ -1064,7 +1280,10 @@ extern struct if_clone *if_clone_lookup(const char *, u_int32_t *); extern int if_clone_attach(struct if_clone *); extern void if_clone_detach(struct if_clone *); +extern u_int32_t if_functional_type(struct ifnet *); + extern errno_t if_mcasts_update(struct ifnet *); +extern int32_t total_snd_byte_count; typedef enum { IFNET_LCK_ASSERT_EXCLUSIVE, /* RW: held as writer */ @@ -1081,6 +1300,12 @@ __private_extern__ void ifnet_lock_shared(struct ifnet *ifp); __private_extern__ void ifnet_lock_exclusive(struct ifnet *ifp); __private_extern__ void ifnet_lock_done(struct ifnet *ifp); +#if INET +__private_extern__ void if_inetdata_lock_shared(struct ifnet *ifp); +__private_extern__ void if_inetdata_lock_exclusive(struct ifnet *ifp); +__private_extern__ void if_inetdata_lock_done(struct ifnet *ifp); +#endif + #if INET6 __private_extern__ void if_inet6data_lock_shared(struct ifnet *ifp); __private_extern__ void if_inet6data_lock_exclusive(struct ifnet *ifp); @@ -1176,7 +1401,13 @@ __private_extern__ struct rtentry *ifnet_cached_rtlookup_inet6(struct ifnet *, struct in6_addr *); #endif /* INET6 */ -__private_extern__ void if_lqm_update(struct ifnet *, int32_t); +__private_extern__ errno_t if_state_update(struct ifnet *, + struct if_interface_state *); +__private_extern__ void if_get_state(struct ifnet *, + struct if_interface_state *); +__private_extern__ errno_t if_probe_connectivity(struct ifnet *ifp, + u_int32_t conn_probe); +__private_extern__ void if_lqm_update(struct ifnet *, int32_t, int); __private_extern__ void ifnet_update_sndq(struct ifclassq *, cqev_t); __private_extern__ void ifnet_update_rcv(struct ifnet *, cqev_t); @@ -1194,6 +1425,13 @@ __private_extern__ errno_t ifnet_set_input_latencies(struct ifnet *, __private_extern__ errno_t ifnet_set_output_latencies(struct ifnet *, struct if_latencies *, boolean_t); +__private_extern__ void ifnet_clear_netagent(uuid_t); + +__private_extern__ int ifnet_set_netsignature(struct ifnet *, uint8_t, + uint8_t, uint16_t, uint8_t *); +__private_extern__ int ifnet_get_netsignature(struct ifnet *, uint8_t, + uint8_t *, uint16_t *, uint8_t *); + __private_extern__ errno_t ifnet_framer_stub(struct ifnet *, struct mbuf **, const struct sockaddr *, const char *, const char *, u_int32_t *, u_int32_t *); diff --git a/bsd/net/if_vlan.c b/bsd/net/if_vlan.c index 1a45bf4d8..ded46afbf 100644 --- a/bsd/net/if_vlan.c +++ b/bsd/net/if_vlan.c @@ -852,11 +852,10 @@ vlan_parent_create(struct ifnet * p, vlan_parent_ref * ret_vlp) vlan_parent_ref vlp; *ret_vlp = NULL; - vlp = _MALLOC(sizeof(*vlp), M_VLAN, M_WAITOK); + vlp = _MALLOC(sizeof(*vlp), M_VLAN, M_WAITOK | M_ZERO); if (vlp == NULL) { return (ENOMEM); } - bzero(vlp, sizeof(*vlp)); error = siocgifdevmtu(p, &vlp->vlp_devmtu); if (error != 0) { printf("vlan_parent_create (%s%d): siocgifdevmtu failed, %d\n", @@ -978,10 +977,9 @@ vlan_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params) if (error != 0) { return (error); } - ifv = _MALLOC(sizeof(struct ifvlan), M_VLAN, M_WAITOK); + ifv = _MALLOC(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); if (ifv == NULL) return ENOBUFS; - bzero(ifv, sizeof(struct ifvlan)); ifv->ifv_retain_count = 1; ifv->ifv_signature = IFV_SIGNATURE; multicast_list_init(&ifv->ifv_multicast); @@ -1173,7 +1171,7 @@ vlan_output(struct ifnet * ifp, struct mbuf * m) m->m_pkthdr.csum_flags |= CSUM_VLAN_TAG_VALID; m->m_pkthdr.vlan_tag = tag; } else { - M_PREPEND(m, encaplen, M_DONTWAIT); + M_PREPEND(m, encaplen, M_DONTWAIT, 1); if (m == NULL) { printf("%s%d: unable to prepend VLAN header\n", ifnet_name(ifp), ifnet_unit(ifp)); diff --git a/bsd/net/kpi_interface.c b/bsd/net/kpi_interface.c index 22b18df05..a2289eca0 100644 --- a/bsd/net/kpi_interface.c +++ b/bsd/net/kpi_interface.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2014 Apple Inc. All rights reserved. + * Copyright (c) 2004-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -346,10 +346,25 @@ ifnet_allocate_extended(const struct ifnet_init_eparams *einit0, bzero(&ifp->if_broadcast, sizeof (ifp->if_broadcast)); } + /* + * output target queue delay is specified in millisecond + * convert it to nanoseconds + */ IFCQ_TARGET_QDELAY(&ifp->if_snd) = - einit.output_target_qdelay; + einit.output_target_qdelay * 1000 * 1000; IFCQ_MAXLEN(&ifp->if_snd) = einit.sndq_maxlen; + if (einit.start_delay_qlen > 0 && + einit.start_delay_timeout > 0) { + ifp->if_eflags |= IFEF_ENQUEUE_MULTI; + ifp->if_start_delay_qlen = + min(100, einit.start_delay_qlen); + ifp->if_start_delay_timeout = + min(20000, einit.start_delay_timeout); + /* convert timeout to nanoseconds */ + ifp->if_start_delay_timeout *= 1000; + } + if (error == 0) { *interface = ifp; // temporary - this should be done in dlil_if_acquire @@ -484,7 +499,7 @@ ifnet_flags(ifnet_t interface) * If IFEF_AWDL has been set on the interface and the caller attempts * to clear one or more of the associated flags in IFEF_AWDL_MASK, * return failure. - * + * * If IFEF_AWDL_RESTRICTED is set by the caller, make sure IFEF_AWDL is set * on the interface. * @@ -568,7 +583,7 @@ ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, u_int32_t mask) ev_data.if_unit = interface->if_unit; ev_msg.dv[0].data_length = sizeof(struct net_event_data); ev_msg.dv[0].data_ptr = &ev_data; - ev_msg.dv[1].data_length = 0; + ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); } @@ -658,7 +673,7 @@ ifnet_set_link_quality(ifnet_t ifp, int quality) goto done; } - if_lqm_update(ifp, quality); + if_lqm_update(ifp, quality, 0); done: return (err); @@ -673,12 +688,57 @@ ifnet_link_quality(ifnet_t ifp) return (IFNET_LQM_THRESH_OFF); ifnet_lock_shared(ifp); - lqm = ifp->if_lqm; + lqm = ifp->if_interface_state.lqm_state; ifnet_lock_done(ifp); return (lqm); } +errno_t +ifnet_set_interface_state(ifnet_t ifp, + struct if_interface_state *if_interface_state) +{ + errno_t err = 0; + + if (ifp == NULL || if_interface_state == NULL) { + err = EINVAL; + goto done; + } + + if (!ifnet_is_attached(ifp, 0)) { + err = ENXIO; + goto done; + } + + if_state_update(ifp, if_interface_state); + +done: + return (err); +} + +errno_t +ifnet_get_interface_state(ifnet_t ifp, + struct if_interface_state *if_interface_state) +{ + errno_t err = 0; + + if (ifp == NULL || if_interface_state == NULL) { + err = EINVAL; + goto done; + } + + if (!ifnet_is_attached(ifp, 0)) { + err = ENXIO; + goto done; + } + + if_get_state(ifp, if_interface_state); + +done: + return (err); +} + + static errno_t ifnet_defrouter_llreachinfo(ifnet_t ifp, int af, struct ifnet_llreach_info *iflri) @@ -824,7 +884,7 @@ ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload) ifcaps |= IFCAP_VLAN_MTU; if ((offload & IFNET_VLAN_TAGGING)) ifcaps |= IFCAP_VLAN_HWTAGGING; - if ((offload & IFNET_TX_STATUS)) + if ((offload & IFNET_TX_STATUS)) ifcaps |= IFNET_TX_STATUS; if (ifcaps != 0) { (void) ifnet_set_capabilities_supported(interface, ifcaps, @@ -1129,6 +1189,25 @@ ifnet_set_bandwidths(struct ifnet *ifp, struct if_bandwidths *output_bw, return (0); } +static void +ifnet_set_link_status_outbw(struct ifnet *ifp) +{ + struct if_wifi_status_v1 *sr; + sr = &ifp->if_link_status->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1; + if (ifp->if_output_bw.eff_bw != 0) { + sr->valid_bitmask |= + IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID; + sr->ul_effective_bandwidth = + ifp->if_output_bw.eff_bw; + } + if (ifp->if_output_bw.max_bw != 0) { + sr->valid_bitmask |= + IF_WIFI_UL_MAX_BANDWIDTH_VALID; + sr->ul_max_bandwidth = + ifp->if_output_bw.max_bw; + } +} + errno_t ifnet_set_output_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw, boolean_t locked) @@ -1167,9 +1246,38 @@ ifnet_set_output_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw, if (!locked) IFCQ_UNLOCK(ifq); + /* + * If this is a Wifi interface, update the values in + * if_link_status structure also. + */ + if (IFNET_IS_WIFI(ifp) && ifp->if_link_status != NULL) { + lck_rw_lock_exclusive(&ifp->if_link_status_lock); + ifnet_set_link_status_outbw(ifp); + lck_rw_done(&ifp->if_link_status_lock); + } + return (0); } +static void +ifnet_set_link_status_inbw(struct ifnet *ifp) +{ + struct if_wifi_status_v1 *sr; + + sr = &ifp->if_link_status->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1; + if (ifp->if_input_bw.eff_bw != 0) { + sr->valid_bitmask |= + IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID; + sr->dl_effective_bandwidth = + ifp->if_input_bw.eff_bw; + } + if (ifp->if_input_bw.max_bw != 0) { + sr->valid_bitmask |= + IF_WIFI_DL_MAX_BANDWIDTH_VALID; + sr->dl_max_bandwidth = ifp->if_input_bw.max_bw; + } +} + errno_t ifnet_set_input_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw) { @@ -1187,6 +1295,12 @@ ifnet_set_input_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw) else if (ifp->if_input_bw.eff_bw == 0) ifp->if_input_bw.eff_bw = ifp->if_input_bw.max_bw; + if (IFNET_IS_WIFI(ifp) && ifp->if_link_status != NULL) { + lck_rw_lock_exclusive(&ifp->if_link_status_lock); + ifnet_set_link_status_inbw(ifp); + lck_rw_done(&ifp->if_link_status_lock); + } + if (old_bw.eff_bw != ifp->if_input_bw.eff_bw || old_bw.max_bw != ifp->if_input_bw.max_bw) ifnet_update_rcv(ifp, CLASSQ_EV_LINK_BANDWIDTH); @@ -1526,7 +1640,7 @@ errno_t ifnet_get_inuse_address_list(ifnet_t interface, ifaddr_t **addresses) { return (addresses == NULL ? EINVAL : - ifnet_get_address_list_family_internal(interface, addresses, + ifnet_get_address_list_family_internal(interface, addresses, 0, 0, M_NOWAIT, 1)); } @@ -1625,17 +1739,16 @@ ifnet_get_address_list_family_internal(ifnet_t interface, ifaddr_t **addresses, if (return_inuse_addrs) { usecount = tcp_find_anypcb_byaddr(ifal->ifal_ifa); usecount += udp_find_anypcb_byaddr(ifal->ifal_ifa); - if (usecount) { + if (usecount) { (*addresses)[index] = ifal->ifal_ifa; index++; - } - else + } else { IFA_REMREF(ifal->ifal_ifa); + } } else { (*addresses)[--count] = ifal->ifal_ifa; } - } - else { + } else { IFA_REMREF(ifal->ifal_ifa); } FREE(ifal, M_TEMP); @@ -2093,7 +2206,7 @@ ifnet_transmit_burst_end(ifnet_t ifp, mbuf_t pkt) uint64_t oseq, ots, bytes, ts, t; uint32_t flags; - if ( ifp == NULL || !(pkt->m_flags & M_PKTHDR)) + if (ifp == NULL || !(pkt->m_flags & M_PKTHDR)) return; flags = OSBitOrAtomic(IF_MEASURED_BW_CALCULATION, &ifp->if_bw.flags); @@ -2116,7 +2229,7 @@ ifnet_transmit_burst_end(ifnet_t ifp, mbuf_t pkt) if (ifp->if_bw.start_seq > 0 && oseq > ifp->if_bw.start_seq) { ts = ots - ifp->if_bw.start_ts; - if (ts > 0 ) { + if (ts > 0) { absolutetime_to_nanoseconds(ts, &t); bytes = oseq - ifp->if_bw.start_seq; ifp->if_bw.bytes = bytes; @@ -2495,6 +2608,12 @@ ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol, INPCB_GET_PORTS_USED_WILDCARDOK : 0); inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) ? INPCB_GET_PORTS_USED_NOWAKEUPOK : 0); + inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) ? + INPCB_GET_PORTS_USED_RECVANYIFONLY : 0); + inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) ? + INPCB_GET_PORTS_USED_EXTBGIDLEONLY : 0); + inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) ? + INPCB_GET_PORTS_USED_ACTIVEONLY : 0); if (bitfield == NULL) return (EINVAL); @@ -2535,11 +2654,11 @@ ifnet_notice_node_presence(ifnet_t ifp, struct sockaddr* sa, int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48]) { if (ifp == NULL || sa == NULL || srvinfo == NULL) - return(EINVAL); + return (EINVAL); if (sa->sa_len > sizeof(struct sockaddr_storage)) - return(EINVAL); + return (EINVAL); if (sa->sa_family != AF_LINK && sa->sa_family != AF_INET6) - return(EINVAL); + return (EINVAL); dlil_node_present(ifp, sa, rssi, lqm, npm, srvinfo); return (0); @@ -2549,11 +2668,11 @@ errno_t ifnet_notice_node_absence(ifnet_t ifp, struct sockaddr* sa) { if (ifp == NULL || sa == NULL) - return(EINVAL); + return (EINVAL); if (sa->sa_len > sizeof(struct sockaddr_storage)) - return(EINVAL); + return (EINVAL); if (sa->sa_family != AF_LINK && sa->sa_family != AF_INET6) - return(EINVAL); + return (EINVAL); dlil_node_absent(ifp, sa); return (0); @@ -2563,7 +2682,7 @@ errno_t ifnet_notice_master_elected(ifnet_t ifp) { if (ifp == NULL) - return(EINVAL); + return (EINVAL); dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_MASTER_ELECTED, NULL, 0); return (0); @@ -2588,7 +2707,7 @@ ifnet_report_issues(ifnet_t ifp, u_int8_t modid[IFNET_MODIDLEN], return (0); } -extern errno_t +errno_t ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp) { ifnet_t odifp = NULL; @@ -2612,7 +2731,7 @@ ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp) ifp->if_delegated.type = delegated_ifp->if_type; ifp->if_delegated.family = delegated_ifp->if_family; ifp->if_delegated.subfamily = delegated_ifp->if_subfamily; - ifp->if_delegated.expensive = + ifp->if_delegated.expensive = delegated_ifp->if_eflags & IFEF_EXPENSIVE ? 1 : 0; printf("%s: is now delegating %s (type 0x%x, family %u, " "sub-family %u)\n", ifp->if_xname, delegated_ifp->if_xname, @@ -2639,7 +2758,7 @@ ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp) return (0); } -extern errno_t +errno_t ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp) { if (ifp == NULL || pdelegated_ifp == NULL) @@ -2659,28 +2778,207 @@ ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp) return (0); } -extern u_int32_t key_fill_offload_frames_for_savs (ifnet_t ifp, - struct ipsec_offload_frame *frames_array, u_int32_t frames_array_count, - size_t frame_data_offset); +extern u_int32_t +key_fill_offload_frames_for_savs(ifnet_t ifp, + struct ifnet_keepalive_offload_frame *frames_array, + u_int32_t frames_array_count, size_t frame_data_offset); + +extern void +udp_fill_keepalive_offload_frames(ifnet_t ifp, + struct ifnet_keepalive_offload_frame *frames_array, + u_int32_t frames_array_count, size_t frame_data_offset, + u_int32_t *used_frames_count); -extern errno_t -ifnet_get_ipsec_offload_frames(ifnet_t ifp, - struct ipsec_offload_frame *frames_array, - u_int32_t frames_array_count, - size_t frame_data_offset, - u_int32_t *used_frames_count) +errno_t +ifnet_get_keepalive_offload_frames(ifnet_t ifp, + struct ifnet_keepalive_offload_frame *frames_array, + u_int32_t frames_array_count, size_t frame_data_offset, + u_int32_t *used_frames_count) { - if (frames_array == NULL || used_frames_count == NULL) { + if (frames_array == NULL || used_frames_count == NULL) return (EINVAL); - } - *used_frames_count = 0; + /* frame_data_offset should be 32-bit aligned */ + if (P2ROUNDUP(frame_data_offset, sizeof(u_int32_t)) + != frame_data_offset) + return (EINVAL); - if (frames_array_count == 0) { + *used_frames_count = 0; + if (frames_array_count == 0) return (0); - } + /* First collect IPSec related keep-alive frames */ *used_frames_count = key_fill_offload_frames_for_savs(ifp, - frames_array, frames_array_count, frame_data_offset); + frames_array, frames_array_count, frame_data_offset); + + /* If there is more room, collect other UDP keep-alive frames */ + if (*used_frames_count < frames_array_count) + udp_fill_keepalive_offload_frames(ifp, frames_array, + frames_array_count, frame_data_offset, + used_frames_count); + + VERIFY(*used_frames_count <= frames_array_count); return (0); } + +errno_t +ifnet_link_status_report(ifnet_t ifp, const void *buffer, + size_t buffer_len) +{ + struct if_link_status *ifsr; + errno_t err = 0; + + if (ifp == NULL || buffer == NULL || buffer_len == 0) + return (EINVAL); + + ifnet_lock_shared(ifp); + + /* + * Make sure that the interface is attached but there is no need + * to take a reference because this call is coming from the driver. + */ + if (!ifnet_is_attached(ifp, 0)) { + ifnet_lock_done(ifp); + return (ENXIO); + } + + lck_rw_lock_exclusive(&ifp->if_link_status_lock); + + /* + * If this is the first status report then allocate memory + * to store it. + */ + if (ifp->if_link_status == NULL) { + MALLOC(ifp->if_link_status, struct if_link_status *, + sizeof(struct if_link_status), M_TEMP, M_ZERO); + if (ifp->if_link_status == NULL) { + err = ENOMEM; + goto done; + } + } + + ifsr = __DECONST(struct if_link_status *, buffer); + + if (ifp->if_type == IFT_CELLULAR) { + struct if_cellular_status_v1 *if_cell_sr, *new_cell_sr; + /* + * Currently we have a single version -- if it does + * not match, just return. + */ + if (ifsr->ifsr_version != + IF_CELLULAR_STATUS_REPORT_CURRENT_VERSION) { + err = ENOTSUP; + goto done; + } + + if (ifsr->ifsr_len != sizeof(*if_cell_sr)) { + err = EINVAL; + goto done; + } + + if_cell_sr = + &ifp->if_link_status->ifsr_u.ifsr_cell.if_cell_u.if_status_v1; + new_cell_sr = &ifsr->ifsr_u.ifsr_cell.if_cell_u.if_status_v1; + ifp->if_link_status->ifsr_version = ifsr->ifsr_version; + ifp->if_link_status->ifsr_len = ifsr->ifsr_len; + if_cell_sr->valid_bitmask = 0; + bcopy(new_cell_sr, if_cell_sr, sizeof(*if_cell_sr)); + } else if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI) { + struct if_wifi_status_v1 *if_wifi_sr, *new_wifi_sr; + + /* Check version */ + if (ifsr->ifsr_version != + IF_WIFI_STATUS_REPORT_CURRENT_VERSION) { + err = ENOTSUP; + goto done; + } + + if (ifsr->ifsr_len != sizeof(*if_wifi_sr)) { + err = EINVAL; + goto done; + } + + if_wifi_sr = + &ifp->if_link_status->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1; + new_wifi_sr = + &ifsr->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1; + ifp->if_link_status->ifsr_version = ifsr->ifsr_version; + ifp->if_link_status->ifsr_len = ifsr->ifsr_len; + if_wifi_sr->valid_bitmask = 0; + bcopy(new_wifi_sr, if_wifi_sr, sizeof(*if_wifi_sr)); + + /* + * Update the bandwidth values if we got recent values + * reported through the other KPI. + */ + if (!(new_wifi_sr->valid_bitmask & + IF_WIFI_UL_MAX_BANDWIDTH_VALID) && + ifp->if_output_bw.max_bw > 0) { + if_wifi_sr->valid_bitmask |= + IF_WIFI_UL_MAX_BANDWIDTH_VALID; + if_wifi_sr->ul_max_bandwidth = + ifp->if_output_bw.max_bw; + } + if (!(new_wifi_sr->valid_bitmask & + IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID) && + ifp->if_output_bw.eff_bw > 0) { + if_wifi_sr->valid_bitmask |= + IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID; + if_wifi_sr->ul_effective_bandwidth = + ifp->if_output_bw.eff_bw; + } + if (!(new_wifi_sr->valid_bitmask & + IF_WIFI_DL_MAX_BANDWIDTH_VALID) && + ifp->if_input_bw.max_bw > 0) { + if_wifi_sr->valid_bitmask |= + IF_WIFI_DL_MAX_BANDWIDTH_VALID; + if_wifi_sr->dl_max_bandwidth = + ifp->if_input_bw.max_bw; + } + if (!(new_wifi_sr->valid_bitmask & + IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID) && + ifp->if_input_bw.eff_bw > 0) { + if_wifi_sr->valid_bitmask |= + IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID; + if_wifi_sr->dl_effective_bandwidth = + ifp->if_input_bw.eff_bw; + } + } + +done: + lck_rw_done(&ifp->if_link_status_lock); + ifnet_lock_done(ifp); + return (err); +} + +/*************************************************************************/ +/* Packet preamble */ +/*************************************************************************/ + +#define MAX_IF_PACKET_PREAMBLE_LEN 32 + +errno_t +ifnet_set_packetpreamblelen(ifnet_t interface, u_int32_t len) +{ + errno_t err = 0; + + if (interface == NULL || len > MAX_IF_PACKET_PREAMBLE_LEN) { + err = EINVAL; + goto done; + } + interface->if_data.ifi_preamblelen = len; +done: + return (err); +} + +u_int32_t +ifnet_packetpreamblelen(ifnet_t interface) +{ + return ((interface == NULL) ? 0 : interface->if_data.ifi_preamblelen); +} + +u_int32_t +ifnet_maxpacketpreamblelen(void) +{ + return (MAX_IF_PACKET_PREAMBLE_LEN); +} diff --git a/bsd/net/kpi_interface.h b/bsd/net/kpi_interface.h index 1f4a41534..2c6e8bbe9 100644 --- a/bsd/net/kpi_interface.h +++ b/bsd/net/kpi_interface.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2013 Apple Inc. All rights reserved. + * Copyright (c) 2004-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,6 +42,7 @@ #include #ifdef KERNEL_PRIVATE +struct if_interface_state; #include #endif /* KERNEL_PRIVATE */ @@ -988,6 +989,14 @@ typedef errno_t (*ifnet_ctl_func)(ifnet_t interface, ifnet_ctl_cmd_t cmd, @field output_lt The effective output latency (in nanosecond.) @field output_lt_max The maximum theoretical output latency (in nanosecond.) + @field start_delay_qlen The maximum length of output queue for + delaying start callback to the driver. This is an + optimization for coalescing output packets. + @field start_delay_timeout The timeout in microseconds to delay + start callback. If start_delay_qlen number of packets are + not in the output queue when the timer fires, the start + callback will be invoked. Maximum allowed value is + 20ms (in microseconds). @field input_poll The poll function for the interface, valid only if IFNET_INIT_LEGACY is not set and only if IFNET_INIT_INPUT_POLL is set. @@ -1045,12 +1054,14 @@ struct ifnet_init_eparams { ifnet_start_func start; /* required only for new model */ ifnet_ctl_func output_ctl; /* optional, only for new model */ u_int32_t output_sched_model; /* optional, only for new model */ - u_int32_t output_target_qdelay; /* optional, only for new model */ + u_int32_t output_target_qdelay; /* optional, only for new model, value in ms */ u_int64_t output_bw; /* optional */ u_int64_t output_bw_max; /* optional */ u_int64_t output_lt; /* optional */ u_int64_t output_lt_max; /* optional */ - u_int64_t _reserved[2]; /* for future use */ + u_int16_t start_delay_qlen; /* optional */ + u_int16_t start_delay_timeout; /* optional */ + u_int32_t _reserved[3]; /* for future use */ ifnet_input_poll_func input_poll; /* optional, ignored for legacy model */ ifnet_ctl_func input_ctl; /* required for opportunistic polling */ u_int32_t rcvq_maxlen; /* optional, only for opportunistic polling */ @@ -1846,6 +1857,27 @@ extern errno_t ifnet_set_link_quality(ifnet_t interface, int quality); */ extern int ifnet_link_quality(ifnet_t interface); +/* + @function ifnet_set_interface_state + @discussion Sets the interface state for the ifnet. + @param interface Interface for which the interface state should + be set to. + @param if_interface_state as defined in net/if_var.h. + @result 0 on success otherwise the errno error. EINVAL if quality + is not a valid value. ENXIO if the interface is not attached. +*/ +extern errno_t ifnet_set_interface_state(ifnet_t interface, + struct if_interface_state *if_interface_state); + +/* + @function ifnet_get_interface_state + @discussion Returns the interface state for the ifnet. + @param if_interface_state to ret. + @result 0 on success, errno otherwise +*/ +extern int ifnet_get_interface_state(ifnet_t interface, + struct if_interface_state *if_interface_state); + /* @struct ifnet_llreach_info @discussion This structure is used to describe the link-layer @@ -3118,10 +3150,13 @@ extern errno_t ifnet_clone_detach(if_clone_t ifcloner); */ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield); -#define IFNET_GET_LOCAL_PORTS_WILDCARDOK 0x1 -#define IFNET_GET_LOCAL_PORTS_NOWAKEUPOK 0x2 -#define IFNET_GET_LOCAL_PORTS_TCPONLY 0x4 -#define IFNET_GET_LOCAL_PORTS_UDPONLY 0x8 +#define IFNET_GET_LOCAL_PORTS_WILDCARDOK 0x01 +#define IFNET_GET_LOCAL_PORTS_NOWAKEUPOK 0x02 +#define IFNET_GET_LOCAL_PORTS_TCPONLY 0x04 +#define IFNET_GET_LOCAL_PORTS_UDPONLY 0x08 +#define IFNET_GET_LOCAL_PORTS_RECVANYIFONLY 0x10 +#define IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY 0x20 +#define IFNET_GET_LOCAL_PORTS_ACTIVEONLY 0x40 /* @function ifnet_get_local_ports_extended @discussion Returns a bitfield indicating which local ports of the @@ -3137,10 +3172,10 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield); @param protocol The protocol family of the sockets. PF_UNSPEC (0) means all protocols, otherwise PF_INET or PF_INET6. @param flags A bitwise of the following flags: - IFNET_GET_LOCAL_PORTS_EXTENDED_WILDCARDOK: When bit is set, + IFNET_GET_LOCAL_PORTS_WILDCARDOK: When bit is set, the list of local ports should include those that are used by sockets that aren't bound to any local address. - IFNET_GET_LOCAL_PORTS_EXTENDED_NOWAKEUPOK: When bit is + IFNET_GET_LOCAL_PORTS_NOWAKEUPOK: When bit is set, the list of local ports should return all sockets including the ones that do not need a wakeup from sleep. Sockets that do not want to wake from sleep are marked @@ -3150,6 +3185,15 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield); IFNET_GET_LOCAL_PORTS_UDPONLY: When bit is set, the list of local ports should return the ports used by UDP sockets. only. + IFNET_GET_LOCAL_PORTS_RECVANYIFONLY: When bit is set, the + port is in the list only if the socket has the option + SO_RECV_ANYIF set + IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY: When bit is set, the + port is in the list only if the socket has the option + SO_EXTENDED_BK_IDLE set + IFNET_GET_LOCAL_PORTS_ACTIVETCPONLY: When bit is set, the + port is in the list only if the socket is not in a final TCP + state or the connection is not idle in a final TCP state @param bitfield A pointer to 8192 bytes. @result Returns 0 on success. */ @@ -3303,37 +3347,110 @@ ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp); extern errno_t ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp); -/******************************************************************************/ -/* for interface IPSec keepalive offload */ -/******************************************************************************/ - -#define IPSEC_OFFLOAD_FRAME_DATA_SIZE 128 -struct ipsec_offload_frame { - u_int8_t data[IPSEC_OFFLOAD_FRAME_DATA_SIZE]; /* Frame bytes */ - u_int16_t length; /* Number of valid bytes in data, including offset */ - u_int16_t interval; /* Interval in seconds */ +/*************************************************************************/ +/* for interface keep alive offload support */ +/*************************************************************************/ + +#define IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE 128 +struct ifnet_keepalive_offload_frame { + u_int8_t data[IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE]; /* data bytes */ +#define IFNET_KEEPALIVE_OFFLOAD_FRAME_IPSEC 0x0 +#define IFNET_KEEPALIVE_OFFLOAD_FRAME_AIRPLAY 0x1 + u_int8_t type; /* type of application */ + u_int8_t length; /* Number of valid data bytes including offset */ + u_int16_t interval; /* Keep alive interval in seconds */ +#define IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV4 0x0 +#define IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV6 0x1 + u_int8_t ether_type; /* Ether type IPv4 or IPv6 */ + u_int8_t __reserved[3]; /* For future */ }; /* - @function ifnet_get_ipsec_offload_frames - @discussion Fills out frames_array with IP packets to send at periodic - intervals on behalf of IPSec. - @param ifp The interface to send the frames out on. This is used to - select which IPSec SAs should generate the packets. - @param frames_array An array of ipsec_offload_frame structs. This is - allocated by the caller, and has frames_array_count frames of valid - memory. - @param frames_array_count The number of valid frames allocated in - frames_array. - @param frame_data_offset The offset in bytes into each frame data at - which IPSec should write the IP header and payload. - @param used_frames_count The returned number of frames that were filled - out with valid information. + @function ifnet_get_keepalive_offload_frames + @discussion Fills out frames_array with IP packets to send at + periodic intervals as Keep-alive or heartbeat messages. + These are UDP datagrams. This can be used to offload + IPSec keep alives. + @param ifp The interface to send frames out on. This is used to + select which sockets or IPSec SAs should generate the + packets. + @param frames_array An array of ifnet_keepalive_offload_frame + structs. This is allocated by the caller, and has + frames_array_count frames of valid memory. + @param frames_array_count The number of valid frames allocated + by the caller in frames_array + @param frame_data_offset The offset in bytes into each frame data + at which the IPv4/IPv6 packet and payload should be written + @param used_frames_count The returned number of frames that were + filled out with valid information. + @result Returns 0 on success, error number otherwise. +*/ +extern errno_t ifnet_get_keepalive_offload_frames(ifnet_t ifp, + struct ifnet_keepalive_offload_frame *frames_array, + u_int32_t frames_array_count, size_t frame_data_offset, + u_int32_t *used_frames_count); + +/*************************************************************************/ +/* Link level notifications */ +/*************************************************************************/ +/* + @function ifnet_link_status_report + @discussion A KPI to let the driver provide link specific + status information to the protocol stack. The KPI will + copy contents from the buffer based on the version and + length provided by the driver. The contents of the buffer + will be read but will not be modified. + @param ifp The interface that is generating the report + @param buffer Buffer containing the link specific information + for this interface. It is the caller's responsibility + to free this buffer. + @param buffer_len Valid length of the buffer provided by the caller @result Returns 0 on success, error number otherwise. +*/ +extern errno_t ifnet_link_status_report(ifnet_t ifp, const void *buffer, + size_t buffer_len); + +/*************************************************************************/ +/* Packet preamble */ +/*************************************************************************/ +/*! + @function ifnet_set_packetpreamblelen + @discussion + Allows a driver to specify a leading space to be + reserved in front of the link layer header. + The preamble is logically adjoining the link layer which + itself is logically contiguous to the network protocol header + (e.g. IP). + There is no guarantee that packets being sent to the + driver has leading space reserved for the preamble. + There is also no guarantee the packet will be laid out in a + contiguous block of memory. + The network protocol header is 32 bit aligned and this dictates + the alignment of the link layer header which in turn affects + the alignment the packet preamble. + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface The interface. + @param len The length of the packet preamble. + @result 0 on success otherwise the errno error. */ -extern errno_t ifnet_get_ipsec_offload_frames(ifnet_t ifp, - struct ipsec_offload_frame *frames_array, u_int32_t frames_array_count, - size_t frame_data_offset, u_int32_t *used_frames_count); +extern errno_t ifnet_set_packetpreamblelen(ifnet_t interface, u_int32_t len); + +/*! + @function ifnet_packetpreamblelen + @param interface The interface. + @result The current packet preamble length. + */ +extern u_int32_t ifnet_packetpreamblelen(ifnet_t interface); + +/*! + @function ifnet_maxpacketpreamblelen + @result The maximum packet preamble length supported by the system + */ +extern u_int32_t ifnet_maxpacketpreamblelen(void); + + #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/net/kpi_protocol.c b/bsd/net/kpi_protocol.c index 170dd25f0..f35b2b10b 100644 --- a/bsd/net/kpi_protocol.c +++ b/bsd/net/kpi_protocol.c @@ -107,11 +107,10 @@ proto_register_input(protocol_family_t protocol, proto_input_handler input, struct domain *dp; domain_guard_t guard; - entry = _MALLOC(sizeof (*entry), M_IFADDR, M_WAITOK); + entry = _MALLOC(sizeof (*entry), M_IFADDR, M_WAITOK | M_ZERO); if (entry == NULL) return (ENOMEM); - bzero(entry, sizeof (*entry)); entry->protocol = protocol; entry->input = input; entry->detached = detached; @@ -365,13 +364,13 @@ proto_register_plumber(protocol_family_t protocol_family, } proto_family = (struct proto_family_str *) - _MALLOC(sizeof (struct proto_family_str), M_IFADDR, M_WAITOK); + _MALLOC(sizeof (struct proto_family_str), M_IFADDR, + M_WAITOK | M_ZERO); if (!proto_family) { lck_mtx_unlock(proto_family_mutex); return (ENOMEM); } - bzero(proto_family, sizeof (struct proto_family_str)); proto_family->proto_family = protocol_family; proto_family->if_family = interface_family & 0xffff; proto_family->attach_proto = attach; diff --git a/bsd/net/lacp.h b/bsd/net/lacp.h index 04c81c167..71ff0d543 100644 --- a/bsd/net/lacp.h +++ b/bsd/net/lacp.h @@ -273,7 +273,7 @@ static __inline__ uint16_t lacp_uint16_get(const uint8_t * field) { uint16_t tmp_field; - memcpy((void *)&tmp_field, (void *)field, sizeof(uint16_t)); + memcpy((void *)&tmp_field, (const void *)field, sizeof(uint16_t)); return (ntohs(tmp_field)); } @@ -301,7 +301,7 @@ static __inline__ uint32_t lacp_uint32_get(const uint8_t * field) { uint32_t tmp_field; - memcpy((void *)&tmp_field, (void *)field, sizeof(uint32_t)); + memcpy((void *)&tmp_field, (const void *)field, sizeof(uint32_t)); return (ntohl(tmp_field)); } diff --git a/bsd/net/necp.c b/bsd/net/necp.c index 6c589c221..650e809cd 100644 --- a/bsd/net/necp.c +++ b/bsd/net/necp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * Copyright (c) 2013-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -39,20 +39,25 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include +#include #include #include #include #include #include #include +#include +#include #include /* @@ -130,13 +135,7 @@ u_int32_t necp_pass_keepalives = 1; // 0=Off, 1=On u_int32_t necp_debug = 0; // 0=None, 1=Basic, 2=EveryMatch -static int sysctl_handle_necp_level SYSCTL_HANDLER_ARGS; - -SYSCTL_NODE(_net, OID_AUTO, necp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "NECP"); -SYSCTL_INT(_net_necp, NECPCTL_PASS_LOOPBACK, pass_loopback, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_loopback, 0, ""); -SYSCTL_INT(_net_necp, NECPCTL_PASS_KEEPALIVES, pass_keepalives, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_keepalives, 0, ""); -SYSCTL_INT(_net_necp, NECPCTL_DEBUG, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_debug, 0, ""); -SYSCTL_PROC(_net_necp, NECPCTL_DROP_ALL_LEVEL, drop_all_level, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, &necp_drop_all_level, 0, &sysctl_handle_necp_level, "IU", ""); +u_int32_t necp_session_count = 0; #define NECPLOG(level, format, ...) do { \ log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s: " format "\n", __FUNCTION__, __VA_ARGS__); \ @@ -218,10 +217,11 @@ struct necp_session { bool proc_locked; // Messages must come from proc_uuid uuid_t proc_uuid; + int proc_pid; bool dirty; LIST_HEAD(_policies, necp_session_policy) policies; - + LIST_HEAD(_services, necp_service_registration) services; }; @@ -248,10 +248,17 @@ static lck_attr_t *necp_kernel_policy_mtx_attr = NULL; static lck_grp_t *necp_kernel_policy_mtx_grp = NULL; decl_lck_rw_data(static, necp_kernel_policy_lock); +static lck_grp_attr_t *necp_route_rule_grp_attr = NULL; +static lck_attr_t *necp_route_rule_mtx_attr = NULL; +static lck_grp_t *necp_route_rule_mtx_grp = NULL; +decl_lck_rw_data(static, necp_route_rule_lock); + static necp_policy_id necp_last_policy_id = 0; static necp_kernel_policy_id necp_last_kernel_policy_id = 0; static u_int32_t necp_last_uuid_id = 0; static u_int32_t necp_last_string_id = 0; +static u_int32_t necp_last_route_rule_id = 0; +static u_int32_t necp_last_aggregate_route_rule_id = 0; /* * On modification, invalidate cached lookups by bumping the generation count. @@ -305,20 +312,20 @@ static void necp_handle_lock_session_to_proc(struct necp_session *session, u_int static void necp_handle_register_service(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); static void necp_handle_unregister_service(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); -static struct necp_session_policy *necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, size_t conditions_array_size, u_int8_t *result, size_t result_size); +static struct necp_session_policy *necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, u_int32_t conditions_array_size, u_int8_t *route_rules_array, u_int32_t route_rules_array_size, u_int8_t *result, u_int32_t result_size); static struct necp_session_policy *necp_policy_find(struct necp_session *session, necp_policy_id policy_id); static bool necp_policy_mark_for_deletion(struct necp_session *session, struct necp_session_policy *policy); static bool necp_policy_mark_all_for_deletion(struct necp_session *session); static bool necp_policy_delete(struct necp_session *session, struct necp_session_policy *policy); static void necp_policy_apply_all(struct necp_session *session); -static necp_kernel_policy_id necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter); +static necp_kernel_policy_id necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter); static bool necp_kernel_socket_policy_delete(necp_kernel_policy_id policy_id); static bool necp_kernel_socket_policies_reprocess(void); static bool necp_kernel_socket_policies_update_uuid_table(void); -static inline struct necp_kernel_socket_policy *necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service); +static inline struct necp_kernel_socket_policy *necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, u_int32_t *return_route_rule_id, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service, u_int32_t *return_netagent_array, size_t netagent_array_count); -static necp_kernel_policy_id necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter); +static necp_kernel_policy_id necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter); static bool necp_kernel_ip_output_policy_delete(necp_kernel_policy_id policy_id); static bool necp_kernel_ip_output_policies_reprocess(void); @@ -366,6 +373,49 @@ static LIST_HEAD(_necp_kernel_service_list, necp_service_registration) necp_regi static char *necp_create_trimmed_domain(char *string, size_t length); static inline int necp_count_dots(char *string, size_t length); +#define ROUTE_RULE_IS_AGGREGATE(ruleid) (ruleid > UINT16_MAX) + +#define MAX_ROUTE_RULE_INTERFACES 10 +struct necp_route_rule { + LIST_ENTRY(necp_route_rule) chain; + u_int32_t id; + u_int32_t default_action; + u_int8_t cellular_action; + u_int8_t wifi_action; + u_int8_t wired_action; + u_int8_t expensive_action; + u_int exception_if_indices[MAX_ROUTE_RULE_INTERFACES]; + u_int8_t exception_if_actions[MAX_ROUTE_RULE_INTERFACES]; + u_int32_t refcount; +}; +static LIST_HEAD(necp_route_rule_list, necp_route_rule) necp_route_rules; +static u_int32_t necp_create_route_rule(struct necp_route_rule_list *list, u_int8_t *route_rules_array, u_int32_t route_rules_array_size); +static bool necp_remove_route_rule(struct necp_route_rule_list *list, u_int32_t route_rule_id); +static bool necp_route_is_allowed(struct rtentry *route, ifnet_t interface, u_int32_t route_rule_id, bool *cellular_denied); +static struct necp_route_rule *necp_lookup_route_rule_locked(struct necp_route_rule_list *list, u_int32_t route_rule_id); + +#define MAX_AGGREGATE_ROUTE_RULES 16 +struct necp_aggregate_route_rule { + LIST_ENTRY(necp_aggregate_route_rule) chain; + u_int32_t id; + u_int32_t rule_ids[MAX_AGGREGATE_ROUTE_RULES]; +}; +static LIST_HEAD(necp_aggregate_route_rule_list, necp_aggregate_route_rule) necp_aggregate_route_rules; +static u_int32_t necp_create_aggregate_route_rule(u_int32_t *rule_ids); + +// Sysctl definitions +static int sysctl_handle_necp_level SYSCTL_HANDLER_ARGS; + +SYSCTL_NODE(_net, OID_AUTO, necp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "NECP"); +SYSCTL_INT(_net_necp, NECPCTL_PASS_LOOPBACK, pass_loopback, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_loopback, 0, ""); +SYSCTL_INT(_net_necp, NECPCTL_PASS_KEEPALIVES, pass_keepalives, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_keepalives, 0, ""); +SYSCTL_INT(_net_necp, NECPCTL_DEBUG, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_debug, 0, ""); +SYSCTL_PROC(_net_necp, NECPCTL_DROP_ALL_LEVEL, drop_all_level, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, &necp_drop_all_level, 0, &sysctl_handle_necp_level, "IU", ""); +SYSCTL_LONG(_net_necp, NECPCTL_SOCKET_POLICY_COUNT, socket_policy_count, CTLFLAG_LOCKED | CTLFLAG_RD, &necp_kernel_socket_policies_count, ""); +SYSCTL_LONG(_net_necp, NECPCTL_SOCKET_NON_APP_POLICY_COUNT, socket_non_app_policy_count, CTLFLAG_LOCKED | CTLFLAG_RD, &necp_kernel_socket_policies_non_app_count, ""); +SYSCTL_LONG(_net_necp, NECPCTL_IP_POLICY_COUNT, ip_policy_count, CTLFLAG_LOCKED | CTLFLAG_RD, &necp_kernel_ip_output_policies_count, ""); +SYSCTL_INT(_net_necp, NECPCTL_SESSION_COUNT, session_count, CTLFLAG_LOCKED | CTLFLAG_RD, &necp_session_count, 0, ""); + // Session order allocation static u_int32_t necp_allocate_new_session_order(u_int32_t priority, u_int32_t control_unit) @@ -448,15 +498,41 @@ necp_init(void) lck_rw_init(&necp_kernel_policy_lock, necp_kernel_policy_mtx_grp, necp_kernel_policy_mtx_attr); + necp_route_rule_grp_attr = lck_grp_attr_alloc_init(); + if (necp_route_rule_grp_attr == NULL) { + NECPLOG0(LOG_ERR, "lck_grp_attr_alloc_init failed"); + result = ENOMEM; + goto done; + } + + necp_route_rule_mtx_grp = lck_grp_alloc_init("necp_route_rule", necp_route_rule_grp_attr); + if (necp_route_rule_mtx_grp == NULL) { + NECPLOG0(LOG_ERR, "lck_grp_alloc_init failed"); + result = ENOMEM; + goto done; + } + + necp_route_rule_mtx_attr = lck_attr_alloc_init(); + if (necp_route_rule_mtx_attr == NULL) { + NECPLOG0(LOG_ERR, "lck_attr_alloc_init failed"); + result = ENOMEM; + goto done; + } + + lck_rw_init(&necp_route_rule_lock, necp_route_rule_mtx_grp, necp_route_rule_mtx_attr); + LIST_INIT(&necp_kernel_socket_policies); LIST_INIT(&necp_kernel_ip_output_policies); LIST_INIT(&necp_account_id_list); LIST_INIT(&necp_uuid_service_id_list); - + LIST_INIT(&necp_registered_service_list); + LIST_INIT(&necp_route_rules); + LIST_INIT(&necp_aggregate_route_rules); + necp_uuid_app_id_hashtbl = hashinit(NECP_UUID_APP_ID_HASH_SIZE, M_NECP, &necp_uuid_app_id_hash_mask); necp_uuid_app_id_hash_num_buckets = necp_uuid_app_id_hash_mask + 1; necp_num_uuid_app_id_mappings = 0; @@ -474,6 +550,10 @@ necp_init(void) necp_last_policy_id = 0; necp_last_kernel_policy_id = 0; + necp_last_uuid_id = 0; + necp_last_string_id = 0; + necp_last_route_rule_id = 0; + necp_last_aggregate_route_rule_id = 0; necp_kernel_socket_policies_gencount = 1; @@ -495,6 +575,18 @@ necp_init(void) lck_grp_attr_free(necp_kernel_policy_grp_attr); necp_kernel_policy_grp_attr = NULL; } + if (necp_route_rule_mtx_attr != NULL) { + lck_attr_free(necp_route_rule_mtx_attr); + necp_route_rule_mtx_attr = NULL; + } + if (necp_route_rule_mtx_grp != NULL) { + lck_grp_free(necp_route_rule_mtx_grp); + necp_route_rule_mtx_grp = NULL; + } + if (necp_route_rule_grp_attr != NULL) { + lck_grp_attr_free(necp_route_rule_grp_attr); + necp_route_rule_grp_attr = NULL; + } if (necp_kctlref != NULL) { ctl_deregister(necp_kctlref); necp_kctlref = NULL; @@ -541,6 +633,24 @@ necp_register_control(void) return (0); } +static void +necp_post_change_event(struct kev_necp_policies_changed_data *necp_event_data) +{ + struct kev_msg ev_msg; + memset(&ev_msg, 0, sizeof(ev_msg)); + + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_NECP_SUBCLASS; + ev_msg.event_code = KEV_NECP_POLICIES_CHANGED; + + ev_msg.dv[0].data_ptr = necp_event_data; + ev_msg.dv[0].data_length = sizeof(necp_event_data->changed_count); + ev_msg.dv[1].data_length = 0; + + kev_post_msg(&ev_msg); +} + static errno_t necp_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, void **unitinfo) { @@ -575,7 +685,7 @@ necp_packet_find_tlv(mbuf_t packet, int offset, u_int8_t type, int *err, int nex { size_t cursor = offset; int error = 0; - size_t curr_length; + u_int32_t curr_length; u_int8_t curr_type; *err = 0; @@ -607,10 +717,10 @@ necp_packet_find_tlv(mbuf_t packet, int offset, u_int8_t type, int *err, int nex } static int -necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, size_t buff_len, void *buff, size_t *value_size) +necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, u_int32_t buff_len, void *buff, u_int32_t *value_size) { - int error = 0; - size_t length; + int error = 0; + u_int32_t length; if (tlv_offset < 0) { return (error); @@ -626,7 +736,7 @@ necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, size_t buff_len, vo } if (buff != NULL && buff_len > 0) { - size_t to_copy = (length < buff_len) ? length : buff_len; + u_int32_t to_copy = (length < buff_len) ? length : buff_len; error = mbuf_copydata(packet, tlv_offset + sizeof(u_int8_t) + sizeof(length), to_copy, buff); if (error) { return (error); @@ -637,7 +747,7 @@ necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, size_t buff_len, vo } static int -necp_packet_get_tlv(mbuf_t packet, int offset, u_int8_t type, size_t buff_len, void *buff, size_t *value_size) +necp_packet_get_tlv(mbuf_t packet, int offset, u_int8_t type, u_int32_t buff_len, void *buff, u_int32_t *value_size) { int error = 0; int tlv_offset; @@ -660,10 +770,10 @@ necp_buffer_write_packet_header(u_int8_t *buffer, u_int8_t packet_type, u_int8_t } static u_int8_t * -necp_buffer_write_tlv(u_int8_t *buffer, u_int8_t type, size_t length, const void *value) +necp_buffer_write_tlv(u_int8_t *buffer, u_int8_t type, u_int32_t length, const void *value) { *(u_int8_t *)(buffer) = type; - *(size_t *)(void *)(buffer + sizeof(type)) = length; + *(u_int32_t *)(void *)(buffer + sizeof(type)) = length; if (length > 0) { memcpy((u_int8_t *)(buffer + sizeof(type) + sizeof(length)), value, length); } @@ -684,24 +794,24 @@ necp_buffer_get_tlv_type(u_int8_t *buffer, int tlv_offset) return (type ? *type : 0); } -static size_t +static u_int32_t necp_buffer_get_tlv_length(u_int8_t *buffer, int tlv_offset) { - size_t *length = NULL; + u_int32_t *length = NULL; if (buffer == NULL) { return (0); } - length = (size_t *)(void *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t)); + length = (u_int32_t *)(void *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t)); return (length ? *length : 0); } static u_int8_t * -necp_buffer_get_tlv_value(u_int8_t *buffer, int tlv_offset, size_t *value_size) +necp_buffer_get_tlv_value(u_int8_t *buffer, int tlv_offset, u_int32_t *value_size) { u_int8_t *value = NULL; - size_t length = necp_buffer_get_tlv_length(buffer, tlv_offset); + u_int32_t length = necp_buffer_get_tlv_length(buffer, tlv_offset); if (length == 0) { return (value); } @@ -710,19 +820,23 @@ necp_buffer_get_tlv_value(u_int8_t *buffer, int tlv_offset, size_t *value_size) *value_size = length; } - value = (u_int8_t *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t) + sizeof(size_t)); + value = (u_int8_t *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t) + sizeof(u_int32_t)); return (value); } static int -necp_buffer_find_tlv(u_int8_t *buffer, size_t buffer_length, int offset, u_int8_t type, int next) +necp_buffer_find_tlv(u_int8_t *buffer, u_int32_t buffer_length, int offset, u_int8_t type, int next) { - size_t cursor = offset; - size_t curr_length; + if (offset < 0) { + return (-1); + } + int cursor = offset; + int next_cursor; + u_int32_t curr_length; u_int8_t curr_type; - do { - if (cursor >= buffer_length) { + while (TRUE) { + if ((((u_int32_t)cursor) + sizeof(curr_type) + sizeof(curr_length)) > buffer_length) { return (-1); } if (!next) { @@ -731,13 +845,18 @@ necp_buffer_find_tlv(u_int8_t *buffer, size_t buffer_length, int offset, u_int8_ next = 0; curr_type = NECP_TLV_NIL; } - if (curr_type != type) { - curr_length = necp_buffer_get_tlv_length(buffer, cursor); - cursor += (sizeof(curr_type) + sizeof(curr_length) + curr_length); + curr_length = necp_buffer_get_tlv_length(buffer, cursor); + next_cursor = (cursor + sizeof(curr_type) + sizeof(curr_length) + curr_length); + if (curr_type == type) { + // check if entire TLV fits inside buffer + if (((u_int32_t)next_cursor) <= buffer_length) { + return (cursor); + } else { + return (-1); + } } - } while (curr_type != type); - - return (cursor); + cursor = next_cursor; + } } static bool @@ -760,7 +879,7 @@ necp_send_success_response(struct necp_session *session, u_int8_t packet_type, u bool success = TRUE; u_int8_t *response = NULL; u_int8_t *cursor = NULL; - size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(size_t); + size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(u_int32_t); MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK); if (response == NULL) { return (FALSE); @@ -783,7 +902,7 @@ necp_send_error_response(struct necp_session *session, u_int8_t packet_type, u_i bool success = TRUE; u_int8_t *response = NULL; u_int8_t *cursor = NULL; - size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(size_t) + sizeof(u_int32_t); + size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(u_int32_t) + sizeof(u_int32_t); MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK); if (response == NULL) { return (FALSE); @@ -806,7 +925,7 @@ necp_send_policy_id_response(struct necp_session *session, u_int8_t packet_type, bool success = TRUE; u_int8_t *response = NULL; u_int8_t *cursor = NULL; - size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(size_t) + sizeof(u_int32_t); + size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(u_int32_t) + sizeof(u_int32_t); MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK); if (response == NULL) { return (FALSE); @@ -858,6 +977,10 @@ necp_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, mbuf_t packe necp_send_error_response(session, header.packet_type, header.message_id, NECP_ERROR_INVALID_PROCESS); goto done; } + } else { + // If not locked, update the proc_uuid and proc_pid of the session + proc_getexecutableuuid(current_proc(), session->proc_uuid, sizeof(session->proc_uuid)); + session->proc_pid = proc_pid(current_proc()); } switch (header.packet_type) { @@ -954,6 +1077,10 @@ necp_create_session(u_int32_t control_unit) new_session->dirty = FALSE; LIST_INIT(&new_session->policies); + lck_rw_lock_exclusive(&necp_kernel_policy_lock); + necp_session_count++; + lck_rw_done(&necp_kernel_policy_lock); + done: return (new_session); } @@ -975,34 +1102,48 @@ necp_delete_session(struct necp_session *session) NECPLOG0(LOG_DEBUG, "Deleted NECP session"); } FREE(session, M_NECP); + + lck_rw_lock_exclusive(&necp_kernel_policy_lock); + necp_session_count--; + lck_rw_done(&necp_kernel_policy_lock); } } // Session Policy Management static inline u_int8_t -necp_policy_result_get_type_from_buffer(u_int8_t *buffer, size_t length) +necp_policy_result_get_type_from_buffer(u_int8_t *buffer, u_int32_t length) { return ((buffer && length >= sizeof(u_int8_t)) ? buffer[0] : 0); } -static inline size_t -necp_policy_result_get_parameter_length_from_buffer(u_int8_t *buffer, size_t length) +static inline u_int32_t +necp_policy_result_get_parameter_length_from_buffer(u_int8_t *buffer, u_int32_t length) { return ((buffer && length > sizeof(u_int8_t)) ? (length - sizeof(u_int8_t)) : 0); } static inline u_int8_t * -necp_policy_result_get_parameter_pointer_from_buffer(u_int8_t *buffer, size_t length) +necp_policy_result_get_parameter_pointer_from_buffer(u_int8_t *buffer, u_int32_t length) { return ((buffer && length > sizeof(u_int8_t)) ? (buffer + sizeof(u_int8_t)) : NULL); } static bool -necp_policy_result_is_valid(u_int8_t *buffer, size_t length) +necp_policy_result_requires_route_rules(u_int8_t *buffer, u_int32_t length) +{ + u_int8_t type = necp_policy_result_get_type_from_buffer(buffer, length); + if (type == NECP_POLICY_RESULT_ROUTE_RULES) { + return (TRUE); + } + return (FALSE); +} + +static bool +necp_policy_result_is_valid(u_int8_t *buffer, u_int32_t length) { bool validated = FALSE; u_int8_t type = necp_policy_result_get_type_from_buffer(buffer, length); - size_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(buffer, length); + u_int32_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(buffer, length); switch (type) { case NECP_POLICY_RESULT_PASS: { validated = TRUE; @@ -1042,10 +1183,15 @@ necp_policy_result_is_valid(u_int8_t *buffer, size_t length) } break; } + case NECP_POLICY_RESULT_ROUTE_RULES: { + validated = TRUE; + break; + } case NECP_POLICY_RESULT_TRIGGER: case NECP_POLICY_RESULT_TRIGGER_IF_NEEDED: case NECP_POLICY_RESULT_TRIGGER_SCOPED: - case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED: { + case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED: + case NECP_POLICY_RESULT_USE_NETAGENT: { if (parameter_length >= sizeof(uuid_t)) { validated = TRUE; } @@ -1065,43 +1211,43 @@ necp_policy_result_is_valid(u_int8_t *buffer, size_t length) } static inline u_int8_t -necp_policy_condition_get_type_from_buffer(u_int8_t *buffer, size_t length) +necp_policy_condition_get_type_from_buffer(u_int8_t *buffer, u_int32_t length) { return ((buffer && length >= sizeof(u_int8_t)) ? buffer[0] : 0); } static inline u_int8_t -necp_policy_condition_get_flags_from_buffer(u_int8_t *buffer, size_t length) +necp_policy_condition_get_flags_from_buffer(u_int8_t *buffer, u_int32_t length) { return ((buffer && length >= (2 * sizeof(u_int8_t))) ? buffer[1] : 0); } -static inline size_t -necp_policy_condition_get_value_length_from_buffer(u_int8_t *buffer, size_t length) +static inline u_int32_t +necp_policy_condition_get_value_length_from_buffer(u_int8_t *buffer, u_int32_t length) { return ((buffer && length >= (2 * sizeof(u_int8_t))) ? (length - (2 * sizeof(u_int8_t))) : 0); } static inline u_int8_t * -necp_policy_condition_get_value_pointer_from_buffer(u_int8_t *buffer, size_t length) +necp_policy_condition_get_value_pointer_from_buffer(u_int8_t *buffer, u_int32_t length) { return ((buffer && length > (2 * sizeof(u_int8_t))) ? (buffer + (2 * sizeof(u_int8_t))) : NULL); } static inline bool -necp_policy_condition_is_default(u_int8_t *buffer, size_t length) +necp_policy_condition_is_default(u_int8_t *buffer, u_int32_t length) { return (necp_policy_condition_get_type_from_buffer(buffer, length) == NECP_POLICY_CONDITION_DEFAULT); } static inline bool -necp_policy_condition_is_application(u_int8_t *buffer, size_t length) +necp_policy_condition_is_application(u_int8_t *buffer, u_int32_t length) { return (necp_policy_condition_get_type_from_buffer(buffer, length) == NECP_POLICY_CONDITION_APPLICATION); } static inline bool -necp_policy_condition_requires_application(u_int8_t *buffer, size_t length) +necp_policy_condition_requires_application(u_int8_t *buffer, u_int32_t length) { u_int8_t type = necp_policy_condition_get_type_from_buffer(buffer, length); return (type == NECP_POLICY_CONDITION_REAL_APPLICATION || @@ -1109,7 +1255,7 @@ necp_policy_condition_requires_application(u_int8_t *buffer, size_t length) } static bool -necp_policy_condition_is_valid(u_int8_t *buffer, size_t length, u_int8_t policy_result_type) +necp_policy_condition_is_valid(u_int8_t *buffer, u_int32_t length, u_int8_t policy_result_type) { bool validated = FALSE; bool result_cannot_have_ip_layer = (policy_result_type == NECP_POLICY_RESULT_SOCKET_DIVERT || @@ -1118,8 +1264,10 @@ necp_policy_condition_is_valid(u_int8_t *buffer, size_t length, u_int8_t policy_ policy_result_type == NECP_POLICY_RESULT_TRIGGER_IF_NEEDED || policy_result_type == NECP_POLICY_RESULT_TRIGGER_SCOPED || policy_result_type == NECP_POLICY_RESULT_NO_TRIGGER_SCOPED || - policy_result_type == NECP_POLICY_RESULT_SOCKET_SCOPED) ? TRUE : FALSE; - size_t condition_length = necp_policy_condition_get_value_length_from_buffer(buffer, length); + policy_result_type == NECP_POLICY_RESULT_SOCKET_SCOPED || + policy_result_type == NECP_POLICY_RESULT_ROUTE_RULES || + policy_result_type == NECP_POLICY_RESULT_USE_NETAGENT) ? TRUE : FALSE; + u_int32_t condition_length = necp_policy_condition_get_value_length_from_buffer(buffer, length); u_int8_t *condition_value = necp_policy_condition_get_value_pointer_from_buffer(buffer, length); u_int8_t type = necp_policy_condition_get_type_from_buffer(buffer, length); u_int8_t flags = necp_policy_condition_get_flags_from_buffer(buffer, length); @@ -1203,6 +1351,40 @@ necp_policy_condition_is_valid(u_int8_t *buffer, size_t length, u_int8_t policy_ return (validated); } +static bool +necp_policy_route_rule_is_default(u_int8_t *buffer, u_int32_t length) +{ + return (necp_policy_condition_get_value_length_from_buffer(buffer, length) == 0 && + necp_policy_condition_get_flags_from_buffer(buffer, length) == 0); +} + +static bool +necp_policy_route_rule_is_valid(u_int8_t *buffer, u_int32_t length) +{ + bool validated = FALSE; + u_int8_t type = necp_policy_condition_get_type_from_buffer(buffer, length); + switch (type) { + case NECP_ROUTE_RULE_ALLOW_INTERFACE: { + validated = TRUE; + break; + } + case NECP_ROUTE_RULE_DENY_INTERFACE: { + validated = TRUE; + break; + } + default: { + validated = FALSE; + break; + } + } + + if (necp_debug) { + NECPLOG(LOG_DEBUG, "Policy route rule type %d, valid %d", type, validated); + } + + return (validated); +} + static void necp_handle_set_session_priority(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) { @@ -1258,7 +1440,7 @@ static void necp_handle_lock_session_to_proc(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) { #pragma unused(packet, offset) - proc_getexecutableuuid(current_proc(), session->proc_uuid, sizeof(session->proc_uuid)); + // proc_uuid already filled out session->proc_locked = TRUE; necp_send_success_response(session, NECP_PACKET_TYPE_LOCK_SESSION_TO_PROC, message_id); } @@ -1353,10 +1535,10 @@ necp_handle_unregister_service(struct necp_session *session, u_int32_t message_i } lck_rw_done(&necp_kernel_policy_lock); - necp_send_success_response(session, NECP_PACKET_TYPE_REGISTER_SERVICE, message_id); + necp_send_success_response(session, NECP_PACKET_TYPE_UNREGISTER_SERVICE, message_id); return; fail: - necp_send_error_response(session, NECP_PACKET_TYPE_REGISTER_SERVICE, message_id, response_error); + necp_send_error_response(session, NECP_PACKET_TYPE_UNREGISTER_SERVICE, message_id, response_error); } static void @@ -1367,9 +1549,14 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_ bool has_application_condition = FALSE; bool requires_application_condition = FALSE; u_int8_t *conditions_array = NULL; - size_t conditions_array_size = 0; + u_int32_t conditions_array_size = 0; int conditions_array_cursor; + bool has_default_route_rule = FALSE; + u_int8_t *route_rules_array = NULL; + u_int32_t route_rules_array_size = 0; + int route_rules_array_cursor; + int cursor; int error = 0; u_int32_t response_error = NECP_ERROR_INTERNAL; @@ -1377,7 +1564,7 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_ necp_policy_order order = 0; struct necp_session_policy *policy = NULL; u_int8_t *policy_result = NULL; - size_t policy_result_size = 0; + u_int32_t policy_result_size = 0; // Read policy order error = necp_packet_get_tlv(packet, offset, NECP_TLV_POLICY_ORDER, sizeof(order), &order, NULL); @@ -1413,15 +1600,79 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_ goto fail; } + if (necp_policy_result_requires_route_rules(policy_result, policy_result_size)) { + // Read route rules conditions + for (cursor = necp_packet_find_tlv(packet, offset, NECP_TLV_ROUTE_RULE, &error, 0); + cursor >= 0; + cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_ROUTE_RULE, &error, 1)) { + u_int32_t route_rule_size = 0; + necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &route_rule_size); + if (route_rule_size > 0) { + route_rules_array_size += (sizeof(u_int8_t) + sizeof(u_int32_t) + route_rule_size); + } + } + + if (route_rules_array_size == 0) { + NECPLOG0(LOG_ERR, "Failed to get policy route rules"); + response_error = NECP_ERROR_INVALID_TLV; + goto fail; + } + + MALLOC(route_rules_array, u_int8_t *, route_rules_array_size, M_NECP, M_WAITOK); + if (route_rules_array == NULL) { + NECPLOG(LOG_ERR, "Failed to allocate a policy route rules array (size %d)", route_rules_array_size); + response_error = NECP_ERROR_INTERNAL; + goto fail; + } + + route_rules_array_cursor = 0; + for (cursor = necp_packet_find_tlv(packet, offset, NECP_TLV_ROUTE_RULE, &error, 0); + cursor >= 0; + cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_ROUTE_RULE, &error, 1)) { + u_int8_t route_rule_type = NECP_TLV_ROUTE_RULE; + u_int32_t route_rule_size = 0; + necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &route_rule_size); + if (route_rule_size > 0 && route_rule_size <= (route_rules_array_size - route_rules_array_cursor)) { + // Add type + memcpy((route_rules_array + route_rules_array_cursor), &route_rule_type, sizeof(route_rule_type)); + route_rules_array_cursor += sizeof(route_rule_type); + + // Add length + memcpy((route_rules_array + route_rules_array_cursor), &route_rule_size, sizeof(route_rule_size)); + route_rules_array_cursor += sizeof(route_rule_size); + + // Add value + necp_packet_get_tlv_at_offset(packet, cursor, route_rule_size, (route_rules_array + route_rules_array_cursor), NULL); + + if (!necp_policy_route_rule_is_valid((route_rules_array + route_rules_array_cursor), route_rule_size)) { + NECPLOG0(LOG_ERR, "Failed to validate policy route rule"); + response_error = NECP_ERROR_ROUTE_RULES_INVALID; + goto fail; + } + + if (necp_policy_route_rule_is_default((route_rules_array + route_rules_array_cursor), route_rule_size)) { + if (has_default_route_rule) { + NECPLOG0(LOG_ERR, "Failed to validate route rule; contained multiple default route rules"); + response_error = NECP_ERROR_ROUTE_RULES_INVALID; + goto fail; + } + has_default_route_rule = TRUE; + } + + route_rules_array_cursor += route_rule_size; + } + } + } + // Read policy conditions for (cursor = necp_packet_find_tlv(packet, offset, NECP_TLV_POLICY_CONDITION, &error, 0); cursor >= 0; cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_POLICY_CONDITION, &error, 1)) { - size_t condition_size = 0; + u_int32_t condition_size = 0; necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &condition_size); if (condition_size > 0) { - conditions_array_size += (sizeof(u_int8_t) + sizeof(size_t) + condition_size); + conditions_array_size += (sizeof(u_int8_t) + sizeof(u_int32_t) + condition_size); } } @@ -1442,7 +1693,7 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_ cursor >= 0; cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_POLICY_CONDITION, &error, 1)) { u_int8_t condition_type = NECP_TLV_POLICY_CONDITION; - size_t condition_size = 0; + u_int32_t condition_size = 0; necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &condition_size); if (condition_size > 0 && condition_size <= (conditions_array_size - conditions_array_cursor)) { // Add type @@ -1490,7 +1741,7 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_ goto fail; } - if ((policy = necp_policy_create(session, order, conditions_array, conditions_array_size, policy_result, policy_result_size)) == NULL) { + if ((policy = necp_policy_create(session, order, conditions_array, conditions_array_size, route_rules_array, route_rules_array_size, policy_result, policy_result_size)) == NULL) { response_error = NECP_ERROR_INTERNAL; goto fail; } @@ -1505,6 +1756,9 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_ if (conditions_array != NULL) { FREE(conditions_array, M_NECP); } + if (route_rules_array != NULL) { + FREE(route_rules_array, M_NECP); + } necp_send_error_response(session, NECP_PACKET_TYPE_POLICY_ADD, message_id, response_error); } @@ -1518,9 +1772,9 @@ necp_handle_policy_get(struct necp_session *session, u_int32_t message_id, mbuf_ u_int8_t *cursor = NULL; u_int32_t response_error = NECP_ERROR_INTERNAL; necp_policy_id policy_id = 0; - size_t order_tlv_size = 0; - size_t result_tlv_size = 0; - size_t response_size = 0; + u_int32_t order_tlv_size = 0; + u_int32_t result_tlv_size = 0; + u_int32_t response_size = 0; struct necp_session_policy *policy = NULL; @@ -1539,8 +1793,8 @@ necp_handle_policy_get(struct necp_session *session, u_int32_t message_id, mbuf_ goto fail; } - order_tlv_size = sizeof(u_int8_t) + sizeof(size_t) + sizeof(necp_policy_order); - result_tlv_size = (policy->result_size ? (sizeof(u_int8_t) + sizeof(size_t) + policy->result_size) : 0); + order_tlv_size = sizeof(u_int8_t) + sizeof(u_int32_t) + sizeof(necp_policy_order); + result_tlv_size = (policy->result_size ? (sizeof(u_int8_t) + sizeof(u_int32_t) + policy->result_size) : 0); response_size = sizeof(struct necp_packet_header) + order_tlv_size + result_tlv_size + policy->conditions_size; MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK); if (response == NULL) { @@ -1615,8 +1869,8 @@ static void necp_handle_policy_list_all(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) { #pragma unused(packet, offset) - size_t tlv_size = (sizeof(u_int8_t) + sizeof(size_t) + sizeof(u_int32_t)); - size_t response_size = 0; + u_int32_t tlv_size = (sizeof(u_int8_t) + sizeof(u_int32_t) + sizeof(u_int32_t)); + u_int32_t response_size = 0; u_int8_t *response = NULL; u_int8_t *cursor = NULL; int num_policies = 0; @@ -1686,7 +1940,7 @@ necp_policy_get_new_id(void) } static struct necp_session_policy * -necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, size_t conditions_array_size, u_int8_t *result, size_t result_size) +necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, u_int32_t conditions_array_size, u_int8_t *route_rules_array, u_int32_t route_rules_array_size, u_int8_t *result, u_int32_t result_size) { struct necp_session_policy *new_policy = NULL; struct necp_session_policy *tmp_policy = NULL; @@ -1707,6 +1961,8 @@ necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8 new_policy->order = order; new_policy->conditions = conditions_array; new_policy->conditions_size = conditions_array_size; + new_policy->route_rules = route_rules_array; + new_policy->route_rules_size = route_rules_array_size; new_policy->result = result; new_policy->result_size = result_size; new_policy->id = necp_policy_get_new_id(); @@ -1745,17 +2001,17 @@ necp_policy_get_result_type(struct necp_session_policy *policy) return (policy ? necp_policy_result_get_type_from_buffer(policy->result, policy->result_size) : 0); } -static inline size_t +static inline u_int32_t necp_policy_get_result_parameter_length(struct necp_session_policy *policy) { return (policy ? necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size) : 0); } static bool -necp_policy_get_result_parameter(struct necp_session_policy *policy, u_int8_t *parameter_buffer, size_t parameter_buffer_length) +necp_policy_get_result_parameter(struct necp_session_policy *policy, u_int8_t *parameter_buffer, u_int32_t parameter_buffer_length) { if (policy) { - size_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size); + u_int32_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size); if (parameter_buffer_length >= parameter_length) { u_int8_t *parameter = necp_policy_result_get_parameter_pointer_from_buffer(policy->result, policy->result_size); if (parameter && parameter_buffer) { @@ -1847,9 +2103,9 @@ necp_policy_unapply(struct necp_session_policy *policy) necp_remove_uuid_app_id_mapping(policy->applied_real_app_uuid, NULL, FALSE); uuid_clear(policy->applied_real_app_uuid); } - if (!uuid_is_null(policy->applied_service_uuid)) { - necp_remove_uuid_service_id_mapping(policy->applied_service_uuid); - uuid_clear(policy->applied_service_uuid); + if (!uuid_is_null(policy->applied_result_uuid)) { + necp_remove_uuid_service_id_mapping(policy->applied_result_uuid); + uuid_clear(policy->applied_result_uuid); } // Release string mappings @@ -1859,6 +2115,12 @@ necp_policy_unapply(struct necp_session_policy *policy) policy->applied_account = NULL; } + // Release route rule + if (policy->applied_route_rules_id != 0) { + necp_remove_route_rule(&necp_route_rules, policy->applied_route_rules_id); + policy->applied_route_rules_id = 0; + } + // Remove socket policies for (i = 0; i < MAX_KERNEL_SOCKET_POLICIES; i++) { if (policy->kernel_socket_policies[i] != 0) { @@ -1926,7 +2188,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli union necp_sockaddr_union cond_remote_start; union necp_sockaddr_union cond_remote_end; u_int8_t cond_remote_prefix = 0; - size_t offset = 0; + u_int32_t offset = 0; u_int8_t ultimate_result = 0; u_int32_t secondary_result = 0; necp_kernel_policy_result_parameter secondary_result_parameter; @@ -1943,13 +2205,13 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli // Process conditions while (offset < policy->conditions_size) { - size_t length = 0; + u_int32_t length = 0; u_int8_t *value = necp_buffer_get_tlv_value(policy->conditions, offset, &length); u_int8_t condition_type = necp_policy_condition_get_type_from_buffer(value, length); u_int8_t condition_flags = necp_policy_condition_get_flags_from_buffer(value, length); bool condition_is_negative = condition_flags & NECP_POLICY_CONDITION_FLAGS_NEGATIVE; - size_t condition_length = necp_policy_condition_get_value_length_from_buffer(value, length); + u_int32_t condition_length = necp_policy_condition_get_value_length_from_buffer(value, length); u_int8_t *condition_value = necp_policy_condition_get_value_pointer_from_buffer(value, length); switch (condition_type) { case NECP_POLICY_CONDITION_DEFAULT: { @@ -2158,7 +2420,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli } } - offset += sizeof(u_int8_t) + sizeof(size_t) + length; + offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length; } // Process result @@ -2210,7 +2472,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli } case NECP_POLICY_RESULT_IP_TUNNEL: { struct necp_policy_result_ip_tunnel tunnel_parameters; - size_t tunnel_parameters_length = necp_policy_get_result_parameter_length(policy); + u_int32_t tunnel_parameters_length = necp_policy_get_result_parameter_length(policy); if (tunnel_parameters_length > sizeof(u_int32_t) && tunnel_parameters_length <= sizeof(struct necp_policy_result_ip_tunnel) && necp_policy_get_result_parameter(policy, (u_int8_t *)&tunnel_parameters, sizeof(tunnel_parameters))) { @@ -2248,7 +2510,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli case NECP_POLICY_RESULT_TRIGGER_SCOPED: case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED: { struct necp_policy_result_service service_parameters; - size_t service_result_length = necp_policy_get_result_parameter_length(policy); + u_int32_t service_result_length = necp_policy_get_result_parameter_length(policy); bool has_extra_service_data = FALSE; if (service_result_length >= (sizeof(service_parameters))) { has_extra_service_data = TRUE; @@ -2256,7 +2518,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli if (necp_policy_get_result_parameter(policy, (u_int8_t *)&service_parameters, sizeof(service_parameters))) { ultimate_result_parameter.service.identifier = necp_create_uuid_service_id_mapping(service_parameters.identifier); if (ultimate_result_parameter.service.identifier != 0) { - uuid_copy(policy->applied_service_uuid, service_parameters.identifier); + uuid_copy(policy->applied_result_uuid, service_parameters.identifier); socket_layer_non_id_conditions = TRUE; if (has_extra_service_data) { ultimate_result_parameter.service.data = service_parameters.data; @@ -2267,8 +2529,19 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli } break; } + case NECP_POLICY_RESULT_USE_NETAGENT: { + uuid_t netagent_uuid; + if (necp_policy_get_result_parameter(policy, (u_int8_t *)&netagent_uuid, sizeof(netagent_uuid))) { + ultimate_result_parameter.netagent_id = necp_create_uuid_service_id_mapping(netagent_uuid); + if (ultimate_result_parameter.netagent_id != 0) { + uuid_copy(policy->applied_result_uuid, netagent_uuid); + socket_layer_non_id_conditions = TRUE; + } + } + break; + } case NECP_POLICY_RESULT_SOCKET_SCOPED: { - size_t interface_name_length = necp_policy_get_result_parameter_length(policy); + u_int32_t interface_name_length = necp_policy_get_result_parameter_length(policy); if (interface_name_length <= IFXNAMSIZ && interface_name_length > 0) { char interface_name[IFXNAMSIZ]; ifnet_t scope_interface = NULL; @@ -2280,13 +2553,23 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli } } } + case NECP_POLICY_RESULT_ROUTE_RULES: { + if (policy->route_rules != NULL && policy->route_rules_size > 0) { + u_int32_t route_rule_id = necp_create_route_rule(&necp_route_rules, policy->route_rules, policy->route_rules_size); + if (route_rule_id > 0) { + policy->applied_route_rules_id = route_rule_id; + ultimate_result_parameter.route_rule_id = route_rule_id; + socket_layer_non_id_conditions = TRUE; + } + } + } default: { break; } } if (socket_layer_non_id_conditions) { - necp_kernel_policy_id policy_id = necp_kernel_socket_policy_add(policy->id, policy->order, session->session_order, master_condition_mask, master_condition_negated_mask, cond_app_id, cond_real_app_id, cond_account_id, cond_domain, cond_pid, cond_uid, cond_bound_interface, cond_traffic_class, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter); + necp_kernel_policy_id policy_id = necp_kernel_socket_policy_add(policy->id, policy->order, session->session_order, session->proc_pid, master_condition_mask, master_condition_negated_mask, cond_app_id, cond_real_app_id, cond_account_id, cond_domain, cond_pid, cond_uid, cond_bound_interface, cond_traffic_class, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter); if (policy_id == 0) { NECPLOG0(LOG_DEBUG, "Error applying socket kernel policy"); @@ -2298,7 +2581,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli } if (ip_output_layer_non_id_conditions) { - necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS, session->session_order, master_condition_mask, master_condition_negated_mask, NECP_KERNEL_POLICY_ID_NONE, cond_bound_interface, 0, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter); + necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS, session->session_order, session->proc_pid, master_condition_mask, master_condition_negated_mask, NECP_KERNEL_POLICY_ID_NONE, cond_bound_interface, 0, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter); if (policy_id == 0) { NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy"); @@ -2309,7 +2592,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli } if (ip_output_layer_id_condition) { - necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION, session->session_order, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, cond_ip_output_layer_id, NULL, 0, 0, NULL, NULL, 0, NULL, NULL, 0, ultimate_result, ultimate_result_parameter); + necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION, session->session_order, session->proc_pid, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, cond_ip_output_layer_id, NULL, 0, 0, NULL, NULL, 0, NULL, NULL, 0, ultimate_result, ultimate_result_parameter); if (policy_id == 0) { NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy"); @@ -2321,7 +2604,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli // Extra policies for IP Output tunnels for when packets loop back if (ip_output_layer_tunnel_condition_from_id) { - necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_TUNNEL_CONDITION, session->session_order, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter); + necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_TUNNEL_CONDITION, session->session_order, session->proc_pid, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter); if (policy_id == 0) { NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy"); @@ -2332,7 +2615,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli } if (ip_output_layer_tunnel_condition_from_id) { - necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_TUNNEL_CONDITION, session->session_order, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter); + necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_TUNNEL_CONDITION, session->session_order, session->proc_pid, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter); if (policy_id == 0) { NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy"); @@ -2355,6 +2638,8 @@ necp_policy_apply_all(struct necp_session *session) { struct necp_session_policy *policy = NULL; struct necp_session_policy *temp_policy = NULL; + struct kev_necp_policies_changed_data kev_data; + kev_data.changed_count = 0; lck_rw_lock_exclusive(&necp_kernel_policy_lock); @@ -2386,6 +2671,8 @@ necp_policy_apply_all(struct necp_session *session) lck_rw_done(&necp_kernel_policy_lock); + necp_post_change_event(&kev_data); + if (necp_debug) { NECPLOG0(LOG_DEBUG, "Applied NECP policies"); } @@ -2417,7 +2704,7 @@ necp_kernel_policy_get_new_id(void) #define NECP_KERNEL_VALID_SOCKET_CONDITIONS (NECP_KERNEL_CONDITION_APP_ID | NECP_KERNEL_CONDITION_REAL_APP_ID | NECP_KERNEL_CONDITION_DOMAIN | NECP_KERNEL_CONDITION_ACCOUNT_ID | NECP_KERNEL_CONDITION_PID | NECP_KERNEL_CONDITION_UID | NECP_KERNEL_CONDITION_ALL_INTERFACES | NECP_KERNEL_CONDITION_BOUND_INTERFACE | NECP_KERNEL_CONDITION_TRAFFIC_CLASS | NECP_KERNEL_CONDITION_PROTOCOL | NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_ENTITLEMENT) static necp_kernel_policy_id -necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *cond_domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter) +necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *cond_domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter) { struct necp_kernel_socket_policy *new_kernel_policy = NULL; struct necp_kernel_socket_policy *tmp_kernel_policy = NULL; @@ -2432,6 +2719,7 @@ necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order new_kernel_policy->id = necp_kernel_policy_get_new_id(); new_kernel_policy->order = order; new_kernel_policy->session_order = session_order; + new_kernel_policy->session_pid = session_pid; // Sanitize condition mask new_kernel_policy->condition_mask = (condition_mask & NECP_KERNEL_VALID_SOCKET_CONDITIONS); @@ -2561,19 +2849,192 @@ necp_kernel_socket_policy_delete(necp_kernel_policy_id policy_id) return (FALSE); } +#define MAX_RESULT_STRING_LEN 64 +static inline const char * +necp_get_result_description(char *result_string, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter) +{ + uuid_string_t uuid_string; + switch (result) { + case NECP_KERNEL_POLICY_RESULT_NONE: { + return ("None"); + } + case NECP_KERNEL_POLICY_RESULT_PASS: { + return ("Pass"); + } + case NECP_KERNEL_POLICY_RESULT_SKIP: { + return ("Skip"); + } + case NECP_KERNEL_POLICY_RESULT_DROP: { + return ("Drop"); + } + case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT: { + snprintf(result_string, MAX_RESULT_STRING_LEN, "SocketDivert (%d)", result_parameter.flow_divert_control_unit); + break; + } + case NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER: { + snprintf(result_string, MAX_RESULT_STRING_LEN, "SocketFilter (%d)", result_parameter.filter_control_unit); + break; + } + case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: { + ifnet_t interface = ifindex2ifnet[result_parameter.tunnel_interface_index]; + snprintf(result_string, MAX_RESULT_STRING_LEN, "IPTunnel (%s%d)", ifnet_name(interface), ifnet_unit(interface)); + break; + } + case NECP_KERNEL_POLICY_RESULT_IP_FILTER: { + return ("IPFilter"); + } + case NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED: { + ifnet_t interface = ifindex2ifnet[result_parameter.scoped_interface_index]; + snprintf(result_string, MAX_RESULT_STRING_LEN, "SocketScoped (%s%d)", ifnet_name(interface), ifnet_unit(interface)); + break; + } + case NECP_KERNEL_POLICY_RESULT_ROUTE_RULES: { + int index = 0; + char interface_names[IFXNAMSIZ][MAX_ROUTE_RULE_INTERFACES]; + struct necp_route_rule *route_rule = necp_lookup_route_rule_locked(&necp_route_rules, result_parameter.route_rule_id); + if (route_rule != NULL) { + bool default_drop = (route_rule->default_action == NECP_ROUTE_RULE_DENY_INTERFACE); + for (index = 0; index < MAX_ROUTE_RULE_INTERFACES; index++) { + if (route_rule->exception_if_indices[index] != 0) { + ifnet_t interface = ifindex2ifnet[route_rule->exception_if_indices[index]]; + snprintf(interface_names[index], IFXNAMSIZ, "%s%d", ifnet_name(interface), ifnet_unit(interface)); + } else { + memset(interface_names[index], 0, IFXNAMSIZ); + } + } + if (default_drop) { + snprintf(result_string, MAX_RESULT_STRING_LEN, "RouteRules (Only %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)", + (route_rule->cellular_action == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? "Cell " : "", + (route_rule->wifi_action == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? "WiFi " : "", + (route_rule->wired_action == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? "Wired " : "", + (route_rule->expensive_action == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? "Exp " : "", + (route_rule->exception_if_actions[0] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[0] : "", + (route_rule->exception_if_actions[0] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "", + (route_rule->exception_if_actions[1] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[1] : "", + (route_rule->exception_if_actions[1] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "", + (route_rule->exception_if_actions[2] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[2] : "", + (route_rule->exception_if_actions[2] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "", + (route_rule->exception_if_actions[3] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[3] : "", + (route_rule->exception_if_actions[3] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "", + (route_rule->exception_if_actions[4] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[4] : "", + (route_rule->exception_if_actions[4] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "", + (route_rule->exception_if_actions[5] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[5] : "", + (route_rule->exception_if_actions[5] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "", + (route_rule->exception_if_actions[6] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[6] : "", + (route_rule->exception_if_actions[6] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "", + (route_rule->exception_if_actions[7] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[7] : "", + (route_rule->exception_if_actions[7] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "", + (route_rule->exception_if_actions[8] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[8] : "", + (route_rule->exception_if_actions[8] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "", + (route_rule->exception_if_actions[9] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[9] : ""); + } else { + snprintf(result_string, MAX_RESULT_STRING_LEN, "RouteRules (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)", + (route_rule->cellular_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!Cell " : "", + (route_rule->wifi_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!WiFi " : "", + (route_rule->wired_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!Wired " : "", + (route_rule->expensive_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!Exp " : "", + (route_rule->exception_if_actions[0] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[0] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[0] : "", + (route_rule->exception_if_actions[1] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[1] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[1] : "", + (route_rule->exception_if_actions[2] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[2] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[2] : "", + (route_rule->exception_if_actions[3] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[3] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[3] : "", + (route_rule->exception_if_actions[4] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[4] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[4] : "", + (route_rule->exception_if_actions[5] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[5] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[5] : "", + (route_rule->exception_if_actions[6] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[6] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[6] : "", + (route_rule->exception_if_actions[7] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[7] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[7] : "", + (route_rule->exception_if_actions[8] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[8] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[8] : "", + (route_rule->exception_if_actions[9] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "", + (route_rule->exception_if_actions[9] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[9] : ""); + } + } else { + snprintf(result_string, MAX_RESULT_STRING_LEN, "RouteRules (Unknown)"); + } + break; + } + case NECP_KERNEL_POLICY_RESULT_USE_NETAGENT: { + bool found_mapping = FALSE; + struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.netagent_id); + if (mapping != NULL) { + uuid_unparse(mapping->uuid, uuid_string); + found_mapping = TRUE; + } + snprintf(result_string, MAX_RESULT_STRING_LEN, "UseNetAgent (%s)", found_mapping ? uuid_string : "Unknown"); + break; + } + case NECP_POLICY_RESULT_TRIGGER: { + bool found_mapping = FALSE; + struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.service.identifier); + if (mapping != NULL) { + uuid_unparse(mapping->uuid, uuid_string); + found_mapping = TRUE; + } + snprintf(result_string, MAX_RESULT_STRING_LEN, "Trigger (%s.%d)", found_mapping ? uuid_string : "Unknown", result_parameter.service.data); + break; + } + case NECP_POLICY_RESULT_TRIGGER_IF_NEEDED: { + bool found_mapping = FALSE; + struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.service.identifier); + if (mapping != NULL) { + uuid_unparse(mapping->uuid, uuid_string); + found_mapping = TRUE; + } + snprintf(result_string, MAX_RESULT_STRING_LEN, "TriggerIfNeeded (%s.%d)", found_mapping ? uuid_string : "Unknown", result_parameter.service.data); + break; + } + case NECP_POLICY_RESULT_TRIGGER_SCOPED: { + bool found_mapping = FALSE; + struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.service.identifier); + if (mapping != NULL) { + uuid_unparse(mapping->uuid, uuid_string); + found_mapping = TRUE; + } + snprintf(result_string, MAX_RESULT_STRING_LEN, "TriggerScoped (%s.%d)", found_mapping ? uuid_string : "Unknown", result_parameter.service.data); + break; + } + case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED: { + bool found_mapping = FALSE; + struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.service.identifier); + if (mapping != NULL) { + uuid_unparse(mapping->uuid, uuid_string); + found_mapping = TRUE; + } + snprintf(result_string, MAX_RESULT_STRING_LEN, "NoTriggerScoped (%s.%d)", found_mapping ? uuid_string : "Unknown", result_parameter.service.data); + break; + } + default: { + snprintf(result_string, MAX_RESULT_STRING_LEN, "Unknown %d (%d)", result, result_parameter.tunnel_interface_index); + break; + } + } + return (result_string); +} + static void necp_kernel_socket_policies_dump_all(void) { - struct necp_kernel_socket_policy *policy = NULL; - int policy_i; - int app_i; - if (necp_debug) { + struct necp_kernel_socket_policy *policy = NULL; + int policy_i; + int app_i; + char result_string[MAX_RESULT_STRING_LEN]; + char proc_name_string[MAXCOMLEN + 1]; + memset(result_string, 0, MAX_RESULT_STRING_LEN); + memset(proc_name_string, 0, MAXCOMLEN + 1); + NECPLOG0(LOG_DEBUG, "NECP Application Policies:\n"); NECPLOG0(LOG_DEBUG, "-----------\n"); for (policy_i = 0; necp_kernel_socket_policies_app_layer_map != NULL && necp_kernel_socket_policies_app_layer_map[policy_i] != NULL; policy_i++) { policy = necp_kernel_socket_policies_app_layer_map[policy_i]; - NECPLOG(LOG_DEBUG, "\t%d. Policy ID: %d, Order: %d.%d, Mask: %x, Result: %d, Parameter: %d\n", policy_i, policy->id, policy->session_order, policy->order, policy->condition_mask, policy->result, policy->result_parameter); + proc_name(policy->session_pid, proc_name_string, MAXCOMLEN); + NECPLOG(LOG_DEBUG, "\t%3d. Policy ID: %5d\tProcess: %10.10s\tOrder: %04d.%04d\tMask: %5x\tResult: %s\n", policy_i, policy->id, proc_name_string, policy->session_order, policy->order, policy->condition_mask, necp_get_result_description(result_string, policy->result, policy->result_parameter)); } if (necp_kernel_socket_policies_app_layer_map[0] != NULL) { NECPLOG0(LOG_DEBUG, "-----------\n"); @@ -2585,7 +3046,8 @@ necp_kernel_socket_policies_dump_all(void) NECPLOG(LOG_DEBUG, "\tApp Bucket: %d\n", app_i); for (policy_i = 0; necp_kernel_socket_policies_map[app_i] != NULL && (necp_kernel_socket_policies_map[app_i])[policy_i] != NULL; policy_i++) { policy = (necp_kernel_socket_policies_map[app_i])[policy_i]; - NECPLOG(LOG_DEBUG, "\t%d. Policy ID: %d, Order: %d.%d, Mask: %x, Result: %d, Parameter: %d\n", policy_i, policy->id, policy->session_order, policy->order, policy->condition_mask, policy->result, policy->result_parameter); + proc_name(policy->session_pid, proc_name_string, MAXCOMLEN); + NECPLOG(LOG_DEBUG, "\t%3d. Policy ID: %5d\tProcess: %10.10s\tOrder: %04d.%04d\tMask: %5x\tResult: %s\n", policy_i, policy->id, proc_name_string, policy->session_order, policy->order, policy->condition_mask, necp_get_result_description(result_string, policy->result, policy->result_parameter)); } NECPLOG0(LOG_DEBUG, "-----------\n"); } @@ -2593,7 +3055,7 @@ necp_kernel_socket_policies_dump_all(void) } static inline bool -necp_kernel_socket_result_is_service_type(struct necp_kernel_socket_policy *kernel_policy) +necp_kernel_socket_result_is_trigger_service_type(struct necp_kernel_socket_policy *kernel_policy) { return (kernel_policy->result >= NECP_KERNEL_POLICY_RESULT_TRIGGER && kernel_policy->result <= NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED); } @@ -2604,12 +3066,14 @@ necp_kernel_socket_policy_results_overlap(struct necp_kernel_socket_policy *uppe if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_DROP) { // Drop always cancels out lower policies return (TRUE); - } else if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER) { - // Filters never cancel out lower policies + } else if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER || + upper_policy->result == NECP_KERNEL_POLICY_RESULT_ROUTE_RULES || + upper_policy->result == NECP_KERNEL_POLICY_RESULT_USE_NETAGENT) { + // Filters and route rules never cancel out lower policies return (FALSE); - } else if (necp_kernel_socket_result_is_service_type(upper_policy)) { + } else if (necp_kernel_socket_result_is_trigger_service_type(upper_policy)) { // Trigger/Scoping policies can overlap one another, but not other results - return (necp_kernel_socket_result_is_service_type(lower_policy)); + return (necp_kernel_socket_result_is_trigger_service_type(lower_policy)); } else if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_SKIP) { if (upper_policy->session_order != lower_policy->session_order) { // A skip cannot override a policy of a different session @@ -2626,7 +3090,7 @@ necp_kernel_socket_policy_results_overlap(struct necp_kernel_socket_policy *uppe } } - // A hard pass, flow divert, or tunnel will currently block out lower policies + // A hard pass, flow divert, tunnel, or scope will currently block out lower policies return (TRUE); } @@ -2993,36 +3457,56 @@ necp_remove_string_to_id_mapping(struct necp_string_id_mapping_list *list, char return (FALSE); } -#define NECP_NULL_SERVICE_ID 1 static u_int32_t -necp_get_new_uuid_id(void) +necp_get_new_route_rule_id(void) { u_int32_t newid = 0; lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); - necp_last_uuid_id++; - if (necp_last_uuid_id < (NECP_NULL_SERVICE_ID + 1)) { - necp_last_uuid_id = (NECP_NULL_SERVICE_ID + 1); + necp_last_route_rule_id++; + if (necp_last_route_rule_id < 1 || necp_last_route_rule_id > UINT16_MAX) { + necp_last_route_rule_id = 1; } - newid = necp_last_uuid_id; + newid = necp_last_route_rule_id; if (newid == 0) { - NECPLOG0(LOG_DEBUG, "Allocate uuid id failed.\n"); + NECPLOG0(LOG_DEBUG, "Allocate route rule id failed.\n"); return (0); } return (newid); } -static struct necp_uuid_id_mapping * -necp_uuid_lookup_app_id_locked(uuid_t uuid) +static u_int32_t +necp_get_new_aggregate_route_rule_id(void) { - struct necp_uuid_id_mapping *searchentry = NULL; - struct necp_uuid_id_mapping *foundentry = NULL; + u_int32_t newid = 0; - LIST_FOREACH(searchentry, APPUUIDHASH(uuid), chain) { - if (uuid_compare(searchentry->uuid, uuid) == 0) { + lck_rw_assert(&necp_route_rule_lock, LCK_RW_ASSERT_EXCLUSIVE); + + necp_last_aggregate_route_rule_id++; + if (necp_last_aggregate_route_rule_id <= UINT16_MAX) { + necp_last_aggregate_route_rule_id = UINT16_MAX + 1; + } + + newid = necp_last_aggregate_route_rule_id; + if (newid == 0) { + NECPLOG0(LOG_DEBUG, "Allocate aggregate route rule id failed.\n"); + return (0); + } + + return (newid); +} + +static struct necp_route_rule * +necp_lookup_route_rule_locked(struct necp_route_rule_list *list, u_int32_t route_rule_id) +{ + struct necp_route_rule *searchentry = NULL; + struct necp_route_rule *foundentry = NULL; + + LIST_FOREACH(searchentry, list, chain) { + if (searchentry->id == route_rule_id) { foundentry = searchentry; break; } @@ -3031,21 +3515,310 @@ necp_uuid_lookup_app_id_locked(uuid_t uuid) return (foundentry); } +static struct necp_route_rule * +necp_lookup_route_rule_by_contents_locked(struct necp_route_rule_list *list, u_int32_t default_action, u_int8_t cellular_action, u_int8_t wifi_action, u_int8_t wired_action, u_int8_t expensive_action, u_int32_t *if_indices, u_int8_t *if_actions) +{ + struct necp_route_rule *searchentry = NULL; + struct necp_route_rule *foundentry = NULL; + + LIST_FOREACH(searchentry, list, chain) { + if (searchentry->default_action == default_action && + searchentry->cellular_action == cellular_action && + searchentry->wifi_action == wifi_action && + searchentry->wired_action == wired_action && + searchentry->expensive_action == expensive_action) { + bool match_failed = FALSE; + size_t index_a = 0; + size_t index_b = 0; + size_t count_a = 0; + size_t count_b = 0; + for (index_a = 0; index_a < MAX_ROUTE_RULE_INTERFACES; index_a++) { + bool found_index = FALSE; + if (searchentry->exception_if_indices[index_a] == 0) { + break; + } + count_a++; + for (index_b = 0; index_b < MAX_ROUTE_RULE_INTERFACES; index_b++) { + if (if_indices[index_b] == 0) { + break; + } + if (index_b >= count_b) { + count_b = index_b + 1; + } + if (searchentry->exception_if_indices[index_a] == if_indices[index_b] && + searchentry->exception_if_actions[index_a] == if_actions[index_b]) { + found_index = TRUE; + break; + } + } + if (!found_index) { + match_failed = TRUE; + break; + } + } + if (!match_failed && count_a == count_b) { + foundentry = searchentry; + break; + } + } + } + + return (foundentry); +} + static u_int32_t -necp_create_uuid_app_id_mapping(uuid_t uuid, bool *allocated_mapping, bool uuid_policy_table) +necp_create_route_rule(struct necp_route_rule_list *list, u_int8_t *route_rules_array, u_int32_t route_rules_array_size) { - u_int32_t local_id = 0; - struct necp_uuid_id_mapping *existing_mapping = NULL; + size_t offset = 0; + u_int32_t route_rule_id = 0; + struct necp_route_rule *existing_rule = NULL; + u_int32_t default_action = NECP_ROUTE_RULE_ALLOW_INTERFACE; + u_int8_t cellular_action = NECP_ROUTE_RULE_NONE; + u_int8_t wifi_action = NECP_ROUTE_RULE_NONE; + u_int8_t wired_action = NECP_ROUTE_RULE_NONE; + u_int8_t expensive_action = NECP_ROUTE_RULE_NONE; + u_int32_t if_indices[MAX_ROUTE_RULE_INTERFACES]; + size_t num_valid_indices = 0; + memset(&if_indices, 0, sizeof(if_indices)); + u_int8_t if_actions[MAX_ROUTE_RULE_INTERFACES]; + memset(&if_actions, 0, sizeof(if_actions)); lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); - if (allocated_mapping) { - *allocated_mapping = FALSE; + if (route_rules_array == NULL || route_rules_array_size == 0) { + return (0); } - existing_mapping = necp_uuid_lookup_app_id_locked(uuid); - if (existing_mapping != NULL) { - local_id = existing_mapping->id; + // Process rules + while (offset < route_rules_array_size) { + ifnet_t rule_interface = NULL; + char interface_name[IFXNAMSIZ]; + u_int32_t length = 0; + u_int8_t *value = necp_buffer_get_tlv_value(route_rules_array, offset, &length); + + u_int8_t rule_type = necp_policy_condition_get_type_from_buffer(value, length); + u_int8_t rule_flags = necp_policy_condition_get_flags_from_buffer(value, length); + u_int32_t rule_length = necp_policy_condition_get_value_length_from_buffer(value, length); + u_int8_t *rule_value = necp_policy_condition_get_value_pointer_from_buffer(value, length); + + if (rule_type == NECP_ROUTE_RULE_NONE) { + // Don't allow an explicit rule to be None action + continue; + } + + if (rule_length == 0) { + if (rule_flags & NECP_ROUTE_RULE_FLAG_CELLULAR) { + cellular_action = rule_type; + } + if (rule_flags & NECP_ROUTE_RULE_FLAG_WIFI) { + wifi_action = rule_type; + } + if (rule_flags & NECP_ROUTE_RULE_FLAG_WIRED) { + wired_action = rule_type; + } + if (rule_flags & NECP_ROUTE_RULE_FLAG_EXPENSIVE) { + expensive_action = rule_type; + } + if (rule_flags == 0) { + default_action = rule_type; + } + offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length; + continue; + } + + if (num_valid_indices >= MAX_ROUTE_RULE_INTERFACES) { + offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length; + continue; + } + + memcpy(interface_name, rule_value, rule_length); + interface_name[length - 1] = 0; // Make sure the string is NULL terminated + if (ifnet_find_by_name(interface_name, &rule_interface) == 0) { + if_actions[num_valid_indices] = rule_type; + if_indices[num_valid_indices++] = rule_interface->if_index; + } + + offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length; + } + + existing_rule = necp_lookup_route_rule_by_contents_locked(list, default_action, cellular_action, wifi_action, wired_action, expensive_action, if_indices, if_actions); + if (existing_rule != NULL) { + route_rule_id = existing_rule->id; + existing_rule->refcount++; + } else { + struct necp_route_rule *new_rule = NULL; + MALLOC(new_rule, struct necp_route_rule *, sizeof(struct necp_route_rule), M_NECP, M_WAITOK); + if (new_rule != NULL) { + memset(new_rule, 0, sizeof(struct necp_route_rule)); + route_rule_id = new_rule->id = necp_get_new_route_rule_id(); + new_rule->default_action = default_action; + new_rule->cellular_action = cellular_action; + new_rule->wifi_action = wifi_action; + new_rule->wired_action = wired_action; + new_rule->expensive_action = expensive_action; + memcpy(&new_rule->exception_if_indices, &if_indices, sizeof(if_indices)); + memcpy(&new_rule->exception_if_actions, &if_actions, sizeof(if_actions)); + new_rule->refcount = 1; + LIST_INSERT_HEAD(list, new_rule, chain); + } + } + return (route_rule_id); +} + +static void +necp_remove_aggregate_route_rule_for_id(u_int32_t rule_id) +{ + if (rule_id) { + lck_rw_lock_exclusive(&necp_route_rule_lock); + + struct necp_aggregate_route_rule *existing_rule = NULL; + struct necp_aggregate_route_rule *tmp_rule = NULL; + + LIST_FOREACH_SAFE(existing_rule, &necp_aggregate_route_rules, chain, tmp_rule) { + int index = 0; + for (index = 0; index < MAX_AGGREGATE_ROUTE_RULES; index++) { + u_int32_t route_rule_id = existing_rule->rule_ids[index]; + if (route_rule_id == rule_id) { + LIST_REMOVE(existing_rule, chain); + FREE(existing_rule, M_NECP); + break; + } + } + } + + lck_rw_done(&necp_route_rule_lock); + } +} + +static bool +necp_remove_route_rule(struct necp_route_rule_list *list, u_int32_t route_rule_id) +{ + struct necp_route_rule *existing_rule = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + existing_rule = necp_lookup_route_rule_locked(list, route_rule_id); + if (existing_rule != NULL) { + if (--existing_rule->refcount == 0) { + necp_remove_aggregate_route_rule_for_id(existing_rule->id); + LIST_REMOVE(existing_rule, chain); + FREE(existing_rule, M_NECP); + } + return (TRUE); + } + + return (FALSE); +} + +static struct necp_aggregate_route_rule * +necp_lookup_aggregate_route_rule_locked(u_int32_t route_rule_id) +{ + struct necp_aggregate_route_rule *searchentry = NULL; + struct necp_aggregate_route_rule *foundentry = NULL; + + lck_rw_lock_shared(&necp_route_rule_lock); + + LIST_FOREACH(searchentry, &necp_aggregate_route_rules, chain) { + if (searchentry->id == route_rule_id) { + foundentry = searchentry; + break; + } + } + + lck_rw_done(&necp_route_rule_lock); + + return (foundentry); +} + +static u_int32_t +necp_create_aggregate_route_rule(u_int32_t *rule_ids) +{ + u_int32_t aggregate_route_rule_id = 0; + struct necp_aggregate_route_rule *new_rule = NULL; + struct necp_aggregate_route_rule *existing_rule = NULL; + + LIST_FOREACH(existing_rule, &necp_aggregate_route_rules, chain) { + if (memcmp(existing_rule->rule_ids, rule_ids, (sizeof(u_int32_t) * MAX_AGGREGATE_ROUTE_RULES)) == 0) { + return (existing_rule->id); + } + } + + lck_rw_lock_exclusive(&necp_route_rule_lock); + + LIST_FOREACH(existing_rule, &necp_aggregate_route_rules, chain) { + // Re-check, in case something else created the rule while we are waiting to lock + if (memcmp(existing_rule->rule_ids, rule_ids, (sizeof(u_int32_t) * MAX_AGGREGATE_ROUTE_RULES)) == 0) { + lck_rw_done(&necp_route_rule_lock); + return (existing_rule->id); + } + } + + MALLOC(new_rule, struct necp_aggregate_route_rule *, sizeof(struct necp_aggregate_route_rule), M_NECP, M_WAITOK); + if (new_rule != NULL) { + memset(new_rule, 0, sizeof(struct necp_aggregate_route_rule)); + aggregate_route_rule_id = new_rule->id = necp_get_new_aggregate_route_rule_id(); + new_rule->id = aggregate_route_rule_id; + memcpy(new_rule->rule_ids, rule_ids, (sizeof(u_int32_t) * MAX_AGGREGATE_ROUTE_RULES)); + LIST_INSERT_HEAD(&necp_aggregate_route_rules, new_rule, chain); + } + lck_rw_done(&necp_route_rule_lock); + + return (aggregate_route_rule_id); +} + +#define NECP_NULL_SERVICE_ID 1 +static u_int32_t +necp_get_new_uuid_id(void) +{ + u_int32_t newid = 0; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + necp_last_uuid_id++; + if (necp_last_uuid_id < (NECP_NULL_SERVICE_ID + 1)) { + necp_last_uuid_id = (NECP_NULL_SERVICE_ID + 1); + } + + newid = necp_last_uuid_id; + if (newid == 0) { + NECPLOG0(LOG_DEBUG, "Allocate uuid id failed.\n"); + return (0); + } + + return (newid); +} + +static struct necp_uuid_id_mapping * +necp_uuid_lookup_app_id_locked(uuid_t uuid) +{ + struct necp_uuid_id_mapping *searchentry = NULL; + struct necp_uuid_id_mapping *foundentry = NULL; + + LIST_FOREACH(searchentry, APPUUIDHASH(uuid), chain) { + if (uuid_compare(searchentry->uuid, uuid) == 0) { + foundentry = searchentry; + break; + } + } + + return (foundentry); +} + +static u_int32_t +necp_create_uuid_app_id_mapping(uuid_t uuid, bool *allocated_mapping, bool uuid_policy_table) +{ + u_int32_t local_id = 0; + struct necp_uuid_id_mapping *existing_mapping = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + if (allocated_mapping) { + *allocated_mapping = FALSE; + } + + existing_mapping = necp_uuid_lookup_app_id_locked(uuid); + if (existing_mapping != NULL) { + local_id = existing_mapping->id; existing_mapping->refcount++; if (uuid_policy_table) { existing_mapping->table_refcount++; @@ -3244,7 +4017,7 @@ necp_kernel_socket_policies_update_uuid_table(void) #define NECP_KERNEL_VALID_IP_OUTPUT_CONDITIONS (NECP_KERNEL_CONDITION_ALL_INTERFACES | NECP_KERNEL_CONDITION_BOUND_INTERFACE | NECP_KERNEL_CONDITION_PROTOCOL | NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE) static necp_kernel_policy_id -necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter) +necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter) { struct necp_kernel_ip_output_policy *new_kernel_policy = NULL; struct necp_kernel_ip_output_policy *tmp_kernel_policy = NULL; @@ -3260,6 +4033,7 @@ necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_or new_kernel_policy->suborder = suborder; new_kernel_policy->order = order; new_kernel_policy->session_order = session_order; + new_kernel_policy->session_pid = session_pid; // Sanitize condition mask new_kernel_policy->condition_mask = (condition_mask & NECP_KERNEL_VALID_IP_OUTPUT_CONDITIONS); @@ -3365,18 +4139,23 @@ necp_kernel_ip_output_policy_delete(necp_kernel_policy_id policy_id) static void necp_kernel_ip_output_policies_dump_all(void) { - struct necp_kernel_ip_output_policy *policy = NULL; - int policy_i; - int id_i; - if (necp_debug) { + struct necp_kernel_ip_output_policy *policy = NULL; + int policy_i; + int id_i; + char result_string[MAX_RESULT_STRING_LEN]; + char proc_name_string[MAXCOMLEN + 1]; + memset(result_string, 0, MAX_RESULT_STRING_LEN); + memset(proc_name_string, 0, MAXCOMLEN + 1); + NECPLOG0(LOG_DEBUG, "NECP IP Output Policies:\n"); NECPLOG0(LOG_DEBUG, "-----------\n"); for (id_i = 0; id_i < NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS; id_i++) { NECPLOG(LOG_DEBUG, " ID Bucket: %d\n", id_i); for (policy_i = 0; necp_kernel_ip_output_policies_map[id_i] != NULL && (necp_kernel_ip_output_policies_map[id_i])[policy_i] != NULL; policy_i++) { policy = (necp_kernel_ip_output_policies_map[id_i])[policy_i]; - NECPLOG(LOG_DEBUG, "\t%d. Policy ID: %d, Order: %d.%d.%d, Mask: %x, Result: %d, Parameter: %d\n", policy_i, policy->id, policy->session_order, policy->order, policy->suborder, policy->condition_mask, policy->result, policy->result_parameter); + proc_name(policy->session_pid, proc_name_string, MAXCOMLEN); + NECPLOG(LOG_DEBUG, "\t%3d. Policy ID: %5d\tProcess: %10.10s\tOrder: %04d.%04d.%d\tMask: %5x\tResult: %s\n", policy_i, policy->id, proc_name_string, policy->session_order, policy->order, policy->suborder, policy->condition_mask, necp_get_result_description(result_string, policy->result, policy->result_parameter)); } NECPLOG0(LOG_DEBUG, "-----------\n"); } @@ -3697,7 +4476,7 @@ necp_hostname_matches_domain(struct substring hostname_substring, u_int8_t hostn memcmp(hostname_substring.string, domain_substring.string, hostname_substring.length) == 0) { return (TRUE); } - } else if (domain_dot_count > 0 && domain_dot_count < hostname_dot_count) { + } else if (domain_dot_count < hostname_dot_count) { if (necp_check_suffix(hostname_substring, domain_substring, TRUE)) { return (TRUE); } @@ -3706,8 +4485,9 @@ necp_hostname_matches_domain(struct substring hostname_substring, u_int8_t hostn return (FALSE); } +#define NECP_KERNEL_ADDRESS_TYPE_CONDITIONS (NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX) static void -necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, struct necp_socket_info *info) +necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, union necp_sockaddr_union *local_addr, union necp_sockaddr_union *remote_addr, struct necp_socket_info *info) { memset(info, 0, sizeof(struct necp_socket_info)); @@ -3746,10 +4526,32 @@ necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_applic if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_DOMAIN) { info->domain = domain; } + + if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_ADDRESS_TYPE_CONDITIONS) { + if (local_addr && local_addr->sa.sa_len > 0) { + memcpy(&info->local_addr, local_addr, local_addr->sa.sa_len); + } + if (remote_addr && remote_addr->sa.sa_len > 0) { + memcpy(&info->remote_addr, remote_addr, remote_addr->sa.sa_len); + } + } +} + +static void +necp_send_application_cell_denied_event(pid_t pid, uuid_t proc_uuid) +{ + struct kev_netpolicy_ifdenied ev_ifdenied; + + bzero(&ev_ifdenied, sizeof(ev_ifdenied)); + + ev_ifdenied.ev_data.epid = pid; + uuid_copy(ev_ifdenied.ev_data.euuid, proc_uuid); + + netpolicy_post_msg(KEV_NETPOLICY_IFDENIED, &ev_ifdenied.ev_data, sizeof(ev_ifdenied)); } static int -necp_application_find_policy_match_internal(u_int8_t *parameters, size_t parameters_size, struct necp_aggregate_result *returned_result) +necp_application_find_policy_match_internal(u_int8_t *parameters, u_int32_t parameters_size, struct necp_aggregate_result *returned_result) { int error = 0; size_t offset = 0; @@ -3757,6 +4559,7 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet struct necp_kernel_socket_policy *matched_policy = NULL; struct necp_socket_info info; necp_kernel_policy_filter filter_control_unit = 0; + u_int32_t route_rule_id = 0; necp_kernel_policy_result service_action = 0; necp_kernel_policy_service service = { 0, 0 }; @@ -3765,7 +4568,12 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet u_int16_t protocol = 0; u_int32_t bound_interface_index = 0; u_int32_t traffic_class = 0; + union necp_sockaddr_union local_addr; + union necp_sockaddr_union remote_addr; + bool no_remote_addr = FALSE; + memset(&local_addr, 0, sizeof(local_addr)); + memset(&remote_addr, 0, sizeof(remote_addr)); uuid_t application_uuid; uuid_clear(application_uuid); uuid_t real_application_uuid; @@ -3773,6 +4581,10 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet char *domain = NULL; char *account = NULL; + u_int32_t netagent_ids[NECP_MAX_NETAGENTS]; + memset(&netagent_ids, 0, sizeof(netagent_ids)); + int netagent_cursor; + if (returned_result == NULL) { return (EINVAL); } @@ -3789,11 +4601,11 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet } lck_rw_done(&necp_kernel_policy_lock); - while (offset < parameters_size) { + while ((offset + sizeof(u_int8_t) + sizeof(u_int32_t)) <= parameters_size) { u_int8_t type = necp_buffer_get_tlv_type(parameters, offset); - size_t length = necp_buffer_get_tlv_length(parameters, offset); + u_int32_t length = necp_buffer_get_tlv_length(parameters, offset); - if (length > 0 && (offset + sizeof(u_int8_t) + sizeof(size_t) + length) <= parameters_size) { + if (length > 0 && (offset + sizeof(u_int8_t) + sizeof(u_int32_t) + length) <= parameters_size) { u_int8_t *value = necp_buffer_get_tlv_value(parameters, offset, NULL); if (value != NULL) { switch (type) { @@ -3855,6 +4667,20 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet } break; } + case NECP_POLICY_CONDITION_LOCAL_ADDR: { + if (length >= sizeof(struct necp_policy_condition_addr)) { + struct necp_policy_condition_addr *address_struct = (struct necp_policy_condition_addr *)(void *)value; + memcpy(&local_addr, &address_struct->address, sizeof(address_struct->address)); + } + break; + } + case NECP_POLICY_CONDITION_REMOTE_ADDR: { + if (length >= sizeof(struct necp_policy_condition_addr)) { + struct necp_policy_condition_addr *address_struct = (struct necp_policy_condition_addr *)(void *)value; + memcpy(&remote_addr, &address_struct->address, sizeof(address_struct->address)); + } + break; + } default: { break; } @@ -3862,23 +4688,26 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet } } - offset += sizeof(u_int8_t) + sizeof(size_t) + length; + offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length; } // Lock lck_rw_lock_shared(&necp_kernel_policy_lock); - necp_application_fillout_info_locked(application_uuid, real_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &info); - matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_app_layer_map, &info, &filter_control_unit, &service_action, &service); + necp_application_fillout_info_locked(application_uuid, real_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &local_addr, &remote_addr, &info); + matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_app_layer_map, &info, &filter_control_unit, &route_rule_id, &service_action, &service, netagent_ids, NECP_MAX_NETAGENTS); if (matched_policy) { + returned_result->policy_id = matched_policy->id; returned_result->routing_result = matched_policy->result; memcpy(&returned_result->routing_result_parameter, &matched_policy->result_parameter, sizeof(returned_result->routing_result_parameter)); } else { + returned_result->policy_id = 0; returned_result->routing_result = NECP_KERNEL_POLICY_RESULT_NONE; } returned_result->filter_control_unit = filter_control_unit; returned_result->service_action = service_action; + // Handle trigger service if (service.identifier != 0) { struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(service.identifier); if (mapping != NULL) { @@ -3899,6 +4728,103 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet } } + // Handle netagents + for (netagent_cursor = 0; netagent_cursor < NECP_MAX_NETAGENTS; netagent_cursor++) { + struct necp_uuid_id_mapping *mapping = NULL; + u_int32_t netagent_id = netagent_ids[netagent_cursor]; + if (netagent_id == 0) { + break; + } + mapping = necp_uuid_lookup_uuid_with_service_id_locked(netagent_id); + if (mapping != NULL) { + uuid_copy(returned_result->netagents[netagent_cursor], mapping->uuid); + returned_result->netagent_flags[netagent_cursor] = netagent_get_flags(mapping->uuid); + } + } + + // Do routing evaluation + u_int output_bound_interface = bound_interface_index; + if (returned_result->routing_result == NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED) { + output_bound_interface = returned_result->routing_result_parameter.scoped_interface_index; + } else if (returned_result->routing_result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL) { + output_bound_interface = returned_result->routing_result_parameter.tunnel_interface_index; + } + + if (remote_addr.sa.sa_len == 0) { + no_remote_addr = TRUE; + // Default to 0.0.0.0:0 + remote_addr.sa.sa_family = AF_INET; + remote_addr.sa.sa_len = sizeof(struct sockaddr_in); + } + + struct rtentry *rt = NULL; + rt = rtalloc1_scoped((struct sockaddr *)&remote_addr, 0, 0, output_bound_interface); + + if (no_remote_addr && + (rt == NULL || rt->rt_ifp == NULL)) { + // Route lookup for default IPv4 failed, try IPv6 + + // Cleanup old route if necessary + if (rt != NULL) { + rtfree(rt); + rt = NULL; + } + + // Reset address to :: + memset(&remote_addr, 0, sizeof(remote_addr)); + remote_addr.sa.sa_family = AF_INET6; + remote_addr.sa.sa_len = sizeof(struct sockaddr_in6); + + // Get route + rt = rtalloc1_scoped((struct sockaddr *)&remote_addr, 0, 0, output_bound_interface); + } + + returned_result->routed_interface_index = 0; + if (rt != NULL && + rt->rt_ifp != NULL) { + returned_result->routed_interface_index = rt->rt_ifp->if_index; + /* + * For local addresses, we allow the interface scope to be + * either the loopback interface or the interface hosting the + * local address. + */ + if (bound_interface_index != IFSCOPE_NONE && + rt->rt_ifa != NULL && rt->rt_ifa->ifa_ifp && + (output_bound_interface == lo_ifp->if_index || + rt->rt_ifp->if_index == lo_ifp->if_index || + rt->rt_ifa->ifa_ifp->if_index == bound_interface_index)) { + struct sockaddr_storage dst; + unsigned int ifscope = bound_interface_index; + + /* + * Transform dst into the internal routing table form + */ + (void) sa_copy((struct sockaddr *)&remote_addr, + &dst, &ifscope); + + if ((rt->rt_ifp->if_index == lo_ifp->if_index) || + rt_ifa_is_dst((struct sockaddr *)&dst, rt->rt_ifa)) + returned_result->routed_interface_index = + bound_interface_index; + } + } + + bool cellular_denied = FALSE; + bool route_is_allowed = necp_route_is_allowed(rt, NULL, route_rule_id, &cellular_denied); + if (!route_is_allowed) { + // If the route is blocked, treat the lookup as a drop + returned_result->routing_result = NECP_KERNEL_POLICY_RESULT_DROP; + memset(&returned_result->routing_result_parameter, 0, sizeof(returned_result->routing_result_parameter)); + + if (cellular_denied) { + necp_send_application_cell_denied_event(pid, application_uuid); + } + } + + if (rt != NULL) { + rtfree(rt); + rt = NULL; + } // Unlock lck_rw_done(&necp_kernel_policy_lock); @@ -4159,7 +5085,6 @@ necp_socket_calc_flowhash_locked(struct necp_socket_info *info) return (net_flowhash(info, sizeof(*info), necp_kernel_socket_policies_gencount)); } -#define NECP_KERNEL_ADDRESS_TYPE_CONDITIONS (NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX) static void necp_socket_fillout_info_locked(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface, struct necp_socket_info *info) { @@ -4270,13 +5195,16 @@ necp_socket_fillout_info_locked(struct inpcb *inp, struct sockaddr *override_loc } static inline struct necp_kernel_socket_policy * -necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service) +necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, u_int32_t *return_route_rule_id, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service, u_int32_t *return_netagent_array, size_t netagent_array_count) { struct necp_kernel_socket_policy *matched_policy = NULL; u_int32_t skip_order = 0; u_int32_t skip_session_order = 0; + u_int32_t route_rule_id_array[MAX_AGGREGATE_ROUTE_RULES]; + size_t route_rule_id_count = 0; int i; - + size_t netagent_cursor = 0; + // Pre-process domain for quick matching struct substring domain_substring = necp_trim_dots_and_stars(info->domain, info->domain ? strlen(info->domain) : 0); u_int8_t domain_dot_count = necp_count_dots(domain_substring.string, domain_substring.length); @@ -4285,6 +5213,10 @@ necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy *return_filter = 0; } + if (return_route_rule_id) { + *return_route_rule_id = 0; + } + if (return_service_action) { *return_service_action = 0; } @@ -4327,7 +5259,15 @@ necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy } } continue; - } else if (necp_kernel_socket_result_is_service_type(policy_search_array[i])) { + } else if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_ROUTE_RULES) { + if (return_route_rule_id && route_rule_id_count < MAX_AGGREGATE_ROUTE_RULES) { + route_rule_id_array[route_rule_id_count++] = policy_search_array[i]->result_parameter.route_rule_id; + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Socket Policy: (Application %d Real Application %d BoundInterface %d Proto %d) Route Rule %d", info->application_id, info->real_application_id, info->bound_interface_index, info->protocol, policy_search_array[i]->result_parameter.route_rule_id); + } + } + continue; + } else if (necp_kernel_socket_result_is_trigger_service_type(policy_search_array[i])) { if (return_service_action && *return_service_action == 0) { *return_service_action = policy_search_array[i]->result; if (necp_debug > 1) { @@ -4342,6 +5282,16 @@ necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy } } continue; + } else if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_USE_NETAGENT) { + if (return_netagent_array != NULL && + netagent_cursor < netagent_array_count) { + return_netagent_array[netagent_cursor] = policy_search_array[i]->result_parameter.netagent_id; + netagent_cursor++; + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Socket Policy: (Application %d Real Application %d BoundInterface %d Proto %d) Use Netagent %d", info->application_id, info->real_application_id, info->bound_interface_index, info->protocol, policy_search_array[i]->result_parameter.netagent_id); + } + } + continue; } // Passed all tests, found a match @@ -4356,6 +5306,11 @@ necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy } } + if (route_rule_id_count == 1) { + *return_route_rule_id = route_rule_id_array[0]; + } else if (route_rule_id_count > 1) { + *return_route_rule_id = necp_create_aggregate_route_rule(route_rule_id_array); + } return (matched_policy); } @@ -4419,11 +5374,16 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local { struct socket *so = NULL; necp_kernel_policy_filter filter_control_unit = 0; + u_int32_t route_rule_id = 0; struct necp_kernel_socket_policy *matched_policy = NULL; necp_kernel_policy_id matched_policy_id = NECP_KERNEL_POLICY_ID_NONE; necp_kernel_policy_result service_action = 0; necp_kernel_policy_service service = { 0, 0 }; + u_int32_t netagent_ids[NECP_MAX_NETAGENTS]; + memset(&netagent_ids, 0, sizeof(netagent_ids)); + int netagent_cursor; + struct necp_socket_info info; if (inp == NULL) { @@ -4440,6 +5400,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local inp->inp_policyresult.policy_gencount = 0; inp->inp_policyresult.flowhash = 0; inp->inp_policyresult.results.filter_control_unit = 0; + inp->inp_policyresult.results.route_rule_id = 0; if (necp_pass_loopback > 0 && necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_PASS; @@ -4458,6 +5419,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local inp->inp_policyresult.policy_gencount = 0; inp->inp_policyresult.flowhash = 0; inp->inp_policyresult.results.filter_control_unit = 0; + inp->inp_policyresult.results.route_rule_id = 0; inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_PASS; return (NECP_KERNEL_POLICY_ID_NONE); } @@ -4481,7 +5443,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local } // Match socket to policy - matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, &filter_control_unit, &service_action, &service); + matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, &filter_control_unit, &route_rule_id, &service_action, &service, netagent_ids, NECP_MAX_NETAGENTS); // If the socket matched a scoped service policy, mark as Drop if not registered. // This covers the cases in which a service is required (on demand) but hasn't started yet. if ((service_action == NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED || @@ -4502,23 +5464,67 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount; inp->inp_policyresult.flowhash = flowhash; inp->inp_policyresult.results.filter_control_unit = 0; + inp->inp_policyresult.results.route_rule_id = 0; inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP; - + if (necp_debug > 1) { NECPLOG(LOG_DEBUG, "Socket Policy: (BoundInterface %d Proto %d) Dropping packet because service is not registered", info.bound_interface_index, info.protocol); } - + // Unlock lck_rw_done(&necp_kernel_policy_lock); return (NECP_KERNEL_POLICY_ID_NONE); } } + // Verify netagents + for (netagent_cursor = 0; netagent_cursor < NECP_MAX_NETAGENTS; netagent_cursor++) { + struct necp_uuid_id_mapping *mapping = NULL; + u_int32_t netagent_id = netagent_ids[netagent_cursor]; + if (netagent_id == 0) { + break; + } + mapping = necp_uuid_lookup_uuid_with_service_id_locked(netagent_id); + if (mapping != NULL) { + u_int32_t agent_flags = 0; + agent_flags = netagent_get_flags(mapping->uuid); + if (agent_flags & NETAGENT_FLAG_REGISTERED) { + if (agent_flags & NETAGENT_FLAG_ACTIVE) { + continue; + } else if ((agent_flags & NETAGENT_FLAG_VOLUNTARY) == 0) { + if (agent_flags & NETAGENT_FLAG_KERNEL_ACTIVATED) { + int trigger_error = 0; + trigger_error = netagent_kernel_trigger(mapping->uuid); + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Socket Policy: Triggering inactive agent, error %d", trigger_error); + } + } + + // Mark socket as a drop if required agent is not active + inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount; + inp->inp_policyresult.flowhash = flowhash; + inp->inp_policyresult.results.filter_control_unit = 0; + inp->inp_policyresult.results.route_rule_id = 0; + inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP; + + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Socket Policy: (BoundInterface %d Proto %d) Dropping packet because agent is not active", info.bound_interface_index, info.protocol); + } + + // Unlock + lck_rw_done(&necp_kernel_policy_lock); + return (NECP_KERNEL_POLICY_ID_NONE); + } + } + } + } if (matched_policy) { matched_policy_id = matched_policy->id; inp->inp_policyresult.policy_id = matched_policy->id; inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount; inp->inp_policyresult.flowhash = flowhash; inp->inp_policyresult.results.filter_control_unit = filter_control_unit; + inp->inp_policyresult.results.route_rule_id = route_rule_id; inp->inp_policyresult.results.result = matched_policy->result; memcpy(&inp->inp_policyresult.results.result_parameter, &matched_policy->result_parameter, sizeof(matched_policy->result_parameter)); @@ -4529,10 +5535,15 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local NECPLOG(LOG_DEBUG, "Marking socket in state %d as defunct", so->so_state); } sosetdefunct(current_proc(), so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE); + } else if (necp_socket_is_connected(inp) && + matched_policy->result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && + info.protocol == IPPROTO_TCP) { + // Reset MSS on TCP socket if tunnel policy changes + tcp_mtudisc(inp, 0); } if (necp_debug > 1) { - NECPLOG(LOG_DEBUG, "Socket Policy: (BoundInterface %d Proto %d) Policy %d Result %d Parameter %d", info.bound_interface_index, info.protocol, matched_policy->id, matched_policy->result, matched_policy->result_parameter.tunnel_interface_index); + NECPLOG(LOG_DEBUG, "Socket Policy: %p (BoundInterface %d Proto %d) Policy %d Result %d Parameter %d", inp->inp_socket, info.bound_interface_index, info.protocol, matched_policy->id, matched_policy->result, matched_policy->result_parameter.tunnel_interface_index); } } else if (necp_drop_all_order > 0) { // Mark socket as a drop if set @@ -4540,6 +5551,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount; inp->inp_policyresult.flowhash = flowhash; inp->inp_policyresult.results.filter_control_unit = 0; + inp->inp_policyresult.results.route_rule_id = 0; inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP; } else { // Mark non-matching socket so we don't re-check it @@ -4547,6 +5559,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount; inp->inp_policyresult.flowhash = flowhash; inp->inp_policyresult.results.filter_control_unit = filter_control_unit; // We may have matched a filter, so mark it! + inp->inp_policyresult.results.route_rule_id = route_rule_id; // We may have matched a route rule, so mark it! inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_NONE; } @@ -5257,11 +6270,22 @@ necp_set_socket_attributes(struct socket *so, struct sockopt *sopt) { int error = 0; u_int8_t *buffer = NULL; - struct inpcb *inp = sotoinpcb(so); + struct inpcb *inp = NULL; + + if ((SOCK_DOM(so) != PF_INET +#if INET6 + && SOCK_DOM(so) != PF_INET6 +#endif + )) { + error = EINVAL; + goto done; + } + + inp = sotoinpcb(so); size_t valsize = sopt->sopt_valsize; if (valsize == 0 || - valsize > ((sizeof(u_int8_t) + sizeof(size_t) + NECP_MAX_SOCKET_ATTRIBUTE_STRING_LENGTH) * 2)) { + valsize > ((sizeof(u_int8_t) + sizeof(u_int32_t) + NECP_MAX_SOCKET_ATTRIBUTE_STRING_LENGTH) * 2)) { goto done; } @@ -5308,10 +6332,10 @@ necp_get_socket_attributes(struct socket *so, struct sockopt *sopt) struct inpcb *inp = sotoinpcb(so); if (inp->inp_necp_attributes.inp_domain != NULL) { - valsize += sizeof(u_int8_t) + sizeof(size_t) + strlen(inp->inp_necp_attributes.inp_domain); + valsize += sizeof(u_int8_t) + sizeof(u_int32_t) + strlen(inp->inp_necp_attributes.inp_domain); } if (inp->inp_necp_attributes.inp_account != NULL) { - valsize += sizeof(u_int8_t) + sizeof(size_t) + strlen(inp->inp_necp_attributes.inp_account); + valsize += sizeof(u_int8_t) + sizeof(u_int32_t) + strlen(inp->inp_necp_attributes.inp_account); } if (valsize == 0) { goto done; @@ -5344,7 +6368,174 @@ necp_get_socket_attributes(struct socket *so, struct sockopt *sopt) } static bool -necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id) +necp_route_is_allowed_inner(struct rtentry *route, struct ifnet *ifp, u_int32_t route_rule_id, bool *cellular_denied) +{ + bool default_is_allowed = TRUE; + u_int8_t type_aggregate_action = NECP_ROUTE_RULE_NONE; + int exception_index = 0; + struct ifnet *delegated_ifp = NULL; + struct necp_route_rule *route_rule = NULL; + + route_rule = necp_lookup_route_rule_locked(&necp_route_rules, route_rule_id); + if (route_rule == NULL) { + return (TRUE); + } + + default_is_allowed = (route_rule->default_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE; + if (ifp == NULL) { + ifp = route->rt_ifp; + } + if (ifp == NULL) { + if (necp_debug > 1 && !default_is_allowed) { + NECPLOG(LOG_DEBUG, "Route Allowed: No interface for route, using default for Rule %d Allowed %d", route_rule_id, default_is_allowed); + } + return (default_is_allowed); + } + + delegated_ifp = ifp->if_delegated.ifp; + for (exception_index = 0; exception_index < MAX_ROUTE_RULE_INTERFACES; exception_index++) { + if (route_rule->exception_if_indices[exception_index] == 0) { + break; + } + if (route_rule->exception_if_indices[exception_index] == ifp->if_index || + (delegated_ifp != NULL && route_rule->exception_if_indices[exception_index] == delegated_ifp->if_index)) { + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Route Allowed: Interface match %d for Rule %d Allowed %d", route_rule->exception_if_indices[exception_index], route_rule_id, ((route_rule->exception_if_actions[exception_index] == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE)); + } + return ((route_rule->exception_if_actions[exception_index] == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE); + } + } + + if (route_rule->cellular_action != NECP_ROUTE_RULE_NONE && + IFNET_IS_CELLULAR(ifp)) { + if (cellular_denied != NULL) { + // Let clients know that cellular was blocked + *cellular_denied = TRUE; + } + if (type_aggregate_action == NECP_ROUTE_RULE_NONE || + (type_aggregate_action == NECP_ROUTE_RULE_ALLOW_INTERFACE && + route_rule->cellular_action == NECP_ROUTE_RULE_DENY_INTERFACE)) { + // Deny wins if there is a conflict + type_aggregate_action = route_rule->cellular_action; + } + } + + if (route_rule->wifi_action != NECP_ROUTE_RULE_NONE && + IFNET_IS_WIFI(ifp)) { + if (type_aggregate_action == NECP_ROUTE_RULE_NONE || + (type_aggregate_action == NECP_ROUTE_RULE_ALLOW_INTERFACE && + route_rule->wifi_action == NECP_ROUTE_RULE_DENY_INTERFACE)) { + // Deny wins if there is a conflict + type_aggregate_action = route_rule->wifi_action; + } + } + + if (route_rule->wired_action != NECP_ROUTE_RULE_NONE && + IFNET_IS_WIRED(ifp)) { + if (type_aggregate_action == NECP_ROUTE_RULE_NONE || + (type_aggregate_action == NECP_ROUTE_RULE_ALLOW_INTERFACE && + route_rule->wired_action == NECP_ROUTE_RULE_DENY_INTERFACE)) { + // Deny wins if there is a conflict + type_aggregate_action = route_rule->wired_action; + } + } + + if (route_rule->expensive_action != NECP_ROUTE_RULE_NONE && + IFNET_IS_EXPENSIVE(ifp)) { + if (type_aggregate_action == NECP_ROUTE_RULE_NONE || + (type_aggregate_action == NECP_ROUTE_RULE_ALLOW_INTERFACE && + route_rule->expensive_action == NECP_ROUTE_RULE_DENY_INTERFACE)) { + // Deny wins if there is a conflict + type_aggregate_action = route_rule->expensive_action; + } + } + + if (type_aggregate_action != NECP_ROUTE_RULE_NONE) { + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Route Allowed: C:%d WF:%d W:%d E:%d for Rule %d Allowed %d", route_rule->cellular_action, route_rule->wifi_action, route_rule->wired_action, route_rule->expensive_action, route_rule_id, ((type_aggregate_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE)); + } + return ((type_aggregate_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE); + } + + if (necp_debug > 1 && !default_is_allowed) { + NECPLOG(LOG_DEBUG, "Route Allowed: Using default for Rule %d Allowed %d", route_rule_id, default_is_allowed); + } + return (default_is_allowed); +} + +static bool +necp_route_is_allowed(struct rtentry *route, struct ifnet *interface, u_int32_t route_rule_id, bool *cellular_denied) +{ + if ((route == NULL && interface == NULL) || route_rule_id == 0) { + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Route Allowed: no route or interface, Rule %d Allowed %d", route_rule_id, TRUE); + } + return (TRUE); + } + + if (ROUTE_RULE_IS_AGGREGATE(route_rule_id)) { + struct necp_aggregate_route_rule *aggregate_route_rule = necp_lookup_aggregate_route_rule_locked(route_rule_id); + if (aggregate_route_rule != NULL) { + int index = 0; + for (index = 0; index < MAX_AGGREGATE_ROUTE_RULES; index++) { + u_int32_t sub_route_rule_id = aggregate_route_rule->rule_ids[index]; + if (sub_route_rule_id == 0) { + break; + } + if (!necp_route_is_allowed_inner(route, interface, sub_route_rule_id, cellular_denied)) { + return (FALSE); + } + } + } + } else { + return (necp_route_is_allowed_inner(route, interface, route_rule_id, cellular_denied)); + } + + return (TRUE); +} + +bool +necp_packet_is_allowed_over_interface(struct mbuf *packet, struct ifnet *interface) +{ + bool is_allowed = TRUE; + u_int32_t route_rule_id = necp_get_route_rule_id_from_packet(packet); + if (route_rule_id != 0 && + interface != NULL) { + lck_rw_lock_shared(&necp_kernel_policy_lock); + is_allowed = necp_route_is_allowed(NULL, interface, necp_get_route_rule_id_from_packet(packet), NULL); + lck_rw_done(&necp_kernel_policy_lock); + } + return (is_allowed); +} + +static bool +necp_netagents_allow_traffic(u_int32_t *netagent_ids, size_t netagent_id_count) +{ + size_t netagent_cursor; + for (netagent_cursor = 0; netagent_cursor < netagent_id_count; netagent_cursor++) { + struct necp_uuid_id_mapping *mapping = NULL; + u_int32_t netagent_id = netagent_ids[netagent_cursor]; + if (netagent_id == 0) { + break; + } + mapping = necp_uuid_lookup_uuid_with_service_id_locked(netagent_id); + if (mapping != NULL) { + u_int32_t agent_flags = 0; + agent_flags = netagent_get_flags(mapping->uuid); + if (agent_flags & NETAGENT_FLAG_REGISTERED) { + if (agent_flags & NETAGENT_FLAG_ACTIVE) { + continue; + } else if ((agent_flags & NETAGENT_FLAG_VOLUNTARY) == 0) { + return (FALSE); + } + } + } + } + return (TRUE); +} + +static bool +necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id) { u_int32_t verifyifindex = interface ? interface->if_index : 0; bool allowed_to_receive = TRUE; @@ -5352,15 +6543,26 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr u_int32_t flowhash = 0; necp_kernel_policy_result service_action = 0; necp_kernel_policy_service service = { 0, 0 }; + u_int32_t route_rule_id = 0; + struct rtentry *route = NULL; + bool cellular_denied = FALSE; + + u_int32_t netagent_ids[NECP_MAX_NETAGENTS]; + memset(&netagent_ids, 0, sizeof(netagent_ids)); if (return_policy_id) { *return_policy_id = NECP_KERNEL_POLICY_ID_NONE; } + if (return_route_rule_id) { + *return_route_rule_id = 0; + } if (inp == NULL) { goto done; } + route = inp->inp_route.ro_rt; + // Don't lock. Possible race condition, but we don't want the performance hit. if (necp_kernel_socket_policies_count == 0 || (!(inp->inp_flags2 & INP2_WANT_APP_POLICY) && necp_kernel_socket_policies_non_app_count == 0)) { @@ -5378,25 +6580,37 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr // If this socket is connected, or we are not taking addresses into account, try to reuse last result if ((necp_socket_is_connected(inp) || (override_local_addr == NULL && override_remote_addr == NULL)) && inp->inp_policyresult.policy_id != NECP_KERNEL_POLICY_ID_NONE) { bool policies_have_changed = FALSE; + bool route_allowed = TRUE; lck_rw_lock_shared(&necp_kernel_policy_lock); if (inp->inp_policyresult.policy_gencount != necp_kernel_socket_policies_gencount) { policies_have_changed = TRUE; + } else { + if (inp->inp_policyresult.results.route_rule_id != 0 && + !necp_route_is_allowed(route, interface, inp->inp_policyresult.results.route_rule_id, &cellular_denied)) { + route_allowed = FALSE; + } } lck_rw_done(&necp_kernel_policy_lock); if (!policies_have_changed) { - if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP || + if (!route_allowed || + inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP || inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT || (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface && inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex)) { - allowed_to_receive = FALSE; - } else if (return_policy_id) { - *return_policy_id = inp->inp_policyresult.policy_id; - } + allowed_to_receive = FALSE; + } else { + if (return_policy_id) { + *return_policy_id = inp->inp_policyresult.policy_id; + } + if (return_route_rule_id) { + *return_route_rule_id = inp->inp_policyresult.results.route_rule_id; + } + } goto done; } } - + // Check for loopback exception if (necp_pass_loopback > 0 && necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { @@ -5415,16 +6629,23 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP || inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT || (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface && - inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex)) { + inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex) || + (inp->inp_policyresult.results.route_rule_id != 0 && + !necp_route_is_allowed(route, interface, inp->inp_policyresult.results.route_rule_id, &cellular_denied))) { allowed_to_receive = FALSE; - } else if (return_policy_id) { - *return_policy_id = inp->inp_policyresult.policy_id; + } else { + if (return_policy_id) { + *return_policy_id = inp->inp_policyresult.policy_id; + } + if (return_route_rule_id) { + *return_route_rule_id = inp->inp_policyresult.results.route_rule_id; + } } lck_rw_done(&necp_kernel_policy_lock); goto done; } - struct necp_kernel_socket_policy *matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, NULL, &service_action, &service); + struct necp_kernel_socket_policy *matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, NULL, &route_rule_id, &service_action, &service, netagent_ids, NECP_MAX_NETAGENTS); if (matched_policy != NULL) { if (matched_policy->result == NECP_KERNEL_POLICY_RESULT_DROP || matched_policy->result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT || @@ -5432,10 +6653,18 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr matched_policy->result_parameter.tunnel_interface_index != verifyifindex) || ((service_action == NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED || service_action == NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED) && - service.identifier != 0 && service.identifier != NECP_NULL_SERVICE_ID)) { + service.identifier != 0 && service.identifier != NECP_NULL_SERVICE_ID) || + (route_rule_id != 0 && + !necp_route_is_allowed(route, interface, route_rule_id, &cellular_denied)) || + !necp_netagents_allow_traffic(netagent_ids, NECP_MAX_NETAGENTS)) { allowed_to_receive = FALSE; - } else if (return_policy_id) { - *return_policy_id = matched_policy->id; + } else { + if (return_policy_id) { + *return_policy_id = matched_policy->id; + } + if (return_route_rule_id) { + *return_route_rule_id = route_rule_id; + } } lck_rw_done(&necp_kernel_policy_lock); @@ -5450,11 +6679,15 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr lck_rw_done(&necp_kernel_policy_lock); done: + if (!allowed_to_receive && cellular_denied) { + soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED)); + } + return (allowed_to_receive); } bool -necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id) +necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id) { struct sockaddr_in local; struct sockaddr_in remote; @@ -5465,11 +6698,11 @@ necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, memcpy(&local.sin_addr, local_addr, sizeof(local.sin_addr)); memcpy(&remote.sin_addr, remote_addr, sizeof(remote.sin_addr)); - return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id)); + return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id, return_route_rule_id)); } bool -necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id) +necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id) { struct sockaddr_in6 local; struct sockaddr_in6 remote; @@ -5480,17 +6713,17 @@ necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, memcpy(&local.sin6_addr, local_addr, sizeof(local.sin6_addr)); memcpy(&remote.sin6_addr, remote_addr, sizeof(remote.sin6_addr)); - return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id)); + return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id, return_route_rule_id)); } bool -necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id) +necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id) { - return (necp_socket_is_allowed_to_send_recv_internal(inp, NULL, NULL, NULL, return_policy_id)); + return (necp_socket_is_allowed_to_send_recv_internal(inp, NULL, NULL, NULL, return_policy_id, return_route_rule_id)); } int -necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id) +necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id, u_int32_t route_rule_id) { if (packet == NULL || inp == NULL) { return (EINVAL); @@ -5506,6 +6739,11 @@ necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel packet->m_pkthdr.necp_mtag.necp_policy_id = NECP_KERNEL_POLICY_ID_NONE; } packet->m_pkthdr.necp_mtag.necp_last_interface_index = 0; + if (route_rule_id != 0) { + packet->m_pkthdr.necp_mtag.necp_route_rule_id = route_rule_id; + } else { + packet->m_pkthdr.necp_mtag.necp_route_rule_id = inp->inp_policyresult.results.route_rule_id; + } return (0); } @@ -5574,10 +6812,20 @@ necp_get_last_interface_index_from_packet(struct mbuf *packet) if (packet == NULL) { return (0); } - + return (packet->m_pkthdr.necp_mtag.necp_last_interface_index); } +u_int32_t +necp_get_route_rule_id_from_packet(struct mbuf *packet) +{ + if (packet == NULL) { + return (0); + } + + return (packet->m_pkthdr.necp_mtag.necp_route_rule_id); +} + bool necp_get_is_keepalive_from_packet(struct mbuf *packet) { @@ -5647,6 +6895,58 @@ necp_socket_get_rescope_if_index(struct inpcb *inp) return (0); } +u_int32_t +necp_socket_get_effective_mtu(struct inpcb *inp, u_int32_t current_mtu) +{ + if (inp == NULL) { + return (current_mtu); + } + + if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && + (inp->inp_flags & INP_BOUND_IF) && + inp->inp_boundifp) { + + u_int bound_interface_index = inp->inp_boundifp->if_index; + u_int tunnel_interface_index = inp->inp_policyresult.results.result_parameter.tunnel_interface_index; + + // The result is IP Tunnel, and is rescoping from one interface to another. Recalculate MTU. + if (bound_interface_index != tunnel_interface_index) { + ifnet_t tunnel_interface = NULL; + + ifnet_head_lock_shared(); + tunnel_interface = ifindex2ifnet[tunnel_interface_index]; + ifnet_head_done(); + + if (tunnel_interface != NULL) { + u_int32_t direct_tunnel_mtu = tunnel_interface->if_mtu; + u_int32_t delegate_tunnel_mtu = (tunnel_interface->if_delegated.ifp != NULL) ? tunnel_interface->if_delegated.ifp->if_mtu : 0; + if (delegate_tunnel_mtu != 0 && + strncmp(tunnel_interface->if_name, "ipsec", strlen("ipsec")) == 0) { + // For ipsec interfaces, calculate the overhead from the delegate interface + u_int32_t tunnel_overhead = (u_int32_t)(esp_hdrsiz(NULL) + sizeof(struct ip6_hdr)); + if (delegate_tunnel_mtu > tunnel_overhead) { + delegate_tunnel_mtu -= tunnel_overhead; + } + + if (delegate_tunnel_mtu < direct_tunnel_mtu) { + // If the (delegate - overhead) < direct, return (delegate - overhead) + return (delegate_tunnel_mtu); + } else { + // Otherwise return direct + return (direct_tunnel_mtu); + } + } else { + // For non-ipsec interfaces, just return the tunnel MTU + return (direct_tunnel_mtu); + } + } + } + } + + // By default, just return the MTU passed in + return (current_mtu); +} + ifnet_t necp_get_ifnet_from_result_parameter(necp_kernel_policy_result_parameter *result_parameter) { diff --git a/bsd/net/necp.h b/bsd/net/necp.h index 519995b85..2aebe9c27 100644 --- a/bsd/net/necp.h +++ b/bsd/net/necp.h @@ -37,6 +37,8 @@ */ #define NECP_CONTROL_NAME "com.apple.net.necp_control" +#define NECP_TLV_LENGTH_UINT32 1 + struct necp_packet_header { u_int8_t packet_type; u_int8_t flags; @@ -65,6 +67,7 @@ struct necp_packet_header { #define NECP_TLV_ATTRIBUTE_DOMAIN 7 // char[] #define NECP_TLV_ATTRIBUTE_ACCOUNT 8 // char[] #define NECP_TLV_SERVICE_UUID 9 // uuid_t +#define NECP_TLV_ROUTE_RULE 10 #define NECP_POLICY_CONDITION_FLAGS_NEGATIVE 0x01 // Negative @@ -103,8 +106,20 @@ struct necp_packet_header { #define NECP_POLICY_RESULT_TRIGGER_SCOPED 10 // service uuid_t #define NECP_POLICY_RESULT_NO_TRIGGER_SCOPED 11 // service uuid_t #define NECP_POLICY_RESULT_SOCKET_SCOPED 12 // String, interface name +#define NECP_POLICY_RESULT_ROUTE_RULES 13 // N/A, must have route rules defined +#define NECP_POLICY_RESULT_USE_NETAGENT 14 // netagent uuid_t + +#define NECP_POLICY_RESULT_MAX NECP_POLICY_RESULT_USE_NETAGENT -#define NECP_POLICY_RESULT_MAX NECP_POLICY_RESULT_SOCKET_SCOPED +// Route rule +#define NECP_ROUTE_RULE_NONE 0 // N/A +#define NECP_ROUTE_RULE_DENY_INTERFACE 1 // String, or empty to match all +#define NECP_ROUTE_RULE_ALLOW_INTERFACE 2 // String, or empty to match all + +#define NECP_ROUTE_RULE_FLAG_CELLULAR 0x01 +#define NECP_ROUTE_RULE_FLAG_WIFI 0x02 +#define NECP_ROUTE_RULE_FLAG_WIRED 0x04 +#define NECP_ROUTE_RULE_FLAG_EXPENSIVE 0x08 // Errors #define NECP_ERROR_INTERNAL 0 @@ -114,6 +129,7 @@ struct necp_packet_header { #define NECP_ERROR_POLICY_CONDITIONS_INVALID 4 #define NECP_ERROR_POLICY_ID_NOT_FOUND 5 #define NECP_ERROR_INVALID_PROCESS 6 +#define NECP_ERROR_ROUTE_RULES_INVALID 7 // Modifiers #define NECP_MASK_USERSPACE_ONLY 0x80000000 // on filter_control_unit value @@ -168,6 +184,7 @@ typedef union { } necp_kernel_policy_routing_result_parameter; #define NECP_SERVICE_FLAGS_REGISTERED 0x01 +#define NECP_MAX_NETAGENTS 8 struct necp_aggregate_result { necp_kernel_policy_result routing_result; necp_kernel_policy_routing_result_parameter routing_result_parameter; @@ -176,6 +193,17 @@ struct necp_aggregate_result { uuid_t service_uuid; u_int32_t service_flags; u_int32_t service_data; + u_int routed_interface_index; + u_int32_t policy_id; + uuid_t netagents[NECP_MAX_NETAGENTS]; + u_int32_t netagent_flags[NECP_MAX_NETAGENTS]; +}; + +#define KEV_NECP_SUBCLASS 8 +#define KEV_NECP_POLICIES_CHANGED 1 + +struct kev_necp_policies_changed_data { + u_int32_t changed_count; // Defaults to 0. }; #ifdef BSD_KERNEL_PRIVATE @@ -185,10 +213,14 @@ struct necp_aggregate_result { #include #include -#define NECPCTL_DROP_ALL_LEVEL 1 /* Drop all packets if no policy matches above this level */ -#define NECPCTL_DEBUG 2 /* Log all kernel policy matches */ -#define NECPCTL_PASS_LOOPBACK 3 /* Pass all loopback traffic */ -#define NECPCTL_PASS_KEEPALIVES 4 /* Pass all kernel-generated keepalive traffic */ +#define NECPCTL_DROP_ALL_LEVEL 1 /* Drop all packets if no policy matches above this level */ +#define NECPCTL_DEBUG 2 /* Log all kernel policy matches */ +#define NECPCTL_PASS_LOOPBACK 3 /* Pass all loopback traffic */ +#define NECPCTL_PASS_KEEPALIVES 4 /* Pass all kernel-generated keepalive traffic */ +#define NECPCTL_SOCKET_POLICY_COUNT 5 /* Count of all socket-level policies */ +#define NECPCTL_SOCKET_NON_APP_POLICY_COUNT 6 /* Count of non-per-app socket-level policies */ +#define NECPCTL_IP_POLICY_COUNT 7 /* Count of all ip-level policies */ +#define NECPCTL_SESSION_COUNT 8 /* Count of NECP sessions */ #define NECPCTL_NAMES { \ { 0, 0 }, \ @@ -218,6 +250,8 @@ typedef u_int32_t necp_app_id; #define NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED NECP_POLICY_RESULT_TRIGGER_SCOPED #define NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED NECP_POLICY_RESULT_NO_TRIGGER_SCOPED #define NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED NECP_POLICY_RESULT_SOCKET_SCOPED +#define NECP_KERNEL_POLICY_RESULT_ROUTE_RULES NECP_POLICY_RESULT_ROUTE_RULES +#define NECP_KERNEL_POLICY_RESULT_USE_NETAGENT NECP_POLICY_RESULT_USE_NETAGENT typedef struct { u_int32_t identifier; @@ -230,6 +264,8 @@ typedef union { u_int32_t flow_divert_control_unit; u_int32_t filter_control_unit; u_int32_t skip_policy_order; + u_int32_t route_rule_id; + u_int32_t netagent_id; necp_kernel_policy_service service; } necp_kernel_policy_result_parameter; @@ -245,7 +281,8 @@ struct necp_kernel_socket_policy { necp_kernel_policy_id id; necp_policy_order order; u_int32_t session_order; - + int session_pid; + u_int32_t condition_mask; u_int32_t condition_negated_mask; necp_kernel_policy_id cond_policy_id; @@ -265,7 +302,7 @@ struct necp_kernel_socket_policy { union necp_sockaddr_union cond_remote_start; // Matches remote IP address (or start) union necp_sockaddr_union cond_remote_end; // Matches IP address range u_int8_t cond_remote_prefix; // Defines subnet - + necp_kernel_policy_result result; necp_kernel_policy_result_parameter result_parameter; }; @@ -277,7 +314,8 @@ struct necp_kernel_ip_output_policy { necp_policy_order suborder; necp_policy_order order; u_int32_t session_order; - + int session_pid; + u_int32_t condition_mask; u_int32_t condition_negated_mask; necp_kernel_policy_id cond_policy_id; @@ -290,7 +328,7 @@ struct necp_kernel_ip_output_policy { union necp_sockaddr_union cond_remote_end; // Matches IP address range u_int8_t cond_remote_prefix; // Defines subnet u_int32_t cond_last_interface_index; - + necp_kernel_policy_result result; necp_kernel_policy_result_parameter result_parameter; }; @@ -305,17 +343,21 @@ struct necp_session_policy { necp_policy_id id; necp_policy_order order; u_int8_t *result; - size_t result_size; - u_int8_t *conditions; // Array of conditions, each with a size_t length at start - size_t conditions_size; - + u_int32_t result_size; + u_int8_t *conditions; // Array of conditions, each with a u_int32_t length at start + u_int32_t conditions_size; + u_int8_t *route_rules; // Array of route rules, each with a u_int32_t length at start + u_int32_t route_rules_size; + uuid_t applied_app_uuid; uuid_t applied_real_app_uuid; char *applied_domain; char *applied_account; - - uuid_t applied_service_uuid; - + + uuid_t applied_result_uuid; + + u_int32_t applied_route_rules_id; + necp_kernel_policy_id kernel_socket_policies[MAX_KERNEL_SOCKET_POLICIES]; necp_kernel_policy_id kernel_ip_output_policies[MAX_KERNEL_IP_OUTPUT_POLICIES]; }; @@ -324,6 +366,7 @@ struct necp_aggregate_socket_result { necp_kernel_policy_result result; necp_kernel_policy_result_parameter result_parameter; necp_kernel_policy_filter filter_control_unit; + u_int32_t route_rule_id; }; struct necp_inpcb_result { @@ -347,13 +390,15 @@ u_int32_t necp_socket_get_flow_divert_control_unit(struct inpcb *inp); bool necp_socket_should_rescope(struct inpcb *inp); u_int necp_socket_get_rescope_if_index(struct inpcb *inp); +u_int32_t necp_socket_get_effective_mtu(struct inpcb *inp, u_int32_t current_mtu); -bool necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id); -bool necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id); -bool necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id); -int necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id); +bool necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id); +bool necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id); +bool necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id); +int necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id, u_int32_t route_rule_id); necp_kernel_policy_id necp_get_policy_id_from_packet(struct mbuf *packet); u_int32_t necp_get_last_interface_index_from_packet(struct mbuf *packet); +u_int32_t necp_get_route_rule_id_from_packet(struct mbuf *packet); necp_kernel_policy_id necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface); necp_kernel_policy_id necp_ip_output_find_policy_match(struct mbuf *packet, int flags, struct ip_out_args *ipoa, necp_kernel_policy_result *result, necp_kernel_policy_result_parameter *result_parameter); @@ -365,6 +410,8 @@ int necp_mark_packet_from_interface(struct mbuf *packet, ifnet_t interface); ifnet_t necp_get_ifnet_from_result_parameter(necp_kernel_policy_result_parameter *result_parameter); bool necp_packet_can_rebind_to_ifnet(struct mbuf *packet, struct ifnet *interface, struct route *new_route, int family); +bool necp_packet_is_allowed_over_interface(struct mbuf *packet, struct ifnet *interface); + int necp_mark_packet_as_keepalive(struct mbuf *packet, bool is_keepalive); bool necp_get_is_keepalive_from_packet(struct mbuf *packet); diff --git a/bsd/net/net_perf.c b/bsd/net/net_perf.c new file mode 100644 index 000000000..b475644ed --- /dev/null +++ b/bsd/net/net_perf.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include + +static void ip_perf_record_stats(net_perf_t *npp, struct timeval *tv1, + struct timeval *tv2, uint64_t num_pkts); +static void update_bins(net_perf_t *npp, uint64_t bins); + +void net_perf_start_time(net_perf_t *npp, struct timeval *tv) +{ +#pragma unused(npp) + microtime(tv); +} + +void net_perf_measure_time(net_perf_t *npp, struct timeval *start, uint64_t num_pkts) +{ + struct timeval stop; + microtime(&stop); + ip_perf_record_stats(npp, start, &stop, num_pkts); +} + +static void +ip_perf_record_stats(net_perf_t *npp, struct timeval *tv1, struct timeval *tv2, uint64_t num_pkts) +{ + struct timeval tv_diff; + uint64_t usecs; + timersub(tv2, tv1, &tv_diff); + usecs = tv_diff.tv_sec * 1000000ULL + tv_diff.tv_usec; + OSAddAtomic64(usecs, &npp->np_total_usecs); + OSAddAtomic64(num_pkts, &npp->np_total_pkts); +} + +static void +update_bins(net_perf_t *npp, uint64_t bins) +{ + bzero(&npp->np_hist_bars, sizeof(npp->np_hist_bars)); + + for (int i = 1, j = 0; i <= 64 && j < NET_PERF_BARS; i++) { + if (bins & 0x1) { + npp->np_hist_bars[j] = i; + j++; + } + bins >>= 1; + } +} + +void +net_perf_initialize(net_perf_t *npp, uint64_t bins) +{ + bzero(npp, sizeof(net_perf_t)); + /* initialize np_hist_bars array */ + update_bins(npp, bins); +} + +void +net_perf_histogram(net_perf_t *npp, uint64_t num_pkts) +{ + if (num_pkts <= npp->np_hist_bars[0]) { + OSAddAtomic64(num_pkts, &npp->np_hist1); + } else if (npp->np_hist_bars[0] < num_pkts && num_pkts <= npp->np_hist_bars[1]) { + OSAddAtomic64(num_pkts, &npp->np_hist2); + } else if (npp->np_hist_bars[1] < num_pkts && num_pkts <= npp->np_hist_bars[2]) { + OSAddAtomic64(num_pkts, &npp->np_hist3); + } else if (npp->np_hist_bars[2] < num_pkts && num_pkts <= npp->np_hist_bars[3]) { + OSAddAtomic64(num_pkts, &npp->np_hist4); + } else if (npp->np_hist_bars[3] < num_pkts) { + OSAddAtomic64(num_pkts, &npp->np_hist5); + } +} + +boolean_t +net_perf_validate_bins(uint64_t bins) +{ + return (NET_PERF_BARS == __builtin_popcountll(bins)); +} + diff --git a/bsd/net/net_perf.h b/bsd/net/net_perf.h new file mode 100644 index 000000000..7c37356b4 --- /dev/null +++ b/bsd/net/net_perf.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _NET_NET_PERF_H_ +#define _NET_NET_PERF_H_ + +#include + +#ifdef KERNEL_PRIVATE +#include +#include +#endif /* KERNEL_PRIVATE */ + +/* five histogram bins are separated by four dividing "bars" */ +#define NET_PERF_BARS 4 + +typedef struct net_perf { + uint64_t np_total_pkts; /* total packets input or output during measurement */ + uint64_t np_total_usecs; /* microseconds elapsed during measurement */ + uint64_t np_hist1; /* histogram bin 1 */ + uint64_t np_hist2; /* histogram bin 2 */ + uint64_t np_hist3; /* histogram bin 3 */ + uint64_t np_hist4; /* histogram bin 4 */ + uint64_t np_hist5; /* histogram bin 5 */ + uint8_t np_hist_bars[NET_PERF_BARS]; +} net_perf_t; + +#ifdef KERNEL_PRIVATE +void net_perf_initialize(net_perf_t *npp, uint64_t bins); +void net_perf_start_time(net_perf_t *npp, struct timeval *tv); +void net_perf_measure_time(net_perf_t *npp, struct timeval *start, uint64_t num_pkts); +void net_perf_histogram(net_perf_t *npp, uint64_t num_pkts); +boolean_t net_perf_validate_bins(uint64_t bins); + +#endif /* KERNEL_PRIVATE */ + +#endif /* _NET_NET_PERF_H_ */ + diff --git a/bsd/net/net_stubs.c b/bsd/net/net_stubs.c index 54211edde..052ae2ffe 100644 --- a/bsd/net/net_stubs.c +++ b/bsd/net/net_stubs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 Apple Inc. All rights reserved. + * Copyright (c) 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -330,7 +330,11 @@ STUB(ifnet_tx_compl_status); STUB(ifnet_flowid); STUB(ifnet_enable_output); STUB(ifnet_disable_output); -STUB(ifnet_get_ipsec_offload_frames); +STUB(ifnet_get_keepalive_offload_frames); +STUB(ifnet_link_status_report); +STUB(ifnet_set_packetpreamblelen); +STUB(ifnet_packetpreamblelen); +STUB(ifnet_maxpacketpreamblelen); STUB(in6_localaddr); STUB(in_localaddr); STUB(in6addr_local); @@ -348,6 +352,7 @@ STUB(m_pullup); STUB(m_split); STUB(m_trailingspace); STUB(mbuf_get_driver_scratch); +STUB(mbuf_get_unsent_data_bytes); STUB(mbuf_get_priority); STUB(mbuf_get_service_class); STUB(mbuf_get_service_class_index); diff --git a/bsd/net/network_agent.c b/bsd/net/network_agent.c new file mode 100644 index 000000000..685437908 --- /dev/null +++ b/bsd/net/network_agent.c @@ -0,0 +1,1150 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +u_int32_t netagent_debug = LOG_NOTICE; // 0=None, 1=Basic + +SYSCTL_NODE(_net, OID_AUTO, netagent, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "NetworkAgent"); +SYSCTL_INT(_net_netagent, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &netagent_debug, 0, ""); + +static int netagent_registered_count = 0; +SYSCTL_INT(_net_netagent, OID_AUTO, registered_count , CTLFLAG_RD | CTLFLAG_LOCKED, + &netagent_registered_count, 0, ""); + +static int netagent_active_count = 0; +SYSCTL_INT(_net_netagent, OID_AUTO, active_count , CTLFLAG_RD | CTLFLAG_LOCKED, + &netagent_active_count, 0, ""); + +#define NETAGENTLOG(level, format, ...) do { \ + if (level <= netagent_debug) \ + log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s: " format "\n", __FUNCTION__, __VA_ARGS__); \ +} while (0) + +#define NETAGENTLOG0(level, msg) do { \ + if (level <= netagent_debug) \ + log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s: %s\n", __FUNCTION__, msg); \ +} while (0) + +struct netagent_assertion { + LIST_ENTRY(netagent_assertion) assertion_chain; + uuid_t asserted_uuid; +}; + +struct netagent_wrapper { + LIST_ENTRY(netagent_wrapper) master_chain; + u_int32_t control_unit; + struct netagent netagent; +}; + +struct netagent_session { + u_int32_t control_unit; + struct netagent_wrapper *wrapper; + LIST_HEAD(_netagent_assertion_list, netagent_assertion) assertion_list; +}; + +static LIST_HEAD(_netagent_list, netagent_wrapper) master_netagent_list; + +static kern_ctl_ref netagent_kctlref; +static u_int32_t netagent_family; +static OSMallocTag netagent_malloc_tag; +static lck_grp_attr_t *netagent_grp_attr = NULL; +static lck_attr_t *netagent_mtx_attr = NULL; +static lck_grp_t *netagent_mtx_grp = NULL; +decl_lck_rw_data(static, netagent_lock); + +static errno_t netagent_register_control(void); +static errno_t netagent_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, + void **unitinfo); +static errno_t netagent_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo); +static errno_t netagent_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + mbuf_t m, int flags); +static void netagent_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags); +static errno_t netagent_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int opt, void *data, size_t *len); +static errno_t netagent_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int opt, void *data, size_t len); + +static int netagent_send_ctl_data(u_int32_t control_unit, u_int8_t *buffer, size_t buffer_size); + +static struct netagent_session *netagent_create_session(u_int32_t control_unit); +static void netagent_delete_session(struct netagent_session *session); + +static void netagent_handle_register(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset); +static void netagent_handle_unregister(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset); +static void netagent_handle_update(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset); +static void netagent_handle_get(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset); +static void netagent_handle_assert(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset); +static void netagent_handle_unassert(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset); + +static struct netagent_wrapper *netagent_find_agent_with_uuid(uuid_t uuid); + +errno_t +netagent_init(void) +{ + errno_t result = 0; + + result = netagent_register_control(); + if (result != 0) { + goto done; + } + + netagent_grp_attr = lck_grp_attr_alloc_init(); + if (netagent_grp_attr == NULL) { + NETAGENTLOG0(LOG_ERR, "lck_grp_attr_alloc_init failed"); + result = ENOMEM; + goto done; + } + + netagent_mtx_grp = lck_grp_alloc_init(NETAGENT_CONTROL_NAME, netagent_grp_attr); + if (netagent_mtx_grp == NULL) { + NETAGENTLOG0(LOG_ERR, "lck_grp_alloc_init failed"); + result = ENOMEM; + goto done; + } + + netagent_mtx_attr = lck_attr_alloc_init(); + if (netagent_mtx_attr == NULL) { + NETAGENTLOG0(LOG_ERR, "lck_attr_alloc_init failed"); + result = ENOMEM; + goto done; + } + + lck_rw_init(&netagent_lock, netagent_mtx_grp, netagent_mtx_attr); + + LIST_INIT(&master_netagent_list); + +done: + if (result != 0) { + if (netagent_mtx_attr != NULL) { + lck_attr_free(netagent_mtx_attr); + netagent_mtx_attr = NULL; + } + if (netagent_mtx_grp != NULL) { + lck_grp_free(netagent_mtx_grp); + netagent_mtx_grp = NULL; + } + if (netagent_grp_attr != NULL) { + lck_grp_attr_free(netagent_grp_attr); + netagent_grp_attr = NULL; + } + if (netagent_kctlref != NULL) { + ctl_deregister(netagent_kctlref); + netagent_kctlref = NULL; + } + } + return (result); +} + +static errno_t +netagent_register_control(void) +{ + struct kern_ctl_reg kern_ctl; + errno_t result = 0; + + // Create a tag to allocate memory + netagent_malloc_tag = OSMalloc_Tagalloc(NETAGENT_CONTROL_NAME, OSMT_DEFAULT); + + // Find a unique value for our interface family + result = mbuf_tag_id_find(NETAGENT_CONTROL_NAME, &netagent_family); + if (result != 0) { + NETAGENTLOG(LOG_ERR, "mbuf_tag_id_find_internal failed: %d", result); + return (result); + } + + bzero(&kern_ctl, sizeof(kern_ctl)); + strlcpy(kern_ctl.ctl_name, NETAGENT_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); + kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; + kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; // Require root + kern_ctl.ctl_sendsize = 64 * 1024; + kern_ctl.ctl_recvsize = 64 * 1024; + kern_ctl.ctl_connect = netagent_ctl_connect; + kern_ctl.ctl_disconnect = netagent_ctl_disconnect; + kern_ctl.ctl_send = netagent_ctl_send; + kern_ctl.ctl_rcvd = netagent_ctl_rcvd; + kern_ctl.ctl_setopt = netagent_ctl_setopt; + kern_ctl.ctl_getopt = netagent_ctl_getopt; + + result = ctl_register(&kern_ctl, &netagent_kctlref); + if (result != 0) { + NETAGENTLOG(LOG_ERR, "ctl_register failed: %d", result); + return (result); + } + + return (0); +} + +static errno_t +netagent_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, void **unitinfo) +{ +#pragma unused(kctlref) + *unitinfo = netagent_create_session(sac->sc_unit); + if (*unitinfo == NULL) { + // Could not allocate session + return (ENOBUFS); + } + + return (0); +} + +static errno_t +netagent_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo) +{ +#pragma unused(kctlref, unit) + struct netagent_session *session = (struct netagent_session *)unitinfo; + if (session != NULL) { + netagent_delete_session(session); + } + + return (0); +} + +// Kernel events +static void +netagent_post_event(uuid_t agent_uuid, u_int32_t event_code) +{ + struct kev_msg ev_msg; + memset(&ev_msg, 0, sizeof(ev_msg)); + + struct kev_netagent_data event_data; + + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_NETAGENT_SUBCLASS; + ev_msg.event_code = event_code; + + uuid_copy(event_data.netagent_uuid, agent_uuid); + ev_msg.dv[0].data_ptr = &event_data; + ev_msg.dv[0].data_length = sizeof(event_data); + + kev_post_msg(&ev_msg); +} + +// Message handling +static u_int8_t * +netagent_buffer_write_message_header(u_int8_t *buffer, u_int8_t message_type, u_int8_t flags, + u_int32_t message_id, u_int32_t error, u_int32_t payload_length) +{ + ((struct netagent_message_header *)(void *)buffer)->message_type = message_type; + ((struct netagent_message_header *)(void *)buffer)->message_flags = flags; + ((struct netagent_message_header *)(void *)buffer)->message_id = message_id; + ((struct netagent_message_header *)(void *)buffer)->message_error = error; + ((struct netagent_message_header *)(void *)buffer)->message_payload_length = payload_length; + return (buffer + sizeof(struct netagent_message_header)); +} + +static int +netagent_send_ctl_data(u_int32_t control_unit, u_int8_t *buffer, size_t buffer_size) +{ + if (netagent_kctlref == NULL || control_unit == 0 || buffer == NULL || buffer_size == 0) { + return (EINVAL); + } + + return ctl_enqueuedata(netagent_kctlref, control_unit, buffer, buffer_size, CTL_DATA_EOR); +} + +static int +netagent_send_trigger(struct netagent_wrapper *wrapper, struct proc *p, u_int32_t flags, u_int32_t trigger_type) +{ + int error = 0; + struct netagent_trigger_message *trigger_message = NULL; + u_int8_t *trigger = NULL; + size_t trigger_size = sizeof(struct netagent_message_header) + sizeof(struct netagent_trigger_message); + + MALLOC(trigger, u_int8_t *, trigger_size, M_NETAGENT, M_WAITOK); + if (trigger == NULL) { + return (ENOMEM); + } + + (void)netagent_buffer_write_message_header(trigger, trigger_type, 0, 0, 0, sizeof(struct netagent_trigger_message)); + + trigger_message = (struct netagent_trigger_message *)(void *)(trigger + sizeof(struct netagent_message_header)); + trigger_message->trigger_flags = flags; + if (p != NULL) { + trigger_message->trigger_pid = proc_pid(p); + proc_getexecutableuuid(p, trigger_message->trigger_proc_uuid, sizeof(trigger_message->trigger_proc_uuid)); + } else { + trigger_message->trigger_pid = 0; + uuid_clear(trigger_message->trigger_proc_uuid); + } + + if ((error = netagent_send_ctl_data(wrapper->control_unit, (u_int8_t *)trigger, trigger_size))) { + NETAGENTLOG(LOG_ERR, "Failed to send trigger message on control unit %d", wrapper->control_unit); + } + + FREE(trigger, M_NETAGENT); + return (error); +} + +static int +netagent_send_success_response(struct netagent_session *session, u_int8_t message_type, u_int32_t message_id) +{ + int error = 0; + u_int8_t *response = NULL; + size_t response_size = sizeof(struct netagent_message_header); + MALLOC(response, u_int8_t *, response_size, M_NETAGENT, M_WAITOK); + if (response == NULL) { + return (ENOMEM); + } + (void)netagent_buffer_write_message_header(response, message_type, NETAGENT_MESSAGE_FLAGS_RESPONSE, message_id, 0, 0); + + if ((error = netagent_send_ctl_data(session->control_unit, (u_int8_t *)response, response_size))) { + NETAGENTLOG0(LOG_ERR, "Failed to send response"); + } + + FREE(response, M_NETAGENT); + return (error); +} + +static int +netagent_send_error_response(struct netagent_session *session, u_int8_t message_type, + u_int32_t message_id, u_int32_t error_code) +{ + int error = 0; + u_int8_t *response = NULL; + size_t response_size = sizeof(struct netagent_message_header); + MALLOC(response, u_int8_t *, response_size, M_NETAGENT, M_WAITOK); + if (response == NULL) { + return (ENOMEM); + } + (void)netagent_buffer_write_message_header(response, message_type, NETAGENT_MESSAGE_FLAGS_RESPONSE, + message_id, error_code, 0); + + if ((error = netagent_send_ctl_data(session->control_unit, (u_int8_t *)response, response_size))) { + NETAGENTLOG0(LOG_ERR, "Failed to send response"); + } + + FREE(response, M_NETAGENT); + return (error); +} + +static errno_t +netagent_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, mbuf_t packet, int flags) +{ +#pragma unused(kctlref, unit, flags) + struct netagent_session *session = (struct netagent_session *)unitinfo; + struct netagent_message_header header; + int error = 0; + + if (session == NULL) { + NETAGENTLOG0(LOG_ERR, "Got a NULL session"); + error = EINVAL; + goto done; + } + + if (mbuf_pkthdr_len(packet) < sizeof(header)) { + NETAGENTLOG(LOG_ERR, "Got a bad packet, length (%lu) < sizeof header (%lu)", + mbuf_pkthdr_len(packet), sizeof(header)); + error = EINVAL; + goto done; + } + + error = mbuf_copydata(packet, 0, sizeof(header), &header); + if (error) { + NETAGENTLOG(LOG_ERR, "mbuf_copydata failed for the header: %d", error); + error = ENOBUFS; + goto done; + } + + switch (header.message_type) { + case NETAGENT_MESSAGE_TYPE_REGISTER: { + netagent_handle_register(session, header.message_id, header.message_payload_length, + packet, sizeof(header)); + break; + } + case NETAGENT_MESSAGE_TYPE_UNREGISTER: { + netagent_handle_unregister(session, header.message_id, header.message_payload_length, + packet, sizeof(header)); + break; + } + case NETAGENT_MESSAGE_TYPE_UPDATE: { + netagent_handle_update(session, header.message_id, header.message_payload_length, + packet, sizeof(header)); + break; + } + case NETAGENT_MESSAGE_TYPE_GET: { + netagent_handle_get(session, header.message_id, header.message_payload_length, + packet, sizeof(header)); + break; + } + case NETAGENT_MESSAGE_TYPE_ASSERT: { + netagent_handle_assert(session, header.message_id, header.message_payload_length, + packet, sizeof(header)); + break; + } + case NETAGENT_MESSAGE_TYPE_UNASSERT: { + netagent_handle_unassert(session, header.message_id, header.message_payload_length, + packet, sizeof(header)); + break; + } + default: { + NETAGENTLOG(LOG_ERR, "Received unknown message type %d", header.message_type); + netagent_send_error_response(session, header.message_type, header.message_id, + NETAGENT_MESSAGE_ERROR_UNKNOWN_TYPE); + break; + } + } + +done: + mbuf_freem(packet); + return (error); +} + +static void +netagent_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags) +{ +#pragma unused(kctlref, unit, unitinfo, flags) + return; +} + +static errno_t +netagent_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, + void *data, size_t *len) +{ +#pragma unused(kctlref, unit, unitinfo, opt, data, len) + return (0); +} + +static errno_t +netagent_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, + void *data, size_t len) +{ +#pragma unused(kctlref, unit, unitinfo, opt, data, len) + return (0); +} + +// Session Management +static struct netagent_session * +netagent_create_session(u_int32_t control_unit) +{ + struct netagent_session *new_session = NULL; + + MALLOC(new_session, struct netagent_session *, sizeof(*new_session), M_NETAGENT, M_WAITOK); + if (new_session == NULL) { + goto done; + } + NETAGENTLOG(LOG_DEBUG, "Create agent session, control unit %d", control_unit); + memset(new_session, 0, sizeof(*new_session)); + new_session->control_unit = control_unit; + LIST_INIT(&new_session->assertion_list); + new_session->wrapper = NULL; +done: + return (new_session); +} + +static void +netagent_unregister_session_wrapper(struct netagent_session *session) +{ + bool unregistered = FALSE; + uuid_t unregistered_uuid; + struct netagent_wrapper *wrapper = NULL; + lck_rw_lock_exclusive(&netagent_lock); + if (session != NULL) { + wrapper = session->wrapper; + if (wrapper != NULL) { + if (netagent_registered_count > 0) { + netagent_registered_count--; + } + if ((session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) && + netagent_active_count > 0) { + netagent_active_count--; + } + + LIST_REMOVE(wrapper, master_chain); + + unregistered = TRUE; + uuid_copy(unregistered_uuid, session->wrapper->netagent.netagent_uuid); + + FREE(wrapper, M_NETAGENT); + session->wrapper = NULL; + NETAGENTLOG0(LOG_DEBUG, "Unregistered agent"); + } + } + lck_rw_done(&netagent_lock); + + if (unregistered) { + netagent_post_event(unregistered_uuid, KEV_NETAGENT_UNREGISTERED); + ifnet_clear_netagent(unregistered_uuid); + } +} + +static void +netagent_delete_session(struct netagent_session *session) +{ + if (session != NULL) { + netagent_unregister_session_wrapper(session); + + // Unassert any pending assertions + lck_rw_lock_shared(&netagent_lock); + struct netagent_assertion *search_assertion = NULL; + struct netagent_assertion *temp_assertion = NULL; + LIST_FOREACH_SAFE(search_assertion, &session->assertion_list, assertion_chain, temp_assertion) { + struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(search_assertion->asserted_uuid); + if (wrapper != NULL) { + netagent_send_trigger(wrapper, current_proc(), NETAGENT_TRIGGER_FLAG_USER, NETAGENT_MESSAGE_TYPE_TRIGGER_UNASSERT); + } + LIST_REMOVE(search_assertion, assertion_chain); + FREE(search_assertion, M_NETAGENT); + } + lck_rw_done(&netagent_lock); + + FREE(session, M_NETAGENT); + } +} + +static int +netagent_packet_get_netagent_data_size(mbuf_t packet, int offset, int *err) +{ + int error = 0; + + struct netagent netagent_peek; + memset(&netagent_peek, 0, sizeof(netagent_peek)); + + *err = 0; + + error = mbuf_copydata(packet, offset, sizeof(netagent_peek), &netagent_peek); + if (error) { + *err = ENOENT; + return (-1); + } + + return (netagent_peek.netagent_data_size); +} + +static void +netagent_handle_register(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset) +{ + int error; + int data_size = 0; + struct netagent_wrapper *new_wrapper = NULL; + u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + uuid_t netagent_uuid; + uuid_clear(netagent_uuid); + + if (session == NULL) { + NETAGENTLOG0(LOG_ERR, "Failed to find session"); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + if (session->wrapper != NULL) { + NETAGENTLOG0(LOG_ERR, "Session already has a registered agent"); + response_error = NETAGENT_MESSAGE_ERROR_ALREADY_REGISTERED; + goto fail; + } + + if (payload_length < sizeof(struct netagent)) { + NETAGENTLOG(LOG_ERR, "Register message size too small for agent: (%d < %d)", + payload_length, sizeof(struct netagent)); + response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA; + goto fail; + } + + data_size = netagent_packet_get_netagent_data_size(packet, offset, &error); + if (error || data_size < 0 || data_size > NETAGENT_MAX_DATA_SIZE) { + NETAGENTLOG(LOG_ERR, "Register message size could not be read, error %d data_size %d", + error, data_size); + response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA; + goto fail; + } + + MALLOC(new_wrapper, struct netagent_wrapper *, sizeof(*new_wrapper) + data_size, M_NETAGENT, M_WAITOK); + if (new_wrapper == NULL) { + NETAGENTLOG0(LOG_ERR, "Failed to allocate agent"); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size); + + error = mbuf_copydata(packet, offset, sizeof(struct netagent) + data_size, + &new_wrapper->netagent); + if (error) { + NETAGENTLOG(LOG_ERR, "Failed to read data into agent structure: %d", error); + FREE(new_wrapper, M_NETAGENT); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + lck_rw_lock_exclusive(&netagent_lock); + + new_wrapper->control_unit = session->control_unit; + + session->wrapper = new_wrapper; + LIST_INSERT_HEAD(&master_netagent_list, new_wrapper, master_chain); + + new_wrapper->netagent.netagent_flags |= NETAGENT_FLAG_REGISTERED; + netagent_registered_count++; + if (new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) { + netagent_active_count++; + } + + lck_rw_done(&netagent_lock); + + NETAGENTLOG0(LOG_DEBUG, "Registered new agent"); + netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_REGISTER, message_id); + netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_REGISTERED); + return; +fail: + netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_REGISTER, message_id, response_error); +} + +static void +netagent_handle_unregister(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset) +{ +#pragma unused(payload_length, packet, offset) + u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + + if (session == NULL) { + NETAGENTLOG0(LOG_ERR, "Failed to find session"); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + netagent_unregister_session_wrapper(session); + + netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_UNREGISTER, message_id); + return; +fail: + netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_UNREGISTER, message_id, response_error); +} + +static void +netagent_handle_update(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset) +{ + int error; + int data_size = 0; + struct netagent_wrapper *new_wrapper = NULL; + u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + uuid_t netagent_uuid; + uuid_clear(netagent_uuid); + + if (session == NULL) { + NETAGENTLOG0(LOG_ERR, "Failed to find session"); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + if (session->wrapper == NULL) { + NETAGENTLOG0(LOG_ERR, "Session has no agent to update"); + response_error = NETAGENT_MESSAGE_ERROR_NOT_REGISTERED; + goto fail; + } + + if (payload_length < sizeof(struct netagent)) { + NETAGENTLOG(LOG_ERR, "Update message size too small for agent: (%d < %d)", + payload_length, sizeof(struct netagent)); + response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA; + goto fail; + } + + data_size = netagent_packet_get_netagent_data_size(packet, offset, &error); + if (error || data_size < 0 || data_size > NETAGENT_MAX_DATA_SIZE) { + NETAGENTLOG(LOG_ERR, "Update message size could not be read, error %d data_size %d", + error, data_size); + response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA; + goto fail; + } + + MALLOC(new_wrapper, struct netagent_wrapper *, sizeof(*new_wrapper) + data_size, M_NETAGENT, M_WAITOK); + if (new_wrapper == NULL) { + NETAGENTLOG0(LOG_ERR, "Failed to allocate agent"); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size); + + error = mbuf_copydata(packet, offset, sizeof(struct netagent) + data_size, &new_wrapper->netagent); + if (error) { + NETAGENTLOG(LOG_ERR, "Failed to read data into agent structure: %d", error); + FREE(new_wrapper, M_NETAGENT); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + lck_rw_lock_exclusive(&netagent_lock); + + if (uuid_compare(session->wrapper->netagent.netagent_uuid, new_wrapper->netagent.netagent_uuid) != 0 || + memcmp(&session->wrapper->netagent.netagent_domain, &new_wrapper->netagent.netagent_domain, + sizeof(new_wrapper->netagent.netagent_domain)) != 0 || + memcmp(&session->wrapper->netagent.netagent_type, &new_wrapper->netagent.netagent_type, + sizeof(new_wrapper->netagent.netagent_type)) != 0) { + NETAGENTLOG0(LOG_ERR, "Basic agent parameters do not match, cannot update"); + FREE(new_wrapper, M_NETAGENT); + response_error = NETAGENT_MESSAGE_ERROR_CANNOT_UPDATE; + lck_rw_done(&netagent_lock); + goto fail; + } + + new_wrapper->netagent.netagent_flags |= NETAGENT_FLAG_REGISTERED; + if ((new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) && + !(session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE)) { + netagent_active_count++; + } else if (!(new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) && + (session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) && + netagent_active_count > 0) { + netagent_active_count--; + } + + LIST_REMOVE(session->wrapper, master_chain); + FREE(session->wrapper, M_NETAGENT); + session->wrapper = new_wrapper; + new_wrapper->control_unit = session->control_unit; + LIST_INSERT_HEAD(&master_netagent_list, new_wrapper, master_chain); + + lck_rw_done(&netagent_lock); + + NETAGENTLOG0(LOG_DEBUG, "Updated agent"); + netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_UPDATE, message_id); + netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_UPDATED); + return; +fail: + netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_UPDATE, message_id, response_error); +} + +static void +netagent_handle_get(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset) +{ +#pragma unused(payload_length, packet, offset) + u_int8_t *response = NULL; + u_int8_t *cursor = NULL; + u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + + if (session == NULL) { + NETAGENTLOG0(LOG_ERR, "Failed to find session"); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + if (session->wrapper == NULL) { + NETAGENTLOG0(LOG_ERR, "Session has no agent to get"); + response_error = NETAGENT_MESSAGE_ERROR_NOT_REGISTERED; + goto fail; + } + + lck_rw_lock_shared(&netagent_lock); + + size_t response_size = sizeof(struct netagent_message_header) + sizeof(session->wrapper->netagent) + + session->wrapper->netagent.netagent_data_size; + MALLOC(response, u_int8_t *, response_size, M_NETAGENT, M_WAITOK); + if (response == NULL) { + goto fail; + } + + cursor = response; + cursor = netagent_buffer_write_message_header(cursor, NETAGENT_MESSAGE_TYPE_GET, + NETAGENT_MESSAGE_FLAGS_RESPONSE, message_id, 0, + response_size - sizeof(struct netagent_message_header)); + memcpy(cursor, &session->wrapper->netagent, sizeof(session->wrapper->netagent) + + session->wrapper->netagent.netagent_data_size); + + lck_rw_done(&netagent_lock); + + if (!netagent_send_ctl_data(session->control_unit, (u_int8_t *)response, response_size)) { + NETAGENTLOG0(LOG_ERR, "Failed to send response"); + } + FREE(response, M_NETAGENT); + return; +fail: + netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_GET, message_id, response_error); +} + +static void +netagent_handle_assert(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset) +{ + int error; + struct netagent_assertion *new_assertion = NULL; + u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + uuid_t netagent_uuid; + uuid_clear(netagent_uuid); + + if (session == NULL) { + NETAGENTLOG0(LOG_ERR, "Failed to find session"); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + if (payload_length < sizeof(uuid_t)) { + NETAGENTLOG(LOG_ERR, "Assert message size too small for uuid: (%d < %d)", + payload_length, sizeof(uuid_t)); + response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA; + goto fail; + } + + error = mbuf_copydata(packet, offset, sizeof(uuid_t), &netagent_uuid); + if (error) { + NETAGENTLOG(LOG_ERR, "Failed to read uuid: %d", error); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + MALLOC(new_assertion, struct netagent_assertion *, sizeof(*new_assertion), M_NETAGENT, M_WAITOK); + if (new_assertion == NULL) { + NETAGENTLOG0(LOG_ERR, "Failed to allocate assertion"); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + uuid_copy(new_assertion->asserted_uuid, netagent_uuid); + + lck_rw_lock_shared(&netagent_lock); + + struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(netagent_uuid); + if (wrapper == NULL) { + lck_rw_done(&netagent_lock); + response_error = NETAGENT_MESSAGE_ERROR_NOT_REGISTERED; + FREE(new_assertion, M_NETAGENT); + goto fail; + } + + error = netagent_send_trigger(wrapper, current_proc(), NETAGENT_TRIGGER_FLAG_USER, NETAGENT_MESSAGE_TYPE_TRIGGER_ASSERT); + if (error) { + lck_rw_done(&netagent_lock); + NETAGENTLOG(LOG_ERR, "Failed to trigger assert agent: %d", error); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + FREE(new_assertion, M_NETAGENT); + goto fail; + } + + LIST_INSERT_HEAD(&session->assertion_list, new_assertion, assertion_chain); + + lck_rw_done(&netagent_lock); + + NETAGENTLOG0(LOG_DEBUG, "Asserted agent"); + netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_ASSERT, message_id); + return; +fail: + netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_ASSERT, message_id, response_error); +} + +static void +netagent_handle_unassert(struct netagent_session *session, u_int32_t message_id, + u_int32_t payload_length, mbuf_t packet, int offset) +{ + int error; + u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + uuid_t netagent_uuid; + uuid_clear(netagent_uuid); + + if (session == NULL) { + NETAGENTLOG0(LOG_ERR, "Failed to find session"); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + if (payload_length < sizeof(uuid_t)) { + NETAGENTLOG(LOG_ERR, "Unassert message size too small for uuid: (%d < %d)", + payload_length, sizeof(uuid_t)); + response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA; + goto fail; + } + + error = mbuf_copydata(packet, offset, sizeof(uuid_t), &netagent_uuid); + if (error) { + NETAGENTLOG(LOG_ERR, "Failed to read uuid: %d", error); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + struct netagent_assertion *found_assertion = NULL; + struct netagent_assertion *search_assertion = NULL; + LIST_FOREACH(search_assertion, &session->assertion_list, assertion_chain) { + if (uuid_compare(search_assertion->asserted_uuid, netagent_uuid) == 0) { + found_assertion = search_assertion; + break; + } + } + + if (found_assertion == NULL) { + NETAGENTLOG0(LOG_ERR, "Netagent uuid not previously asserted"); + response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA; + goto fail; + } + + LIST_REMOVE(found_assertion, assertion_chain); + FREE(found_assertion, M_NETAGENT); + found_assertion = NULL; + + lck_rw_lock_shared(&netagent_lock); + + struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(netagent_uuid); + if (wrapper == NULL) { + lck_rw_done(&netagent_lock); + response_error = NETAGENT_MESSAGE_ERROR_NOT_REGISTERED; + goto fail; + } + + error = netagent_send_trigger(wrapper, current_proc(), NETAGENT_TRIGGER_FLAG_USER, NETAGENT_MESSAGE_TYPE_TRIGGER_UNASSERT); + if (error) { + lck_rw_done(&netagent_lock); + NETAGENTLOG(LOG_ERR, "Failed to trigger assert agent: %d", error); + response_error = NETAGENT_MESSAGE_ERROR_INTERNAL; + goto fail; + } + + lck_rw_done(&netagent_lock); + + NETAGENTLOG0(LOG_DEBUG, "Unasserted agent"); + netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_UNASSERT, message_id); + return; +fail: + netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_UNASSERT, message_id, response_error); +} + +static struct netagent_wrapper * +netagent_find_agent_with_uuid(uuid_t uuid) +{ + struct netagent_wrapper *search_netagent = NULL; + + LIST_FOREACH(search_netagent, &master_netagent_list, master_chain) { + if (uuid_compare(search_netagent->netagent.netagent_uuid, uuid) == 0) { + return (search_netagent); + } + } + + return (NULL); +} + +void +netagent_post_updated_interfaces(uuid_t uuid) +{ + struct netagent_wrapper *wrapper = NULL; + lck_rw_lock_shared(&netagent_lock); + wrapper = netagent_find_agent_with_uuid(uuid); + lck_rw_done(&netagent_lock); + + if (wrapper != NULL) { + netagent_post_event(uuid, KEV_NETAGENT_UPDATED_INTERFACES); + } else { + NETAGENTLOG0(LOG_DEBUG, "Interface event with no associated agent"); + } + + return; +} + +int +netagent_ioctl(u_long cmd, caddr_t data) +{ + int error = 0; + + lck_rw_lock_shared(&netagent_lock); + switch (cmd) { + case SIOCGIFAGENTDATA32: { + struct netagent_req32 *ifsir32 = (struct netagent_req32 *)(void *)data; + struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(ifsir32->netagent_uuid); + if (wrapper == NULL) { + error = ENOENT; + break; + } + uuid_copy(ifsir32->netagent_uuid, wrapper->netagent.netagent_uuid); + memcpy(ifsir32->netagent_domain, wrapper->netagent.netagent_domain, sizeof(ifsir32->netagent_domain)); + memcpy(ifsir32->netagent_type, wrapper->netagent.netagent_type, sizeof(ifsir32->netagent_type)); + memcpy(ifsir32->netagent_desc, wrapper->netagent.netagent_desc, sizeof(ifsir32->netagent_desc)); + ifsir32->netagent_flags = wrapper->netagent.netagent_flags; + if (ifsir32->netagent_data_size == 0) { + // First pass, client wants data size + ifsir32->netagent_data_size = wrapper->netagent.netagent_data_size; + } else if (ifsir32->netagent_data != USER_ADDR_NULL && + ifsir32->netagent_data_size == wrapper->netagent.netagent_data_size) { + // Second pass, client wants data buffer filled out + error = copyout(wrapper->netagent.netagent_data, ifsir32->netagent_data, wrapper->netagent.netagent_data_size); + } else { + error = EINVAL; + } + break; + } + case SIOCGIFAGENTDATA64: { + struct netagent_req64 *ifsir64 = (struct netagent_req64 *)(void *)data; + struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(ifsir64->netagent_uuid); + if (wrapper == NULL) { + error = ENOENT; + break; + } + uuid_copy(ifsir64->netagent_uuid, wrapper->netagent.netagent_uuid); + memcpy(ifsir64->netagent_domain, wrapper->netagent.netagent_domain, sizeof(ifsir64->netagent_domain)); + memcpy(ifsir64->netagent_type, wrapper->netagent.netagent_type, sizeof(ifsir64->netagent_type)); + memcpy(ifsir64->netagent_desc, wrapper->netagent.netagent_desc, sizeof(ifsir64->netagent_desc)); + ifsir64->netagent_flags = wrapper->netagent.netagent_flags; + if (ifsir64->netagent_data_size == 0) { + // First pass, client wants data size + ifsir64->netagent_data_size = wrapper->netagent.netagent_data_size; + } else if (ifsir64->netagent_data != USER_ADDR_NULL && + ifsir64->netagent_data_size == wrapper->netagent.netagent_data_size) { + // Second pass, client wants data buffer filled out + error = copyout(wrapper->netagent.netagent_data, ifsir64->netagent_data, wrapper->netagent.netagent_data_size); + } else { + error = EINVAL; + } + break; + } + default: { + error = EINVAL; + break; + } + } + lck_rw_done(&netagent_lock); + return (error); +} + +u_int32_t +netagent_get_flags(uuid_t uuid) +{ + u_int32_t flags = 0; + lck_rw_lock_shared(&netagent_lock); + struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(uuid); + if (wrapper != NULL) { + flags = wrapper->netagent.netagent_flags; + } else { + NETAGENTLOG0(LOG_DEBUG, "Flags requested for invalid netagent"); + } + lck_rw_done(&netagent_lock); + + return (flags); +} + +int +netagent_kernel_trigger(uuid_t uuid) +{ + int error = 0; + + lck_rw_lock_shared(&netagent_lock); + struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(uuid); + if (wrapper == NULL) { + NETAGENTLOG0(LOG_ERR, "Requested netagent for kernel trigger could not be found"); + error = ENOENT; + goto done; + } + + if ((wrapper->netagent.netagent_flags & NETAGENT_FLAG_KERNEL_ACTIVATED) == 0) { + NETAGENTLOG0(LOG_ERR, "Requested netagent for kernel trigger is not kernel activated"); + // Agent does not accept kernel triggers + error = EINVAL; + goto done; + } + + if ((wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE)) { + // Agent already active + NETAGENTLOG0(LOG_INFO, "Requested netagent for kernel trigger is already active"); + error = 0; + goto done; + } + + error = netagent_send_trigger(wrapper, current_proc(), NETAGENT_TRIGGER_FLAG_KERNEL, NETAGENT_MESSAGE_TYPE_TRIGGER); + NETAGENTLOG((error ? LOG_ERR : LOG_INFO), "Triggered netagent from kernel (error %d)", error); +done: + lck_rw_done(&netagent_lock); + return (error); +} + +int +netagent_trigger(struct proc *p, struct netagent_trigger_args *uap, int32_t *retval) +{ +#pragma unused(p, retval) + uuid_t agent_uuid; + int error = 0; + + if (uap == NULL) { + NETAGENTLOG0(LOG_ERR, "uap == NULL"); + return (EINVAL); + } + + if (uap->agent_uuid) { + if (uap->agent_uuidlen != sizeof(uuid_t)) { + NETAGENTLOG(LOG_ERR, "Incorrect length (got %d, expected %d)", + uap->agent_uuidlen, sizeof(uuid_t)); + return (ERANGE); + } + + error = copyin(uap->agent_uuid, agent_uuid, sizeof(uuid_t)); + if (error) { + NETAGENTLOG(LOG_ERR, "copyin error (%d)", error); + return (error); + } + } + + if (uuid_is_null(agent_uuid)) { + NETAGENTLOG0(LOG_ERR, "Requested netagent UUID is empty"); + return (EINVAL); + } + + lck_rw_lock_shared(&netagent_lock); + struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(agent_uuid); + if (wrapper == NULL) { + NETAGENTLOG0(LOG_ERR, "Requested netagent UUID is not registered"); + error = ENOENT; + goto done; + } + + if ((wrapper->netagent.netagent_flags & NETAGENT_FLAG_USER_ACTIVATED) == 0) { + // Agent does not accept triggers + NETAGENTLOG0(LOG_ERR, "Requested netagent UUID is not eligible for triggering"); + error = EINVAL; + goto done; + } + + if ((wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE)) { + // Agent already active + NETAGENTLOG0(LOG_INFO, "Requested netagent UUID is already active"); + error = 0; + goto done; + } + + error = netagent_send_trigger(wrapper, p, NETAGENT_TRIGGER_FLAG_USER, NETAGENT_MESSAGE_TYPE_TRIGGER); + NETAGENTLOG((error ? LOG_ERR : LOG_INFO), "Triggered netagent (error %d)", error); +done: + lck_rw_done(&netagent_lock); + return (error); +} diff --git a/bsd/net/network_agent.h b/bsd/net/network_agent.h new file mode 100644 index 000000000..6fe55b96c --- /dev/null +++ b/bsd/net/network_agent.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2014, 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NETAGENT_H_ +#define _NETAGENT_H_ + +#include +#include + +#ifdef BSD_KERNEL_PRIVATE +#include + +errno_t netagent_init(void); +#endif +#ifdef PRIVATE +/* + * Name registered by the Network Agent kernel control + */ +#define NETAGENT_CONTROL_NAME "com.apple.net.netagent" + +struct netagent_message_header { + u_int8_t message_type; + u_int8_t message_flags; + u_int32_t message_id; + u_int32_t message_error; + u_int32_t message_payload_length; +}; + +struct netagent_trigger_message { + u_int32_t trigger_flags; + pid_t trigger_pid; + uuid_t trigger_proc_uuid; +}; + +#define NETAGENT_MESSAGE_TYPE_REGISTER 1 // Pass netagent to set, no return value +#define NETAGENT_MESSAGE_TYPE_UNREGISTER 2 // No value, no return value +#define NETAGENT_MESSAGE_TYPE_UPDATE 3 // Pass netagent to update, no return value +#define NETAGENT_MESSAGE_TYPE_GET 4 // No value, return netagent +#define NETAGENT_MESSAGE_TYPE_TRIGGER 5 // Kernel initiated, no reply expected +#define NETAGENT_MESSAGE_TYPE_ASSERT 6 // Pass uuid of netagent to assert +#define NETAGENT_MESSAGE_TYPE_UNASSERT 7 // Pass uuid of netagent to unassert +#define NETAGENT_MESSAGE_TYPE_TRIGGER_ASSERT 8 // Kernel initiated, no reply expected +#define NETAGENT_MESSAGE_TYPE_TRIGGER_UNASSERT 9 // Kernel initiated, no reply expected + +#define NETAGENT_MESSAGE_FLAGS_RESPONSE 0x01 // Used for acks, errors, and query responses + +#define NETAGENT_MESSAGE_ERROR_NONE 0 +#define NETAGENT_MESSAGE_ERROR_INTERNAL 1 +#define NETAGENT_MESSAGE_ERROR_UNKNOWN_TYPE 2 +#define NETAGENT_MESSAGE_ERROR_INVALID_DATA 3 +#define NETAGENT_MESSAGE_ERROR_NOT_REGISTERED 4 +#define NETAGENT_MESSAGE_ERROR_ALREADY_REGISTERED 5 +#define NETAGENT_MESSAGE_ERROR_CANNOT_UPDATE 6 + +#define NETAGENT_DOMAINSIZE 32 +#define NETAGENT_TYPESIZE 32 +#define NETAGENT_DESCSIZE 128 + +#define NETAGENT_MAX_DATA_SIZE 1024 + +#define NETAGENT_FLAG_REGISTERED 0x0001 // Agent is registered +#define NETAGENT_FLAG_ACTIVE 0x0002 // Agent is active +#define NETAGENT_FLAG_KERNEL_ACTIVATED 0x0004 // Agent can be activated by kernel activity +#define NETAGENT_FLAG_USER_ACTIVATED 0x0008 // Agent can be activated by system call (netagent_trigger) +#define NETAGENT_FLAG_VOLUNTARY 0x0010 // Use of agent is optional +#define NETAGENT_FLAG_SPECIFIC_USE_ONLY 0x0020 // Agent should only be used and activated when specifically required + +#define NETAGENT_TRIGGER_FLAG_USER 0x0001 // Userspace triggered agent +#define NETAGENT_TRIGGER_FLAG_KERNEL 0x0002 // Kernel triggered agent + +#define KEV_NETAGENT_SUBCLASS 9 +#define KEV_NETAGENT_REGISTERED 1 +#define KEV_NETAGENT_UNREGISTERED 2 +#define KEV_NETAGENT_UPDATED 3 +#define KEV_NETAGENT_UPDATED_INTERFACES 4 + +struct kev_netagent_data { + uuid_t netagent_uuid; +}; + +// To be used with kernel control socket +struct netagent { + uuid_t netagent_uuid; + char netagent_domain[NETAGENT_DOMAINSIZE]; + char netagent_type[NETAGENT_TYPESIZE]; + char netagent_desc[NETAGENT_DESCSIZE]; + u_int32_t netagent_flags; + u_int32_t netagent_data_size; + u_int8_t netagent_data[0]; +}; + +// To be used with SIOCGAGENTDATA +struct netagent_req { + uuid_t netagent_uuid; + char netagent_domain[NETAGENT_DOMAINSIZE]; + char netagent_type[NETAGENT_TYPESIZE]; + char netagent_desc[NETAGENT_DESCSIZE]; + u_int32_t netagent_flags; + u_int32_t netagent_data_size; + u_int8_t *netagent_data; +}; +#ifdef BSD_KERNEL_PRIVATE +int netagent_ioctl(u_long cmd, caddr_t data); + +struct netagent_req32 { + uuid_t netagent_uuid; + char netagent_domain[NETAGENT_DOMAINSIZE]; + char netagent_type[NETAGENT_TYPESIZE]; + char netagent_desc[NETAGENT_DESCSIZE]; + u_int32_t netagent_flags; + u_int32_t netagent_data_size; + user32_addr_t netagent_data; +}; +struct netagent_req64 { + uuid_t netagent_uuid; + char netagent_domain[NETAGENT_DOMAINSIZE]; + char netagent_type[NETAGENT_TYPESIZE]; + char netagent_desc[NETAGENT_DESCSIZE]; + u_int32_t netagent_flags; + u_int32_t netagent_data_size; + user64_addr_t netagent_data __attribute__((aligned(8))); +}; + +// Kernel accessors +void netagent_post_updated_interfaces(uuid_t uuid); // To be called from interface ioctls + +u_int32_t netagent_get_flags(uuid_t uuid); + +int netagent_kernel_trigger(uuid_t uuid); +#endif /* BSD_KERNEL_PRIVATE */ + +#endif /* PRIVATE */ + +#ifndef KERNEL +int netagent_trigger(uuid_t agent_uuid, size_t agent_uuidlen); +#endif /* !KERNEL */ + +#endif /* _NETAGENT_H_ */ diff --git a/bsd/net/ntstat.c b/bsd/net/ntstat.c index 49380d884..8a295f887 100644 --- a/bsd/net/ntstat.c +++ b/bsd/net/ntstat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2014 Apple Inc. All rights reserved. + * Copyright (c) 2010-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -87,46 +87,21 @@ static int nstat_recvspace = 8192; SYSCTL_INT(_net_stats, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED, &nstat_recvspace, 0, ""); -static int nstat_successmsgfailures = 0; -SYSCTL_INT(_net_stats, OID_AUTO, successmsgfailures, CTLFLAG_RD| CTLFLAG_LOCKED, - &nstat_successmsgfailures, 0, ""); - -static int nstat_sendountfailures = 0; -SYSCTL_INT(_net_stats, OID_AUTO, sendountfailures, CTLFLAG_RD| CTLFLAG_LOCKED, - &nstat_sendountfailures, 0, ""); - -static int nstat_sysinfofailures = 0; -SYSCTL_INT(_net_stats, OID_AUTO, sysinfofalures, CTLFLAG_RD| CTLFLAG_LOCKED, - &nstat_sysinfofailures, 0, ""); - -static int nstat_srccountfailures = 0; -SYSCTL_INT(_net_stats, OID_AUTO, srccountfailures, CTLFLAG_RD| CTLFLAG_LOCKED, - &nstat_srccountfailures, 0, ""); - -static int nstat_descriptionfailures = 0; -SYSCTL_INT(_net_stats, OID_AUTO, descriptionfailures, CTLFLAG_RD| CTLFLAG_LOCKED, - &nstat_descriptionfailures, 0, ""); - -static int nstat_msgremovedfailures = 0; -SYSCTL_INT(_net_stats, OID_AUTO, msgremovedfailures , CTLFLAG_RD| CTLFLAG_LOCKED, - &nstat_msgremovedfailures, 0, ""); - -static int nstat_srcaddedfailures = 0; -SYSCTL_INT(_net_stats, OID_AUTO, srcaddedfailures , CTLFLAG_RD| CTLFLAG_LOCKED, - &nstat_srcaddedfailures, 0, ""); - -static int nstat_msgerrorfailures = 0; -SYSCTL_INT(_net_stats, OID_AUTO, msgerrorfailures , CTLFLAG_RD| CTLFLAG_LOCKED, - &nstat_msgerrorfailures, 0, ""); +static struct nstat_stats nstat_stats; +SYSCTL_STRUCT(_net_stats, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED, + &nstat_stats, nstat_stats, ""); enum { - NSTAT_FLAG_CLEANUP = (1 << 0), - NSTAT_FLAG_REQCOUNTS = (1 << 1), - NSTAT_FLAG_REQDESCS = (1 << 2) + NSTAT_FLAG_CLEANUP = (1 << 0), + NSTAT_FLAG_REQCOUNTS = (1 << 1), + NSTAT_FLAG_SUPPORTS_UPDATES = (1 << 2), + NSTAT_FLAG_SYSINFO_SUBSCRIBED = (1 << 3), }; +#define QUERY_CONTINUATION_SRC_COUNT 100 + typedef struct nstat_control_state { struct nstat_control_state *ncs_next; @@ -136,7 +111,12 @@ typedef struct nstat_control_state u_int32_t ncs_unit; nstat_src_ref_t ncs_next_srcref; struct nstat_src *ncs_srcs; + mbuf_t ncs_accumulated; u_int32_t ncs_flags; + u_int64_t ncs_provider_filters[NSTAT_PROVIDER_COUNT]; + /* state maintained for partial query requests */ + u_int64_t ncs_context; + u_int64_t ncs_seq; } nstat_control_state; typedef struct nstat_provider @@ -151,6 +131,7 @@ typedef struct nstat_provider void (*nstat_watcher_remove)(nstat_control_state *state); errno_t (*nstat_copy_descriptor)(nstat_provider_cookie_t cookie, void *data, u_int32_t len); void (*nstat_release)(nstat_provider_cookie_t cookie, boolean_t locked); + bool (*nstat_reporting_allowed)(nstat_provider_cookie_t cookie, uint64_t filter); } nstat_provider; @@ -161,14 +142,19 @@ typedef struct nstat_src nstat_provider *provider; nstat_provider_cookie_t cookie; uint32_t filter; + uint64_t seq; } nstat_src; static errno_t nstat_control_send_counts(nstat_control_state *, - nstat_src *, unsigned long long, int *); -static int nstat_control_send_description(nstat_control_state *state, nstat_src *src, u_int64_t context); + nstat_src *, unsigned long long, u_int16_t, int *); +static int nstat_control_send_description(nstat_control_state *state, nstat_src *src, u_int64_t context, u_int16_t hdr_flags); +static int nstat_control_send_update(nstat_control_state *state, nstat_src *src, u_int64_t context, u_int16_t hdr_flags, int *gone); static errno_t nstat_control_send_removed(nstat_control_state *, nstat_src *); -static void nstat_control_cleanup_source(nstat_control_state *state, nstat_src *src, - boolean_t); +static errno_t nstat_control_send_goodbye(nstat_control_state *state, nstat_src *src); +static void nstat_control_cleanup_source(nstat_control_state *state, nstat_src *src, boolean_t); +static bool nstat_control_reporting_allowed(nstat_control_state *state, nstat_src *src); +static boolean_t nstat_control_begin_query(nstat_control_state *state, const nstat_msg_hdr *hdrp); +static u_int16_t nstat_control_end_query(nstat_control_state *state, nstat_src *last_src, boolean_t partial); static u_int32_t nstat_udp_watchers = 0; static u_int32_t nstat_tcp_watchers = 0; @@ -250,6 +236,57 @@ nstat_ip6_to_sockaddr( } } +static u_int16_t +nstat_inpcb_to_flags( + const struct inpcb *inp) +{ + u_int16_t flags = 0; + + if ((inp != NULL ) && (inp->inp_last_outifp != NULL)) + { + struct ifnet *ifp = inp->inp_last_outifp; + + u_int32_t functional_type = if_functional_type(ifp); + + /* Panic if someone adds a functional type without updating ntstat. */ + VERIFY(0 <= functional_type && functional_type <= IFRTYPE_FUNCTIONAL_LAST); + + switch (functional_type) + { + case IFRTYPE_FUNCTIONAL_UNKNOWN: + flags |= NSTAT_IFNET_IS_UNKNOWN_TYPE; + break; + case IFRTYPE_FUNCTIONAL_LOOPBACK: + flags |= NSTAT_IFNET_IS_LOOPBACK; + break; + case IFRTYPE_FUNCTIONAL_WIRED: + flags |= NSTAT_IFNET_IS_WIRED; + break; + case IFRTYPE_FUNCTIONAL_WIFI_INFRA: + flags |= NSTAT_IFNET_IS_WIFI; + break; + case IFRTYPE_FUNCTIONAL_WIFI_AWDL: + flags |= NSTAT_IFNET_IS_WIFI; + flags |= NSTAT_IFNET_IS_AWDL; + break; + case IFRTYPE_FUNCTIONAL_CELLULAR: + flags |= NSTAT_IFNET_IS_CELLULAR; + break; + } + + if (IFNET_IS_EXPENSIVE(ifp)) + { + flags |= NSTAT_IFNET_IS_EXPENSIVE; + } + } + else + { + flags = NSTAT_IFNET_IS_UNKNOWN_TYPE; + } + + return flags; +} + #pragma mark -- Network Statistic Providers -- static errno_t nstat_control_source_add(u_int64_t context, nstat_control_state *state, nstat_provider *provider, nstat_provider_cookie_t cookie); @@ -291,7 +328,6 @@ static void nstat_init_route_provider(void); static void nstat_init_tcp_provider(void); static void nstat_init_udp_provider(void); static void nstat_init_ifnet_provider(void); -static void nstat_init_sysinfo_provider(void); __private_extern__ void nstat_init(void) @@ -311,7 +347,6 @@ nstat_init(void) nstat_init_tcp_provider(); nstat_init_udp_provider(); nstat_init_ifnet_provider(); - nstat_init_sysinfo_provider(); nstat_control_register(); } } @@ -433,9 +468,9 @@ nstat_route_counts( struct rtentry *rt = (struct rtentry*)cookie; struct nstat_counts *rt_stats = rt->rt_stats; - *out_gone = 0; + if (out_gone) *out_gone = 0; - if ((rt->rt_flags & RTF_UP) == 0) *out_gone = 1; + if (out_gone && (rt->rt_flags & RTF_UP) == 0) *out_gone = 1; if (rt_stats) { @@ -454,7 +489,9 @@ nstat_route_counts( out_counts->nstat_cell_rxbytes = out_counts->nstat_cell_txbytes = 0; } else + { bzero(out_counts, sizeof(*out_counts)); + } return 0; } @@ -839,6 +876,7 @@ struct nstat_tucookie { struct sockaddr_in6 v6; } remote; unsigned int if_index; + uint16_t ifnet_properties; }; static struct nstat_tucookie * @@ -1037,12 +1075,12 @@ nstat_tcp_counts( bzero(out_counts, sizeof(*out_counts)); - *out_gone = 0; + if (out_gone) *out_gone = 0; // if the pcb is in the dead state, we should stop using it if (nstat_tcp_gone(cookie)) { - *out_gone = 1; + if (out_gone) *out_gone = 1; if (!(inp = tucookie->inp) || !intotcpcb(inp)) return EINVAL; } @@ -1167,7 +1205,8 @@ nstat_pcb_detach(struct inpcb *inp) return; lck_mtx_lock(&nstat_mtx); - for (state = nstat_controls; state; state = state->ncs_next) { + for (state = nstat_controls; state; state = state->ncs_next) + { lck_mtx_lock(&state->mtx); for (prevsrc = NULL, src = state->ncs_srcs; src; prevsrc = src, src = src->next) @@ -1177,30 +1216,15 @@ nstat_pcb_detach(struct inpcb *inp) break; } - if (src) { - // send one last counts notification - result = nstat_control_send_counts(state, src, 0, NULL); - if (result != 0 && nstat_debug) - printf("%s - nstat_control_send_counts() %d\n", - __func__, result); - - // send a last description - result = nstat_control_send_description(state, src, 0); - if (result != 0 && nstat_debug) - printf("%s - nstat_control_send_description() %d\n", - __func__, result); - - // send the source removed notification - result = nstat_control_send_removed(state, src); - if (result != 0 && nstat_debug) - printf("%s - nstat_control_send_removed() %d\n", - __func__, result); - + if (src) + { + result = nstat_control_send_goodbye(state, src); + if (prevsrc) prevsrc->next = src->next; else state->ncs_srcs = src->next; - + src->next = dead_list; dead_list = src; } @@ -1260,6 +1284,8 @@ nstat_pcb_cache(struct inpcb *inp) if (inp->inp_last_outifp) tucookie->if_index = inp->inp_last_outifp->if_index; + + tucookie->ifnet_properties = nstat_inpcb_to_flags(inp); tucookie->cached = true; break; } @@ -1357,7 +1383,7 @@ nstat_tcp_copy_descriptor( desc->traffic_class = so->so_traffic_class; desc->traffic_mgt_flags = so->so_traffic_mgt_flags; proc_name(desc->pid, desc->pname, sizeof(desc->pname)); - if (desc->pname == NULL || desc->pname[0] == 0) + if (desc->pname[0] == 0) { strlcpy(desc->pname, tucookie->pname, sizeof(desc->pname)); @@ -1384,10 +1410,31 @@ nstat_tcp_copy_descriptor( desc->rcvbufsize = so->so_rcv.sb_hiwat; desc->rcvbufused = so->so_rcv.sb_cc; } - + + tcp_get_connectivity_status(tp, &desc->connstatus); + desc->ifnet_properties = nstat_inpcb_to_flags(inp); return 0; } +static bool +nstat_tcpudp_reporting_allowed(nstat_provider_cookie_t cookie, uint64_t filter) +{ + bool retval = true; + + /* Only apply interface filter if at least one is allowed. */ + if ((filter & NSTAT_FILTER_ACCEPT_ALL) != 0) + { + struct nstat_tucookie *tucookie = (struct nstat_tucookie *)cookie; + struct inpcb *inp = tucookie->inp; + + uint16_t interface_properties = nstat_inpcb_to_flags(inp); + + /* For now, just check on interface type. */ + retval = ((filter & interface_properties) != 0); + } + return retval; +} + static void nstat_init_tcp_provider(void) { @@ -1401,6 +1448,7 @@ nstat_init_tcp_provider(void) nstat_tcp_provider.nstat_watcher_add = nstat_tcp_add_watcher; nstat_tcp_provider.nstat_watcher_remove = nstat_tcp_remove_watcher; nstat_tcp_provider.nstat_copy_descriptor = nstat_tcp_copy_descriptor; + nstat_tcp_provider.nstat_reporting_allowed = nstat_tcpudp_reporting_allowed; nstat_tcp_provider.next = nstat_providers; nstat_providers = &nstat_tcp_provider; } @@ -1439,12 +1487,12 @@ nstat_udp_counts( struct nstat_tucookie *tucookie = (struct nstat_tucookie *)cookie; - *out_gone = 0; + if (out_gone) *out_gone = 0; // if the pcb is in the dead state, we should stop using it if (nstat_udp_gone(cookie)) { - *out_gone = 1; + if (out_gone) *out_gone = 1; if (!tucookie->inp) return EINVAL; } @@ -1570,34 +1618,36 @@ nstat_udp_copy_descriptor( if (inp->inp_vflag & INP_IPV6) { nstat_ip6_to_sockaddr(&inp->in6p_laddr, inp->inp_lport, - &desc->local.v6, sizeof(desc->local)); + &desc->local.v6, sizeof(desc->local.v6)); nstat_ip6_to_sockaddr(&inp->in6p_faddr, inp->inp_fport, - &desc->remote.v6, sizeof(desc->remote)); + &desc->remote.v6, sizeof(desc->remote.v6)); } else if (inp->inp_vflag & INP_IPV4) { nstat_ip_to_sockaddr(&inp->inp_laddr, inp->inp_lport, - &desc->local.v4, sizeof(desc->local)); + &desc->local.v4, sizeof(desc->local.v4)); nstat_ip_to_sockaddr(&inp->inp_faddr, inp->inp_fport, - &desc->remote.v4, sizeof(desc->remote)); + &desc->remote.v4, sizeof(desc->remote.v4)); } + desc->ifnet_properties = nstat_inpcb_to_flags(inp); } else { if (inp->inp_vflag & INP_IPV6) { memcpy(&desc->local.v6, &tucookie->local.v6, - sizeof(desc->local)); + sizeof(desc->local.v6)); memcpy(&desc->remote.v6, &tucookie->remote.v6, - sizeof(desc->remote)); + sizeof(desc->remote.v6)); } else if (inp->inp_vflag & INP_IPV4) { memcpy(&desc->local.v4, &tucookie->local.v4, - sizeof(desc->local)); + sizeof(desc->local.v4)); memcpy(&desc->remote.v4, &tucookie->remote.v4, - sizeof(desc->remote)); + sizeof(desc->remote.v4)); } + desc->ifnet_properties = tucookie->ifnet_properties; } if (inp->inp_last_outifp) @@ -1613,7 +1663,7 @@ nstat_udp_copy_descriptor( desc->upid = so->last_upid; desc->pid = so->last_pid; proc_name(desc->pid, desc->pname, sizeof(desc->pname)); - if (desc->pname == NULL || desc->pname[0] == 0) + if (desc->pname[0] == 0) { strlcpy(desc->pname, tucookie->pname, sizeof(desc->pname)); @@ -1639,7 +1689,7 @@ nstat_udp_copy_descriptor( desc->rcvbufused = so->so_rcv.sb_cc; desc->traffic_class = so->so_traffic_class; } - + return 0; } @@ -1656,6 +1706,7 @@ nstat_init_udp_provider(void) nstat_udp_provider.nstat_watcher_remove = nstat_udp_remove_watcher; nstat_udp_provider.nstat_copy_descriptor = nstat_udp_copy_descriptor; nstat_udp_provider.nstat_release = nstat_udp_release; + nstat_udp_provider.nstat_reporting_allowed = nstat_tcpudp_reporting_allowed; nstat_udp_provider.next = nstat_providers; nstat_providers = &nstat_udp_provider; } @@ -1680,7 +1731,7 @@ nstat_ifnet_lookup( u_int32_t length, nstat_provider_cookie_t *out_cookie) { - const nstat_ifnet_add_param *param = (nstat_ifnet_add_param *)data; + const nstat_ifnet_add_param *param = (const nstat_ifnet_add_param *)data; struct ifnet *ifp; boolean_t changed = FALSE; nstat_control_state *state; @@ -1739,7 +1790,7 @@ nstat_ifnet_lookup( { if (src->provider != &nstat_ifnet_provider) continue; - nstat_control_send_description(state, src, 0); + nstat_control_send_description(state, src, 0, 0); } lck_mtx_unlock(&state->mtx); } @@ -1780,12 +1831,12 @@ nstat_ifnet_counts( (struct nstat_ifnet_cookie *)cookie; struct ifnet *ifp = ifcookie->ifp; - *out_gone = 0; + if (out_gone) *out_gone = 0; // if the ifnet is gone, we should stop using it if (nstat_ifnet_gone(cookie)) { - *out_gone = 1; + if (out_gone) *out_gone = 1; return EINVAL; } @@ -1795,7 +1846,6 @@ nstat_ifnet_counts( out_counts->nstat_txpackets = ifp->if_opackets; out_counts->nstat_txbytes = ifp->if_obytes; out_counts->nstat_cell_rxbytes = out_counts->nstat_cell_txbytes = 0; - return 0; } @@ -1849,6 +1899,208 @@ nstat_ifnet_release( OSFree(ifcookie, sizeof(*ifcookie), nstat_malloc_tag); } +static void +nstat_ifnet_copy_link_status( + struct ifnet *ifp, + struct nstat_ifnet_descriptor *desc) +{ + struct if_link_status *ifsr = ifp->if_link_status; + nstat_ifnet_desc_link_status *link_status = &desc->link_status; + + link_status->link_status_type = NSTAT_IFNET_DESC_LINK_STATUS_TYPE_NONE; + if (ifsr == NULL) + return; + + lck_rw_lock_shared(&ifp->if_link_status_lock); + + if (ifp->if_type == IFT_CELLULAR) { + + nstat_ifnet_desc_cellular_status *cell_status = &link_status->u.cellular; + struct if_cellular_status_v1 *if_cell_sr = + &ifsr->ifsr_u.ifsr_cell.if_cell_u.if_status_v1; + + if (ifsr->ifsr_version != IF_CELLULAR_STATUS_REPORT_VERSION_1) + goto done; + + link_status->link_status_type = NSTAT_IFNET_DESC_LINK_STATUS_TYPE_CELLULAR; + + if (if_cell_sr->valid_bitmask & IF_CELL_LINK_QUALITY_METRIC_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_LINK_QUALITY_METRIC_VALID; + cell_status->link_quality_metric = if_cell_sr->link_quality_metric; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_EFFECTIVE_BANDWIDTH_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_EFFECTIVE_BANDWIDTH_VALID; + cell_status->ul_effective_bandwidth = if_cell_sr->ul_effective_bandwidth; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_MAX_BANDWIDTH_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MAX_BANDWIDTH_VALID; + cell_status->ul_max_bandwidth = if_cell_sr->ul_max_bandwidth; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_MIN_LATENCY_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MIN_LATENCY_VALID; + cell_status->ul_min_latency = if_cell_sr->ul_min_latency; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_EFFECTIVE_LATENCY_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_EFFECTIVE_LATENCY_VALID; + cell_status->ul_effective_latency = if_cell_sr->ul_effective_latency; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_MAX_LATENCY_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MAX_LATENCY_VALID; + cell_status->ul_max_latency = if_cell_sr->ul_max_latency; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_RETXT_LEVEL_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_VALID; + if (if_cell_sr->ul_retxt_level == IF_CELL_UL_RETXT_LEVEL_NONE) + cell_status->ul_retxt_level = NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_NONE; + else if (if_cell_sr->ul_retxt_level == IF_CELL_UL_RETXT_LEVEL_LOW) + cell_status->ul_retxt_level = NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_LOW; + else if (if_cell_sr->ul_retxt_level == IF_CELL_UL_RETXT_LEVEL_MEDIUM) + cell_status->ul_retxt_level = NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_MEDIUM; + else if (if_cell_sr->ul_retxt_level == IF_CELL_UL_RETXT_LEVEL_HIGH) + cell_status->ul_retxt_level = NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_HIGH; + else + cell_status->valid_bitmask &= ~NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_VALID; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_BYTES_LOST_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_BYTES_LOST_VALID; + cell_status->ul_bytes_lost = if_cell_sr->ul_bytes_lost; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_MIN_QUEUE_SIZE_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MIN_QUEUE_SIZE_VALID; + cell_status->ul_min_queue_size = if_cell_sr->ul_min_queue_size; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_AVG_QUEUE_SIZE_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_AVG_QUEUE_SIZE_VALID; + cell_status->ul_avg_queue_size = if_cell_sr->ul_avg_queue_size; + } + if (if_cell_sr->valid_bitmask & IF_CELL_UL_MAX_QUEUE_SIZE_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MAX_QUEUE_SIZE_VALID; + cell_status->ul_max_queue_size = if_cell_sr->ul_max_queue_size; + } + if (if_cell_sr->valid_bitmask & IF_CELL_DL_EFFECTIVE_BANDWIDTH_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_DL_EFFECTIVE_BANDWIDTH_VALID; + cell_status->dl_effective_bandwidth = if_cell_sr->dl_effective_bandwidth; + } + if (if_cell_sr->valid_bitmask & IF_CELL_DL_MAX_BANDWIDTH_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_DL_MAX_BANDWIDTH_VALID; + cell_status->dl_max_bandwidth = if_cell_sr->dl_max_bandwidth; + } + if (if_cell_sr->valid_bitmask & IF_CELL_CONFIG_INACTIVITY_TIME_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_CONFIG_INACTIVITY_TIME_VALID; + cell_status->config_inactivity_time = if_cell_sr->config_inactivity_time; + } + if (if_cell_sr->valid_bitmask & IF_CELL_CONFIG_BACKOFF_TIME_VALID) { + cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_CONFIG_BACKOFF_TIME_VALID; + cell_status->config_backoff_time = if_cell_sr->config_backoff_time; + } + + } else if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI) { + + nstat_ifnet_desc_wifi_status *wifi_status = &link_status->u.wifi; + struct if_wifi_status_v1 *if_wifi_sr = + &ifsr->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1; + + if (ifsr->ifsr_version != IF_WIFI_STATUS_REPORT_VERSION_1) + goto done; + + link_status->link_status_type = NSTAT_IFNET_DESC_LINK_STATUS_TYPE_WIFI; + + if (if_wifi_sr->valid_bitmask & IF_WIFI_LINK_QUALITY_METRIC_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_LINK_QUALITY_METRIC_VALID; + wifi_status->link_quality_metric = if_wifi_sr->link_quality_metric; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID; + wifi_status->ul_effective_bandwidth = if_wifi_sr->ul_effective_bandwidth; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_MAX_BANDWIDTH_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_MAX_BANDWIDTH_VALID; + wifi_status->ul_max_bandwidth = if_wifi_sr->ul_max_bandwidth; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_MIN_LATENCY_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_MIN_LATENCY_VALID; + wifi_status->ul_min_latency = if_wifi_sr->ul_min_latency; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_EFFECTIVE_LATENCY_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_EFFECTIVE_LATENCY_VALID; + wifi_status->ul_effective_latency = if_wifi_sr->ul_effective_latency; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_MAX_LATENCY_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_MAX_LATENCY_VALID; + wifi_status->ul_max_latency = if_wifi_sr->ul_max_latency; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_RETXT_LEVEL_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_VALID; + if (if_wifi_sr->ul_retxt_level == IF_WIFI_UL_RETXT_LEVEL_NONE) + wifi_status->ul_retxt_level = NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_NONE; + else if (if_wifi_sr->ul_retxt_level == IF_WIFI_UL_RETXT_LEVEL_LOW) + wifi_status->ul_retxt_level = NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_LOW; + else if (if_wifi_sr->ul_retxt_level == IF_WIFI_UL_RETXT_LEVEL_MEDIUM) + wifi_status->ul_retxt_level = NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_MEDIUM; + else if (if_wifi_sr->ul_retxt_level == IF_WIFI_UL_RETXT_LEVEL_HIGH) + wifi_status->ul_retxt_level = NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_HIGH; + else + wifi_status->valid_bitmask &= ~NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_VALID; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_BYTES_LOST_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_BYTES_LOST_VALID; + wifi_status->ul_bytes_lost = if_wifi_sr->ul_bytes_lost; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_ERROR_RATE_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_ERROR_RATE_VALID; + wifi_status->ul_error_rate = if_wifi_sr->ul_error_rate; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID; + wifi_status->dl_effective_bandwidth = if_wifi_sr->dl_effective_bandwidth; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_MAX_BANDWIDTH_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_MAX_BANDWIDTH_VALID; + wifi_status->dl_max_bandwidth = if_wifi_sr->dl_max_bandwidth; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_MIN_LATENCY_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_MIN_LATENCY_VALID; + wifi_status->dl_min_latency = if_wifi_sr->dl_min_latency; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_EFFECTIVE_LATENCY_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_EFFECTIVE_LATENCY_VALID; + wifi_status->dl_effective_latency = if_wifi_sr->dl_effective_latency; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_MAX_LATENCY_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_MAX_LATENCY_VALID; + wifi_status->dl_max_latency = if_wifi_sr->dl_max_latency; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_ERROR_RATE_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_ERROR_RATE_VALID; + wifi_status->dl_error_rate = if_wifi_sr->dl_error_rate; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_CONFIG_FREQUENCY_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_VALID; + if (if_wifi_sr->config_frequency == IF_WIFI_CONFIG_FREQUENCY_2_4_GHZ) + wifi_status->config_frequency = NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_2_4_GHZ; + else if (if_wifi_sr->config_frequency == IF_WIFI_CONFIG_FREQUENCY_5_0_GHZ) + wifi_status->config_frequency = NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_5_0_GHZ; + else + wifi_status->valid_bitmask &= ~NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_VALID; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_CONFIG_MULTICAST_RATE_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_CONFIG_MULTICAST_RATE_VALID; + wifi_status->config_multicast_rate = if_wifi_sr->config_multicast_rate; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_CONFIG_SCAN_COUNT_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_CONFIG_SCAN_COUNT_VALID; + wifi_status->scan_count = if_wifi_sr->scan_count; + } + if (if_wifi_sr->valid_bitmask & IF_WIFI_CONFIG_SCAN_DURATION_VALID) { + wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_CONFIG_SCAN_DURATION_VALID; + wifi_status->scan_duration = if_wifi_sr->scan_duration; + } + } + +done: + lck_rw_done(&ifp->if_link_status_lock); +} + static errno_t nstat_ifnet_copy_descriptor( nstat_provider_cookie_t cookie, @@ -1875,8 +2127,8 @@ nstat_ifnet_copy_descriptor( if (ifp->if_desc.ifd_len < sizeof(desc->description)) memcpy(desc->description, ifp->if_desc.ifd_desc, sizeof(desc->description)); + nstat_ifnet_copy_link_status(ifp, desc); ifnet_lock_done(ifp); - return 0; } @@ -1917,132 +2169,32 @@ nstat_ifnet_threshold_reached(unsigned int ifindex) ifp = ifcookie->ifp; if (ifp->if_index != ifindex) continue; - nstat_control_send_counts(state, src, 0, NULL); + nstat_control_send_counts(state, src, 0, 0, NULL); } lck_mtx_unlock(&state->mtx); } lck_mtx_unlock(&nstat_mtx); } -#pragma mark -- Sysinfo Provider -- - -static nstat_provider nstat_sysinfo_provider; - -/* We store the flags requested by the client */ -typedef struct nstat_sysinfo_cookie -{ - u_int32_t flags; -} nstat_sysinfo_cookie; - -static errno_t -nstat_sysinfo_lookup( - const void *data, - u_int32_t length, - nstat_provider_cookie_t *out_cookie) -{ - const nstat_sysinfo_add_param *param = (nstat_sysinfo_add_param *)data; - nstat_sysinfo_cookie *cookie; - - if (length < sizeof(*param)) - return (EINVAL); - - if (nstat_privcheck != 0) { - errno_t result = priv_check_cred(kauth_cred_get(), - PRIV_NET_PRIVILEGED_NETWORK_STATISTICS, 0); - if (result != 0) - return (result); - } - - cookie = OSMalloc(sizeof(*cookie), nstat_malloc_tag); - if (cookie == NULL) - return (ENOMEM); - cookie->flags = param->flags; - *out_cookie = cookie; - return (0); -} - -static int -nstat_sysinfo_gone( - __unused nstat_provider_cookie_t cookie) -{ - /* Sysinfo always exists */ - return (0); -} - -static errno_t -nstat_sysinfo_copy_descriptor( - nstat_provider_cookie_t cookie, - void *data, - u_int32_t len) -{ - nstat_sysinfo_descriptor *desc = (nstat_sysinfo_descriptor *)data; - struct nstat_sysinfo_cookie *syscookie = - (struct nstat_sysinfo_cookie *)cookie; - - if (len < sizeof(nstat_sysinfo_descriptor)) - return (EINVAL); - desc->flags = syscookie->flags; - return (0); -} - +#pragma mark -- Sysinfo -- static void -nstat_sysinfo_release( - nstat_provider_cookie_t cookie, - __unused boolean_t locked) -{ - struct nstat_sysinfo_cookie *syscookie = - (struct nstat_sysinfo_cookie *)cookie; - OSFree(syscookie, sizeof(*syscookie), nstat_malloc_tag); -} - -static errno_t -nstat_enqueue_success( - uint64_t context, - nstat_control_state *state) +nstat_set_keyval_scalar(nstat_sysinfo_keyval *kv, int key, u_int32_t val) { - nstat_msg_hdr success; - errno_t result; - - bzero(&success, sizeof(success)); - success.context = context; - success.type = NSTAT_MSG_TYPE_SUCCESS; - result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success, - sizeof(success), CTL_DATA_EOR | CTL_DATA_CRIT); - if (result != 0) { - printf("%s: could not enqueue success message %d\n", - __func__, result); - nstat_successmsgfailures += 1; - } - return result; -} - -static void -nstat_init_sysinfo_provider(void) -{ - bzero(&nstat_sysinfo_provider, sizeof(nstat_sysinfo_provider)); - nstat_sysinfo_provider.nstat_provider_id = NSTAT_PROVIDER_SYSINFO; - nstat_sysinfo_provider.nstat_descriptor_length = sizeof(nstat_sysinfo_descriptor); - nstat_sysinfo_provider.nstat_lookup = nstat_sysinfo_lookup; - nstat_sysinfo_provider.nstat_gone = nstat_sysinfo_gone; - nstat_sysinfo_provider.nstat_counts = NULL; - nstat_sysinfo_provider.nstat_watcher_add = NULL; - nstat_sysinfo_provider.nstat_watcher_remove = NULL; - nstat_sysinfo_provider.nstat_copy_descriptor = nstat_sysinfo_copy_descriptor; - nstat_sysinfo_provider.nstat_release = nstat_sysinfo_release; - nstat_sysinfo_provider.next = nstat_providers; - nstat_providers = &nstat_sysinfo_provider; + kv->nstat_sysinfo_key = key; + kv->nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv->u.nstat_sysinfo_scalar = val; } static void nstat_sysinfo_send_data_internal( nstat_control_state *control, - nstat_src *src, nstat_sysinfo_data *data) { nstat_msg_sysinfo_counts *syscnt = NULL; size_t allocsize = 0, countsize = 0, nkeyvals = 0; nstat_sysinfo_keyval *kv; errno_t result = 0; + size_t i = 0; allocsize = offsetof(nstat_msg_sysinfo_counts, counts); countsize = offsetof(nstat_sysinfo_counts, nstat_sysinfo_keyvals); @@ -2051,10 +2203,12 @@ nstat_sysinfo_send_data_internal( switch (data->flags) { case NSTAT_SYSINFO_MBUF_STATS: - nkeyvals = 5; + nkeyvals = sizeof(struct nstat_sysinfo_mbuf_stats) / + sizeof(u_int32_t); break; case NSTAT_SYSINFO_TCP_STATS: - nkeyvals = 6; + nkeyvals = sizeof(struct nstat_sysinfo_tcp_stats) / + sizeof(u_int32_t); break; default: return; @@ -2068,61 +2222,150 @@ nstat_sysinfo_send_data_internal( bzero(syscnt, allocsize); syscnt->hdr.type = NSTAT_MSG_TYPE_SYSINFO_COUNTS; + syscnt->hdr.length = allocsize; syscnt->counts.nstat_sysinfo_len = countsize; - syscnt->srcref = src->srcref; - + kv = (nstat_sysinfo_keyval *) &syscnt->counts.nstat_sysinfo_keyvals; switch (data->flags) { case NSTAT_SYSINFO_MBUF_STATS: { - kv[0].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_MBUF_256B_TOTAL; - kv[0].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[0].u.nstat_sysinfo_scalar = data->u.mb_stats.total_256b; - - kv[1].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_MBUF_2KB_TOTAL; - kv[1].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[1].u.nstat_sysinfo_scalar = data->u.mb_stats.total_2kb; - - kv[2].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_MBUF_4KB_TOTAL; - kv[2].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[2].u.nstat_sysinfo_scalar = data->u.mb_stats.total_4kb; - - kv[3].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SOCK_MBCNT; - kv[3].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[3].u.nstat_sysinfo_scalar = data->u.mb_stats.sbmb_total; - - - kv[4].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SOCK_ATMBLIMIT; - kv[4].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[4].u.nstat_sysinfo_scalar = data->u.mb_stats.sb_atmbuflimit; + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_MBUF_256B_TOTAL, + data->u.mb_stats.total_256b); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_MBUF_2KB_TOTAL, + data->u.mb_stats.total_2kb); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_MBUF_4KB_TOTAL, + data->u.mb_stats.total_4kb); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_MBUF_16KB_TOTAL, + data->u.mb_stats.total_16kb); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_SOCK_MBCNT, + data->u.mb_stats.sbmb_total); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_SOCK_ATMBLIMIT, + data->u.mb_stats.sb_atmbuflimit); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_MBUF_DRAIN_CNT, + data->u.mb_stats.draincnt); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_MBUF_MEM_RELEASED, + data->u.mb_stats.memreleased); + VERIFY(i == nkeyvals); break; } case NSTAT_SYSINFO_TCP_STATS: { - kv[0].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_IPV4_AVGRTT; - kv[0].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[0].u.nstat_sysinfo_scalar = data->u.tcp_stats.ipv4_avgrtt; - - kv[1].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_IPV6_AVGRTT; - kv[1].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[1].u.nstat_sysinfo_scalar = data->u.tcp_stats.ipv6_avgrtt; - - kv[2].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SEND_PLR; - kv[2].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[2].u.nstat_sysinfo_scalar = data->u.tcp_stats.send_plr; - - kv[3].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_RECV_PLR; - kv[3].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[3].u.nstat_sysinfo_scalar = data->u.tcp_stats.recv_plr; - - kv[4].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SEND_TLRTO; - kv[4].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[4].u.nstat_sysinfo_scalar = data->u.tcp_stats.send_tlrto_rate; - - kv[5].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SEND_REORDERRATE; - kv[5].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; - kv[5].u.nstat_sysinfo_scalar = data->u.tcp_stats.send_reorder_rate; + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_IPV4_AVGRTT, + data->u.tcp_stats.ipv4_avgrtt); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_IPV6_AVGRTT, + data->u.tcp_stats.ipv6_avgrtt); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_SEND_PLR, + data->u.tcp_stats.send_plr); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_RECV_PLR, + data->u.tcp_stats.recv_plr); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_SEND_TLRTO, + data->u.tcp_stats.send_tlrto_rate); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_KEY_SEND_REORDERRATE, + data->u.tcp_stats.send_reorder_rate); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_CONNECTION_ATTEMPTS, + data->u.tcp_stats.connection_attempts); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_CONNECTION_ACCEPTS, + data->u.tcp_stats.connection_accepts); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_CLIENT_ENABLED, + data->u.tcp_stats.ecn_client_enabled); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_SERVER_ENABLED, + data->u.tcp_stats.ecn_server_enabled); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_CLIENT_SETUP, + data->u.tcp_stats.ecn_client_setup); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_SERVER_SETUP, + data->u.tcp_stats.ecn_server_setup); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_CLIENT_SUCCESS, + data->u.tcp_stats.ecn_client_success); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_SERVER_SUCCESS, + data->u.tcp_stats.ecn_server_success); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_NOT_SUPPORTED, + data->u.tcp_stats.ecn_not_supported); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_LOST_SYN, + data->u.tcp_stats.ecn_lost_syn); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_LOST_SYNACK, + data->u.tcp_stats.ecn_lost_synack); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_RECV_CE, + data->u.tcp_stats.ecn_recv_ce); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_RECV_ECE, + data->u.tcp_stats.ecn_recv_ece); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_SENT_ECE, + data->u.tcp_stats.ecn_sent_ece); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_CONN_RECV_CE, + data->u.tcp_stats.ecn_conn_recv_ce); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_CONN_RECV_ECE, + data->u.tcp_stats.ecn_conn_recv_ece); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_CONN_PLNOCE, + data->u.tcp_stats.ecn_conn_plnoce); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_CONN_PL_CE, + data->u.tcp_stats.ecn_conn_pl_ce); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_ECN_CONN_NOPL_CE, + data->u.tcp_stats.ecn_conn_nopl_ce); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_SYN_DATA_RCV, + data->u.tcp_stats.tfo_syn_data_rcv); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_COOKIE_REQ_RCV, + data->u.tcp_stats.tfo_cookie_req_rcv); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_COOKIE_SENT, + data->u.tcp_stats.tfo_cookie_sent); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_COOKIE_INVALID, + data->u.tcp_stats.tfo_cookie_invalid); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_COOKIE_REQ, + data->u.tcp_stats.tfo_cookie_req); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_COOKIE_RCV, + data->u.tcp_stats.tfo_cookie_rcv); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_SYN_DATA_SENT, + data->u.tcp_stats.tfo_syn_data_sent); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_SYN_DATA_ACKED, + data->u.tcp_stats.tfo_syn_data_acked); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_SYN_LOSS, + data->u.tcp_stats.tfo_syn_loss); + nstat_set_keyval_scalar(&kv[i++], + NSTAT_SYSINFO_TFO_BLACKHOLE, + data->u.tcp_stats.tfo_blackhole); + + VERIFY(i == nkeyvals); break; } } @@ -2132,7 +2375,9 @@ nstat_sysinfo_send_data_internal( result = ctl_enqueuedata(control->ncs_kctl, control->ncs_unit, syscnt, allocsize, CTL_DATA_EOR); if (result != 0) - nstat_sysinfofailures += 1; + { + nstat_stats.nstat_sysinfofailures += 1; + } OSFree(syscnt, allocsize, nstat_malloc_tag); } return; @@ -2148,25 +2393,13 @@ nstat_sysinfo_send_data( for (control = nstat_controls; control; control = control->ncs_next) { lck_mtx_lock(&control->mtx); - nstat_src *src; - for (src = control->ncs_srcs; src; src = src->next) + if ((control->ncs_flags & NSTAT_FLAG_SYSINFO_SUBSCRIBED) != 0) { - if (src->provider->nstat_provider_id == - NSTAT_PROVIDER_SYSINFO) - { - struct nstat_sysinfo_cookie *syscookie; - syscookie = (struct nstat_sysinfo_cookie *) src->cookie; - if (syscookie->flags & data->flags) - { - nstat_sysinfo_send_data_internal(control, - src, data); - } - } - } + nstat_sysinfo_send_data_internal(control, data); + } lck_mtx_unlock(&control->mtx); } lck_mtx_unlock(&nstat_mtx); - } static void @@ -2185,73 +2418,209 @@ static errno_t nstat_control_connect(kern_ctl_ref kctl, struct sockaddr_ctl *sac static errno_t nstat_control_disconnect(kern_ctl_ref kctl, u_int32_t unit, void *uinfo); static errno_t nstat_control_send(kern_ctl_ref kctl, u_int32_t unit, void *uinfo, mbuf_t m, int flags); +static errno_t +nstat_enqueue_success( + uint64_t context, + nstat_control_state *state, + u_int16_t flags) +{ + nstat_msg_hdr success; + errno_t result; -static void* -nstat_idle_check( - __unused thread_call_param_t p0, - __unused thread_call_param_t p1) + bzero(&success, sizeof(success)); + success.context = context; + success.type = NSTAT_MSG_TYPE_SUCCESS; + success.length = sizeof(success); + success.flags = flags; + result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success, + sizeof(success), CTL_DATA_EOR | CTL_DATA_CRIT); + if (result != 0) { + if (nstat_debug != 0) + printf("%s: could not enqueue success message %d\n", + __func__, result); + nstat_stats.nstat_successmsgfailures += 1; + } + return result; +} + +static errno_t +nstat_control_send_goodbye( + nstat_control_state *state, + nstat_src *src) { - lck_mtx_lock(&nstat_mtx); - - nstat_idle_time = 0; - - nstat_control_state *control; - nstat_src *dead = NULL; - nstat_src *dead_list = NULL; - for (control = nstat_controls; control; control = control->ncs_next) + errno_t result = 0; + int failed = 0; + + if (nstat_control_reporting_allowed(state, src)) { - lck_mtx_lock(&control->mtx); - nstat_src **srcpp = &control->ncs_srcs; - - if (!(control->ncs_flags & NSTAT_FLAG_REQCOUNTS)) + if ((state->ncs_flags & NSTAT_FLAG_SUPPORTS_UPDATES) != 0) { - while(*srcpp != NULL) + result = nstat_control_send_update(state, src, 0, NSTAT_MSG_HDR_FLAG_CLOSING, NULL); + if (result != 0) { - if ((*srcpp)->provider->nstat_gone((*srcpp)->cookie)) - { - errno_t result; - - // Pull it off the list - dead = *srcpp; - *srcpp = (*srcpp)->next; - - // send one last counts notification - result = nstat_control_send_counts(control, dead, - 0, NULL); - if (result != 0 && nstat_debug) - printf("%s - nstat_control_send_counts() %d\n", - __func__, result); - - // send a last description - result = nstat_control_send_description(control, dead, 0); - if (result != 0 && nstat_debug) - printf("%s - nstat_control_send_description() %d\n", - __func__, result); - - // send the source removed notification - result = nstat_control_send_removed(control, dead); - if (result != 0 && nstat_debug) - printf("%s - nstat_control_send_removed() %d\n", - __func__, result); - - // Put this on the list to release later - dead->next = dead_list; - dead_list = dead; - } - else - { - srcpp = &(*srcpp)->next; - } + failed = 1; + if (nstat_debug != 0) + printf("%s - nstat_control_send_update() %d\n", __func__, result); } } - control->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS; - lck_mtx_unlock(&control->mtx); - } - - if (nstat_controls) - { - clock_interval_to_deadline(60, NSEC_PER_SEC, &nstat_idle_time); - thread_call_func_delayed((thread_call_func_t)nstat_idle_check, NULL, nstat_idle_time); + else + { + // send one last counts notification + result = nstat_control_send_counts(state, src, 0, NSTAT_MSG_HDR_FLAG_CLOSING, NULL); + if (result != 0) + { + failed = 1; + if (nstat_debug != 0) + printf("%s - nstat_control_send_counts() %d\n", __func__, result); + } + + // send a last description + result = nstat_control_send_description(state, src, 0, NSTAT_MSG_HDR_FLAG_CLOSING); + if (result != 0) + { + failed = 1; + if (nstat_debug != 0) + printf("%s - nstat_control_send_description() %d\n", __func__, result); + } + } + } + + // send the source removed notification + result = nstat_control_send_removed(state, src); + if (result != 0 && nstat_debug) + { + failed = 1; + if (nstat_debug != 0) + printf("%s - nstat_control_send_removed() %d\n", __func__, result); + } + + if (failed != 0) + nstat_stats.nstat_control_send_goodbye_failures++; + + + return result; +} + +static errno_t +nstat_flush_accumulated_msgs( + nstat_control_state *state) +{ + errno_t result = 0; + if (state->ncs_accumulated && mbuf_len(state->ncs_accumulated)) + { + mbuf_pkthdr_setlen(state->ncs_accumulated, mbuf_len(state->ncs_accumulated)); + result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, state->ncs_accumulated, CTL_DATA_EOR); + if (result != 0 && nstat_debug) + { + nstat_stats.nstat_flush_accumulated_msgs_failures++; + if (nstat_debug != 0) + printf("%s - ctl_enqueuembuf failed: %d\n", __func__, result); + mbuf_freem(state->ncs_accumulated); + } + state->ncs_accumulated = NULL; + } + return result; +} + +static errno_t +nstat_accumulate_msg( + nstat_control_state *state, + nstat_msg_hdr *hdr, + size_t length) +{ + if (state->ncs_accumulated && mbuf_trailingspace(state->ncs_accumulated) < length) + { + // Will send the current mbuf + nstat_flush_accumulated_msgs(state); + } + + errno_t result = 0; + + if (state->ncs_accumulated == NULL) + { + unsigned int one = 1; + if (mbuf_allocpacket(MBUF_DONTWAIT, NSTAT_MAX_MSG_SIZE, &one, &state->ncs_accumulated) != 0) + { + if (nstat_debug != 0) + printf("%s - mbuf_allocpacket failed\n", __func__); + result = ENOMEM; + } + else + { + mbuf_setlen(state->ncs_accumulated, 0); + } + } + + if (result == 0) + { + hdr->length = length; + result = mbuf_copyback(state->ncs_accumulated, mbuf_len(state->ncs_accumulated), + length, hdr, MBUF_DONTWAIT); + } + + if (result != 0) + { + nstat_flush_accumulated_msgs(state); + if (nstat_debug != 0) + printf("%s - resorting to ctl_enqueuedata\n", __func__); + result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, hdr, length, CTL_DATA_EOR); + } + + if (result != 0) + nstat_stats.nstat_accumulate_msg_failures++; + + return result; +} + +static void* +nstat_idle_check( + __unused thread_call_param_t p0, + __unused thread_call_param_t p1) +{ + lck_mtx_lock(&nstat_mtx); + + nstat_idle_time = 0; + + nstat_control_state *control; + nstat_src *dead = NULL; + nstat_src *dead_list = NULL; + for (control = nstat_controls; control; control = control->ncs_next) + { + lck_mtx_lock(&control->mtx); + nstat_src **srcpp = &control->ncs_srcs; + + if (!(control->ncs_flags & NSTAT_FLAG_REQCOUNTS)) + { + while(*srcpp != NULL) + { + if ((*srcpp)->provider->nstat_gone((*srcpp)->cookie)) + { + errno_t result; + + // Pull it off the list + dead = *srcpp; + *srcpp = (*srcpp)->next; + + result = nstat_control_send_goodbye(control, dead); + + // Put this on the list to release later + dead->next = dead_list; + dead_list = dead; + } + else + { + srcpp = &(*srcpp)->next; + } + } + } + control->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS; + lck_mtx_unlock(&control->mtx); + } + + if (nstat_controls) + { + clock_interval_to_deadline(60, NSEC_PER_SEC, &nstat_idle_time); + thread_call_func_delayed((thread_call_func_t)nstat_idle_check, NULL, nstat_idle_time); } lck_mtx_unlock(&nstat_mtx); @@ -2304,17 +2673,38 @@ nstat_control_cleanup_source( { errno_t result; - if (state) { + if (state) + { result = nstat_control_send_removed(state, src); - if (result != 0 && nstat_debug) - printf("%s - nstat_control_send_removed() %d\n", - __func__, result); + if (result != 0) + { + nstat_stats.nstat_control_cleanup_source_failures++; + if (nstat_debug != 0) + printf("%s - nstat_control_send_removed() %d\n", + __func__, result); + } } // Cleanup the source if we found it. src->provider->nstat_release(src->cookie, locked); OSFree(src, sizeof(*src), nstat_malloc_tag); } + +static bool +nstat_control_reporting_allowed( + nstat_control_state *state, + nstat_src *src) +{ + if (src->provider->nstat_reporting_allowed == NULL) + return TRUE; + + return ( + src->provider->nstat_reporting_allowed( src->cookie, + state->ncs_provider_filters[src->provider->nstat_provider_id]) + ); +} + + static errno_t nstat_control_connect( kern_ctl_ref kctl, @@ -2350,7 +2740,7 @@ static errno_t nstat_control_disconnect( __unused kern_ctl_ref kctl, __unused u_int32_t unit, - void *uinfo) + void *uinfo) { u_int32_t watching; nstat_control_state *state = (nstat_control_state*)uinfo; @@ -2385,6 +2775,12 @@ nstat_control_disconnect( // set cleanup flags state->ncs_flags |= NSTAT_FLAG_CLEANUP; + if (state->ncs_accumulated) + { + mbuf_freem(state->ncs_accumulated); + state->ncs_accumulated = NULL; + } + // Copy out the list of sources nstat_src *srcs = state->ncs_srcs; state->ncs_srcs = NULL; @@ -2441,10 +2837,10 @@ nstat_control_send_counts( nstat_control_state *state, nstat_src *src, unsigned long long context, + u_int16_t hdr_flags, int *gone) -{ +{ nstat_msg_src_counts counts; - int localgone = 0; errno_t result = 0; /* Some providers may not have any counts to send */ @@ -2453,33 +2849,68 @@ nstat_control_send_counts( bzero(&counts, sizeof(counts)); counts.hdr.type = NSTAT_MSG_TYPE_SRC_COUNTS; + counts.hdr.length = sizeof(counts); + counts.hdr.flags = hdr_flags; counts.hdr.context = context; counts.srcref = src->srcref; - if (src->provider->nstat_counts(src->cookie, &counts.counts, - &localgone) == 0) { + if (src->provider->nstat_counts(src->cookie, &counts.counts, gone) == 0) + { if ((src->filter & NSTAT_FILTER_NOZEROBYTES) && counts.counts.nstat_rxbytes == 0 && - counts.counts.nstat_txbytes == 0) { + counts.counts.nstat_txbytes == 0) + { result = EAGAIN; - } else { + } + else + { result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &counts, sizeof(counts), CTL_DATA_EOR); if (result != 0) - nstat_srccountfailures += 1; + nstat_stats.nstat_sendcountfailures += 1; } } - if (gone) - *gone = localgone; return result; } +static errno_t +nstat_control_append_counts( + nstat_control_state *state, + nstat_src *src, + int *gone) +{ + /* Some providers may not have any counts to send */ + if (!src->provider->nstat_counts) return 0; + + nstat_msg_src_counts counts; + bzero(&counts, sizeof(counts)); + counts.hdr.type = NSTAT_MSG_TYPE_SRC_COUNTS; + counts.hdr.length = sizeof(counts); + counts.srcref = src->srcref; + + errno_t result = 0; + result = src->provider->nstat_counts(src->cookie, &counts.counts, gone); + if (result != 0) + { + return result; + } + + if ((src->filter & NSTAT_FILTER_NOZEROBYTES) == NSTAT_FILTER_NOZEROBYTES && + counts.counts.nstat_rxbytes == 0 && counts.counts.nstat_txbytes == 0) + { + return EAGAIN; + } + + return nstat_accumulate_msg(state, &counts.hdr, counts.hdr.length); +} + static int nstat_control_send_description( nstat_control_state *state, nstat_src *src, - u_int64_t context) + u_int64_t context, + u_int16_t hdr_flags) { // Provider doesn't support getting the descriptor? Done. if (src->provider->nstat_descriptor_length == 0 || @@ -2513,19 +2944,194 @@ nstat_control_send_description( desc->hdr.context = context; desc->hdr.type = NSTAT_MSG_TYPE_SRC_DESC; + desc->hdr.length = size; + desc->hdr.flags = hdr_flags; desc->srcref = src->srcref; desc->provider = src->provider->nstat_provider_id; result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, CTL_DATA_EOR); if (result != 0) { - nstat_descriptionfailures += 1; + nstat_stats.nstat_descriptionfailures += 1; mbuf_freem(msg); } return result; } +static errno_t +nstat_control_append_description( + nstat_control_state *state, + nstat_src *src) +{ + size_t size = offsetof(nstat_msg_src_description, data) + src->provider->nstat_descriptor_length; + if (size > 512 || src->provider->nstat_descriptor_length == 0 || + src->provider->nstat_copy_descriptor == NULL) + { + return EOPNOTSUPP; + } + + // Fill out a buffer on the stack, we will copy to the mbuf later + u_int64_t buffer[size/sizeof(u_int64_t) + 1]; // u_int64_t to ensure alignment + bzero(buffer, size); + + nstat_msg_src_description *desc = (nstat_msg_src_description*)buffer; + desc->hdr.type = NSTAT_MSG_TYPE_SRC_DESC; + desc->hdr.length = size; + desc->srcref = src->srcref; + desc->provider = src->provider->nstat_provider_id; + + errno_t result = 0; + // Fill in the description + // Query the provider for the provider specific bits + result = src->provider->nstat_copy_descriptor(src->cookie, desc->data, + src->provider->nstat_descriptor_length); + if (result != 0) + { + return result; + } + + return nstat_accumulate_msg(state, &desc->hdr, size); +} + +static int +nstat_control_send_update( + nstat_control_state *state, + nstat_src *src, + u_int64_t context, + u_int16_t hdr_flags, + int *gone) +{ + // Provider doesn't support getting the descriptor or counts? Done. + if ((src->provider->nstat_descriptor_length == 0 || + src->provider->nstat_copy_descriptor == NULL) && + src->provider->nstat_counts == NULL) + { + return EOPNOTSUPP; + } + + // Allocate storage for the descriptor message + mbuf_t msg; + unsigned int one = 1; + u_int32_t size = offsetof(nstat_msg_src_update, data) + + src->provider->nstat_descriptor_length; + if (mbuf_allocpacket(MBUF_DONTWAIT, size, &one, &msg) != 0) + { + return ENOMEM; + } + + nstat_msg_src_update *desc = (nstat_msg_src_update*)mbuf_data(msg); + bzero(desc, size); + desc->hdr.context = context; + desc->hdr.type = NSTAT_MSG_TYPE_SRC_UPDATE; + desc->hdr.length = size; + desc->hdr.flags = hdr_flags; + desc->srcref = src->srcref; + desc->provider = src->provider->nstat_provider_id; + + mbuf_setlen(msg, size); + mbuf_pkthdr_setlen(msg, mbuf_len(msg)); + + errno_t result = 0; + if (src->provider->nstat_descriptor_length != 0 && src->provider->nstat_copy_descriptor) + { + // Query the provider for the provider specific bits + result = src->provider->nstat_copy_descriptor(src->cookie, desc->data, + src->provider->nstat_descriptor_length); + if (result != 0) + { + mbuf_freem(msg); + return result; + } + } + + if (src->provider->nstat_counts) + { + result = src->provider->nstat_counts(src->cookie, &desc->counts, gone); + if (result == 0) + { + if ((src->filter & NSTAT_FILTER_NOZEROBYTES) == NSTAT_FILTER_NOZEROBYTES && + desc->counts.nstat_rxbytes == 0 && desc->counts.nstat_txbytes == 0) + { + result = EAGAIN; + } + else + { + result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, CTL_DATA_EOR); + } + } + } + + if (result != 0) + { + nstat_stats.nstat_srcupatefailures += 1; + mbuf_freem(msg); + } + + return result; +} + +static errno_t +nstat_control_append_update( + nstat_control_state *state, + nstat_src *src, + int *gone) +{ + size_t size = offsetof(nstat_msg_src_update, data) + src->provider->nstat_descriptor_length; + if (size > 512 || ((src->provider->nstat_descriptor_length == 0 || + src->provider->nstat_copy_descriptor == NULL) && + src->provider->nstat_counts == NULL)) + { + return EOPNOTSUPP; + } + + // Fill out a buffer on the stack, we will copy to the mbuf later + u_int64_t buffer[size/sizeof(u_int64_t) + 1]; // u_int64_t to ensure alignment + bzero(buffer, size); + + nstat_msg_src_update *desc = (nstat_msg_src_update*)buffer; + desc->hdr.type = NSTAT_MSG_TYPE_SRC_UPDATE; + desc->hdr.length = size; + desc->srcref = src->srcref; + desc->provider = src->provider->nstat_provider_id; + + errno_t result = 0; + // Fill in the description + if (src->provider->nstat_descriptor_length != 0 && src->provider->nstat_copy_descriptor) + { + // Query the provider for the provider specific bits + result = src->provider->nstat_copy_descriptor(src->cookie, desc->data, + src->provider->nstat_descriptor_length); + if (result != 0) + { + nstat_stats.nstat_copy_descriptor_failures++; + if (nstat_debug != 0) + printf("%s: src->provider->nstat_copy_descriptor: %d\n", __func__, result); + return result; + } + } + + if (src->provider->nstat_counts) + { + result = src->provider->nstat_counts(src->cookie, &desc->counts, gone); + if (result != 0) + { + nstat_stats.nstat_provider_counts_failures++; + if (nstat_debug != 0) + printf("%s: src->provider->nstat_counts: %d\n", __func__, result); + return result; + } + + if ((src->filter & NSTAT_FILTER_NOZEROBYTES) == NSTAT_FILTER_NOZEROBYTES && + desc->counts.nstat_rxbytes == 0 && desc->counts.nstat_txbytes == 0) + { + return EAGAIN; + } + } + + return nstat_accumulate_msg(state, &desc->hdr, size); +} + static errno_t nstat_control_send_removed( nstat_control_state *state, @@ -2536,12 +3142,13 @@ nstat_control_send_removed( bzero(&removed, sizeof(removed)); removed.hdr.type = NSTAT_MSG_TYPE_SRC_REMOVED; + removed.hdr.length = sizeof(removed); removed.hdr.context = 0; removed.srcref = src->srcref; result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &removed, sizeof(removed), CTL_DATA_EOR | CTL_DATA_CRIT); if (result != 0) - nstat_msgremovedfailures += 1; + nstat_stats.nstat_msgremovedfailures += 1; return result; } @@ -2610,9 +3217,13 @@ nstat_control_handle_add_all( return EINVAL; } + nstat_msg_add_all_srcs *req = mbuf_data(m); + if (req->provider > NSTAT_PROVIDER_LAST) return ENOENT; + nstat_provider *provider = nstat_find_provider_by_id(req->provider); - + u_int64_t filter = req->filter; + if (!provider) return ENOENT; if (provider->nstat_watcher_add == NULL) return ENOTSUP; @@ -2631,85 +3242,104 @@ nstat_control_handle_add_all( lck_mtx_unlock(&state->mtx); if (result != 0) return result; + state->ncs_provider_filters[req->provider] = filter; + result = provider->nstat_watcher_add(state); if (result != 0) { + state->ncs_provider_filters[req->provider] = 0; lck_mtx_lock(&state->mtx); state->ncs_watching &= ~(1 << provider->nstat_provider_id); lck_mtx_unlock(&state->mtx); } if (result == 0) - nstat_enqueue_success(req->hdr.context, state); + nstat_enqueue_success(req->hdr.context, state, 0); return result; } static errno_t nstat_control_source_add( - u_int64_t context, + u_int64_t context, nstat_control_state *state, nstat_provider *provider, - nstat_provider_cookie_t cookie) + nstat_provider_cookie_t cookie) { - // Fill out source added message - mbuf_t msg = NULL; - unsigned int one = 1; + // Fill out source added message if appropriate + mbuf_t msg = NULL; + nstat_src_ref_t *srcrefp = NULL; + + u_int64_t provider_filters = + state->ncs_provider_filters[provider->nstat_provider_id]; + boolean_t tell_user = + ((provider_filters & NSTAT_FILTER_SUPPRESS_SRC_ADDED) == 0); + u_int32_t src_filter = + (provider_filters & NSTAT_FILTER_PROVIDER_NOZEROBYTES) + ? NSTAT_FILTER_NOZEROBYTES : 0; + + if (tell_user) + { + unsigned int one = 1; - if (mbuf_allocpacket(MBUF_DONTWAIT, sizeof(nstat_msg_src_added), &one, - &msg) != 0) - return ENOMEM; + if (mbuf_allocpacket(MBUF_DONTWAIT, sizeof(nstat_msg_src_added), + &one, &msg) != 0) + return ENOMEM; - mbuf_setlen(msg, sizeof(nstat_msg_src_added)); - mbuf_pkthdr_setlen(msg, mbuf_len(msg)); - nstat_msg_src_added *add = mbuf_data(msg); - bzero(add, sizeof(*add)); - add->hdr.type = NSTAT_MSG_TYPE_SRC_ADDED; - add->hdr.context = context; - add->provider = provider->nstat_provider_id; + mbuf_setlen(msg, sizeof(nstat_msg_src_added)); + mbuf_pkthdr_setlen(msg, mbuf_len(msg)); + nstat_msg_src_added *add = mbuf_data(msg); + bzero(add, sizeof(*add)); + add->hdr.type = NSTAT_MSG_TYPE_SRC_ADDED; + add->hdr.length = mbuf_len(msg); + add->hdr.context = context; + add->provider = provider->nstat_provider_id; + srcrefp = &add->srcref; + } // Allocate storage for the source nstat_src *src = OSMalloc(sizeof(*src), nstat_malloc_tag); if (src == NULL) { - mbuf_freem(msg); + if (msg) mbuf_freem(msg); return ENOMEM; } // Fill in the source, including picking an unused source ref lck_mtx_lock(&state->mtx); - - add->srcref = src->srcref = nstat_control_next_src_ref(state); + + src->srcref = nstat_control_next_src_ref(state); + if (srcrefp) + *srcrefp = src->srcref; + if (state->ncs_flags & NSTAT_FLAG_CLEANUP || src->srcref == NSTAT_SRC_REF_INVALID) { lck_mtx_unlock(&state->mtx); OSFree(src, sizeof(*src), nstat_malloc_tag); - mbuf_freem(msg); + if (msg) mbuf_freem(msg); return EINVAL; } src->provider = provider; src->cookie = cookie; - src->filter = 0; - - // send the source added message - errno_t result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, - CTL_DATA_EOR); - if (result != 0) + src->filter = src_filter; + + if (msg) { - nstat_srcaddedfailures += 1; - lck_mtx_unlock(&state->mtx); - OSFree(src, sizeof(*src), nstat_malloc_tag); - mbuf_freem(msg); - return result; + // send the source added message if appropriate + errno_t result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, + CTL_DATA_EOR); + if (result != 0) + { + nstat_stats.nstat_srcaddedfailures += 1; + lck_mtx_unlock(&state->mtx); + OSFree(src, sizeof(*src), nstat_malloc_tag); + mbuf_freem(msg); + return result; + } } - - // Put the source in the list + // Put the source in the list src->next = state->ncs_srcs; state->ncs_srcs = src; - // send the description message - // not useful as the source is often not complete -// nstat_control_send_description(state, src, 0); - lck_mtx_unlock(&state->mtx); return 0; @@ -2770,64 +3400,121 @@ nstat_control_handle_query_request( { return EINVAL; } + + const boolean_t all_srcs = (req.srcref == NSTAT_SRC_REF_ALL); lck_mtx_lock(&state->mtx); - if (req.srcref == NSTAT_SRC_REF_ALL) + + if (all_srcs) + { state->ncs_flags |= NSTAT_FLAG_REQCOUNTS; + } nstat_src **srcpp = &state->ncs_srcs; - while (*srcpp != NULL) - { - int gone; + u_int64_t src_count = 0; + boolean_t partial = FALSE; + + /* + * Error handling policy and sequence number generation is folded into + * nstat_control_begin_query. + */ + partial = nstat_control_begin_query(state, &req.hdr); + while (*srcpp != NULL + && (!partial || src_count < QUERY_CONTINUATION_SRC_COUNT)) + { + nstat_src *src = NULL; + int gone; + + src = *srcpp; gone = 0; // XXX ignore IFACE types? - if (req.srcref == NSTAT_SRC_REF_ALL || - (*srcpp)->srcref == req.srcref) + if (all_srcs || src->srcref == req.srcref) { - gone = 0; - - result = nstat_control_send_counts(state, *srcpp, - req.hdr.context, &gone); - - // If the counts message failed to enqueue then we should clear our flag so - // that a client doesn't miss anything on idle cleanup. - if (result != 0) - state->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS; - - if (gone) + if (nstat_control_reporting_allowed(state, src) + && (!partial || !all_srcs || src->seq != state->ncs_seq)) { - // send one last descriptor message so client may see last state - // If we can't send the notification now, it - // will be sent in the idle cleanup. - result = nstat_control_send_description(state, *srcpp, 0); - if (result != 0 && nstat_debug) - printf("%s - nstat_control_send_description() %d\n", - __func__, result); - if (result != 0) { + if (all_srcs && + (req.hdr.flags & NSTAT_MSG_HDR_FLAG_SUPPORTS_AGGREGATE) != 0) + { + result = nstat_control_append_counts(state, src, &gone); + } + else + { + result = nstat_control_send_counts(state, src, req.hdr.context, 0, &gone); + } + + if (ENOMEM == result || ENOBUFS == result) + { + /* + * If the counts message failed to + * enqueue then we should clear our flag so + * that a client doesn't miss anything on + * idle cleanup. We skip the "gone" + * processing in the hope that we may + * catch it another time. + */ state->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS; break; - } - - // pull src out of the list - nstat_src *src = *srcpp; - *srcpp = src->next; - - src->next = dead_srcs; - dead_srcs = src; + } + if (partial) + { + /* + * We skip over hard errors and + * filtered sources. + */ + src->seq = state->ncs_seq; + src_count++; + } } - - if (req.srcref != NSTAT_SRC_REF_ALL) - break; } - if (!gone) + if (gone) + { + // send one last descriptor message so client may see last state + // If we can't send the notification now, it + // will be sent in the idle cleanup. + result = nstat_control_send_description(state, *srcpp, 0, 0); + if (result != 0) + { + nstat_stats.nstat_control_send_description_failures++; + if (nstat_debug != 0) + printf("%s - nstat_control_send_description() %d\n", __func__, result); + state->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS; + break; + } + + // pull src out of the list + *srcpp = src->next; + + src->next = dead_srcs; + dead_srcs = src; + } + else + { srcpp = &(*srcpp)->next; + } + + if (!all_srcs && req.srcref == src->srcref) + { + break; + } } - lck_mtx_unlock(&state->mtx); - + nstat_flush_accumulated_msgs(state); + + u_int16_t flags = 0; if (req.srcref == NSTAT_SRC_REF_ALL) + flags = nstat_control_end_query(state, *srcpp, partial); + + lck_mtx_unlock(&state->mtx); + + /* + * If an error occurred enqueueing data, then allow the error to + * propagate to nstat_control_send. This way, the error is sent to + * user-level. + */ + if (all_srcs && ENOMEM != result && ENOBUFS != result) { - nstat_enqueue_success(req.hdr.context, state); + nstat_enqueue_success(req.hdr.context, state, flags); result = 0; } @@ -2848,37 +3535,86 @@ nstat_control_handle_query_request( static errno_t nstat_control_handle_get_src_description( nstat_control_state *state, - mbuf_t m) + mbuf_t m) { nstat_msg_get_src_description req; - errno_t result = 0; + errno_t result = ENOENT; nstat_src *src; if (mbuf_copydata(m, 0, sizeof(req), &req) != 0) { return EINVAL; } - + lck_mtx_lock(&state->mtx); - if (req.srcref == NSTAT_SRC_REF_ALL) - state->ncs_flags |= NSTAT_FLAG_REQDESCS; - for (src = state->ncs_srcs; src; src = src->next) - if (req.srcref == NSTAT_SRC_REF_ALL || - src->srcref == req.srcref) + u_int64_t src_count = 0; + boolean_t partial = FALSE; + const boolean_t all_srcs = (req.srcref == NSTAT_SRC_REF_ALL); + + /* + * Error handling policy and sequence number generation is folded into + * nstat_control_begin_query. + */ + partial = nstat_control_begin_query(state, &req.hdr); + + for (src = state->ncs_srcs; + src && (!partial || src_count < QUERY_CONTINUATION_SRC_COUNT); + src = src->next) + { + if (all_srcs || src->srcref == req.srcref) { - result = nstat_control_send_description(state, src, - req.hdr.context); - if (result != 0) - state->ncs_flags &= ~NSTAT_FLAG_REQDESCS; - if (req.srcref != NSTAT_SRC_REF_ALL) + if (nstat_control_reporting_allowed(state, src) + && (!all_srcs || !partial || src->seq != state->ncs_seq)) + { + if ((req.hdr.flags & NSTAT_MSG_HDR_FLAG_SUPPORTS_AGGREGATE) != 0 && all_srcs) + { + result = nstat_control_append_description(state, src); + } + else + { + result = nstat_control_send_description(state, src, req.hdr.context, 0); + } + + if (ENOMEM == result || ENOBUFS == result) + { + /* + * If the description message failed to + * enqueue then we give up for now. + */ + break; + } + if (partial) + { + /* + * Note, we skip over hard errors and + * filtered sources. + */ + src->seq = state->ncs_seq; + src_count++; + } + } + + if (!all_srcs) + { break; + } } + } + nstat_flush_accumulated_msgs(state); + + u_int16_t flags = 0; + if (req.srcref == NSTAT_SRC_REF_ALL) + flags = nstat_control_end_query(state, src, partial); + lck_mtx_unlock(&state->mtx); - if (req.srcref != NSTAT_SRC_REF_ALL && src == NULL) - result = ENOENT; - else if (req.srcref == NSTAT_SRC_REF_ALL) + /* + * If an error occurred enqueueing data, then allow the error to + * propagate to nstat_control_send. This way, the error is sent to + * user-level. + */ + if (all_srcs && ENOMEM != result && ENOBUFS != result) { - nstat_enqueue_success(req.hdr.context, state); + nstat_enqueue_success(req.hdr.context, state, flags); result = 0; } @@ -2911,7 +3647,231 @@ nstat_control_handle_set_filter( return ENOENT; return 0; +} + +static void +nstat_send_error( + nstat_control_state *state, + u_int64_t context, + u_int32_t error) +{ + errno_t result; + struct nstat_msg_error err; + + bzero(&err, sizeof(err)); + err.hdr.type = NSTAT_MSG_TYPE_ERROR; + err.hdr.length = sizeof(err); + err.hdr.context = context; + err.error = error; + + result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &err, + sizeof(err), CTL_DATA_EOR | CTL_DATA_CRIT); + if (result != 0) + nstat_stats.nstat_msgerrorfailures++; +} + +static boolean_t +nstat_control_begin_query( + nstat_control_state *state, + const nstat_msg_hdr *hdrp) +{ + boolean_t partial = FALSE; + + if (hdrp->flags & NSTAT_MSG_HDR_FLAG_CONTINUATION) + { + /* A partial query all has been requested. */ + partial = TRUE; + + if (state->ncs_context != hdrp->context) + { + if (state->ncs_context != 0) + nstat_send_error(state, state->ncs_context, EAGAIN); + + /* Initialize state for a partial query all. */ + state->ncs_context = hdrp->context; + state->ncs_seq++; + } + } + else if (state->ncs_context != 0) + { + /* + * A continuation of a paced-query was in progress. Send that + * context an error and reset the state. If the same context + * has changed its mind, just send the full query results. + */ + if (state->ncs_context != hdrp->context) + nstat_send_error(state, state->ncs_context, EAGAIN); + } + + return partial; +} + +static u_int16_t +nstat_control_end_query( + nstat_control_state *state, + nstat_src *last_src, + boolean_t partial) +{ + u_int16_t flags = 0; + + if (last_src == NULL || !partial) + { + /* + * We iterated through the entire srcs list or exited early + * from the loop when a partial update was not requested (an + * error occurred), so clear context to indicate internally + * that the query is finished. + */ + state->ncs_context = 0; + } + else + { + /* + * Indicate to userlevel to make another partial request as + * there are still sources left to be reported. + */ + flags |= NSTAT_MSG_HDR_FLAG_CONTINUATION; + } + + return flags; +} + +static errno_t +nstat_control_handle_get_update( + nstat_control_state *state, + mbuf_t m) +{ + nstat_msg_query_src_req req; + + if (mbuf_copydata(m, 0, sizeof(req), &req) != 0) + { + return EINVAL; + } + + lck_mtx_lock(&state->mtx); + + state->ncs_flags |= NSTAT_FLAG_SUPPORTS_UPDATES; + + errno_t result = ENOENT; + nstat_src *src; + nstat_src *dead_srcs = NULL; + nstat_src **srcpp = &state->ncs_srcs; + u_int64_t src_count = 0; + boolean_t partial = FALSE; + + /* + * Error handling policy and sequence number generation is folded into + * nstat_control_begin_query. + */ + partial = nstat_control_begin_query(state, &req.hdr); + + while (*srcpp != NULL + && (FALSE == partial + || src_count < QUERY_CONTINUATION_SRC_COUNT)) + { + int gone; + + gone = 0; + src = *srcpp; + if (nstat_control_reporting_allowed(state, src)) + { + /* skip this source if it has the current state + * sequence number as it's already been reported in + * this query-all partial sequence. */ + if (req.srcref == NSTAT_SRC_REF_ALL + && (FALSE == partial || src->seq != state->ncs_seq)) + { + result = nstat_control_append_update(state, src, &gone); + if (ENOMEM == result || ENOBUFS == result) + { + /* + * If the update message failed to + * enqueue then give up. + */ + break; + } + if (partial) + { + /* + * We skip over hard errors and + * filtered sources. + */ + src->seq = state->ncs_seq; + src_count++; + } + } + else if (src->srcref == req.srcref) + { + result = nstat_control_send_update(state, src, req.hdr.context, 0, &gone); + } + } + + if (gone) + { + // pull src out of the list + *srcpp = src->next; + + src->next = dead_srcs; + dead_srcs = src; + } + else + { + srcpp = &(*srcpp)->next; + } + + if (req.srcref != NSTAT_SRC_REF_ALL && req.srcref == src->srcref) + { + break; + } + } + + nstat_flush_accumulated_msgs(state); + + + u_int16_t flags = 0; + if (req.srcref == NSTAT_SRC_REF_ALL) + flags = nstat_control_end_query(state, *srcpp, partial); + + lck_mtx_unlock(&state->mtx); + /* + * If an error occurred enqueueing data, then allow the error to + * propagate to nstat_control_send. This way, the error is sent to + * user-level. + */ + if (req.srcref == NSTAT_SRC_REF_ALL && ENOMEM != result && ENOBUFS != result) + { + nstat_enqueue_success(req.hdr.context, state, flags); + result = 0; + } + + while (dead_srcs) + { + src = dead_srcs; + dead_srcs = src->next; + + // release src and send notification + nstat_control_cleanup_source(state, src, FALSE); + } + + return result; +} +static errno_t +nstat_control_handle_subscribe_sysinfo( + nstat_control_state *state) +{ + errno_t result = priv_check_cred(kauth_cred_get(), PRIV_NET_PRIVILEGED_NETWORK_STATISTICS, 0); + + if (result != 0) + { + return result; + } + + lck_mtx_lock(&state->mtx); + state->ncs_flags |= NSTAT_FLAG_SYSINFO_SUBSCRIBED; + lck_mtx_unlock(&state->mtx); + + return 0; } static errno_t @@ -2927,7 +3887,7 @@ nstat_control_send( struct nstat_msg_hdr storage; errno_t result = 0; - if (mbuf_pkthdr_len(m) < sizeof(hdr)) + if (mbuf_pkthdr_len(m) < sizeof(*hdr)) { // Is this the right thing to do? mbuf_freem(m); @@ -2944,6 +3904,19 @@ nstat_control_send( hdr = &storage; } + // Legacy clients may not set the length + // Those clients are likely not setting the flags either + // Fix everything up so old clients continue to work + if (hdr->length != mbuf_pkthdr_len(m)) + { + hdr->flags = 0; + hdr->length = mbuf_pkthdr_len(m); + if (hdr == &storage) + { + mbuf_copyback(m, 0, sizeof(*hdr), hdr, MBUF_DONTWAIT); + } + } + switch (hdr->type) { case NSTAT_MSG_TYPE_ADD_SRC: @@ -2965,11 +3938,19 @@ nstat_control_send( case NSTAT_MSG_TYPE_GET_SRC_DESC: result = nstat_control_handle_get_src_description(state, m); break; - + case NSTAT_MSG_TYPE_SET_FILTER: result = nstat_control_handle_set_filter(state, m); break; - + + case NSTAT_MSG_TYPE_GET_UPDATE: + result = nstat_control_handle_get_update(state, m); + break; + + case NSTAT_MSG_TYPE_SUBSCRIBE_SYSINFO: + result = nstat_control_handle_subscribe_sysinfo(state); + break; + default: result = EINVAL; break; @@ -2981,16 +3962,34 @@ nstat_control_send( bzero(&err, sizeof(err)); err.hdr.type = NSTAT_MSG_TYPE_ERROR; + err.hdr.length = sizeof(err) + mbuf_pkthdr_len(m); err.hdr.context = hdr->context; err.error = result; - result = ctl_enqueuedata(kctl, unit, &err, sizeof(err), - CTL_DATA_EOR | CTL_DATA_CRIT); + if (mbuf_prepend(&m, sizeof(err), MBUF_DONTWAIT) == 0 && + mbuf_copyback(m, 0, sizeof(err), &err, MBUF_DONTWAIT) == 0) + { + result = ctl_enqueuembuf(kctl, unit, m, CTL_DATA_EOR | CTL_DATA_CRIT); + if (result != 0) + { + mbuf_freem(m); + } + m = NULL; + } + if (result != 0) - nstat_descriptionfailures += 1; + { + // Unable to prepend the error to the request - just send the error + err.hdr.length = sizeof(err); + result = ctl_enqueuedata(kctl, unit, &err, sizeof(err), + CTL_DATA_EOR | CTL_DATA_CRIT); + if (result != 0) + nstat_stats.nstat_msgerrorfailures += 1; + } + nstat_stats.nstat_handle_msg_failures += 1; } - mbuf_freem(m); + if (m) mbuf_freem(m); return result; } diff --git a/bsd/net/ntstat.h b/bsd/net/ntstat.h index 2aad07b65..a6bcec06b 100644 --- a/bsd/net/ntstat.h +++ b/bsd/net/ntstat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2014 Apple Inc. All rights reserved. + * Copyright (c) 2010-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,12 +30,13 @@ #include #include #include +#include #ifdef PRIVATE #pragma pack(push, 4) #pragma mark -- Common Data Structures -- -#define __NSTAT_REVISION__ 6 +#define __NSTAT_REVISION__ 7 typedef u_int32_t nstat_provider_id_t; typedef u_int32_t nstat_src_ref_t; @@ -75,18 +76,20 @@ typedef struct nstat_sysinfo_keyval int64_t nstat_sysinfo_scalar; double nstat_sysinfo_distribution; } u; -} nstat_sysinfo_keyval; +} __attribute__((packed)) nstat_sysinfo_keyval; #define NSTAT_SYSINFO_FLAG_SCALAR 0x0001 #define NSTAT_SYSINFO_FLAG_DISTRIBUTION 0x0002 +#define NSTAT_MAX_MSG_SIZE 4096 + typedef struct nstat_sysinfo_counts { /* Counters */ u_int32_t nstat_sysinfo_len; u_int32_t pad; u_int8_t nstat_sysinfo_keyvals[]; -} nstat_sysinfo_counts; +} __attribute__((packed)) nstat_sysinfo_counts; enum { @@ -101,19 +104,66 @@ enum ,NSTAT_SYSINFO_KEY_RECV_PLR = 9 ,NSTAT_SYSINFO_KEY_SEND_TLRTO = 10 ,NSTAT_SYSINFO_KEY_SEND_REORDERRATE = 11 - + ,NSTAT_SYSINFO_CONNECTION_ATTEMPTS = 12 + ,NSTAT_SYSINFO_CONNECTION_ACCEPTS = 13 + ,NSTAT_SYSINFO_ECN_CLIENT_SETUP = 14 + ,NSTAT_SYSINFO_ECN_SERVER_SETUP = 15 + ,NSTAT_SYSINFO_ECN_CLIENT_SUCCESS = 16 + ,NSTAT_SYSINFO_ECN_SERVER_SUCCESS = 17 + ,NSTAT_SYSINFO_ECN_NOT_SUPPORTED = 18 + ,NSTAT_SYSINFO_ECN_LOST_SYN = 19 + ,NSTAT_SYSINFO_ECN_LOST_SYNACK = 20 + ,NSTAT_SYSINFO_ECN_RECV_CE = 21 + ,NSTAT_SYSINFO_ECN_RECV_ECE = 22 + ,NSTAT_SYSINFO_ECN_SENT_ECE = 23 + ,NSTAT_SYSINFO_ECN_CONN_RECV_CE = 24 + ,NSTAT_SYSINFO_ECN_CONN_PLNOCE = 25 + ,NSTAT_SYSINFO_ECN_CONN_PL_CE = 26 + ,NSTAT_SYSINFO_ECN_CONN_NOPL_CE = 27 + ,NSTAT_SYSINFO_MBUF_16KB_TOTAL = 28 + ,NSTAT_SYSINFO_ECN_CLIENT_ENABLED = 29 + ,NSTAT_SYSINFO_ECN_SERVER_ENABLED = 30 + ,NSTAT_SYSINFO_ECN_CONN_RECV_ECE = 31 + ,NSTAT_SYSINFO_MBUF_MEM_RELEASED = 32 + ,NSTAT_SYSINFO_MBUF_DRAIN_CNT = 33 + ,NSTAT_SYSINFO_TFO_SYN_DATA_RCV = 34 + ,NSTAT_SYSINFO_TFO_COOKIE_REQ_RCV = 35 + ,NSTAT_SYSINFO_TFO_COOKIE_SENT = 36 + ,NSTAT_SYSINFO_TFO_COOKIE_INVALID = 37 + ,NSTAT_SYSINFO_TFO_COOKIE_REQ = 38 + ,NSTAT_SYSINFO_TFO_COOKIE_RCV = 39 + ,NSTAT_SYSINFO_TFO_SYN_DATA_SENT = 40 + ,NSTAT_SYSINFO_TFO_SYN_DATA_ACKED = 41 + ,NSTAT_SYSINFO_TFO_SYN_LOSS = 42 + ,NSTAT_SYSINFO_TFO_BLACKHOLE = 43 }; #pragma mark -- Network Statistics Providers -- + +// Interface properties + +#define NSTAT_IFNET_IS_UNKNOWN_TYPE 0x01 +#define NSTAT_IFNET_IS_LOOPBACK 0x02 +#define NSTAT_IFNET_IS_CELLULAR 0x04 +#define NSTAT_IFNET_IS_WIFI 0x08 +#define NSTAT_IFNET_IS_WIRED 0x10 +#define NSTAT_IFNET_IS_AWDL 0x20 +#define NSTAT_IFNET_IS_EXPENSIVE 0x40 +#define NSTAT_IFNET_IS_VPN 0x80 + + enum { - NSTAT_PROVIDER_ROUTE = 1 + NSTAT_PROVIDER_NONE = 0 + ,NSTAT_PROVIDER_ROUTE = 1 ,NSTAT_PROVIDER_TCP = 2 ,NSTAT_PROVIDER_UDP = 3 ,NSTAT_PROVIDER_IFNET = 4 ,NSTAT_PROVIDER_SYSINFO = 5 }; +#define NSTAT_PROVIDER_LAST NSTAT_PROVIDER_SYSINFO +#define NSTAT_PROVIDER_COUNT (NSTAT_PROVIDER_LAST+1) typedef struct nstat_route_add_param { @@ -182,6 +232,8 @@ typedef struct nstat_tcp_descriptor uint8_t uuid[16]; uint8_t euuid[16]; uint8_t vuuid[16]; + struct tcp_conn_status connstatus; + uint16_t ifnet_properties __attribute__((aligned(4))); } nstat_tcp_descriptor; typedef struct nstat_tcp_add_param nstat_udp_add_param; @@ -215,6 +267,7 @@ typedef struct nstat_udp_descriptor uint8_t uuid[16]; uint8_t euuid[16]; uint8_t vuuid[16]; + uint16_t ifnet_properties; } nstat_udp_descriptor; typedef struct nstat_route_descriptor @@ -255,16 +308,146 @@ typedef struct nstat_ifnet_add_param u_int64_t threshold; } nstat_ifnet_add_param; +typedef struct nstat_ifnet_desc_cellular_status +{ + u_int32_t valid_bitmask; /* indicates which fields are valid */ +#define NSTAT_IFNET_DESC_CELL_LINK_QUALITY_METRIC_VALID 0x1 +#define NSTAT_IFNET_DESC_CELL_UL_EFFECTIVE_BANDWIDTH_VALID 0x2 +#define NSTAT_IFNET_DESC_CELL_UL_MAX_BANDWIDTH_VALID 0x4 +#define NSTAT_IFNET_DESC_CELL_UL_MIN_LATENCY_VALID 0x8 +#define NSTAT_IFNET_DESC_CELL_UL_EFFECTIVE_LATENCY_VALID 0x10 +#define NSTAT_IFNET_DESC_CELL_UL_MAX_LATENCY_VALID 0x20 +#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_VALID 0x40 +#define NSTAT_IFNET_DESC_CELL_UL_BYTES_LOST_VALID 0x80 +#define NSTAT_IFNET_DESC_CELL_UL_MIN_QUEUE_SIZE_VALID 0x100 +#define NSTAT_IFNET_DESC_CELL_UL_AVG_QUEUE_SIZE_VALID 0x200 +#define NSTAT_IFNET_DESC_CELL_UL_MAX_QUEUE_SIZE_VALID 0x400 +#define NSTAT_IFNET_DESC_CELL_DL_EFFECTIVE_BANDWIDTH_VALID 0x800 +#define NSTAT_IFNET_DESC_CELL_DL_MAX_BANDWIDTH_VALID 0x1000 +#define NSTAT_IFNET_DESC_CELL_CONFIG_INACTIVITY_TIME_VALID 0x2000 +#define NSTAT_IFNET_DESC_CELL_CONFIG_BACKOFF_TIME_VALID 0x4000 + u_int32_t link_quality_metric; + u_int32_t ul_effective_bandwidth; /* Measured uplink bandwidth based on + current activity (bps) */ + u_int32_t ul_max_bandwidth; /* Maximum supported uplink bandwidth + (bps) */ + u_int32_t ul_min_latency; /* min expected uplink latency for first hop + (ms) */ + u_int32_t ul_effective_latency; /* current expected uplink latency for + first hop (ms) */ + u_int32_t ul_max_latency; /* max expected uplink latency first hop + (ms) */ + u_int32_t ul_retxt_level; /* Retransmission metric */ +#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_NONE 1 +#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_LOW 2 +#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_MEDIUM 3 +#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_HIGH 4 + + u_int32_t ul_bytes_lost; /* % of total bytes lost on uplink in Q10 + format */ + u_int32_t ul_min_queue_size; /* minimum bytes in queue */ + u_int32_t ul_avg_queue_size; /* average bytes in queue */ + u_int32_t ul_max_queue_size; /* maximum bytes in queue */ + u_int32_t dl_effective_bandwidth; /* Measured downlink bandwidth based + on current activity (bps) */ + u_int32_t dl_max_bandwidth; /* Maximum supported downlink bandwidth + (bps) */ + u_int32_t config_inactivity_time; /* ms */ + u_int32_t config_backoff_time; /* new connections backoff time in ms */ +} nstat_ifnet_desc_cellular_status; + +typedef struct nstat_ifnet_desc_wifi_status { + u_int32_t valid_bitmask; +#define NSTAT_IFNET_DESC_WIFI_LINK_QUALITY_METRIC_VALID 0x1 +#define NSTAT_IFNET_DESC_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID 0x2 +#define NSTAT_IFNET_DESC_WIFI_UL_MAX_BANDWIDTH_VALID 0x4 +#define NSTAT_IFNET_DESC_WIFI_UL_MIN_LATENCY_VALID 0x8 +#define NSTAT_IFNET_DESC_WIFI_UL_EFFECTIVE_LATENCY_VALID 0x10 +#define NSTAT_IFNET_DESC_WIFI_UL_MAX_LATENCY_VALID 0x20 +#define NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_VALID 0x40 +#define NSTAT_IFNET_DESC_WIFI_UL_ERROR_RATE_VALID 0x80 +#define NSTAT_IFNET_DESC_WIFI_UL_BYTES_LOST_VALID 0x100 +#define NSTAT_IFNET_DESC_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID 0x200 +#define NSTAT_IFNET_DESC_WIFI_DL_MAX_BANDWIDTH_VALID 0x400 +#define NSTAT_IFNET_DESC_WIFI_DL_MIN_LATENCY_VALID 0x800 +#define NSTAT_IFNET_DESC_WIFI_DL_EFFECTIVE_LATENCY_VALID 0x1000 +#define NSTAT_IFNET_DESC_WIFI_DL_MAX_LATENCY_VALID 0x2000 +#define NSTAT_IFNET_DESC_WIFI_DL_ERROR_RATE_VALID 0x4000 +#define NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_VALID 0x8000 +#define NSTAT_IFNET_DESC_WIFI_CONFIG_MULTICAST_RATE_VALID 0x10000 +#define NSTAT_IFNET_DESC_WIFI_CONFIG_SCAN_COUNT_VALID 0x20000 +#define NSTAT_IFNET_DESC_WIFI_CONFIG_SCAN_DURATION_VALID 0x40000 + u_int32_t link_quality_metric; /* link quality metric */ + u_int32_t ul_effective_bandwidth; /* Measured uplink bandwidth based on + current activity (bps) */ + u_int32_t ul_max_bandwidth; /* Maximum supported uplink bandwidth + (bps) */ + u_int32_t ul_min_latency; /* min expected uplink latency for first hop + (ms) */ + u_int32_t ul_effective_latency; /* current expected uplink latency for + first hop (ms) */ + u_int32_t ul_max_latency; /* max expected uplink latency for first hop + (ms) */ + u_int32_t ul_retxt_level; /* Retransmission metric */ +#define NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_NONE 1 +#define NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_LOW 2 +#define NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_MEDIUM 3 +#define NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_HIGH 4 + + u_int32_t ul_bytes_lost; /* % of total bytes lost on uplink in Q10 + format */ + u_int32_t ul_error_rate; /* % of bytes dropped on uplink after many + retransmissions in Q10 format */ + u_int32_t dl_effective_bandwidth; /* Measured downlink bandwidth based + on current activity (bps) */ + u_int32_t dl_max_bandwidth; /* Maximum supported downlink bandwidth + (bps) */ + /* + * The download latency values indicate the time AP may have to wait + * for the driver to receive the packet. These values give the range + * of expected latency mainly due to co-existence events and channel + * hopping where the interface becomes unavailable. + */ + u_int32_t dl_min_latency; /* min expected latency for first hop in ms */ + u_int32_t dl_effective_latency; /* current expected latency for first + hop in ms */ + u_int32_t dl_max_latency; /* max expected latency for first hop in ms */ + u_int32_t dl_error_rate; /* % of CRC or other errors in Q10 format */ + u_int32_t config_frequency; /* 2.4 or 5 GHz */ +#define NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_2_4_GHZ 1 +#define NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_5_0_GHZ 2 + u_int32_t config_multicast_rate; /* bps */ + u_int32_t scan_count; /* scan count during the previous period */ + u_int32_t scan_duration; /* scan duration in ms */ +} nstat_ifnet_desc_wifi_status; + +enum +{ + NSTAT_IFNET_DESC_LINK_STATUS_TYPE_NONE = 0 + ,NSTAT_IFNET_DESC_LINK_STATUS_TYPE_CELLULAR = 1 + ,NSTAT_IFNET_DESC_LINK_STATUS_TYPE_WIFI = 2 +}; + +typedef struct nstat_ifnet_desc_link_status +{ + u_int32_t link_status_type; + union { + nstat_ifnet_desc_cellular_status cellular; + nstat_ifnet_desc_wifi_status wifi; + } u; +} nstat_ifnet_desc_link_status; + #ifndef IF_DESCSIZE #define IF_DESCSIZE 128 #endif typedef struct nstat_ifnet_descriptor { - char name[IFNAMSIZ+1]; - u_int32_t ifindex; - u_int64_t threshold; - unsigned int type; - char description[IF_DESCSIZE]; + char name[IFNAMSIZ+1]; + u_int32_t ifindex; + u_int64_t threshold; + unsigned int type; + char description[IF_DESCSIZE]; + nstat_ifnet_desc_link_status link_status; } nstat_ifnet_descriptor; typedef struct nstat_sysinfo_descriptor @@ -292,19 +475,22 @@ enum ,NSTAT_MSG_TYPE_ERROR = 1 // Requests - ,NSTAT_MSG_TYPE_ADD_SRC = 1001 + ,NSTAT_MSG_TYPE_ADD_SRC = 1001 ,NSTAT_MSG_TYPE_ADD_ALL_SRCS = 1002 - ,NSTAT_MSG_TYPE_REM_SRC = 1003 - ,NSTAT_MSG_TYPE_QUERY_SRC = 1004 + ,NSTAT_MSG_TYPE_REM_SRC = 1003 + ,NSTAT_MSG_TYPE_QUERY_SRC = 1004 ,NSTAT_MSG_TYPE_GET_SRC_DESC = 1005 - ,NSTAT_MSG_TYPE_SET_FILTER = 1006 + ,NSTAT_MSG_TYPE_SET_FILTER = 1006 + ,NSTAT_MSG_TYPE_GET_UPDATE = 1007 + ,NSTAT_MSG_TYPE_SUBSCRIBE_SYSINFO = 1008 // Responses/Notfications - ,NSTAT_MSG_TYPE_SRC_ADDED = 10001 - ,NSTAT_MSG_TYPE_SRC_REMOVED = 10002 - ,NSTAT_MSG_TYPE_SRC_DESC = 10003 - ,NSTAT_MSG_TYPE_SRC_COUNTS = 10004 - ,NSTAT_MSG_TYPE_SYSINFO_COUNTS = 10005 + ,NSTAT_MSG_TYPE_SRC_ADDED = 10001 + ,NSTAT_MSG_TYPE_SRC_REMOVED = 10002 + ,NSTAT_MSG_TYPE_SRC_DESC = 10003 + ,NSTAT_MSG_TYPE_SRC_COUNTS = 10004 + ,NSTAT_MSG_TYPE_SYSINFO_COUNTS = 10005 + ,NSTAT_MSG_TYPE_SRC_UPDATE = 10006 }; enum @@ -313,16 +499,50 @@ enum ,NSTAT_SRC_REF_INVALID = 0 }; +/* Source-level filters */ enum { - NSTAT_FILTER_NOZEROBYTES = 0x01, + NSTAT_FILTER_NOZEROBYTES = 0x00000001 +}; + +/* Provider-level filters */ +enum +{ + NSTAT_FILTER_ACCEPT_UNKNOWN = 0x00000001 + ,NSTAT_FILTER_ACCEPT_LOOPBACK = 0x00000002 + ,NSTAT_FILTER_ACCEPT_CELLULAR = 0x00000004 + ,NSTAT_FILTER_ACCEPT_WIFI = 0x00000008 + ,NSTAT_FILTER_ACCEPT_WIRED = 0x00000010 + ,NSTAT_FILTER_ACCEPT_ALL = 0x0000001F + ,NSTAT_FILTER_IFNET_FLAGS = 0x000000FF + + ,NSTAT_FILTER_PROVIDER_NOZEROBYTES = 0x00000100 + + ,NSTAT_FILTER_TCP_NO_LISTENER = 0x00001000 + ,NSTAT_FILTER_TCP_ONLY_LISTENER = 0x00002000 + ,NSTAT_FILTER_TCP_INTERFACE_ATTACH = 0x00004000 + ,NSTAT_FILTER_TCP_FLAGS = 0x0000F000 + + ,NSTAT_FILTER_UDP_INTERFACE_ATTACH = 0x00010000 + ,NSTAT_FILTER_UDP_FLAGS = 0x000F0000 + + ,NSTAT_FILTER_SUPPRESS_SRC_ADDED = 0x00100000 + ,NSTAT_FILTER_REQUIRE_SRC_ADDED = 0x00200000 +}; + +enum +{ + NSTAT_MSG_HDR_FLAG_SUPPORTS_AGGREGATE = 1 << 0, + NSTAT_MSG_HDR_FLAG_CONTINUATION = 1 << 1, + NSTAT_MSG_HDR_FLAG_CLOSING = 1 << 2, }; typedef struct nstat_msg_hdr { u_int64_t context; u_int32_t type; - u_int32_t pad; // unused for now + u_int16_t length; + u_int16_t flags; } nstat_msg_hdr; typedef struct nstat_msg_error @@ -342,6 +562,7 @@ typedef struct nstat_msg_add_all_srcs { nstat_msg_hdr hdr; nstat_provider_id_t provider; + u_int64_t filter; } nstat_msg_add_all_srcs; typedef struct nstat_msg_src_added @@ -391,6 +612,15 @@ typedef struct nstat_msg_src_counts nstat_counts counts; } nstat_msg_src_counts; +typedef struct nstat_msg_src_update +{ + nstat_msg_hdr hdr; + nstat_src_ref_t srcref; + nstat_counts counts; + nstat_provider_id_t provider; + u_int8_t data[]; +} nstat_msg_src_update; + typedef struct nstat_msg_src_removed { nstat_msg_hdr hdr; @@ -402,27 +632,87 @@ typedef struct nstat_msg_sysinfo_counts nstat_msg_hdr hdr; nstat_src_ref_t srcref; nstat_sysinfo_counts counts; -} nstat_msg_sysinfo_counts; +} __attribute__((packed)) nstat_msg_sysinfo_counts; + +#pragma pack(pop) + +#pragma mark -- Statitiscs about Network Statistics -- + +struct nstat_stats { + u_int32_t nstat_successmsgfailures; + u_int32_t nstat_sendcountfailures; + u_int32_t nstat_sysinfofailures; + u_int32_t nstat_srcupatefailures; + u_int32_t nstat_descriptionfailures; + u_int32_t nstat_msgremovedfailures; + u_int32_t nstat_srcaddedfailures; + u_int32_t nstat_msgerrorfailures; + u_int32_t nstat_copy_descriptor_failures; + u_int32_t nstat_provider_counts_failures; + u_int32_t nstat_control_send_description_failures; + u_int32_t nstat_control_send_goodbye_failures; + u_int32_t nstat_flush_accumulated_msgs_failures; + u_int32_t nstat_accumulate_msg_failures; + u_int32_t nstat_control_cleanup_source_failures; + u_int32_t nstat_handle_msg_failures; +}; + +#endif /* PRIVATE */ + +#ifdef XNU_KERNEL_PRIVATE +#include + +#pragma mark -- System Information Internal Support -- typedef struct nstat_sysinfo_mbuf_stats { - u_int32_t total_256b; - u_int32_t total_2kb; - u_int32_t total_4kb; - u_int32_t sbmb_total; - u_int32_t sb_atmbuflimit; - u_int32_t draincnt; - u_int32_t memreleased; + u_int32_t total_256b; /* Peak usage, 256B pool */ + u_int32_t total_2kb; /* Peak usage, 2KB pool */ + u_int32_t total_4kb; /* Peak usage, 4KB pool */ + u_int32_t total_16kb; /* Peak usage, 16KB pool */ + u_int32_t sbmb_total; /* Total mbufs in sock buffer pool */ + u_int32_t sb_atmbuflimit; /* Memory limit reached for socket buffer autoscaling */ + u_int32_t draincnt; /* Number of times mbuf pool has been drained under memory pressure */ + u_int32_t memreleased; /* Memory (bytes) released from mbuf pool to VM */ } nstat_sysinfo_mbuf_stats; typedef struct nstat_sysinfo_tcp_stats { - u_int32_t ipv4_avgrtt; - u_int32_t ipv6_avgrtt; - u_int32_t send_plr; - u_int32_t recv_plr; - u_int32_t send_tlrto_rate; - u_int32_t send_reorder_rate; + u_int32_t ipv4_avgrtt; /* Average RTT for IPv4 */ + u_int32_t ipv6_avgrtt; /* Average RTT for IPv6 */ + u_int32_t send_plr; /* Average uplink packet loss rate */ + u_int32_t recv_plr; /* Average downlink packet loss rate */ + u_int32_t send_tlrto_rate; /* Average rxt timeout after tail loss */ + u_int32_t send_reorder_rate; /* Average packet reordering rate */ + u_int32_t connection_attempts; /* TCP client connection attempts */ + u_int32_t connection_accepts; /* TCP server connection accepts */ + u_int32_t ecn_client_enabled; /* Global setting for ECN client side */ + u_int32_t ecn_server_enabled; /* Global setting for ECN server side */ + u_int32_t ecn_client_setup; /* Attempts to setup TCP client connection with ECN */ + u_int32_t ecn_server_setup; /* Attempts to setup TCP server connection with ECN */ + u_int32_t ecn_client_success; /* Number of successful negotiations of ECN for a client connection */ + u_int32_t ecn_server_success; /* Number of successful negotiations of ECN for a server connection */ + u_int32_t ecn_not_supported; /* Number of falbacks to Non-ECN, no support from peer */ + u_int32_t ecn_lost_syn; /* Number of SYNs lost with ECN bits */ + u_int32_t ecn_lost_synack; /* Number of SYN-ACKs lost with ECN bits */ + u_int32_t ecn_recv_ce; /* Number of CEs received from network */ + u_int32_t ecn_recv_ece; /* Number of ECEs received from receiver */ + u_int32_t ecn_sent_ece; /* Number of ECEs sent in response to CE */ + u_int32_t ecn_conn_recv_ce; /* Number of connections using ECN received CE at least once */ + u_int32_t ecn_conn_recv_ece; /* Number of connections using ECN received ECE at least once */ + u_int32_t ecn_conn_plnoce; /* Number of connections using ECN seen packet loss but never received CE */ + u_int32_t ecn_conn_pl_ce; /* Number of connections using ECN seen packet loss and CE */ + u_int32_t ecn_conn_nopl_ce; /* Number of connections using ECN with no packet loss but received CE */ + u_int32_t tfo_syn_data_rcv; /* Number of SYN+data received with valid cookie */ + u_int32_t tfo_cookie_req_rcv;/* Number of TFO cookie-requests received */ + u_int32_t tfo_cookie_sent; /* Number of TFO-cookies offered to the client */ + u_int32_t tfo_cookie_invalid;/* Number of invalid TFO-cookies received */ + u_int32_t tfo_cookie_req; /* Number of SYNs with cookie request received*/ + u_int32_t tfo_cookie_rcv; /* Number of SYN/ACKs with Cookie received */ + u_int32_t tfo_syn_data_sent; /* Number of SYNs+data+cookie sent */ + u_int32_t tfo_syn_data_acked;/* Number of times our SYN+data has been acknowledged */ + u_int32_t tfo_syn_loss; /* Number of times SYN+TFO has been lost and we fallback */ + u_int32_t tfo_blackhole; /* Number of times SYN+TFO has been lost and we fallback */ } nstat_sysinfo_tcp_stats; typedef struct nstat_sysinfo_data @@ -434,13 +724,6 @@ typedef struct nstat_sysinfo_data } u; } nstat_sysinfo_data; -#pragma pack(pop) - -#endif /* PRIVATE */ - -#ifdef XNU_KERNEL_PRIVATE -#include - #pragma mark -- Generic Network Statistics Provider -- typedef void * nstat_provider_cookie_t; diff --git a/bsd/net/packet_mangler.c b/bsd/net/packet_mangler.c index b2666ee19..fbdc502e1 100644 --- a/bsd/net/packet_mangler.c +++ b/bsd/net/packet_mangler.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Apple Inc. All rights reserved. + * Copyright (c) 2015 Apple Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -138,6 +138,18 @@ static void pktmnglr_ipfilter_detach(void *cookie); static void chksm_update(mbuf_t data); +#define TCP_OPT_MULTIPATH_TCP 30 +#define MPTCP_SBT_VER_OFFSET 2 + +#define MPTCP_SUBTYPE_MPCAPABLE 0x0 +#define MPTCP_SUBTYPE_MPJOIN 0x1 +#define MPTCP_SUBTYPE_DSS 0x2 +#define MPTCP_SUBTYPE_ADD_ADDR 0x3 +#define MPTCP_SUBTYPE_REM_ADDR 0x4 +#define MPTCP_SUBTYPE_MP_PRIO 0x5 +#define MPTCP_SUBTYPE_MP_FAIL 0x6 +#define MPTCP_SUBTYPE_MP_FASTCLOSE 0x7 + /* * packet filter global read write lock */ @@ -755,57 +767,66 @@ pkt_mnglr_init(void) static errno_t pktmnglr_ipfilter_output(void *cookie, mbuf_t *data, ipf_pktopts_t options) { struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie; - unsigned char *ptr = (unsigned char *)mbuf_data(*data); - struct ip *ip = (struct ip *)(void *)ptr; - struct tcphdr *tcp; + struct ip ip; + struct tcphdr tcp; int optlen = 0; + errno_t error = 0; #pragma unused(tcp, optlen, options) - if (p_pkt_mnglr == NULL) { - return 0; + goto output_done; } if (!p_pkt_mnglr->activate) { - return 0; + goto output_done; + } + + if (p_pkt_mnglr->dir == IN) { + goto output_done; } if (data == NULL) { - PKT_MNGLR_LOG(LOG_INFO, "%s:%d Data pointer is NULL\n", __FILE__, __LINE__); - return 0; + PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL"); + goto output_done; } - if (p_pkt_mnglr->dir == IN) { - return 0; + /* Check for IP filter options */ + error = mbuf_copydata(*data, 0, sizeof(ip), &ip); + if (error) { + PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy"); + goto output_done; } - if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip->ip_v == 4)) { - return 0; + if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) { + goto output_done; } - if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip->ip_v == 6)) { - return 0; + if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) { + goto output_done; } if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) { struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr)); - if (ip->ip_src.s_addr != laddr.sin_addr.s_addr) { - return 0; + if (ip.ip_src.s_addr != laddr.sin_addr.s_addr) { + goto output_done; } } if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) { struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr)); - if (ip->ip_dst.s_addr != raddr.sin_addr.s_addr) { - return 0; + if (ip.ip_dst.s_addr != raddr.sin_addr.s_addr) { + goto output_done; } } - if (ip->ip_v != 4) { - PKT_MNGLR_LOG(LOG_INFO, "%s:%d Not handling IP version %d\n", __FILE__, __LINE__, ip->ip_v); - return 0; + if (ip.ip_v != 4) { + PKT_MNGLR_LOG(LOG_INFO, + "%s:%d Not handling IP version %d\n", + __FILE__, __LINE__, ip.ip_v); + goto output_done; } +output_done: /* Not handling output flow */ return 0; } @@ -832,12 +853,12 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u goto input_done; } - if (data == NULL) { - PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL"); + if (p_pkt_mnglr->dir == OUT) { goto input_done; } - if (p_pkt_mnglr->dir == OUT) { + if (data == NULL) { + PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL"); goto input_done; } @@ -924,7 +945,7 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u /* Protocol actions */ switch (protocol) { case IPPROTO_TCP: - if (p_pkt_mnglr->proto_action_mask & PKT_MNGLR_TCP_ACT_NOP_MPTCP) { + if (p_pkt_mnglr->proto_action_mask) { int i = 0; tcp_optlen = (tcp.th_off << 2)-sizeof(struct tcphdr); PKT_MNGLR_LOG(LOG_INFO, "Packet from F5 is TCP\n"); @@ -944,18 +965,34 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u tcp_optlen--; i++; continue; - } else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != 0x1e)) { + } else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != TCP_OPT_MULTIPATH_TCP)) { PKT_MNGLR_LOG(LOG_INFO, "Skipping option %x\n", tcp_opt_buf[i]); tcp_optlen -= tcp_opt_buf[i+1]; i += tcp_opt_buf[i+1]; continue; - } else if (tcp_opt_buf[i] == 0x1e) { + } else if (tcp_opt_buf[i] == TCP_OPT_MULTIPATH_TCP) { int j = 0; int mptcpoptlen = tcp_opt_buf[i+1]; + uint8_t sbtver = tcp_opt_buf[i+MPTCP_SBT_VER_OFFSET]; + uint8_t subtype = sbtver >> 4; + + PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]); + PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP subtype %x\n", subtype); + if (subtype == MPTCP_SUBTYPE_DSS) { + PKT_MNGLR_LOG(LOG_INFO, "Got DSS option\n"); + PKT_MNGLR_LOG(LOG_INFO, "Protocol option mask: %d\n", p_pkt_mnglr->proto_action_mask); + if (p_pkt_mnglr->proto_action_mask & + PKT_MNGLR_TCP_ACT_DSS_DROP) { + goto drop_it; + } + } + PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]); - PKT_MNGLR_LOG(LOG_INFO, "Overwriting with NOP\n"); for (; j < mptcpoptlen; j++) { - tcp_opt_buf[i+j] = 0x1; + if (p_pkt_mnglr->proto_action_mask & + PKT_MNGLR_TCP_ACT_NOP_MPTCP) { + tcp_opt_buf[i+j] = 0x1; + } } tcp_optlen -= mptcpoptlen; i += mptcpoptlen; @@ -988,6 +1025,11 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u chksm_update(*data); input_done: return 0; + +drop_it: + PKT_MNGLR_LOG(LOG_INFO, "Dropping packet\n"); + mbuf_freem(*data); + return EJUSTRETURN; } static void pktmnglr_ipfilter_detach(void *cookie) diff --git a/bsd/net/packet_mangler.h b/bsd/net/packet_mangler.h index 7042fe0c4..b23849910 100644 --- a/bsd/net/packet_mangler.h +++ b/bsd/net/packet_mangler.h @@ -52,37 +52,38 @@ typedef enum { * to be set in the sc_id field of sockaddr_ctl for connect(2) * Note: the sc_unit is ephemeral */ -#define PACKET_MANGLER_CONTROL_NAME "com.apple.packet-mangler" - -#define PKT_MNGLR_OPT_PROTO_ACT_MASK 1 -#define PKT_MNGLR_OPT_IP_ACT_MASK 2 -#define PKT_MNGLR_OPT_LOCAL_IP 3 -#define PKT_MNGLR_OPT_REMOTE_IP 4 -#define PKT_MNGLR_OPT_LOCAL_PORT 5 -#define PKT_MNGLR_OPT_REMOTE_PORT 6 -#define PKT_MNGLR_OPT_DIRECTION 7 -#define PKT_MNGLR_OPT_PROTOCOL 8 -#define PKT_MNGLR_OPT_ACTIVATE 0xFFFFFFFF +#define PACKET_MANGLER_CONTROL_NAME "com.apple.packet-mangler" + +#define PKT_MNGLR_OPT_PROTO_ACT_MASK 1 +#define PKT_MNGLR_OPT_IP_ACT_MASK 2 +#define PKT_MNGLR_OPT_LOCAL_IP 3 +#define PKT_MNGLR_OPT_REMOTE_IP 4 +#define PKT_MNGLR_OPT_LOCAL_PORT 5 +#define PKT_MNGLR_OPT_REMOTE_PORT 6 +#define PKT_MNGLR_OPT_DIRECTION 7 +#define PKT_MNGLR_OPT_PROTOCOL 8 +#define PKT_MNGLR_OPT_ACTIVATE 0xFFFFFFFF /* Packet mangler action masks */ /* Packet Mangler TCP action mask */ -#define PKT_MNGLR_TCP_ACT_NOP_MPTCP 0x00000001 -#define PKT_MNGLR_TCP_ACT_SWAP_L_PORT 0x00000002 -#define PKT_MNGLR_TCP_ACT_SWAP_R_PORT 0x00000004 -#define PKT_MNGLR_TCP_ACT_CHK_EXTENDED 0x80000000 +#define PKT_MNGLR_TCP_ACT_NOP_MPTCP 0x00000001 +#define PKT_MNGLR_TCP_ACT_SWAP_L_PORT 0x00000002 +#define PKT_MNGLR_TCP_ACT_SWAP_R_PORT 0x00000004 +#define PKT_MNGLR_TCP_ACT_DSS_DROP 0x00000008 +#define PKT_MNGLR_TCP_ACT_CHK_EXTENDED 0x80000000 /* Packet Mangler IP action mask */ -#define PKT_MNGLR_IP_ACT_FLT_L_IP 0x00000001 -#define PKT_MNGLR_IP_ACT_FLT_R_IP 0x00000002 -#define PKT_MNGLR_IP_ACT_SWAP_L_IP 0x00000004 -#define PKT_MNGLR_IP_ACT_SWAP_R_IP 0x00000008 -#define PKT_MNGLR_IP_ACT_DROP_PACKET 0x00000010 -#define PKT_MNGLR_IP_ACT_CHK_EXTENDED 0x80000000 +#define PKT_MNGLR_IP_ACT_FLT_L_IP 0x00000001 +#define PKT_MNGLR_IP_ACT_FLT_R_IP 0x00000002 +#define PKT_MNGLR_IP_ACT_SWAP_L_IP 0x00000004 +#define PKT_MNGLR_IP_ACT_SWAP_R_IP 0x00000008 +#define PKT_MNGLR_IP_ACT_DROP_PACKET 0x00000010 +#define PKT_MNGLR_IP_ACT_CHK_EXTENDED 0x80000000 /* * How many filter may be active simultaneously */ -#define PKT_MNGLR_MAX_FILTER_COUNT 1 +#define PKT_MNGLR_MAX_FILTER_COUNT 1 #define PKT_MNGLR_VERSION_CURRENT 1 diff --git a/bsd/net/pf.c b/bsd/net/pf.c index 0a74fe5d2..58a20fe4f 100644 --- a/bsd/net/pf.c +++ b/bsd/net/pf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Apple Inc. All rights reserved. + * Copyright (c) 2007-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,7 +31,8 @@ /* * Copyright (c) 2001 Daniel Hartmeier - * Copyright (c) 2002,2003 Henning Brauer + * Copyright (c) 2002 - 2013 Henning Brauer + * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -219,12 +220,16 @@ static int pf_check_threshold(struct pf_threshold *); static void pf_change_ap(int, struct mbuf *, struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, - struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); + struct pf_addr *, u_int16_t, u_int8_t, sa_family_t, + sa_family_t, int); static int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *); #if INET6 static void pf_change_a6(struct pf_addr *, u_int16_t *, struct pf_addr *, u_int8_t); +void pf_change_addr(struct pf_addr *a, u_int16_t *c, + struct pf_addr *an, u_int8_t u, + sa_family_t af, sa_family_t afn); #endif /* INET6 */ static void pf_change_icmp(struct pf_addr *, u_int16_t *, struct pf_addr *, struct pf_addr *, u_int16_t, @@ -245,8 +250,7 @@ static struct pf_rule *pf_get_translation_aux(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_src_node **, struct pf_addr *, union pf_state_xport *, struct pf_addr *, - union pf_state_xport *, struct pf_addr *, - union pf_state_xport *); + union pf_state_xport *, union pf_state_xport *); static void pf_attach_state(struct pf_state_key *, struct pf_state *, int); static void pf_detach_state(struct pf_state *, int); @@ -302,7 +306,7 @@ static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, u_int16_t); static void pf_set_rt_ifp(struct pf_state *, - struct pf_addr *); + struct pf_addr *, sa_family_t af); static int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); static int pf_addr_wrap_neq(struct pf_addr_wrap *, @@ -421,13 +425,14 @@ pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif, #define STATE_ADDR_TRANSLATE(sk) \ (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \ - ((sk)->af == AF_INET6 && \ + ((sk)->af_lan == AF_INET6 && \ ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \ (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \ (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) #define STATE_TRANSLATE(sk) \ - (STATE_ADDR_TRANSLATE(sk) || \ + ((sk)->af_lan != (sk)->af_gwy || \ + STATE_ADDR_TRANSLATE(sk) || \ (sk)->lan.xport.port != (sk)->gwy.xport.port) #define STATE_GRE_TRANSLATE(sk) \ @@ -773,42 +778,34 @@ struct pf_esp_hdr { }; static __inline int -pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) +pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) { - int diff; - - if (a->rule.ptr > b->rule.ptr) - return (1); - if (a->rule.ptr < b->rule.ptr) - return (-1); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#if INET + switch (af) { +#ifdef INET case AF_INET: - if (a->addr.addr32[0] > b->addr.addr32[0]) + if (a->addr32[0] > b->addr32[0]) return (1); - if (a->addr.addr32[0] < b->addr.addr32[0]) + if (a->addr32[0] < b->addr32[0]) return (-1); break; #endif /* INET */ -#if INET6 +#ifdef INET6 case AF_INET6: - if (a->addr.addr32[3] > b->addr.addr32[3]) + if (a->addr32[3] > b->addr32[3]) return (1); - if (a->addr.addr32[3] < b->addr.addr32[3]) + if (a->addr32[3] < b->addr32[3]) return (-1); - if (a->addr.addr32[2] > b->addr.addr32[2]) + if (a->addr32[2] > b->addr32[2]) return (1); - if (a->addr.addr32[2] < b->addr.addr32[2]) + if (a->addr32[2] < b->addr32[2]) return (-1); - if (a->addr.addr32[1] > b->addr.addr32[1]) + if (a->addr32[1] > b->addr32[1]) return (1); - if (a->addr.addr32[1] < b->addr.addr32[1]) + if (a->addr32[1] < b->addr32[1]) return (-1); - if (a->addr.addr32[0] > b->addr.addr32[0]) + if (a->addr32[0] > b->addr32[0]) return (1); - if (a->addr.addr32[0] < b->addr.addr32[0]) + if (a->addr32[0] < b->addr32[0]) return (-1); break; #endif /* INET6 */ @@ -816,6 +813,22 @@ pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) return (0); } +static __inline int +pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) +{ + int diff; + + if (a->rule.ptr > b->rule.ptr) + return (1); + if (a->rule.ptr < b->rule.ptr) + return (-1); + if ((diff = a->af - b->af) != 0) + return (diff); + if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) + return (diff); + return (0); +} + static __inline int pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b) { @@ -824,7 +837,7 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b) if ((diff = a->proto - b->proto) != 0) return (diff); - if ((diff = a->af - b->af) != 0) + if ((diff = a->af_lan - b->af_lan) != 0) return (diff); extfilter = PF_EXTFILTER_APD; @@ -839,7 +852,7 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b) case IPPROTO_TCP: if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) return (diff); - if ((diff = a->ext.xport.port - b->ext.xport.port) != 0) + if ((diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) return (diff); break; @@ -850,21 +863,21 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b) if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) return (diff); if ((extfilter < PF_EXTFILTER_AD) && - (diff = a->ext.xport.port - b->ext.xport.port) != 0) + (diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) return (diff); break; case IPPROTO_GRE: if (a->proto_variant == PF_GRE_PPTP_VARIANT && a->proto_variant == b->proto_variant) { - if (!!(diff = a->ext.xport.call_id - - b->ext.xport.call_id)) + if (!!(diff = a->ext_lan.xport.call_id - + b->ext_lan.xport.call_id)) return (diff); } break; case IPPROTO_ESP: - if (!!(diff = a->ext.xport.spi - b->ext.xport.spi)) + if (!!(diff = a->ext_lan.xport.spi - b->ext_lan.xport.spi)) return (diff); break; @@ -872,57 +885,33 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b) break; } - switch (a->af) { + switch (a->af_lan) { #if INET case AF_INET: - if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) - return (1); - if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) - return (-1); + if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr, + a->af_lan)) != 0) + return (diff); + if (extfilter < PF_EXTFILTER_EI) { - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); + if ((diff = pf_addr_compare(&a->ext_lan.addr, + &b->ext_lan.addr, + a->af_lan)) != 0) + return (diff); } break; #endif /* INET */ #if INET6 case AF_INET6: - if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3]) - return (1); - if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3]) - return (-1); - if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2]) - return (1); - if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2]) - return (-1); - if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1]) - return (1); - if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1]) - return (-1); - if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) - return (1); - if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) - return (-1); + if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr, + a->af_lan)) != 0) + return (diff); + if (extfilter < PF_EXTFILTER_EI || - !PF_AZERO(&b->ext.addr, AF_INET6)) { - if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) - return (1); - if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) - return (-1); - if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) - return (1); - if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) - return (-1); - if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) - return (1); - if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); + !PF_AZERO(&b->ext_lan.addr, AF_INET6)) { + if ((diff = pf_addr_compare(&a->ext_lan.addr, + &b->ext_lan.addr, + a->af_lan)) != 0) + return (diff); } break; #endif /* INET6 */ @@ -954,7 +943,7 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b) if ((diff = a->proto - b->proto) != 0) return (diff); - if ((diff = a->af - b->af) != 0) + if ((diff = a->af_gwy - b->af_gwy) != 0) return (diff); extfilter = PF_EXTFILTER_APD; @@ -967,7 +956,7 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b) break; case IPPROTO_TCP: - if ((diff = a->ext.xport.port - b->ext.xport.port) != 0) + if ((diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) return (diff); if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) return (diff); @@ -980,7 +969,7 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b) if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) return (diff); if ((extfilter < PF_EXTFILTER_AD) && - (diff = a->ext.xport.port - b->ext.xport.port) != 0) + (diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) return (diff); break; @@ -1002,57 +991,31 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b) break; } - switch (a->af) { + switch (a->af_gwy) { #if INET case AF_INET: - if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) - return (1); - if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) - return (-1); + if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr, + a->af_gwy)) != 0) + return (diff); + if (extfilter < PF_EXTFILTER_EI) { - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); + if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr, + a->af_gwy)) != 0) + return (diff); } break; #endif /* INET */ #if INET6 case AF_INET6: - if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3]) - return (1); - if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3]) - return (-1); - if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2]) - return (1); - if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2]) - return (-1); - if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1]) - return (1); - if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1]) - return (-1); - if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) - return (1); - if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) - return (-1); + if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr, + a->af_gwy)) != 0) + return (diff); + if (extfilter < PF_EXTFILTER_EI || - !PF_AZERO(&b->ext.addr, AF_INET6)) { - if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) - return (1); - if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) - return (-1); - if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) - return (1); - if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) - return (-1); - if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) - return (1); - if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); + !PF_AZERO(&b->ext_gwy.addr, AF_INET6)) { + if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr, + a->af_gwy)) != 0) + return (diff); } break; #endif /* INET6 */ @@ -1135,6 +1098,17 @@ pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir) case PF_IN: sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy, (struct pf_state_key *)key); + /* + * NAT64 is done only on input, for packets coming in from + * from the LAN side, need to lookup the lan_ext tree. + */ + if (sk == NULL) { + sk = RB_FIND(pf_state_tree_lan_ext, + &pf_statetbl_lan_ext, + (struct pf_state_key *)key); + if (sk && sk->af_lan == sk->af_gwy) + sk = NULL; + } break; default: panic("pf_find_state"); @@ -1165,6 +1139,17 @@ pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) case PF_IN: sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy, (struct pf_state_key *)key); + /* + * NAT64 is done only on input, for packets coming in from + * from the LAN side, need to lookup the lan_ext tree. + */ + if ((sk == NULL) && pf_nat64_configured) { + sk = RB_FIND(pf_state_tree_lan_ext, + &pf_statetbl_lan_ext, + (struct pf_state_key *)key); + if (sk && sk->af_lan == sk->af_gwy) + sk = NULL; + } break; default: panic("pf_find_state_all"); @@ -1216,7 +1201,6 @@ static int pf_src_connlimit(struct pf_state **state) { int bad = 0; - (*state)->src_node->conn++; VERIFY((*state)->src_node->conn != 0); (*state)->src.tcp_est = 1; @@ -1246,12 +1230,12 @@ pf_src_connlimit(struct pf_state **state) if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf_src_connlimit: blocking address "); pf_print_host(&(*state)->src_node->addr, 0, - (*state)->state_key->af); + (*state)->state_key->af_lan); } bzero(&p, sizeof (p)); - p.pfra_af = (*state)->state_key->af; - switch ((*state)->state_key->af) { + p.pfra_af = (*state)->state_key->af_lan; + switch ((*state)->state_key->af_lan) { #if INET case AF_INET: p.pfra_net = 32; @@ -1282,15 +1266,15 @@ pf_src_connlimit(struct pf_state **state) * from the same rule if PF_FLUSH_GLOBAL is not * set) */ - if (sk->af == - (*state)->state_key->af && + if (sk->af_lan == + (*state)->state_key->af_lan && (((*state)->state_key->direction == PF_OUT && PF_AEQ(&(*state)->src_node->addr, - &sk->lan.addr, sk->af)) || + &sk->lan.addr, sk->af_lan)) || ((*state)->state_key->direction == PF_IN && PF_AEQ(&(*state)->src_node->addr, - &sk->ext.addr, sk->af))) && + &sk->ext_lan.addr, sk->af_lan))) && ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL || (*state)->rule.ptr == st->rule.ptr)) { @@ -1402,13 +1386,16 @@ pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif) break; } printf(" lan: "); - pf_print_sk_host(&sk->lan, sk->af, sk->proto, + pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto, sk->proto_variant); printf(" gwy: "); - pf_print_sk_host(&sk->gwy, sk->af, sk->proto, + pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto, + sk->proto_variant); + printf(" ext_lan: "); + pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto, sk->proto_variant); - printf(" ext: "); - pf_print_sk_host(&sk->ext, sk->af, sk->proto, + printf(" ext_gwy: "); + pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto, sk->proto_variant); if (s->sync_flags & PFSTATE_FROMSYNC) printf(" (from sync)"); @@ -1655,9 +1642,9 @@ pf_unlink_state(struct pf_state *cur) lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); if (cur->src.state == PF_TCPS_PROXY_DST) { - pf_send_tcp(cur->rule.ptr, cur->state_key->af, - &cur->state_key->ext.addr, &cur->state_key->lan.addr, - cur->state_key->ext.xport.port, + pf_send_tcp(cur->rule.ptr, cur->state_key->af_lan, + &cur->state_key->ext_lan.addr, &cur->state_key->lan.addr, + cur->state_key->ext_lan.xport.port, cur->state_key->lan.xport.port, cur->src.seqhi, cur->src.seqlo + 1, TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); @@ -1900,11 +1887,15 @@ pf_print_state(struct pf_state *s) printf("%u ", sk->proto); break; } - pf_print_sk_host(&sk->lan, sk->af, sk->proto, sk->proto_variant); + pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto, sk->proto_variant); + printf(" "); + pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto, sk->proto_variant); printf(" "); - pf_print_sk_host(&sk->gwy, sk->af, sk->proto, sk->proto_variant); + pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto, + sk->proto_variant); printf(" "); - pf_print_sk_host(&sk->ext, sk->af, sk->proto, sk->proto_variant); + pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto, + sk->proto_variant); printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, s->src.seqhi, s->src.max_win, s->src.seqdiff); if (s->src.wscale && s->dst.wscale) @@ -2034,21 +2025,21 @@ pf_calc_state_key_flowhash(struct pf_state_key *sk) uint32_t flowhash = 0; bzero(&fh, sizeof (fh)); - if (PF_ALEQ(&sk->lan.addr, &sk->ext.addr, sk->af)) { + if (PF_ALEQ(&sk->lan.addr, &sk->ext_lan.addr, sk->af_lan)) { bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof (fh.ap1.addr)); - bcopy(&sk->ext.addr, &fh.ap2.addr, sizeof (fh.ap2.addr)); + bcopy(&sk->ext_lan.addr, &fh.ap2.addr, sizeof (fh.ap2.addr)); } else { - bcopy(&sk->ext.addr, &fh.ap1.addr, sizeof (fh.ap1.addr)); + bcopy(&sk->ext_lan.addr, &fh.ap1.addr, sizeof (fh.ap1.addr)); bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof (fh.ap2.addr)); } - if (sk->lan.xport.spi <= sk->ext.xport.spi) { + if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) { fh.ap1.xport.spi = sk->lan.xport.spi; - fh.ap2.xport.spi = sk->ext.xport.spi; + fh.ap2.xport.spi = sk->ext_lan.xport.spi; } else { - fh.ap1.xport.spi = sk->ext.xport.spi; + fh.ap1.xport.spi = sk->ext_lan.xport.spi; fh.ap2.xport.spi = sk->lan.xport.spi; } - fh.af = sk->af; + fh.af = sk->af_lan; fh.proto = sk->proto; try_again: @@ -2106,84 +2097,152 @@ pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) return (l); } +/* + * change ip address & port + * dir : packet direction + * a : address to be changed + * p : port to be changed + * ic : ip header checksum + * pc : protocol checksum + * an : new ip address + * pn : new port + * u : should be 1 if UDP packet else 0 + * af : address family of the packet + * afn : address family of the new address + * ua : should be 1 if ip address needs to be updated in the packet else + * only the checksum is recalculated & updated. + */ static void pf_change_ap(int dir, struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn, - u_int8_t u, sa_family_t af) + u_int8_t u, sa_family_t af, sa_family_t afn, int ua) { struct pf_addr ao; u_int16_t po = *p; PF_ACPY(&ao, a, af); - PF_ACPY(a, an, af); + if (ua) + PF_ACPY(a, an, afn); *p = pn; switch (af) { #if INET case AF_INET: - *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, - ao.addr16[0], an->addr16[0], 0), - ao.addr16[1], an->addr16[1], 0); - *p = pn; + switch (afn) { + case AF_INET: + *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, + ao.addr16[0], an->addr16[0], 0), + ao.addr16[1], an->addr16[1], 0); + *p = pn; /* * If the packet is originated from an ALG on the NAT gateway * (source address is loopback or local), in which case the * TCP/UDP checksum field contains the pseudo header checksum - * that's not yet complemented. + * that's not yet complemented. A packet generated locally + * will have UDP/TCP CSUM flag set (gets set in protocol + * output). */ - if (dir == PF_OUT && m != NULL && - (m->m_flags & M_PKTHDR) && - (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) { + if (dir == PF_OUT && m != NULL && + (m->m_flags & M_PKTHDR) && + (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) { /* Pseudo-header checksum does not include ports */ - *pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc, - ao.addr16[0], an->addr16[0], u), - ao.addr16[1], an->addr16[1], u); - } else { - *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, + *pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u); + } else { + *pc = + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + *pc, ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + po, pn, u); + } + break; +#ifdef INET6 + case AF_INET6: + *p = pn; + *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u), + 0, an->addr16[2], u), + 0, an->addr16[3], u), + 0, an->addr16[4], u), + 0, an->addr16[5], u), + 0, an->addr16[6], u), + 0, an->addr16[7], u), po, pn, u); + break; +#endif /* INET6 */ } break; #endif /* INET */ #if INET6 case AF_INET6: + switch (afn) { + case AF_INET6: /* * If the packet is originated from an ALG on the NAT gateway * (source address is loopback or local), in which case the * TCP/UDP checksum field contains the pseudo header checksum * that's not yet complemented. + * A packet generated locally + * will have UDP/TCP CSUM flag set (gets set in protocol + * output). */ - if (dir == PF_OUT && m != NULL && - (m->m_flags & M_PKTHDR) && - (m->m_pkthdr.csum_flags & (CSUM_TCPIPV6 | CSUM_UDPIPV6))) { + if (dir == PF_OUT && m != NULL && + (m->m_flags & M_PKTHDR) && + (m->m_pkthdr.csum_flags & (CSUM_TCPIPV6 | + CSUM_UDPIPV6))) { /* Pseudo-header checksum does not include ports */ - *pc = ~pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(~*pc, - ao.addr16[0], an->addr16[0], u), - ao.addr16[1], an->addr16[1], u), - ao.addr16[2], an->addr16[2], u), - ao.addr16[3], an->addr16[3], u), - ao.addr16[4], an->addr16[4], u), - ao.addr16[5], an->addr16[5], u), - ao.addr16[6], an->addr16[6], u), - ao.addr16[7], an->addr16[7], u), - po, pn, u); - } else { + *pc = + ~pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + ~*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], an->addr16[2], u), + ao.addr16[3], an->addr16[3], u), + ao.addr16[4], an->addr16[4], u), + ao.addr16[5], an->addr16[5], u), + ao.addr16[6], an->addr16[6], u), + ao.addr16[7], an->addr16[7], u); + } else { + *pc = + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + *pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], an->addr16[2], u), + ao.addr16[3], an->addr16[3], u), + ao.addr16[4], an->addr16[4], u), + ao.addr16[5], an->addr16[5], u), + ao.addr16[6], an->addr16[6], u), + ao.addr16[7], an->addr16[7], u), + po, pn, u); + } + break; +#ifdef INET + case AF_INET: *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, - ao.addr16[0], an->addr16[0], u), - ao.addr16[1], an->addr16[1], u), - ao.addr16[2], an->addr16[2], u), - ao.addr16[3], an->addr16[3], u), - ao.addr16[4], an->addr16[4], u), - ao.addr16[5], an->addr16[5], u), - ao.addr16[6], an->addr16[6], u), - ao.addr16[7], an->addr16[7], u), - po, pn, u); + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], 0, u), + ao.addr16[3], 0, u), + ao.addr16[4], 0, u), + ao.addr16[5], 0, u), + ao.addr16[6], 0, u), + ao.addr16[7], 0, u), + po, pn, u); + break; +#endif /* INET */ } break; #endif /* INET6 */ @@ -2224,6 +2283,60 @@ pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) ao.addr16[6], an->addr16[6], u), ao.addr16[7], an->addr16[7], u); } + +void +pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u, + sa_family_t af, sa_family_t afn) +{ + struct pf_addr ao; + + PF_ACPY(&ao, a, af); + PF_ACPY(a, an, afn); + + switch (af) { + case AF_INET: + switch (afn) { + case AF_INET: + pf_change_a(a, c, an->v4.s_addr, u); + break; + case AF_INET6: + *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*c, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + 0, an->addr16[2], u), + 0, an->addr16[3], u), + 0, an->addr16[4], u), + 0, an->addr16[5], u), + 0, an->addr16[6], u), + 0, an->addr16[7], u); + break; + } + break; + case AF_INET6: + switch (afn) { + case AF_INET: + *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*c, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], 0, u), + ao.addr16[3], 0, u), + ao.addr16[4], 0, u), + ao.addr16[5], 0, u), + ao.addr16[6], 0, u), + ao.addr16[7], 0, u); + break; + case AF_INET6: + pf_change_a6(a, c, an, u); + break; + } + break; + } +} + #endif /* INET6 */ static void @@ -3036,13 +3149,13 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, k.rule.ptr = NULL; pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); - if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { - PF_ACPY(naddr, &(*sn)->raddr, af); + if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, rpool->af)) { + PF_ACPY(naddr, &(*sn)->raddr, rpool->af); if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf_map_addr: src tracking maps "); pf_print_host(&k.addr, 0, af); printf(" to "); - pf_print_host(naddr, 0, af); + pf_print_host(naddr, 0, rpool->af); printf("\n"); } return (0); @@ -3054,7 +3167,7 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (rpool->cur->addr.p.dyn == NULL) return (1); - switch (af) { + switch (rpool->af) { #if INET case AF_INET: if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && @@ -3086,13 +3199,14 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, switch (rpool->opts & PF_POOL_TYPEMASK) { case PF_POOL_NONE: - PF_ACPY(naddr, raddr, af); + PF_ACPY(naddr, raddr, rpool->af); break; case PF_POOL_BITMASK: + ASSERT(af == rpool->af); PF_POOLMASK(naddr, raddr, rmask, saddr, af); break; case PF_POOL_RANDOM: - if (init_addr != NULL && PF_AZERO(init_addr, af)) { + if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) { switch (af) { #if INET case AF_INET: @@ -3122,15 +3236,19 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, break; #endif /* INET6 */ } - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); - PF_ACPY(init_addr, naddr, af); + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, + rpool->af); + PF_ACPY(init_addr, naddr, rpool->af); } else { - PF_AINC(&rpool->counter, af); - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + PF_AINC(&rpool->counter, rpool->af); + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, + rpool->af); } break; case PF_POOL_SRCHASH: + ASSERT(af == rpool->af); + PF_POOLMASK(naddr, raddr, rmask, saddr, af); pf_hash(saddr, (struct pf_addr *)(void *)&hash, &rpool->key, af); PF_POOLMASK(naddr, raddr, rmask, @@ -3140,7 +3258,7 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) + &raddr, &rmask, rpool->af)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (rpool->cur->addr.p.dyn != NULL && @@ -3148,7 +3266,8 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, &rpool->tblidx, &rpool->counter, &raddr, &rmask, af)) goto get_addr; - } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) + } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, + rpool->af)) goto get_addr; try_next: @@ -3158,8 +3277,9 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { - /* table contains no address of type 'af' */ + &raddr, &rmask, rpool->af)) { + /* table contains no address of type + * 'rpool->af' */ if (rpool->cur != acur) goto try_next; return (1); @@ -3170,8 +3290,9 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, return (1); if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { - /* table contains no address of type 'af' */ + &raddr, &rmask, rpool->af)) { + /* table contains no address of type + * 'rpool->af' */ if (rpool->cur != acur) goto try_next; return (1); @@ -3179,23 +3300,23 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; - PF_ACPY(&rpool->counter, raddr, af); + PF_ACPY(&rpool->counter, raddr, rpool->af); } get_addr: - PF_ACPY(naddr, &rpool->counter, af); - if (init_addr != NULL && PF_AZERO(init_addr, af)) - PF_ACPY(init_addr, naddr, af); - PF_AINC(&rpool->counter, af); + PF_ACPY(naddr, &rpool->counter, rpool->af); + if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) + PF_ACPY(init_addr, naddr, rpool->af); + PF_AINC(&rpool->counter, rpool->af); break; } if (*sn != NULL) - PF_ACPY(&(*sn)->raddr, naddr, af); + PF_ACPY(&(*sn)->raddr, naddr, rpool->af); if (pf_status.debug >= PF_DEBUG_MISC && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { printf("pf_map_addr: selected address "); - pf_print_host(naddr, 0, af); + pf_print_host(naddr, 0, rpool->af); printf("\n"); } @@ -3248,14 +3369,15 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r, continue; if (s->nat_rule.ptr != r) continue; - if (sk->proto != IPPROTO_UDP || sk->af != af) + if (sk->proto != IPPROTO_UDP || + sk->af_lan != af) continue; if (sk->lan.xport.port != sxport->port) continue; if (PF_ANEQ(&sk->lan.addr, saddr, af)) continue; if (r->extmap < PF_EXTMAP_EI && - PF_ANEQ(&sk->ext.addr, daddr, af)) + PF_ANEQ(&sk->ext_lan.addr, daddr, af)) continue; nxport->port = sk->gwy.xport.port; @@ -3275,7 +3397,7 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r, continue; if (s->nat_rule.ptr != r) continue; - if (sk->proto != IPPROTO_TCP || sk->af != af) + if (sk->proto != IPPROTO_TCP || sk->af_lan != af) continue; if (sk->lan.xport.port != sxport->port) continue; @@ -3286,10 +3408,10 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r, } } do { - key.af = af; + key.af_gwy = af; key.proto = proto; - PF_ACPY(&key.ext.addr, daddr, key.af); - PF_ACPY(&key.gwy.addr, naddr, key.af); + PF_ACPY(&key.ext_gwy.addr, daddr, key.af_gwy); + PF_ACPY(&key.gwy.addr, naddr, key.af_gwy); switch (proto) { case IPPROTO_UDP: key.proto_variant = r->extfilter; @@ -3299,9 +3421,10 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r, break; } if (dxport) - key.ext.xport = *dxport; + key.ext_gwy.xport = *dxport; else - memset(&key.ext.xport, 0, sizeof (key.ext.xport)); + memset(&key.ext_gwy.xport, 0, + sizeof (key.ext_gwy.xport)); /* * port search; start random, step; * similar 2 portloop in in_pcbbind @@ -3312,7 +3435,7 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r, key.gwy.xport = *dxport; else memset(&key.gwy.xport, 0, - sizeof (key.ext.xport)); + sizeof (key.gwy.xport)); if (pf_find_state_all(&key, PF_IN, NULL) == NULL) return (0); } else if (low == 0 && high == 0) { @@ -3462,19 +3585,37 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid, NULL)) return (NULL); if (rm != NULL && (rm->action == PF_NONAT || - rm->action == PF_NORDR || rm->action == PF_NOBINAT)) + rm->action == PF_NORDR || rm->action == PF_NOBINAT || + rm->action == PF_NONAT64)) return (NULL); return (rm); } +/* + * Get address translation information for NAT/BINAT/RDR + * pd : pf packet descriptor + * m : mbuf holding the packet + * off : offset to protocol header + * direction : direction of packet + * kif : pf interface info obtained from the packet's recv interface + * sn : source node pointer (output) + * saddr : packet source address + * sxport : packet source port + * daddr : packet destination address + * dxport : packet destination port + * nsxport : translated source port (output) + * + * Translated source & destination address are updated in pd->nsaddr & + * pd->ndaddr + */ static struct pf_rule * pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_src_node **sn, struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr, - union pf_state_xport *dxport, struct pf_addr *naddr, - union pf_state_xport *nxport) + union pf_state_xport *dxport, union pf_state_xport *nsxport) { struct pf_rule *r = NULL; + pd->naf = pd->af; if (direction == PF_OUT) { r = pf_match_translation(pd, m, off, direction, kif, saddr, @@ -3494,14 +3635,32 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, } if (r != NULL) { + struct pf_addr *nsaddr = &pd->naddr; + struct pf_addr *ndaddr = &pd->ndaddr; + + *nsaddr = *saddr; + *ndaddr = *daddr; + switch (r->action) { case PF_NONAT: + case PF_NONAT64: case PF_NOBINAT: case PF_NORDR: return (NULL); case PF_NAT: + case PF_NAT64: + /* + * we do NAT64 on incoming path and we call ip_input + * which asserts receive interface to be not NULL. + * The below check is to prevent NAT64 action on any + * packet generated by local entity using synthesized + * IPv6 address. + */ + if ((r->action == PF_NAT64) && (direction == PF_OUT)) + return (NULL); + if (pf_get_sport(pd, kif, r, saddr, sxport, daddr, - dxport, naddr, nxport, sn)) { + dxport, nsaddr, nsxport, sn)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: NAT proxy port allocation " "(%u-%u) failed\n", @@ -3509,6 +3668,14 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, r->rpool.proxy_port[1])); return (NULL); } + /* + * For NAT64 the destination IPv4 address is derived + * from the last 32 bits of synthesized IPv6 address + */ + if (r->action == PF_NAT64) { + ndaddr->v4.s_addr = daddr->addr32[3]; + pd->naf = AF_INET; + } break; case PF_BINAT: switch (direction) { @@ -3523,7 +3690,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, if (r->rpool.cur->addr.p.dyn-> pfid_acnt4 < 1) return (NULL); - PF_POOLMASK(naddr, + PF_POOLMASK(nsaddr, &r->rpool.cur->addr.p.dyn-> pfid_addr4, &r->rpool.cur->addr.p.dyn-> @@ -3536,7 +3703,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, if (r->rpool.cur->addr.p.dyn-> pfid_acnt6 < 1) return (NULL); - PF_POOLMASK(naddr, + PF_POOLMASK(nsaddr, &r->rpool.cur->addr.p.dyn-> pfid_addr6, &r->rpool.cur->addr.p.dyn-> @@ -3546,7 +3713,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, #endif /* INET6 */ } } else { - PF_POOLMASK(naddr, + PF_POOLMASK(nsaddr, &r->rpool.cur->addr.v.a.addr, &r->rpool.cur->addr.v.a.mask, saddr, pd->af); @@ -3562,7 +3729,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, if (r->src.addr.p.dyn-> pfid_acnt4 < 1) return (NULL); - PF_POOLMASK(naddr, + PF_POOLMASK(ndaddr, &r->src.addr.p.dyn-> pfid_addr4, &r->src.addr.p.dyn-> @@ -3575,7 +3742,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, if (r->src.addr.p.dyn-> pfid_acnt6 < 1) return (NULL); - PF_POOLMASK(naddr, + PF_POOLMASK(ndaddr, &r->src.addr.p.dyn-> pfid_addr6, &r->src.addr.p.dyn-> @@ -3585,7 +3752,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, #endif /* INET6 */ } } else - PF_POOLMASK(naddr, + PF_POOLMASK(ndaddr, &r->src.addr.v.a.addr, &r->src.addr.v.a.mask, daddr, pd->af); @@ -3604,7 +3771,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, if (r->dst.addr.p.dyn-> pfid_acnt4 < 1) return (NULL); - PF_POOLMASK(naddr, + PF_POOLMASK(nsaddr, &r->dst.addr.p.dyn-> pfid_addr4, &r->dst.addr.p.dyn-> @@ -3617,7 +3784,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, if (r->dst.addr.p.dyn-> pfid_acnt6 < 1) return (NULL); - PF_POOLMASK(naddr, + PF_POOLMASK(nsaddr, &r->dst.addr.p.dyn-> pfid_addr6, &r->dst.addr.p.dyn-> @@ -3627,26 +3794,26 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, #endif /* INET6 */ } } else { - PF_POOLMASK(naddr, + PF_POOLMASK(nsaddr, &r->dst.addr.v.a.addr, &r->dst.addr.v.a.mask, daddr, pd->af); } - if (nxport && r->dst.xport.range.port[0]) - nxport->port = + if (nsxport && r->dst.xport.range.port[0]) + nsxport->port = r->dst.xport.range.port[0]; break; case PF_IN: if (pf_map_addr(pd->af, r, saddr, - naddr, NULL, sn)) + ndaddr, NULL, sn)) return (NULL); if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) - PF_POOLMASK(naddr, naddr, + PF_POOLMASK(ndaddr, ndaddr, &r->rpool.cur->addr.v.a.mask, daddr, pd->af); - if (nxport && dxport) { + if (nsxport && dxport) { if (r->rpool.proxy_port[1]) { u_int32_t tmp_nport; @@ -3661,10 +3828,10 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, /* wrap around if necessary */ if (tmp_nport > 65535) tmp_nport -= 65535; - nxport->port = + nsxport->port = htons((u_int16_t)tmp_nport); } else if (r->rpool.proxy_port[0]) { - nxport->port = htons(r->rpool. + nsxport->port = htons(r->rpool. proxy_port[0]); } } @@ -3925,29 +4092,21 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) } static void -pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) +pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af) { struct pf_rule *r = s->rule.ptr; s->rt_kif = NULL; + if (!r->rt || r->rt == PF_FASTROUTE) return; - switch (s->state_key->af) { -#if INET - case AF_INET: - pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, - &s->nat_src_node); - s->rt_kif = r->rpool.cur->kif; - break; -#endif /* INET */ -#if INET6 - case AF_INET6: - pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, + if ((af == AF_INET) || (af == AF_INET6)) { + pf_map_addr(af, r, saddr, &s->rt_addr, NULL, &s->nat_src_node); s->rt_kif = r->rpool.cur->kif; - break; -#endif /* INET6 */ } + + return; } static void @@ -4001,8 +4160,10 @@ pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk) if (psk != NULL) { bcopy(&psk->lan, &sk->lan, sizeof (sk->lan)); bcopy(&psk->gwy, &sk->gwy, sizeof (sk->gwy)); - bcopy(&psk->ext, &sk->ext, sizeof (sk->ext)); - sk->af = psk->af; + bcopy(&psk->ext_lan, &sk->ext_lan, sizeof (sk->ext_lan)); + bcopy(&psk->ext_gwy, &sk->ext_gwy, sizeof (sk->ext_gwy)); + sk->af_lan = psk->af_lan; + sk->af_gwy = psk->af_gwy; sk->proto = psk->proto; sk->direction = psk->direction; sk->proto_variant = psk->proto_variant; @@ -4044,6 +4205,383 @@ pf_tcp_iss(struct pf_pdesc *pd) return (digest[0] + random() + pf_tcp_iss_off); } +/* + * This routine is called to perform address family translation on the + * inner IP header (that may come as payload) of an ICMP(v4/6) error + * response. + */ +static int +pf_change_icmp_af(struct mbuf *m, int off, + struct pf_pdesc *pd, struct pf_pdesc *pd2, struct pf_addr *src, + struct pf_addr *dst, sa_family_t af, sa_family_t naf) +{ + struct mbuf *n = NULL; + struct ip *ip4 = NULL; + struct ip6_hdr *ip6 = NULL; + int hlen, olen, mlen; + + if (af == naf || (af != AF_INET && af != AF_INET6) || + (naf != AF_INET && naf != AF_INET6)) + return (-1); + + /* split the mbuf chain on the inner ip/ip6 header boundary */ + if ((n = m_split(m, off, M_DONTWAIT)) == NULL) + return (-1); + + /* old header */ + olen = pd2->off - off; + /* new header */ + hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); + + /* trim old header */ + m_adj(n, olen); + + /* prepend a new one */ + if (M_PREPEND(n, hlen, M_DONTWAIT, 0) == NULL) + return (-1); + + /* translate inner ip/ip6 header */ + switch (naf) { + case AF_INET: + ip4 = mtod(n, struct ip *); + bzero(ip4, sizeof(*ip4)); + ip4->ip_v = IPVERSION; + ip4->ip_hl = sizeof(*ip4) >> 2; + ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen); + ip4->ip_id = htons(ip_randomid()); + ip4->ip_off = htons(IP_DF); + ip4->ip_ttl = pd2->ttl; + if (pd2->proto == IPPROTO_ICMPV6) + ip4->ip_p = IPPROTO_ICMP; + else + ip4->ip_p = pd2->proto; + ip4->ip_src = src->v4; + ip4->ip_dst = dst->v4; + ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); + break; + case AF_INET6: + ip6 = mtod(n, struct ip6_hdr *); + bzero(ip6, sizeof(*ip6)); + ip6->ip6_vfc = IPV6_VERSION; + ip6->ip6_plen = htons(pd2->tot_len - olen); + if (pd2->proto == IPPROTO_ICMP) + ip6->ip6_nxt = IPPROTO_ICMPV6; + else + ip6->ip6_nxt = pd2->proto; + if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) + ip6->ip6_hlim = IPV6_DEFHLIM; + else + ip6->ip6_hlim = pd2->ttl; + ip6->ip6_src = src->v6; + ip6->ip6_dst = dst->v6; + break; + } + + /* adjust payload offset and total packet length */ + pd2->off += hlen - olen; + pd->tot_len += hlen - olen; + + /* merge modified inner packet with the original header */ + mlen = n->m_pkthdr.len; + m_cat(m, n); + m->m_pkthdr.len += mlen; + + return (0); +} + +#define PTR_IP(field) ((int32_t)offsetof(struct ip, field)) +#define PTR_IP6(field) ((int32_t)offsetof(struct ip6_hdr, field)) + +static int +pf_translate_icmp_af(int af, void *arg) +{ + struct icmp *icmp4; + struct icmp6_hdr *icmp6; + u_int32_t mtu; + int32_t ptr = -1; + u_int8_t type; + u_int8_t code; + + switch (af) { + case AF_INET: + icmp6 = arg; + type = icmp6->icmp6_type; + code = icmp6->icmp6_code; + mtu = ntohl(icmp6->icmp6_mtu); + + switch (type) { + case ICMP6_ECHO_REQUEST: + type = ICMP_ECHO; + break; + case ICMP6_ECHO_REPLY: + type = ICMP_ECHOREPLY; + break; + case ICMP6_DST_UNREACH: + type = ICMP_UNREACH; + switch (code) { + case ICMP6_DST_UNREACH_NOROUTE: + case ICMP6_DST_UNREACH_BEYONDSCOPE: + case ICMP6_DST_UNREACH_ADDR: + code = ICMP_UNREACH_HOST; + break; + case ICMP6_DST_UNREACH_ADMIN: + code = ICMP_UNREACH_HOST_PROHIB; + break; + case ICMP6_DST_UNREACH_NOPORT: + code = ICMP_UNREACH_PORT; + break; + default: + return (-1); + } + break; + case ICMP6_PACKET_TOO_BIG: + type = ICMP_UNREACH; + code = ICMP_UNREACH_NEEDFRAG; + mtu -= 20; + break; + case ICMP6_TIME_EXCEEDED: + type = ICMP_TIMXCEED; + break; + case ICMP6_PARAM_PROB: + switch (code) { + case ICMP6_PARAMPROB_HEADER: + type = ICMP_PARAMPROB; + code = ICMP_PARAMPROB_ERRATPTR; + ptr = ntohl(icmp6->icmp6_pptr); + + if (ptr == PTR_IP6(ip6_vfc)) + ; /* preserve */ + else if (ptr == PTR_IP6(ip6_vfc) + 1) + ptr = PTR_IP(ip_tos); + else if (ptr == PTR_IP6(ip6_plen) || + ptr == PTR_IP6(ip6_plen) + 1) + ptr = PTR_IP(ip_len); + else if (ptr == PTR_IP6(ip6_nxt)) + ptr = PTR_IP(ip_p); + else if (ptr == PTR_IP6(ip6_hlim)) + ptr = PTR_IP(ip_ttl); + else if (ptr >= PTR_IP6(ip6_src) && + ptr < PTR_IP6(ip6_dst)) + ptr = PTR_IP(ip_src); + else if (ptr >= PTR_IP6(ip6_dst) && + ptr < (int32_t)sizeof(struct ip6_hdr)) + ptr = PTR_IP(ip_dst); + else { + return (-1); + } + break; + case ICMP6_PARAMPROB_NEXTHEADER: + type = ICMP_UNREACH; + code = ICMP_UNREACH_PROTOCOL; + break; + default: + return (-1); + } + break; + default: + return (-1); + } + icmp6->icmp6_type = type; + icmp6->icmp6_code = code; + /* aligns well with a icmpv4 nextmtu */ + icmp6->icmp6_mtu = htonl(mtu); + /* icmpv4 pptr is a one most significant byte */ + if (ptr >= 0) + icmp6->icmp6_pptr = htonl(ptr << 24); + break; + + case AF_INET6: + icmp4 = arg; + type = icmp4->icmp_type; + code = icmp4->icmp_code; + mtu = ntohs(icmp4->icmp_nextmtu); + + switch (type) { + case ICMP_ECHO: + type = ICMP6_ECHO_REQUEST; + break; + case ICMP_ECHOREPLY: + type = ICMP6_ECHO_REPLY; + break; + case ICMP_UNREACH: + type = ICMP6_DST_UNREACH; + switch (code) { + case ICMP_UNREACH_NET: + case ICMP_UNREACH_HOST: + case ICMP_UNREACH_NET_UNKNOWN: + case ICMP_UNREACH_HOST_UNKNOWN: + case ICMP_UNREACH_ISOLATED: + case ICMP_UNREACH_TOSNET: + case ICMP_UNREACH_TOSHOST: + code = ICMP6_DST_UNREACH_NOROUTE; + break; + case ICMP_UNREACH_PORT: + code = ICMP6_DST_UNREACH_NOPORT; + break; + case ICMP_UNREACH_NET_PROHIB: + case ICMP_UNREACH_HOST_PROHIB: + case ICMP_UNREACH_FILTER_PROHIB: + case ICMP_UNREACH_PRECEDENCE_CUTOFF: + code = ICMP6_DST_UNREACH_ADMIN; + break; + case ICMP_UNREACH_PROTOCOL: + type = ICMP6_PARAM_PROB; + code = ICMP6_PARAMPROB_NEXTHEADER; + ptr = offsetof(struct ip6_hdr, ip6_nxt); + break; + case ICMP_UNREACH_NEEDFRAG: + type = ICMP6_PACKET_TOO_BIG; + code = 0; + mtu += 20; + break; + default: + return (-1); + } + break; + case ICMP_TIMXCEED: + type = ICMP6_TIME_EXCEEDED; + break; + case ICMP_PARAMPROB: + type = ICMP6_PARAM_PROB; + switch (code) { + case ICMP_PARAMPROB_ERRATPTR: + code = ICMP6_PARAMPROB_HEADER; + break; + case ICMP_PARAMPROB_LENGTH: + code = ICMP6_PARAMPROB_HEADER; + break; + default: + return (-1); + } + + ptr = icmp4->icmp_pptr; + if (ptr == 0 || ptr == PTR_IP(ip_tos)) + ; /* preserve */ + else if (ptr == PTR_IP(ip_len) || + ptr == PTR_IP(ip_len) + 1) + ptr = PTR_IP6(ip6_plen); + else if (ptr == PTR_IP(ip_ttl)) + ptr = PTR_IP6(ip6_hlim); + else if (ptr == PTR_IP(ip_p)) + ptr = PTR_IP6(ip6_nxt); + else if (ptr >= PTR_IP(ip_src) && + ptr < PTR_IP(ip_dst)) + ptr = PTR_IP6(ip6_src); + else if (ptr >= PTR_IP(ip_dst) && + ptr < (int32_t)sizeof(struct ip)) + ptr = PTR_IP6(ip6_dst); + else { + return (-1); + } + break; + default: + return (-1); + } + icmp4->icmp_type = type; + icmp4->icmp_code = code; + icmp4->icmp_nextmtu = htons(mtu); + if (ptr >= 0) + icmp4->icmp_void = htonl(ptr); + break; + } + + return (0); +} + +static int +pf_nat64_ipv6(struct mbuf *m, int off, struct pf_pdesc *pd) +{ + struct ip *ip4; + + /* + * ip_input asserts for rcvif to be not NULL + * That may not be true for two corner cases + * 1. If for some reason a local app sends DNS + * AAAA query to local host + * 2. If IPv6 stack in kernel internally generates a + * message destined for a synthesized IPv6 end-point. + */ + if (m->m_pkthdr.rcvif == NULL) + return (PF_DROP); + + /* trim the old header */ + m_adj(m, off); + + /* prepend the new one */ + if (M_PREPEND(m, sizeof(*ip4), M_DONTWAIT, 0) == NULL) + return (PF_DROP); + + ip4 = mtod(m, struct ip *); + ip4->ip_v = 4; + ip4->ip_hl = 5; + ip4->ip_tos = pd->tos & htonl(0x0ff00000); + ip4->ip_len = htons(sizeof(*ip4) + (pd->tot_len - off)); + ip4->ip_id = 0; + ip4->ip_off = htons(IP_DF); + ip4->ip_ttl = pd->ttl; + ip4->ip_p = pd->proto; + ip4->ip_sum = 0; + ip4->ip_src = pd->naddr.v4; + ip4->ip_dst = pd->ndaddr.v4; + ip4->ip_sum = in_cksum(m, ip4->ip_hl << 2); + + /* recalculate icmp checksums */ + if (pd->proto == IPPROTO_ICMP) { + struct mbuf *mp; + struct icmp *icmp; + int moff, hlen = sizeof(*ip4); + + if ((mp = m_pulldown(m, hlen, ICMP_MINLEN, &moff)) == NULL) + return (PF_NAT64); + + icmp = (struct icmp *)(void *)(mtod(mp, char *) + moff); + icmp->icmp_cksum = 0; + icmp->icmp_cksum = inet_cksum(m, 0, hlen, + ntohs(ip4->ip_len) - hlen); + } + + ip_input(m); + return (PF_NAT64); +} + +static int +pf_nat64_ipv4(struct mbuf *m, int off, struct pf_pdesc *pd) +{ + struct ip6_hdr *ip6; + + if (m->m_pkthdr.rcvif == NULL) + return (PF_DROP); + + m_adj(m, off); + if (M_PREPEND(m, sizeof(*ip6), M_DONTWAIT, 0) == NULL) + return (PF_DROP); + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_vfc = htonl((6 << 28) | (pd->tos << 20)); + ip6->ip6_plen = htons(pd->tot_len - off); + ip6->ip6_nxt = pd->proto; + ip6->ip6_hlim = pd->ttl; + ip6->ip6_src = pd->naddr.v6; + ip6->ip6_dst = pd->ndaddr.v6; + + /* recalculate icmp6 checksums */ + if (pd->proto == IPPROTO_ICMPV6) { + struct mbuf *mp; + struct icmp6_hdr *icmp6; + int moff, hlen = sizeof(*ip6); + + if ((mp = m_pulldown(m, hlen, sizeof(*icmp6), &moff)) == NULL) + return (PF_NAT64); + + icmp6 = (struct icmp6_hdr *)(void *)(mtod(mp, char *) + moff); + icmp6->icmp6_cksum = 0; + icmp6->icmp6_cksum = inet6_cksum(m, IPPROTO_ICMPV6, hlen, + ntohs(ip6->ip6_plen)); + } + ip6_input(m); + return (PF_NAT64); +} + static int pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, @@ -4058,6 +4596,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; struct tcphdr *th = pd->hdr.tcp; + struct udphdr *uh = pd->hdr.udp; u_short reason; int rewrite = 0, hdrlen = 0; int tag = -1; @@ -4069,7 +4608,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, u_int8_t icmptype = 0, icmpcode = 0; struct pf_grev1_hdr *grev1 = pd->hdr.grev1; - union pf_state_xport bxport, nxport, sxport, dxport; + union pf_state_xport bxport, bdxport, nxport, sxport, dxport; struct pf_state_key psk; lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); @@ -4091,9 +4630,9 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, hdrlen = sizeof (*th); break; case IPPROTO_UDP: - sxport.port = pd->hdr.udp->uh_sport; - dxport.port = pd->hdr.udp->uh_dport; - hdrlen = sizeof (*pd->hdr.udp); + sxport.port = uh->uh_sport; + dxport.port = uh->uh_dport; + hdrlen = sizeof (*uh); break; #if INET case IPPROTO_ICMP: @@ -4144,199 +4683,229 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - if (direction == PF_OUT) { - bxport = nxport = sxport; - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation_aux(pd, m, off, PF_OUT, kif, &nsn, - saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) != + bxport = sxport; + bdxport = dxport; + + if (direction == PF_OUT) + nxport = sxport; + else + nxport = dxport; + + /* check packet for BINAT/NAT/RDR */ + if ((nr = pf_get_translation_aux(pd, m, off, direction, kif, &nsn, + saddr, &sxport, daddr, &dxport, &nxport)) != NULL) { - PF_ACPY(&pd->baddr, saddr, af); - switch (pd->proto) { - case IPPROTO_TCP: + int ua; + u_int16_t dport; + + if (pd->af != pd->naf) + ua = 0; + else + ua = 1; + + PF_ACPY(&pd->baddr, saddr, af); + PF_ACPY(&pd->bdaddr, daddr, af); + + switch (pd->proto) { + case IPPROTO_TCP: + if (pd->af != pd->naf || + PF_ANEQ(saddr, &pd->naddr, pd->af)) { pf_change_ap(direction, pd->mp, saddr, - &th->th_sport, pd->ip_sum, &th->th_sum, - &pd->naddr, nxport.port, 0, af); + &th->th_sport, pd->ip_sum, &th->th_sum, + &pd->naddr, nxport.port, 0, af, + pd->naf, ua); sxport.port = th->th_sport; - rewrite++; - break; - case IPPROTO_UDP: + } + + if (pd->af != pd->naf || + PF_ANEQ(daddr, &pd->ndaddr, pd->af) || + (nr && (nr->action == PF_RDR) && + (th->th_dport != nxport.port))) { + if (nr && nr->action == PF_RDR) + dport = nxport.port; + else + dport = th->th_dport; + pf_change_ap(direction, pd->mp, daddr, + &th->th_dport, pd->ip_sum, + &th->th_sum, &pd->ndaddr, + dport, 0, af, pd->naf, ua); + dxport.port = th->th_dport; + } + rewrite++; + break; + + case IPPROTO_UDP: + if (pd->af != pd->naf || + PF_ANEQ(saddr, &pd->naddr, pd->af)) { pf_change_ap(direction, pd->mp, saddr, - &pd->hdr.udp->uh_sport, pd->ip_sum, - &pd->hdr.udp->uh_sum, &pd->naddr, - nxport.port, 1, af); - sxport.port = pd->hdr.udp->uh_sport; - rewrite++; - break; -#if INET - case IPPROTO_ICMP: - if (pd->af == AF_INET) { - pf_change_a(&saddr->v4.s_addr, pd->ip_sum, - pd->naddr.v4.s_addr, 0); - pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( - pd->hdr.icmp->icmp_cksum, sxport.port, - nxport.port, 0); - pd->hdr.icmp->icmp_id = nxport.port; - ++rewrite; - } - break; -#endif /* INET */ -#if INET6 - case IPPROTO_ICMPV6: - if (pd->af == AF_INET6) { - pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, - &pd->naddr, 0); - rewrite++; - } - break; -#endif /* INET */ - case IPPROTO_GRE: - switch (af) { -#if INET - case AF_INET: - pf_change_a(&saddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; -#endif /* INET */ -#if INET6 - case AF_INET6: - PF_ACPY(saddr, &pd->naddr, AF_INET6); - break; -#endif /* INET6 */ - } - ++rewrite; - break; - case IPPROTO_ESP: - bxport.spi = 0; - switch (af) { + &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &pd->naddr, + nxport.port, 1, af, pd->naf, ua); + sxport.port = uh->uh_sport; + } + + if (pd->af != pd->naf || + PF_ANEQ(daddr, &pd->ndaddr, pd->af) || + (nr && (nr->action == PF_RDR) && + (uh->uh_dport != nxport.port))) { + if (nr && nr->action == PF_RDR) + dport = nxport.port; + else + dport = uh->uh_dport; + pf_change_ap(direction, pd->mp, daddr, + &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &pd->ndaddr, + dport, 0, af, pd->naf, ua); + dxport.port = uh->uh_dport; + } + rewrite++; + break; #if INET - case AF_INET: - pf_change_a(&saddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; -#endif /* INET */ -#if INET6 - case AF_INET6: - PF_ACPY(saddr, &pd->naddr, AF_INET6); - break; -#endif /* INET6 */ - } + case IPPROTO_ICMP: + if (pd->af != AF_INET) break; - default: - switch (af) { -#if INET - case AF_INET: - pf_change_a(&saddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; + /* + * TODO: + * pd->af != pd->naf not handled yet here and would be + * needed for NAT46 needed to support XLAT. + * Will cross the bridge when it comes. + */ + if (PF_ANEQ(saddr, &pd->naddr, pd->af)) { + pf_change_a(&saddr->v4.s_addr, pd->ip_sum, + pd->naddr.v4.s_addr, 0); + pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, sxport.port, + nxport.port, 0); + pd->hdr.icmp->icmp_id = nxport.port; + } + + if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) { + pf_change_a(&daddr->v4.s_addr, pd->ip_sum, + pd->ndaddr.v4.s_addr, 0); + } + ++rewrite; + break; #endif /* INET */ #if INET6 - case AF_INET6: - PF_ACPY(saddr, &pd->naddr, af); - break; -#endif /* INET */ - } + case IPPROTO_ICMPV6: + if (pd->af != AF_INET6) break; + + if (pd->af != pd->naf || + PF_ANEQ(saddr, &pd->naddr, pd->af)) { + pf_change_addr(saddr, + &pd->hdr.icmp6->icmp6_cksum, + &pd->naddr, 0, pd->af, pd->naf); } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } else { - bxport.port = nxport.port = dxport.port; - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation_aux(pd, m, off, PF_IN, kif, &nsn, - saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) != - NULL) { - PF_ACPY(&pd->baddr, daddr, af); - switch (pd->proto) { - case IPPROTO_TCP: - pf_change_ap(direction, pd->mp, daddr, - &th->th_dport, pd->ip_sum, &th->th_sum, - &pd->naddr, nxport.port, 0, af); - dxport.port = th->th_dport; - rewrite++; - break; - case IPPROTO_UDP: - pf_change_ap(direction, pd->mp, daddr, - &pd->hdr.udp->uh_dport, pd->ip_sum, - &pd->hdr.udp->uh_sum, &pd->naddr, - nxport.port, 1, af); - dxport.port = pd->hdr.udp->uh_dport; - rewrite++; - break; + if (pd->af != pd->naf || + PF_ANEQ(daddr, &pd->ndaddr, pd->af)) { + pf_change_addr(daddr, + &pd->hdr.icmp6->icmp6_cksum, + &pd->ndaddr, 0, pd->af, pd->naf); + } + + if (pd->af != pd->naf) { + if (pf_translate_icmp_af(AF_INET, + pd->hdr.icmp6)) + return (PF_DROP); + pd->proto = IPPROTO_ICMP; + } + rewrite++; + break; +#endif /* INET */ + case IPPROTO_GRE: + if ((direction == PF_IN) && + (pd->proto_variant == PF_GRE_PPTP_VARIANT)) + grev1->call_id = nxport.call_id; + + switch (pd->af) { #if INET - case IPPROTO_ICMP: - if (pd->af == AF_INET) { - pf_change_a(&daddr->v4.s_addr, pd->ip_sum, - pd->naddr.v4.s_addr, 0); + case AF_INET: + if (PF_ANEQ(saddr, &pd->naddr, pd->af)) { + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + pd->naddr.v4.s_addr, 0); + } + if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) { + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + pd->ndaddr.v4.s_addr, 0); } break; #endif /* INET */ #if INET6 - case IPPROTO_ICMPV6: - if (pd->af == AF_INET6) { - pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, - &pd->naddr, 0); - rewrite++; - } + case AF_INET6: + if (PF_ANEQ(saddr, &pd->naddr, pd->af)) + PF_ACPY(saddr, &pd->naddr, AF_INET6); + if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) + PF_ACPY(daddr, &pd->ndaddr, AF_INET6); break; #endif /* INET6 */ - case IPPROTO_GRE: - if (pd->proto_variant == PF_GRE_PPTP_VARIANT) - grev1->call_id = nxport.call_id; + } + ++rewrite; + break; + case IPPROTO_ESP: + if (direction == PF_OUT) + bxport.spi = 0; - switch (af) { + switch (pd->af) { #if INET - case AF_INET: + case AF_INET: + if (PF_ANEQ(saddr, &pd->naddr, pd->af)) { + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, pd->naddr.v4.s_addr, 0); + } + if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) { pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; -#endif /* INET */ -#if INET6 - case AF_INET6: - PF_ACPY(daddr, &pd->naddr, AF_INET6); - break; -#endif /* INET6 */ + pd->ip_sum, + pd->ndaddr.v4.s_addr, 0); } - ++rewrite; break; - case IPPROTO_ESP: - switch (af) { -#if INET - case AF_INET: - pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; #endif /* INET */ #if INET6 - case AF_INET6: - PF_ACPY(daddr, &pd->naddr, AF_INET6); - break; + case AF_INET6: + if (PF_ANEQ(saddr, &pd->naddr, pd->af)) + PF_ACPY(saddr, &pd->naddr, AF_INET6); + if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) + PF_ACPY(daddr, &pd->ndaddr, AF_INET6); + break; #endif /* INET6 */ + } + break; + default: + switch (pd->af) { +#if INET + case AF_INET: + if ((pd->naf != AF_INET) || + (PF_ANEQ(saddr, &pd->naddr, pd->af))) { + pf_change_addr(saddr, pd->ip_sum, + &pd->naddr, 0, af, pd->naf); + } + + if ((pd->naf != AF_INET) || + (PF_ANEQ(daddr, &pd->ndaddr, pd->af))) { + pf_change_addr(daddr, pd->ip_sum, + &pd->ndaddr, 0, af, pd->naf); } break; - default: - switch (af) { -#if INET - case AF_INET: - pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; #endif /* INET */ #if INET6 - case AF_INET6: - PF_ACPY(daddr, &pd->naddr, af); - break; -#endif /* INET */ - } + case AF_INET6: + if (PF_ANEQ(saddr, &pd->naddr, pd->af)) + PF_ACPY(saddr, &pd->naddr, af); + if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) + PF_ACPY(daddr, &pd->ndaddr, af); break; +#endif /* INET */ } - - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; + break; } + + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + pd->af = pd->naf; } if (nr && nr->tag > 0) @@ -4348,11 +4917,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != af) + else if (r->af && r->af != pd->af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + else if (PF_MISMATCHAW(&r->src.addr, saddr, pd->af, r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; /* tcp/udp only. port_op always 0 in other cases */ @@ -4363,7 +4932,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, r->src.xport.range.port[0], r->src.xport.range.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + else if (PF_MISMATCHAW(&r->dst.addr, daddr, pd->af, r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; /* tcp/udp only. port_op always 0 in other cases */ @@ -4455,8 +5024,8 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, m_copyback(m, off, hdrlen, pd->hdr.any); } - PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, - a, ruleset, pd); + PFLOG_PACKET(kif, h, m, pd->af, direction, reason, + r->log ? r : nr, a, ruleset, pd); } if ((r->action == PF_DROP) && @@ -4464,14 +5033,16 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, (r->rule_flag & PFRULE_RETURNICMP) || (r->rule_flag & PFRULE_RETURN))) { /* undo NAT changes, if they have taken place */ - if (nr != NULL) { + /* XXX For NAT64 we are not reverting the changes */ + if (nr != NULL && nr->action != PF_NAT64) { if (direction == PF_OUT) { + pd->af = af; switch (pd->proto) { case IPPROTO_TCP: pf_change_ap(direction, pd->mp, saddr, &th->th_sport, pd->ip_sum, &th->th_sum, &pd->baddr, - bxport.port, 0, af); + bxport.port, 0, af, pd->af, 1); sxport.port = th->th_sport; rewrite++; break; @@ -4479,7 +5050,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, pf_change_ap(direction, pd->mp, saddr, &pd->hdr.udp->uh_sport, pd->ip_sum, &pd->hdr.udp->uh_sum, &pd->baddr, - bxport.port, 1, af); + bxport.port, 1, af, pd->af, 1); sxport.port = pd->hdr.udp->uh_sport; rewrite++; break; @@ -4543,16 +5114,16 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, case IPPROTO_TCP: pf_change_ap(direction, pd->mp, daddr, &th->th_dport, pd->ip_sum, - &th->th_sum, &pd->baddr, - bxport.port, 0, af); + &th->th_sum, &pd->bdaddr, + bdxport.port, 0, af, pd->af, 1); dxport.port = th->th_dport; rewrite++; break; case IPPROTO_UDP: pf_change_ap(direction, pd->mp, daddr, &pd->hdr.udp->uh_dport, pd->ip_sum, - &pd->hdr.udp->uh_sum, &pd->baddr, - bxport.port, 1, af); + &pd->hdr.udp->uh_sum, &pd->bdaddr, + bdxport.port, 1, af, pd->af, 1); dxport.port = pd->hdr.udp->uh_dport; rewrite++; break; @@ -4565,19 +5136,20 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, case IPPROTO_GRE: if (pd->proto_variant == PF_GRE_PPTP_VARIANT) - grev1->call_id = bxport.call_id; + grev1->call_id = + bdxport.call_id; ++rewrite; switch (af) { #if INET case AF_INET: pf_change_a(&daddr->v4.s_addr, pd->ip_sum, - pd->baddr.v4.s_addr, 0); + pd->bdaddr.v4.s_addr, 0); break; #endif /* INET */ #if INET6 case AF_INET6: - PF_ACPY(daddr, &pd->baddr, + PF_ACPY(daddr, &pd->bdaddr, AF_INET6); break; #endif /* INET6 */ @@ -4589,12 +5161,12 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, case AF_INET: pf_change_a(&daddr->v4.s_addr, pd->ip_sum, - pd->baddr.v4.s_addr, 0); + pd->bdaddr.v4.s_addr, 0); break; #endif /* INET */ #if INET6 case AF_INET6: - PF_ACPY(daddr, &pd->baddr, + PF_ACPY(daddr, &pd->bdaddr, AF_INET6); break; #endif /* INET6 */ @@ -4605,11 +5177,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, case AF_INET: pf_change_a(&daddr->v4.s_addr, pd->ip_sum, - pd->baddr.v4.s_addr, 0); + pd->bdaddr.v4.s_addr, 0); break; #if INET6 case AF_INET6: - PF_ACPY(daddr, &pd->baddr, af); + PF_ACPY(daddr, &pd->bdaddr, af); break; #endif /* INET6 */ } @@ -4627,7 +5199,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, struct ip6_hdr *h6; #endif /* INET6 */ - switch (af) { + switch (pd->af) { case AF_INET: h4 = mtod(m, struct ip *); len = ntohs(h4->ip_len) - off; @@ -4641,28 +5213,29 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, #endif /* INET6 */ } - if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) + if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, + pd->af)) REASON_SET(&reason, PFRES_PROTCKSUM); else { if (th->th_flags & TH_SYN) ack++; if (th->th_flags & TH_FIN) ack++; - pf_send_tcp(r, af, pd->dst, + pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); } - } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && + } else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET && pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, - r->return_icmp & 255, af, r); + r->return_icmp & 255, pd->af, r); else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH && r->return_icmp6) pf_send_icmp(m, r->return_icmp6 >> 8, - r->return_icmp6 & 255, af, r); + r->return_icmp6 & 255, pd->af, r); } if (r->action == PF_DROP) @@ -4672,7 +5245,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, bzero(&psk, sizeof (psk)); psk.proto = pd->proto; psk.direction = direction; - psk.af = af; if (pd->proto == IPPROTO_UDP) { if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT && ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) { @@ -4686,62 +5258,120 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, psk.proto_variant = pd->proto_variant; } if (direction == PF_OUT) { + psk.af_gwy = af; PF_ACPY(&psk.gwy.addr, saddr, af); - PF_ACPY(&psk.ext.addr, daddr, af); + PF_ACPY(&psk.ext_gwy.addr, daddr, af); switch (pd->proto) { - case IPPROTO_UDP: - psk.gwy.xport = sxport; - psk.ext.xport = dxport; - break; case IPPROTO_ESP: psk.gwy.xport.spi = 0; - psk.ext.xport.spi = pd->hdr.esp->spi; + psk.ext_gwy.xport.spi = pd->hdr.esp->spi; break; case IPPROTO_ICMP: #if INET6 case IPPROTO_ICMPV6: #endif + /* + * NAT64 requires protocol translation between ICMPv4 + * and ICMPv6. TCP and UDP do not require protocol + * translation. To avoid adding complexity just to + * handle ICMP(v4/v6), we always lookup for + * proto = IPPROTO_ICMP on both LAN and WAN side + */ + psk.proto = IPPROTO_ICMP; psk.gwy.xport.port = nxport.port; - psk.ext.xport.spi = 0; + psk.ext_gwy.xport.spi = 0; break; default: psk.gwy.xport = sxport; - psk.ext.xport = dxport; + psk.ext_gwy.xport = dxport; break; } + psk.af_lan = af; if (nr != NULL) { PF_ACPY(&psk.lan.addr, &pd->baddr, af); psk.lan.xport = bxport; + PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af); + psk.ext_lan.xport = bdxport; } else { PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af); psk.lan.xport = psk.gwy.xport; + PF_ACPY(&psk.ext_lan.addr, &psk.ext_gwy.addr, af); + psk.ext_lan.xport = psk.ext_gwy.xport; } } else { - PF_ACPY(&psk.lan.addr, daddr, af); - PF_ACPY(&psk.ext.addr, saddr, af); + psk.af_lan = af; + if (nr && nr->action == PF_NAT64) { + PF_ACPY(&psk.lan.addr, &pd->baddr, af); + PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af); + } else { + PF_ACPY(&psk.lan.addr, daddr, af); + PF_ACPY(&psk.ext_lan.addr, saddr, af); + } switch (pd->proto) { case IPPROTO_ICMP: #if INET6 case IPPROTO_ICMPV6: #endif - psk.lan.xport = nxport; - psk.ext.xport.spi = 0; + /* + * NAT64 requires protocol translation between ICMPv4 + * and ICMPv6. TCP and UDP do not require protocol + * translation. To avoid adding complexity just to + * handle ICMP(v4/v6), we always lookup for + * proto = IPPROTO_ICMP on both LAN and WAN side + */ + psk.proto = IPPROTO_ICMP; + if (nr && nr->action == PF_NAT64) { + psk.lan.xport = bxport; + psk.ext_lan.xport = bxport; + } else { + psk.lan.xport = nxport; + psk.ext_lan.xport.spi = 0; + } break; case IPPROTO_ESP: - psk.ext.xport.spi = 0; + psk.ext_lan.xport.spi = 0; psk.lan.xport.spi = pd->hdr.esp->spi; break; default: - psk.lan.xport = dxport; - psk.ext.xport = sxport; + if (nr != NULL) { + if (nr->action == PF_NAT64) { + psk.lan.xport = bxport; + psk.ext_lan.xport = bdxport; + } else { + psk.lan.xport = dxport; + psk.ext_lan.xport = sxport; + } + } else { + psk.lan.xport = dxport; + psk.ext_lan.xport = sxport; + } break; } + psk.af_gwy = pd->naf; if (nr != NULL) { - PF_ACPY(&psk.gwy.addr, &pd->baddr, af); - psk.gwy.xport = bxport; + if (nr->action == PF_NAT64) { + PF_ACPY(&psk.gwy.addr, &pd->naddr, pd->naf); + PF_ACPY(&psk.ext_gwy.addr, &pd->ndaddr, + pd->naf); + if ((pd->proto == IPPROTO_ICMPV6) || + (pd->proto == IPPROTO_ICMP)) { + psk.gwy.xport = nxport; + psk.ext_gwy.xport = nxport; + } else { + psk.gwy.xport = sxport; + psk.ext_gwy.xport = dxport; + } + } else { + PF_ACPY(&psk.gwy.addr, &pd->bdaddr, af); + psk.gwy.xport = bdxport; + PF_ACPY(&psk.ext_gwy.addr, saddr, af); + psk.ext_gwy.xport = sxport; + } } else { PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af); psk.gwy.xport = psk.lan.xport; + PF_ACPY(&psk.ext_gwy.addr, &psk.ext_lan.addr, af); + psk.ext_gwy.xport = psk.ext_lan.xport; } } if (pd->pktflags & PKTF_FLOW_ID) { @@ -4772,7 +5402,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_ike_hdr ike; if (pd->proto == IPPROTO_UDP) { - struct udphdr *uh = pd->hdr.udp; size_t plen = m->m_pkthdr.len - off - sizeof (*uh); if (ntohs(uh->uh_sport) == PF_IKE_PORT && @@ -4797,10 +5426,10 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, * partial state is allowed for each external address. */ memset(&sk0, 0, sizeof (sk0)); - sk0.af = pd->af; + sk0.af_gwy = pd->af; sk0.proto = IPPROTO_ESP; - PF_ACPY(&sk0.gwy.addr, saddr, sk0.af); - PF_ACPY(&sk0.ext.addr, daddr, sk0.af); + PF_ACPY(&sk0.gwy.addr, saddr, sk0.af_gwy); + PF_ACPY(&sk0.ext_gwy.addr, daddr, sk0.af_gwy); s0 = pf_find_state(kif, &sk0, PF_IN); if (s0 && PF_ANEQ(&s0->state_key->lan.addr, @@ -4975,7 +5604,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, goto cleanup; } - pf_set_rt_ifp(s, saddr); /* needs s->state_key set */ + pf_set_rt_ifp(s, saddr, af); /* needs s->state_key set */ m = pd->mp; @@ -4983,7 +5612,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, switch (pd->proto) { case IPPROTO_TCP: { u_int16_t dport = (direction == PF_OUT) ? - sk->ext.xport.port : sk->gwy.xport.port; + sk->ext_gwy.xport.port : sk->gwy.xport.port; if (nr != NULL && ntohs(dport) == PF_PPTP_PORT) { @@ -5010,8 +5639,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, } case IPPROTO_UDP: { - struct udphdr *uh = pd->hdr.udp; - if (nr != NULL && ntohs(uh->uh_sport) == PF_IKE_PORT && ntohs(uh->uh_dport) == PF_IKE_PORT) { @@ -5056,19 +5683,20 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { + int ua = (sk->af_lan == sk->af_gwy) ? 1 : 0; s->src.state = PF_TCPS_PROXY_SRC; if (nr != NULL) { if (direction == PF_OUT) { pf_change_ap(direction, pd->mp, saddr, &th->th_sport, pd->ip_sum, &th->th_sum, &pd->baddr, - bxport.port, 0, af); + bxport.port, 0, af, pd->af, ua); sxport.port = th->th_sport; } else { pf_change_ap(direction, pd->mp, daddr, &th->th_dport, pd->ip_sum, &th->th_sum, &pd->baddr, - bxport.port, 0, af); + bxport.port, 0, af, pd->af, ua); sxport.port = th->th_dport; } } @@ -5124,6 +5752,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, } m_copyback(m, off, hdrlen, pd->hdr.any); + if (af == AF_INET6 && pd->naf == AF_INET) + return pf_nat64_ipv6(m, off, pd); + else if (af == AF_INET && pd->naf == AF_INET6) + return pf_nat64_ipv4(m, off, pd); + } return (PF_PASS); @@ -5375,7 +6008,7 @@ pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif, NTOHS(iphdr->ip_len); NTOHS(iphdr->ip_off); } - /* + /* * Don't need to unlock pf_lock as NET_THREAD_HELD_PF * allows for recursive behavior */ @@ -5571,14 +6204,17 @@ pf_pptp_handler(struct pf_state *s, int direction, int off, memcpy(&gsk->lan, &sk->lan, sizeof (gsk->lan)); memcpy(&gsk->gwy, &sk->gwy, sizeof (gsk->gwy)); - memcpy(&gsk->ext, &sk->ext, sizeof (gsk->ext)); - gsk->af = sk->af; + memcpy(&gsk->ext_lan, &sk->ext_lan, sizeof (gsk->ext_lan)); + memcpy(&gsk->ext_gwy, &sk->ext_gwy, sizeof (gsk->ext_gwy)); + gsk->af_lan = sk->af_lan; + gsk->af_gwy = sk->af_gwy; gsk->proto = IPPROTO_GRE; gsk->proto_variant = PF_GRE_PPTP_VARIANT; gsk->app_state = gas; gsk->lan.xport.call_id = 0; gsk->gwy.xport.call_id = 0; - gsk->ext.xport.call_id = 0; + gsk->ext_lan.xport.call_id = 0; + gsk->ext_gwy.xport.call_id = 0; gsk->flowsrc = FLOWSRC_PF; gsk->flowhash = pf_calc_state_key_flowhash(gsk); memset(gas, 0, sizeof (*gas)); @@ -5593,7 +6229,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off, switch (sk->direction) { case PF_IN: - pns_call_id = &gsk->ext.xport.call_id; + pns_call_id = &gsk->ext_lan.xport.call_id; pns_state = &gs->dst.state; pac_call_id = &gsk->lan.xport.call_id; pac_state = &gs->src.state; @@ -5602,7 +6238,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off, case PF_OUT: pns_call_id = &gsk->lan.xport.call_id; pns_state = &gs->src.state; - pac_call_id = &gsk->ext.xport.call_id; + pac_call_id = &gsk->ext_lan.xport.call_id; pac_state = &gs->dst.state; break; @@ -5697,13 +6333,13 @@ pf_pptp_handler(struct pf_state *s, int direction, int off, int n = 0; struct pf_state_key_cmp key; - key.af = gsk->af; + key.af_gwy = gsk->af_gwy; key.proto = IPPROTO_GRE; key.proto_variant = PF_GRE_PPTP_VARIANT; - PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af); - PF_ACPY(&key.ext.addr, &gsk->ext.addr, key.af); + PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af_gwy); + PF_ACPY(&key.ext_gwy.addr, &gsk->ext_gwy.addr, key.af_gwy); key.gwy.xport.call_id = gsk->gwy.xport.call_id; - key.ext.xport.call_id = gsk->ext.xport.call_id; + key.ext_gwy.xport.call_id = gsk->ext_gwy.xport.call_id; do { call_id = htonl(random()); } while (!call_id); @@ -5758,7 +6394,8 @@ pf_pptp_handler(struct pf_state *s, int direction, int off, gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC; gsk->lan.xport.call_id = 0; gsk->gwy.xport.call_id = 0; - gsk->ext.xport.call_id = 0; + gsk->ext_lan.xport.call_id = 0; + gsk->ext_gwy.xport.call_id = 0; gs->id = gs->creatorid = 0; break; @@ -5774,7 +6411,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off, ++gs->nat_src_node->states; VERIFY(gs->nat_src_node->states != 0); } - pf_set_rt_ifp(gs, &sk->lan.addr); + pf_set_rt_ifp(gs, &sk->lan.addr, sk->af_lan); if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) { /* @@ -5840,6 +6477,33 @@ pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b) return ((d > 0) ? 1 : ((d < 0) ? -1 : 0)); } +static int +pf_do_nat64(struct pf_state_key *sk, struct pf_pdesc *pd, struct mbuf *m, + int off) +{ + if (pd->af == AF_INET) { + if (pd->af != sk->af_lan) { + pd->ndaddr = sk->lan.addr; + pd->naddr = sk->ext_lan.addr; + } else { + pd->naddr = sk->gwy.addr; + pd->ndaddr = sk->ext_gwy.addr; + } + return (pf_nat64_ipv4(m, off, pd)); + } + else if (pd->af == AF_INET6) { + if (pd->af != sk->af_lan) { + pd->ndaddr = sk->lan.addr; + pd->naddr = sk->ext_lan.addr; + } else { + pd->naddr = sk->gwy.addr; + pd->ndaddr = sk->ext_gwy.addr; + } + return (pf_nat64_ipv6(m, off, pd)); + } + return (PF_DROP); +} + static int pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, @@ -5854,25 +6518,44 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, int ackskew; int copyback = 0; struct pf_state_peer *src, *dst; + struct pf_state_key *sk; key.app_state = 0; - key.af = pd->af; key.proto = IPPROTO_TCP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.xport.port = th->th_sport; - key.gwy.xport.port = th->th_dport; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.xport.port = th->th_sport; - key.ext.xport.port = th->th_dport; - } + key.af_lan = key.af_gwy = pd->af; + + /* + * For NAT64 the first time rule search and state creation + * is done on the incoming side only. + * Once the state gets created, NAT64's LAN side (ipv6) will + * not be able to find the state in ext-gwy tree as that normally + * is intended to be looked up for incoming traffic from the + * WAN side. + * Therefore to handle NAT64 case we init keys here for both + * lan-ext as well as ext-gwy trees. + * In the state lookup we attempt a lookup on both trees if + * first one does not return any result and return a match if + * the match state's was created by NAT64 rule. + */ + PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy); + key.ext_gwy.xport.port = th->th_sport; + key.gwy.xport.port = th->th_dport; + + PF_ACPY(&key.lan.addr, pd->src, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan); + key.lan.xport.port = th->th_sport; + key.ext_lan.xport.port = th->th_dport; STATE_LOOKUP(); - if (direction == (*state)->state_key->direction) { + sk = (*state)->state_key; + /* + * In case of NAT64 the translation is first applied on the LAN + * side. Therefore for stack's address family comparison + * we use sk->af_lan. + */ + if ((direction == sk->direction) && (pd->af == sk->af_lan)) { src = &(*state)->src; dst = &(*state)->dst; } else { @@ -5880,26 +6563,26 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, dst = &(*state)->src; } - if ((*state)->src.state == PF_TCPS_PROXY_SRC) { - if (direction != (*state)->state_key->direction) { + if (src->state == PF_TCPS_PROXY_SRC) { + if (direction != sk->direction) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } if (th->th_flags & TH_SYN) { - if (ntohl(th->th_seq) != (*state)->src.seqlo) { + if (ntohl(th->th_seq) != src->seqlo) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, - (*state)->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, + src->seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, src->mss, 0, 1, 0, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (!(th->th_flags & TH_ACK) || - (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || - (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + (ntohl(th->th_ack) != src->seqhi + 1) || + (ntohl(th->th_seq) != src->seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } else if ((*state)->src_node != NULL && @@ -5907,62 +6590,62 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } else - (*state)->src.state = PF_TCPS_PROXY_DST; + src->state = PF_TCPS_PROXY_DST; } - if ((*state)->src.state == PF_TCPS_PROXY_DST) { + if (src->state == PF_TCPS_PROXY_DST) { struct pf_state_host *psrc, *pdst; if (direction == PF_OUT) { - psrc = &(*state)->state_key->gwy; - pdst = &(*state)->state_key->ext; + psrc = &sk->gwy; + pdst = &sk->ext_gwy; } else { - psrc = &(*state)->state_key->ext; - pdst = &(*state)->state_key->lan; + psrc = &sk->ext_lan; + pdst = &sk->lan; } - if (direction == (*state)->state_key->direction) { + if (direction == sk->direction) { if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || - (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || - (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + (ntohl(th->th_ack) != src->seqhi + 1) || + (ntohl(th->th_seq) != src->seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } - (*state)->src.max_win = MAX(ntohs(th->th_win), 1); - if ((*state)->dst.seqhi == 1) - (*state)->dst.seqhi = htonl(random()); + src->max_win = MAX(ntohs(th->th_win), 1); + if (dst->seqhi == 1) + dst->seqhi = htonl(random()); pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr, &pdst->addr, psrc->xport.port, pdst->xport.port, - (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); + dst->seqhi, 0, TH_SYN, 0, + src->mss, 0, 0, (*state)->tag, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != (TH_SYN|TH_ACK)) || - (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { + (ntohl(th->th_ack) != dst->seqhi + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } else { - (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); - (*state)->dst.seqlo = ntohl(th->th_seq); + dst->max_win = MAX(ntohs(th->th_win), 1); + dst->seqlo = ntohl(th->th_seq); pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, - TH_ACK, (*state)->src.max_win, 0, 0, 0, + TH_ACK, src->max_win, 0, 0, 0, (*state)->tag, NULL, NULL); pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr, &pdst->addr, psrc->xport.port, pdst->xport.port, - (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, - TH_ACK, (*state)->dst.max_win, 0, 0, 1, + src->seqhi + 1, src->seqlo + 1, + TH_ACK, dst->max_win, 0, 0, 1, 0, NULL, NULL); - (*state)->src.seqdiff = (*state)->dst.seqhi - - (*state)->src.seqlo; - (*state)->dst.seqdiff = (*state)->src.seqhi - - (*state)->dst.seqlo; - (*state)->src.seqhi = (*state)->src.seqlo + - (*state)->dst.max_win; - (*state)->dst.seqhi = (*state)->dst.seqlo + - (*state)->src.max_win; - (*state)->src.wscale = (*state)->dst.wscale = 0; - (*state)->src.state = (*state)->dst.state = + src->seqdiff = dst->seqhi - + src->seqlo; + dst->seqdiff = src->seqhi - + dst->seqlo; + src->seqhi = src->seqlo + + dst->max_win; + dst->seqhi = dst->seqlo + + src->max_win; + src->wscale = dst->wscale = 0; + src->state = dst->state = TCPS_ESTABLISHED; REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); @@ -5979,7 +6662,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, printf("\n"); } /* XXX make sure it's the same direction ?? */ - (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; + src->state = dst->state = TCPS_CLOSED; pf_unlink_state(*state); *state = NULL; return (PF_DROP); @@ -6252,7 +6935,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pd->p_len, ackskew, (*state)->packets[0], (*state)->packets[1], direction == PF_IN ? "in" : "out", - direction == (*state)->state_key->direction ? + direction == sk->direction ? "fwd" : "rev"); } @@ -6287,8 +6970,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Fall through to PASS packet */ } else { - if ((*state)->dst.state == TCPS_SYN_SENT && - (*state)->src.state == TCPS_SYN_SENT) { + if (dst->state == TCPS_SYN_SENT && + src->state == TCPS_SYN_SENT) { /* Send RST for state mismatches during handshake */ if (!(th->th_flags & TH_RST)) pf_send_tcp((*state)->rule.ptr, pd->af, @@ -6310,7 +6993,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (unsigned int)sws, (unsigned int)dws, (*state)->packets[0], (*state)->packets[1], direction == PF_IN ? "in" : "out", - direction == (*state)->state_key->direction ? + direction == sk->direction ? "fwd" : "rev"); printf("pf: State failure on: %c %c %c %c | %c %c\n", SEQ_GEQ(src->seqhi, end) ? ' ' : '1', @@ -6328,9 +7011,9 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Any packets which have gotten here are to be passed */ - if ((*state)->state_key->app_state && - (*state)->state_key->app_state->handler) { - (*state)->state_key->app_state->handler(*state, direction, + if (sk->app_state && + sk->app_state->handler) { + sk->app_state->handler(*state, direction, off + (th->th_off << 2), pd, kif); if (pd->lmw < 0) { REASON_SET(reason, PFRES_MEMORY); @@ -6340,17 +7023,50 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, } /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_OUT) + if (STATE_TRANSLATE(sk)) { + pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan; + + if (direction == PF_OUT) { pf_change_ap(direction, pd->mp, pd->src, &th->th_sport, - pd->ip_sum, &th->th_sum, - &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.xport.port, 0, pd->af); - else - pf_change_ap(direction, pd->mp, pd->dst, &th->th_dport, - pd->ip_sum, &th->th_sum, - &(*state)->state_key->lan.addr, - (*state)->state_key->lan.xport.port, 0, pd->af); + pd->ip_sum, &th->th_sum, &sk->gwy.addr, + sk->gwy.xport.port, 0, pd->af, pd->naf, 1); + } else { + if (pd->af != pd->naf) { + if (pd->af == sk->af_gwy) { + pf_change_ap(direction, pd->mp, pd->dst, + &th->th_dport, pd->ip_sum, + &th->th_sum, &sk->lan.addr, + sk->lan.xport.port, 0, + pd->af, pd->naf, 0); + + pf_change_ap(direction, pd->mp, pd->src, + &th->th_sport, pd->ip_sum, + &th->th_sum, &sk->ext_lan.addr, + th->th_sport, 0, pd->af, + pd->naf, 0); + + } else { + pf_change_ap(direction, pd->mp, pd->dst, + &th->th_dport, pd->ip_sum, + &th->th_sum, &sk->ext_gwy.addr, + th->th_dport, 0, pd->af, + pd->naf, 0); + + pf_change_ap(direction, pd->mp, pd->src, + &th->th_sport, pd->ip_sum, + &th->th_sum, &sk->gwy.addr, + sk->gwy.xport.port, 0, pd->af, + pd->naf, 0); + } + } else { + pf_change_ap(direction, pd->mp, pd->dst, + &th->th_dport, pd->ip_sum, + &th->th_sum, &sk->lan.addr, + sk->lan.xport.port, 0, pd->af, + pd->naf, 1); + } + } + copyback = off + sizeof (*th); } @@ -6363,8 +7079,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Copyback sequence modulation or stateful scrub changes */ m_copyback(m, off, sizeof (*th), th); - } + if (sk->af_lan != sk->af_gwy) + return (pf_do_nat64(sk, pd, m, off)); + } return (PF_PASS); } @@ -6375,27 +7093,38 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, #pragma unused(h) struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; + struct pf_state_key *sk; struct udphdr *uh = pd->hdr.udp; struct pf_app_state as; - int dx, action, extfilter; + int action, extfilter; key.app_state = 0; key.proto_variant = PF_EXTFILTER_APD; - key.af = pd->af; key.proto = IPPROTO_UDP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.xport.port = uh->uh_sport; - key.gwy.xport.port = uh->uh_dport; - dx = PF_IN; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.xport.port = uh->uh_sport; - key.ext.xport.port = uh->uh_dport; - dx = PF_OUT; - } + key.af_lan = key.af_gwy = pd->af; + + /* + * For NAT64 the first time rule search and state creation + * is done on the incoming side only. + * Once the state gets created, NAT64's LAN side (ipv6) will + * not be able to find the state in ext-gwy tree as that normally + * is intended to be looked up for incoming traffic from the + * WAN side. + * Therefore to handle NAT64 case we init keys here for both + * lan-ext as well as ext-gwy trees. + * In the state lookup we attempt a lookup on both trees if + * first one does not return any result and return a match if + * the match state's was created by NAT64 rule. + */ + PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy); + key.ext_gwy.xport.port = uh->uh_sport; + key.gwy.xport.port = uh->uh_dport; + + PF_ACPY(&key.lan.addr, pd->src, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan); + key.lan.xport.port = uh->uh_sport; + key.ext_lan.xport.port = uh->uh_dport; if (ntohs(uh->uh_sport) == PF_IKE_PORT && ntohs(uh->uh_dport) == PF_IKE_PORT) { @@ -6429,16 +7158,16 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, } } - *state = pf_find_state(kif, &key, dx); + *state = pf_find_state(kif, &key, direction); if (!key.app_state && *state == 0) { key.proto_variant = PF_EXTFILTER_AD; - *state = pf_find_state(kif, &key, dx); + *state = pf_find_state(kif, &key, direction); } if (!key.app_state && *state == 0) { key.proto_variant = PF_EXTFILTER_EI; - *state = pf_find_state(kif, &key, dx); + *state = pf_find_state(kif, &key, direction); } /* similar to STATE_LOOKUP() */ @@ -6454,7 +7183,14 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, if (pf_state_lookup_aux(state, kif, direction, &action)) return (action); - if (direction == (*state)->state_key->direction) { + sk = (*state)->state_key; + + /* + * In case of NAT64 the translation is first applied on the LAN + * side. Therefore for stack's address family comparison + * we use sk->af_lan. + */ + if ((direction == sk->direction) && (pd->af == sk->af_lan)) { src = &(*state)->src; dst = &(*state)->dst; } else { @@ -6475,18 +7211,24 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, else (*state)->timeout = PFTM_UDP_SINGLE; - extfilter = (*state)->state_key->proto_variant; + extfilter = sk->proto_variant; if (extfilter > PF_EXTFILTER_APD) { - (*state)->state_key->ext.xport.port = key.ext.xport.port; - if (extfilter > PF_EXTFILTER_AD) - PF_ACPY(&(*state)->state_key->ext.addr, - &key.ext.addr, key.af); + if (direction == PF_OUT) { + sk->ext_lan.xport.port = key.ext_lan.xport.port; + if (extfilter > PF_EXTFILTER_AD) + PF_ACPY(&sk->ext_lan.addr, &key.ext_lan.addr, + key.af_lan); + } else { + sk->ext_gwy.xport.port = key.ext_gwy.xport.port; + if (extfilter > PF_EXTFILTER_AD) + PF_ACPY(&sk->ext_gwy.addr, &key.ext_gwy.addr, + key.af_gwy); + } } - if ((*state)->state_key->app_state && - (*state)->state_key->app_state->handler) { - (*state)->state_key->app_state->handler(*state, direction, - off + uh->uh_ulen, pd, kif); + if (sk->app_state && sk->app_state->handler) { + sk->app_state->handler(*state, direction, off + uh->uh_ulen, + pd, kif); if (pd->lmw < 0) { REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); @@ -6495,26 +7237,61 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, } /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE((*state)->state_key)) { + if (STATE_TRANSLATE(sk)) { m = pf_lazy_makewritable(pd, m, off + sizeof (*uh)); if (!m) { REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); } - if (direction == PF_OUT) + pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan; + + if (direction == PF_OUT) { pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport, - pd->ip_sum, &uh->uh_sum, - &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.xport.port, 1, pd->af); - else - pf_change_ap(direction, pd->mp, pd->dst, &uh->uh_dport, - pd->ip_sum, &uh->uh_sum, - &(*state)->state_key->lan.addr, - (*state)->state_key->lan.xport.port, 1, pd->af); + pd->ip_sum, &uh->uh_sum, &sk->gwy.addr, + sk->gwy.xport.port, 1, pd->af, pd->naf, 1); + } else { + if (pd->af != pd->naf) { + + if (pd->af == sk->af_gwy) { + pf_change_ap(direction, pd->mp, pd->dst, + &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &sk->lan.addr, + sk->lan.xport.port, 1, + pd->af, pd->naf, 0); + + pf_change_ap(direction, pd->mp, pd->src, + &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &sk->ext_lan.addr, + uh->uh_sport, 1, pd->af, + pd->naf, 0); + + } else { + pf_change_ap(direction, pd->mp, pd->dst, + &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &sk->ext_gwy.addr, + uh->uh_dport, 1, pd->af, + pd->naf, 0); + + pf_change_ap(direction, pd->mp, pd->src, + &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &sk->gwy.addr, + sk->gwy.xport.port, 1, pd->af, + pd->naf, 0); + } + } else { + pf_change_ap(direction, pd->mp, pd->dst, + &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &sk->lan.addr, + sk->lan.xport.port, 1, + pd->af, pd->naf, 1); + } + } + m_copyback(m, off, sizeof (*uh), uh); + if (sk->af_lan != sk->af_gwy) + return (pf_do_nat64(sk, pd, m, off)); } - return (PF_PASS); } @@ -6524,14 +7301,18 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, { #pragma unused(h) struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct in_addr srcv4_inaddr = saddr->v4; u_int16_t icmpid = 0, *icmpsum; u_int8_t icmptype; int state_icmp = 0; struct pf_state_key_cmp key; + struct pf_state_key *sk; struct pf_app_state as; key.app_state = 0; + pd->off = off; + switch (pd->proto) { #if INET case IPPROTO_ICMP: @@ -6568,42 +7349,51 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * ICMP query/reply message not related to a TCP/UDP packet. * Search for an ICMP state. */ - key.af = pd->af; - key.proto = pd->proto; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.xport.port = 0; - key.gwy.xport.port = icmpid; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.xport.port = icmpid; - key.ext.xport.port = 0; - } + /* + * NAT64 requires protocol translation between ICMPv4 + * and ICMPv6. TCP and UDP do not require protocol + * translation. To avoid adding complexity just to + * handle ICMP(v4/v6), we always lookup for + * proto = IPPROTO_ICMP on both LAN and WAN side + */ + key.proto = IPPROTO_ICMP; + key.af_lan = key.af_gwy = pd->af; + + PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy); + key.ext_gwy.xport.port = 0; + key.gwy.xport.port = icmpid; + + PF_ACPY(&key.lan.addr, pd->src, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan); + key.lan.xport.port = icmpid; + key.ext_lan.xport.port = 0; STATE_LOOKUP(); + sk = (*state)->state_key; (*state)->expire = pf_time_second(); (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE((*state)->state_key)) { + if (STATE_TRANSLATE(sk)) { + pd->naf = (pd->af == sk->af_lan) ? + sk->af_gwy : sk->af_lan; if (direction == PF_OUT) { switch (pd->af) { #if INET case AF_INET: pf_change_a(&saddr->v4.s_addr, pd->ip_sum, - (*state)->state_key->gwy.addr.v4.s_addr, 0); + sk->gwy.addr.v4.s_addr, 0); pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( pd->hdr.icmp->icmp_cksum, icmpid, - (*state)->state_key->gwy.xport.port, 0); + sk->gwy.xport.port, 0); pd->hdr.icmp->icmp_id = - (*state)->state_key->gwy.xport.port; + sk->gwy.xport.port; m = pf_lazy_makewritable(pd, m, - off + ICMP_MINLEN); + off + ICMP_MINLEN); if (!m) return (PF_DROP); m_copyback(m, off, ICMP_MINLEN, @@ -6614,7 +7404,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, case AF_INET6: pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, - &(*state)->state_key->gwy.addr, 0); + &sk->gwy.addr, 0); m = pf_lazy_makewritable(pd, m, off + sizeof (struct icmp6_hdr)); if (!m) @@ -6629,35 +7419,62 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, switch (pd->af) { #if INET case AF_INET: - pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, - (*state)->state_key->lan.addr.v4.s_addr, 0); - pd->hdr.icmp->icmp_cksum = - pf_cksum_fixup( - pd->hdr.icmp->icmp_cksum, icmpid, - (*state)->state_key->lan.xport.port, 0); - pd->hdr.icmp->icmp_id = - (*state)->state_key->lan.xport.port; + if (pd->naf != AF_INET) { + if (pf_translate_icmp_af( + AF_INET6, pd->hdr.icmp)) + return (PF_DROP); + + pd->proto = IPPROTO_ICMPV6; + + } else { + + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + sk->lan.addr.v4.s_addr, 0); + + pd->hdr.icmp->icmp_cksum = + pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, + icmpid, sk->lan.xport.port, 0); + + pd->hdr.icmp->icmp_id = + sk->lan.xport.port; + } + m = pf_lazy_makewritable(pd, m, off + ICMP_MINLEN); if (!m) return (PF_DROP); m_copyback(m, off, ICMP_MINLEN, - pd->hdr.icmp); + pd->hdr.icmp); + if (sk->af_lan != sk->af_gwy) + return (pf_do_nat64(sk, pd, m, + off)); break; #endif /* INET */ #if INET6 case AF_INET6: - pf_change_a6(daddr, - &pd->hdr.icmp6->icmp6_cksum, - &(*state)->state_key->lan.addr, 0); + if (pd->naf != AF_INET6) { + if (pf_translate_icmp_af( + AF_INET, pd->hdr.icmp6)) + return (PF_DROP); + + pd->proto = IPPROTO_ICMP; + } else { + pf_change_a6(daddr, + &pd->hdr.icmp6->icmp6_cksum, + &sk->lan.addr, 0); + } m = pf_lazy_makewritable(pd, m, off + sizeof (struct icmp6_hdr)); if (!m) return (PF_DROP); m_copyback(m, off, - sizeof (struct icmp6_hdr), - pd->hdr.icmp6); + sizeof (struct icmp6_hdr), + pd->hdr.icmp6); + if (sk->af_lan != sk->af_gwy) + return (pf_do_nat64(sk, pd, m, + off)); break; #endif /* INET6 */ } @@ -6671,8 +7488,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * ICMP error message in response to a TCP/UDP packet. * Extract the inner TCP/UDP header and search for that state. */ - - struct pf_pdesc pd2; + struct pf_pdesc pd2; /* For inner (original) header */ #if INET struct ip h2; #endif /* INET */ @@ -6710,6 +7526,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, /* offset of protocol header that follows h2 */ off2 = ipoff2 + (h2.ip_hl << 2); + /* TODO */ + pd2.off = ipoff2 + (h2.ip_hl << 2); pd2.proto = h2.ip_p; pd2.src = (struct pf_addr *)&h2.ip_src; @@ -6769,6 +7587,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, break; } } while (!terminal); + /* TODO */ + pd2.off = ipoff2; break; #endif /* INET6 */ } @@ -6794,23 +7614,25 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, return (PF_DROP); } - key.af = pd2.af; key.proto = IPPROTO_TCP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.xport.port = th.th_dport; - key.gwy.xport.port = th.th_sport; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.xport.port = th.th_dport; - key.ext.xport.port = th.th_sport; - } + key.af_gwy = pd2.af; + PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy); + key.ext_gwy.xport.port = th.th_dport; + key.gwy.xport.port = th.th_sport; + + key.af_lan = pd2.af; + PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan); + key.lan.xport.port = th.th_dport; + key.ext_lan.xport.port = th.th_sport; STATE_LOOKUP(); - if (direction == (*state)->state_key->direction) { + sk = (*state)->state_key; + if ((direction == sk->direction) && + ((sk->af_lan == sk->af_gwy) || + (pd2.af == sk->af_lan))) { src = &(*state)->dst; dst = &(*state)->src; } else { @@ -6848,17 +7670,93 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, return (PF_DROP); } - if (STATE_TRANSLATE((*state)->state_key)) { + pd->naf = pd2.naf = (pd2.af == sk->af_lan) ? + sk->af_gwy : sk->af_lan; + + if (STATE_TRANSLATE(sk)) { + /* NAT64 case */ + if (sk->af_lan != sk->af_gwy) { + struct pf_state_host *saddr2, *daddr2; + + if (pd2.naf == sk->af_lan) { + saddr2 = &sk->lan; + daddr2 = &sk->ext_lan; + } else { + saddr2 = &sk->ext_gwy; + daddr2 = &sk->gwy; + } + + /* translate ICMP message types and codes */ + if (pf_translate_icmp_af(pd->naf, + pd->hdr.icmp)) + return (PF_DROP); + m = + pf_lazy_makewritable(pd, m, off2 + 8); + if (!m) + return (PF_DROP); + + m_copyback(m, pd->off, + sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + + /* + * translate inner ip header within the + * ICMP message + */ + if (pf_change_icmp_af(m, ipoff2, pd, + &pd2, &saddr2->addr, &daddr2->addr, + pd->af, pd->naf)) + return (PF_DROP); + + if (pd->naf == AF_INET) + pd->proto = IPPROTO_ICMP; + else + pd->proto = IPPROTO_ICMPV6; + + /* + * translate inner tcp header within + * the ICMP message + */ + pf_change_ap(direction, NULL, pd2.src, + &th.th_sport, pd2.ip_sum, + &th.th_sum, &daddr2->addr, + saddr2->xport.port, 0, pd2.af, + pd2.naf, 0); + + pf_change_ap(direction, NULL, pd2.dst, + &th.th_dport, pd2.ip_sum, + &th.th_sum, &saddr2->addr, + daddr2->xport.port, 0, pd2.af, + pd2.naf, 0); + + m_copyback(m, pd2.off, 8, &th); + + /* translate outer ip header */ + PF_ACPY(&pd->naddr, &daddr2->addr, + pd->naf); + PF_ACPY(&pd->ndaddr, &saddr2->addr, + pd->naf); + if (pd->af == AF_INET) { + memcpy(&pd->naddr.addr32[3], + &srcv4_inaddr, + sizeof(pd->naddr.addr32[3])); + return (pf_nat64_ipv4(m, off, + pd)); + } else { + return (pf_nat64_ipv6(m, off, + pd)); + } + } if (direction == PF_IN) { pf_change_icmp(pd2.src, &th.th_sport, - daddr, &(*state)->state_key->lan.addr, - (*state)->state_key->lan.xport.port, NULL, + daddr, &sk->lan.addr, + sk->lan.xport.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); } else { pf_change_icmp(pd2.dst, &th.th_dport, - saddr, &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.xport.port, NULL, + saddr, &sk->gwy.addr, + sk->gwy.xport.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); } @@ -6895,7 +7793,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, break; } case IPPROTO_UDP: { - struct udphdr uh; + struct udphdr uh; int dx, action; if (!pf_pull_hdr(m, off2, &uh, sizeof (uh), NULL, reason, pd2.af)) { @@ -6905,23 +7803,21 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, return (PF_DROP); } - key.af = pd2.af; - key.proto = IPPROTO_UDP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.xport.port = uh.uh_dport; - key.gwy.xport.port = uh.uh_sport; - dx = PF_IN; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.xport.port = uh.uh_dport; - key.ext.xport.port = uh.uh_sport; - dx = PF_OUT; - } + key.af_gwy = pd2.af; + PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy); + key.ext_gwy.xport.port = uh.uh_dport; + key.gwy.xport.port = uh.uh_sport; + key.af_lan = pd2.af; + PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan); + key.lan.xport.port = uh.uh_dport; + key.ext_lan.xport.port = uh.uh_sport; + + key.proto = IPPROTO_UDP; key.proto_variant = PF_EXTFILTER_APD; + dx = direction; if (ntohs(uh.uh_sport) == PF_IKE_PORT && ntohs(uh.uh_dport) == PF_IKE_PORT) { @@ -6977,17 +7873,94 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (pf_state_lookup_aux(state, kif, direction, &action)) return (action); - if (STATE_TRANSLATE((*state)->state_key)) { + sk = (*state)->state_key; + pd->naf = pd2.naf = (pd2.af == sk->af_lan) ? + sk->af_gwy : sk->af_lan; + + if (STATE_TRANSLATE(sk)) { + /* NAT64 case */ + if (sk->af_lan != sk->af_gwy) { + struct pf_state_host *saddr2, *daddr2; + + if (pd2.naf == sk->af_lan) { + saddr2 = &sk->lan; + daddr2 = &sk->ext_lan; + } else { + saddr2 = &sk->ext_gwy; + daddr2 = &sk->gwy; + } + + /* translate ICMP message */ + if (pf_translate_icmp_af(pd->naf, + pd->hdr.icmp)) + return (PF_DROP); + m = + pf_lazy_makewritable(pd, m, off2 + 8); + if (!m) + return (PF_DROP); + + m_copyback(m, pd->off, + sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + + /* + * translate inner ip header within the + * ICMP message + */ + if (pf_change_icmp_af(m, ipoff2, pd, + &pd2, &saddr2->addr, &daddr2->addr, + pd->af, pd->naf)) + return (PF_DROP); + + if (pd->naf == AF_INET) + pd->proto = IPPROTO_ICMP; + else + pd->proto = IPPROTO_ICMPV6; + + /* + * translate inner udp header within + * the ICMP message + */ + pf_change_ap(direction, NULL, pd2.src, + &uh.uh_sport, pd2.ip_sum, + &uh.uh_sum, &daddr2->addr, + saddr2->xport.port, 0, pd2.af, + pd2.naf, 0); + + pf_change_ap(direction, NULL, pd2.dst, + &uh.uh_dport, pd2.ip_sum, + &uh.uh_sum, &saddr2->addr, + daddr2->xport.port, 0, pd2.af, + pd2.naf, 0); + + m_copyback(m, pd2.off, sizeof(uh), &uh); + + /* translate outer ip header */ + PF_ACPY(&pd->naddr, &daddr2->addr, + pd->naf); + PF_ACPY(&pd->ndaddr, &saddr2->addr, + pd->naf); + if (pd->af == AF_INET) { + memcpy(&pd->naddr.addr32[3], + &srcv4_inaddr, + sizeof(pd->naddr.addr32[3])); + return (pf_nat64_ipv4(m, off, + pd)); + } else { + return (pf_nat64_ipv6(m, off, + pd)); + } + } if (direction == PF_IN) { pf_change_icmp(pd2.src, &uh.uh_sport, - daddr, &(*state)->state_key->lan.addr, - (*state)->state_key->lan.xport.port, &uh.uh_sum, + daddr, &sk->lan.addr, + sk->lan.xport.port, &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); } else { pf_change_icmp(pd2.dst, &uh.uh_dport, - saddr, &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.xport.port, &uh.uh_sum, + saddr, &sk->gwy.addr, + sk->gwy.xport.port, &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); } @@ -7031,37 +8004,40 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, return (PF_DROP); } - key.af = pd2.af; key.proto = IPPROTO_ICMP; if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.xport.port = 0; + key.af_gwy = pd2.af; + PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy); + key.ext_gwy.xport.port = 0; key.gwy.xport.port = iih.icmp_id; } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); + key.af_lan = pd2.af; + PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan); key.lan.xport.port = iih.icmp_id; - key.ext.xport.port = 0; + key.ext_lan.xport.port = 0; } STATE_LOOKUP(); - if (STATE_TRANSLATE((*state)->state_key)) { + sk = (*state)->state_key; + if (STATE_TRANSLATE(sk)) { if (direction == PF_IN) { pf_change_icmp(pd2.src, &iih.icmp_id, - daddr, &(*state)->state_key->lan.addr, - (*state)->state_key->lan.xport.port, NULL, + daddr, &sk->lan.addr, + sk->lan.xport.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); } else { pf_change_icmp(pd2.dst, &iih.icmp_id, - saddr, &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.xport.port, NULL, + saddr, &sk->gwy.addr, + sk->gwy.xport.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); } - m = pf_lazy_makewritable(pd, m, off2 + ICMP_MINLEN); + m = pf_lazy_makewritable(pd, m, + off2 + ICMP_MINLEN); if (!m) return (PF_DROP); m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); @@ -7085,33 +8061,35 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, return (PF_DROP); } - key.af = pd2.af; key.proto = IPPROTO_ICMPV6; if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.xport.port = 0; + key.af_gwy = pd2.af; + PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy); + key.ext_gwy.xport.port = 0; key.gwy.xport.port = iih.icmp6_id; } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); + key.af_lan = pd2.af; + PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan); key.lan.xport.port = iih.icmp6_id; - key.ext.xport.port = 0; + key.ext_lan.xport.port = 0; } STATE_LOOKUP(); - if (STATE_TRANSLATE((*state)->state_key)) { + sk = (*state)->state_key; + if (STATE_TRANSLATE(sk)) { if (direction == PF_IN) { pf_change_icmp(pd2.src, &iih.icmp6_id, - daddr, &(*state)->state_key->lan.addr, - (*state)->state_key->lan.xport.port, NULL, + daddr, &sk->lan.addr, + sk->lan.xport.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); } else { pf_change_icmp(pd2.dst, &iih.icmp6_id, - saddr, &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.xport.port, NULL, + saddr, &sk->gwy.addr, + sk->gwy.xport.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); } @@ -7131,35 +8109,35 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } #endif /* INET6 */ default: { - key.af = pd2.af; key.proto = pd2.proto; if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.xport.port = 0; + key.af_gwy = pd2.af; + PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy); + key.ext_gwy.xport.port = 0; key.gwy.xport.port = 0; } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); + key.af_lan = pd2.af; + PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan); key.lan.xport.port = 0; - key.ext.xport.port = 0; + key.ext_lan.xport.port = 0; } STATE_LOOKUP(); - if (STATE_TRANSLATE((*state)->state_key)) { + sk = (*state)->state_key; + if (STATE_TRANSLATE(sk)) { if (direction == PF_IN) { - pf_change_icmp(pd2.src, NULL, - daddr, &(*state)->state_key->lan.addr, - 0, NULL, - pd2.ip_sum, icmpsum, - pd->ip_sum, 0, pd2.af); + pf_change_icmp(pd2.src, NULL, daddr, + &sk->lan.addr, 0, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); } else { - pf_change_icmp(pd2.dst, NULL, - saddr, &(*state)->state_key->gwy.addr, - 0, NULL, - pd2.ip_sum, icmpsum, - pd->ip_sum, 0, pd2.af); + pf_change_icmp(pd2.dst, NULL, saddr, + &sk->gwy.addr, 0, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); } switch (pd2.af) { #if INET @@ -7172,14 +8150,14 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #if INET6 case AF_INET6: m = pf_lazy_makewritable(pd, m, - ipoff2 + sizeof (h2_6)); + ipoff2 + sizeof (h2_6)); if (!m) return (PF_DROP); m_copyback(m, off, - sizeof (struct icmp6_hdr), - pd->hdr.icmp6); + sizeof (struct icmp6_hdr), + pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof (h2_6), - &h2_6); + &h2_6); break; #endif /* INET6 */ } @@ -7203,17 +8181,18 @@ pf_test_state_grev1(struct pf_state **state, int direction, struct mbuf *m; key.app_state = 0; - key.af = pd->af; key.proto = IPPROTO_GRE; key.proto_variant = PF_GRE_PPTP_VARIANT; if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); + key.af_gwy = pd->af; + PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy); key.gwy.xport.call_id = grev1->call_id; } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.ext.xport.call_id = grev1->call_id; + key.af_lan = pd->af; + PF_ACPY(&key.lan.addr, pd->src, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan); + key.ext_lan.xport.call_id = grev1->call_id; } STATE_LOOKUP(); @@ -7305,16 +8284,17 @@ pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif, int action; memset(&key, 0, sizeof (key)); - key.af = pd->af; key.proto = IPPROTO_ESP; if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); + key.af_gwy = pd->af; + PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy); key.gwy.xport.spi = esp->spi; } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.ext.xport.spi = esp->spi; + key.af_lan = pd->af; + PF_ACPY(&key.lan.addr, pd->src, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan); + key.ext_lan.xport.spi = esp->spi; } *state = pf_find_state(kif, &key, direction); @@ -7347,7 +8327,7 @@ pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif, *state = s; } } else { - key.ext.xport.spi = 0; + key.ext_lan.xport.spi = 0; s = pf_find_state(kif, &key, direction); if (s) { @@ -7355,7 +8335,7 @@ pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif, RB_REMOVE(pf_state_tree_lan_ext, &pf_statetbl_lan_ext, sk); - sk->ext.xport.spi = esp->spi; + sk->ext_lan.xport.spi = esp->spi; if (RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext, sk)) @@ -7464,18 +8444,19 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_state_key_cmp key; key.app_state = 0; - key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.xport.port = 0; + key.af_gwy = pd->af; + PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy); + PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy); + key.ext_gwy.xport.port = 0; key.gwy.xport.port = 0; } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); + key.af_lan = pd->af; + PF_ACPY(&key.lan.addr, pd->src, key.af_lan); + PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan); key.lan.xport.port = 0; - key.ext.xport.port = 0; + key.ext_lan.xport.port = 0; } STATE_LOOKUP(); @@ -8130,12 +9111,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, pd.pf_mtag = pf_get_mtag(m); pd.src = (struct pf_addr *)&h->ip_src; pd.dst = (struct pf_addr *)&h->ip_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET); + PF_ACPY(&pd.baddr, pd.src, AF_INET); + PF_ACPY(&pd.bdaddr, pd.dst, AF_INET); pd.ip_sum = &h->ip_sum; pd.proto = h->ip_p; pd.proto_variant = 0; pd.af = AF_INET; pd.tos = h->ip_tos; + pd.ttl = h->ip_ttl; pd.tot_len = ntohs(h->ip_len); pd.eh = eh; @@ -8175,7 +9158,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, pd.src = (struct pf_addr *)&h->ip_src; pd.dst = (struct pf_addr *)&h->ip_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET); + PF_ACPY(&pd.baddr, pd.src, AF_INET); + PF_ACPY(&pd.bdaddr, pd.dst, AF_INET); pd.ip_sum = &h->ip_sum; pd.proto = h->ip_p; pd.proto_variant = 0; @@ -8184,6 +9168,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, pd.pf_mtag = pf_get_mtag(m); pd.af = AF_INET; pd.tos = h->ip_tos; + pd.ttl = h->ip_ttl; pd.sc = MBUF_SCIDX(mbuf_get_service_class(m)); pd.tot_len = ntohs(h->ip_len); pd.eh = eh; @@ -8239,6 +9224,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, goto done; action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); + if (action == PF_NAT64) + goto done; if (pd.lmw < 0) goto done; PF_APPLE_UPDATE_PDESC_IPv4(); @@ -8281,6 +9268,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, #endif /* DUMMYNET */ action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd, &reason); + if (action == PF_NAT64) + goto done; if (pd.lmw < 0) goto done; PF_APPLE_UPDATE_PDESC_IPv4(); @@ -8316,6 +9305,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, #endif /* DUMMYNET */ action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); + if (action == PF_NAT64) + goto done; if (pd.lmw < 0) goto done; PF_APPLE_UPDATE_PDESC_IPv4(); @@ -8326,7 +9317,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, r = s->rule.ptr; a = s->anchor.ptr; log = s->log; - } else if (s == NULL) + } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL); break; @@ -8440,6 +9431,11 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, } done: + if (action == PF_NAT64) { + *m0 = NULL; + return (action); + } + *m0 = pd.mp; PF_APPLE_UPDATE_PDESC_IPv4(); @@ -8664,12 +9660,14 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, pd.pf_mtag = pf_get_mtag(m); pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); + PF_ACPY(&pd.baddr, pd.src, AF_INET6); + PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6); pd.ip_sum = NULL; pd.af = AF_INET6; pd.proto = nxt; pd.proto_variant = 0; pd.tos = 0; + pd.ttl = h->ip6_hlim; pd.sc = MBUF_SCIDX(mbuf_get_service_class(m)); pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); pd.eh = eh; @@ -8719,10 +9717,12 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); + PF_ACPY(&pd.baddr, pd.src, AF_INET6); + PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6); pd.ip_sum = NULL; pd.af = AF_INET6; pd.tos = 0; + pd.ttl = h->ip6_hlim; pd.tot_len = ntohs(h->ip6_plen) + sizeof (struct ip6_hdr); pd.eh = eh; @@ -8829,6 +9829,8 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, goto done; action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); + if (action == PF_NAT64) + goto done; if (pd.lmw < 0) goto done; PF_APPLE_UPDATE_PDESC_IPv6(); @@ -8839,7 +9841,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, r = s->rule.ptr; a = s->anchor.ptr; log = s->log; - } else if (s == NULL) + } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL); break; @@ -8871,6 +9873,8 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, #endif /* DUMMYNET */ action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd, &reason); + if (action == PF_NAT64) + goto done; if (pd.lmw < 0) goto done; PF_APPLE_UPDATE_PDESC_IPv6(); @@ -8881,7 +9885,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, r = s->rule.ptr; a = s->anchor.ptr; log = s->log; - } else if (s == NULL) + } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL); break; @@ -8906,6 +9910,8 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, #endif /* DUMMYNET */ action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); + if (action == PF_NAT64) + goto done; if (pd.lmw < 0) goto done; PF_APPLE_UPDATE_PDESC_IPv6(); @@ -9031,6 +10037,11 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } done: + if (action == PF_NAT64) { + *m0 = NULL; + return (action); + } + *m0 = pd.mp; PF_APPLE_UPDATE_PDESC_IPv6(); diff --git a/bsd/net/pf_ioctl.c b/bsd/net/pf_ioctl.c index bfad2e2ae..15bfb0724 100644 --- a/bsd/net/pf_ioctl.c +++ b/bsd/net/pf_ioctl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Apple Inc. All rights reserved. + * Copyright (c) 2007-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -240,6 +240,7 @@ u_int32_t altq_allowed = 0; #endif /* PF_ALTQ */ u_int32_t pf_hash_seed; +int16_t pf_nat64_configured = 0; /* * These are the pf enabled reference counting variables @@ -1420,12 +1421,15 @@ pf_state_export(struct pfsync_state *sp, struct pf_state_key *sk, sp->lan.xport = sk->lan.xport; sp->gwy.addr = sk->gwy.addr; sp->gwy.xport = sk->gwy.xport; - sp->ext.addr = sk->ext.addr; - sp->ext.xport = sk->ext.xport; + sp->ext_lan.addr = sk->ext_lan.addr; + sp->ext_lan.xport = sk->ext_lan.xport; + sp->ext_gwy.addr = sk->ext_gwy.addr; + sp->ext_gwy.xport = sk->ext_gwy.xport; sp->proto_variant = sk->proto_variant; sp->tag = s->tag; sp->proto = sk->proto; - sp->af = sk->af; + sp->af_lan = sk->af_lan; + sp->af_gwy = sk->af_gwy; sp->direction = sk->direction; sp->flowhash = sk->flowhash; @@ -1473,12 +1477,15 @@ pf_state_import(struct pfsync_state *sp, struct pf_state_key *sk, sk->lan.xport = sp->lan.xport; sk->gwy.addr = sp->gwy.addr; sk->gwy.xport = sp->gwy.xport; - sk->ext.addr = sp->ext.addr; - sk->ext.xport = sp->ext.xport; + sk->ext_lan.addr = sp->ext_lan.addr; + sk->ext_lan.xport = sp->ext_lan.xport; + sk->ext_gwy.addr = sp->ext_gwy.addr; + sk->ext_gwy.xport = sp->ext_gwy.xport; sk->proto_variant = sp->proto_variant; s->tag = sp->tag; sk->proto = sp->proto; - sk->af = sp->af; + sk->af_lan = sp->af_lan; + sk->af_gwy = sp->af_gwy; sk->direction = sp->direction; sk->flowhash = pf_calc_state_key_flowhash(sk); @@ -3087,8 +3094,10 @@ pf_rule_setup(struct pfioc_rule *pr, struct pf_rule *rule, } pf_mv_pool(&pf_pabuf, &rule->rpool.list); + if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || - (rule->action == PF_BINAT)) && rule->anchor == NULL) || + (rule->action == PF_BINAT) || (rule->action == PF_NAT64)) && + rule->anchor == NULL) || (rule->rt > PF_FASTROUTE)) && (TAILQ_FIRST(&rule->rpool.list) == NULL)) error = EINVAL; @@ -3097,6 +3106,10 @@ pf_rule_setup(struct pfioc_rule *pr, struct pf_rule *rule, pf_rm_rule(NULL, rule); return (error); } + /* For a NAT64 rule the rule's address family is AF_INET6 whereas + * the address pool's family will be AF_INET + */ + rule->rpool.af = (rule->action == PF_NAT64) ? AF_INET: rule->af; rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); rule->evaluations = rule->packets[0] = rule->packets[1] = rule->bytes[0] = rule->bytes[1] = 0; @@ -3175,6 +3188,9 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p ruleset->rules[rs_num].inactive.rcount++; if (rule->rule_flag & PFRULE_PFM) pffwrules++; + + if (rule->action == PF_NAT64) + atomic_add_16(&pf_nat64_configured, 1); break; } @@ -3575,6 +3591,8 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p pf_rule_copyout(rule, &pr->rule); if (rule->rule_flag & PFRULE_PFM) pffwrules++; + if (rule->action == PF_NAT64) + atomic_add_16(&pf_nat64_configured, 1); break; } @@ -3597,6 +3615,8 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p } else pf_delete_rule_by_owner(pr->rule.owner, req_dev); pr->nr = pffwrules; + if (pr->rule.action == PF_NAT64) + atomic_add_16(&pf_nat64_configured, -1); break; } @@ -3691,21 +3711,21 @@ pfioctl_ioc_state_kill(u_long cmd, struct pfioc_state_kill *psk, struct proc *p) if (sk->direction == PF_OUT) { src = &sk->lan; - dst = &sk->ext; + dst = &sk->ext_lan; } else { - src = &sk->ext; + src = &sk->ext_lan; dst = &sk->lan; } - if ((!psk->psk_af || sk->af == psk->psk_af) && + if ((!psk->psk_af || sk->af_lan == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == sk->proto) && PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, - &src->addr, sk->af) && + &src->addr, sk->af_lan) && PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, - &dst->addr, sk->af) && + &dst->addr, sk->af_lan) && (pf_match_xport(psk->psk_proto, psk->psk_proto_variant, &psk->psk_src.xport, &src->xport)) && @@ -3891,7 +3911,6 @@ pfioctl_ioc_natlook(u_long cmd, struct pfioc_natlook *pnl, struct proc *p) struct pf_state_key_cmp key; int m = 0, direction = pnl->direction; - key.af = pnl->af; key.proto = pnl->proto; key.proto_variant = pnl->proto_variant; @@ -3910,20 +3929,24 @@ pfioctl_ioc_natlook(u_long cmd, struct pfioc_natlook *pnl, struct proc *p) * state tree. */ if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af); - memcpy(&key.ext.xport, &pnl->dxport, - sizeof (key.ext.xport)); + key.af_gwy = pnl->af; + PF_ACPY(&key.ext_gwy.addr, &pnl->daddr, + pnl->af); + memcpy(&key.ext_gwy.xport, &pnl->dxport, + sizeof (key.ext_gwy.xport)); PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af); memcpy(&key.gwy.xport, &pnl->sxport, sizeof (key.gwy.xport)); state = pf_find_state_all(&key, PF_IN, &m); } else { + key.af_lan = pnl->af; PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af); memcpy(&key.lan.xport, &pnl->dxport, sizeof (key.lan.xport)); - PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af); - memcpy(&key.ext.xport, &pnl->sxport, - sizeof (key.ext.xport)); + PF_ACPY(&key.ext_lan.addr, &pnl->saddr, + pnl->af); + memcpy(&key.ext_lan.xport, &pnl->sxport, + sizeof (key.ext_lan.xport)); state = pf_find_state_all(&key, PF_OUT, &m); } if (m > 1) @@ -3932,7 +3955,7 @@ pfioctl_ioc_natlook(u_long cmd, struct pfioc_natlook *pnl, struct proc *p) sk = state->state_key; if (direction == PF_IN) { PF_ACPY(&pnl->rsaddr, &sk->lan.addr, - sk->af); + sk->af_lan); memcpy(&pnl->rsxport, &sk->lan.xport, sizeof (pnl->rsxport)); PF_ACPY(&pnl->rdaddr, &pnl->daddr, @@ -3941,7 +3964,7 @@ pfioctl_ioc_natlook(u_long cmd, struct pfioc_natlook *pnl, struct proc *p) sizeof (pnl->rdxport)); } else { PF_ACPY(&pnl->rdaddr, &sk->gwy.addr, - sk->af); + sk->af_gwy); memcpy(&pnl->rdxport, &sk->gwy.xport, sizeof (pnl->rdxport)); PF_ACPY(&pnl->rsaddr, &pnl->saddr, diff --git a/bsd/net/pf_ruleset.c b/bsd/net/pf_ruleset.c index 27121f779..dc78423c3 100644 --- a/bsd/net/pf_ruleset.c +++ b/bsd/net/pf_ruleset.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Apple Inc. All rights reserved. + * Copyright (c) 2007-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,6 +32,7 @@ /* * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002,2003 Henning Brauer + * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -166,6 +167,8 @@ pf_get_ruleset_number(u_int8_t action) break; case PF_RDR: case PF_NORDR: + case PF_NAT64: + case PF_NONAT64: return (PF_RULESET_RDR); break; #if DUMMYNET @@ -237,7 +240,7 @@ pf_find_ruleset_with_owner(const char *path, const char *owner, int is_anchor, *error = EINVAL; return (NULL); } else { - if ((owner && anchor->owner && (!strcmp(owner, anchor->owner))) + if ((owner && (!strcmp(owner, anchor->owner))) || (is_anchor && !strcmp(anchor->owner, ""))) return (&anchor->ruleset); *error = EPERM; diff --git a/bsd/net/pf_table.c b/bsd/net/pf_table.c index 427cc6567..17ff0ab1a 100644 --- a/bsd/net/pf_table.c +++ b/bsd/net/pf_table.c @@ -1096,6 +1096,11 @@ pfr_walktree(struct radix_node *rn, void *arg) pfr_copyout_addr(&as.pfras_a, ke); +#if !defined(__LP64__) + /* Initialized to avoid potential info leak to + * userspace */ + as._pad = 0; +#endif bcopy(ke->pfrke_packets, as.pfras_packets, sizeof (as.pfras_packets)); bcopy(ke->pfrke_bytes, as.pfras_bytes, diff --git a/bsd/net/pfkeyv2.h b/bsd/net/pfkeyv2.h index 880eb1ecb..97d6280fa 100644 --- a/bsd/net/pfkeyv2.h +++ b/bsd/net/pfkeyv2.h @@ -106,7 +106,8 @@ you leave this credit intact on any copies of this file. #define SADB_GETSASTAT 23 #define SADB_X_SPDENABLE 24 /* by policy id */ #define SADB_X_SPDDISABLE 25 /* by policy id */ -#define SADB_MAX 25 +#define SADB_MIGRATE 26 +#define SADB_MAX 26 struct sadb_msg { u_int8_t sadb_msg_version; @@ -143,7 +144,11 @@ struct sadb_sa_2 { u_int16_t sadb_reserved0; u_int16_t sadb_sa_natt_interval; }; - u_int32_t sadb_reserved1; + + union { + u_int32_t sadb_reserved1; + u_int16_t sadb_sa_natt_offload_interval; + }; }; #endif /* PRIVATE */ @@ -376,7 +381,10 @@ struct sadb_sastat { #define SADB_X_EXT_ADDR_RANGE_SRC_END 24 #define SADB_X_EXT_ADDR_RANGE_DST_START 25 #define SADB_X_EXT_ADDR_RANGE_DST_END 26 -#define SADB_EXT_MAX 26 +#define SADB_EXT_MIGRATE_ADDRESS_SRC 27 +#define SADB_EXT_MIGRATE_ADDRESS_DST 28 +#define SADB_X_EXT_MIGRATE_IPSECIF 29 +#define SADB_EXT_MAX 29 #define SADB_SATYPE_UNSPEC 0 #define SADB_SATYPE_AH 2 @@ -423,6 +431,7 @@ struct sadb_sastat { #define SADB_X_EALG_RIJNDAELCBC 12 #define SADB_X_EALG_AESCBC 12 #define SADB_X_EALG_AES 12 +#define SADB_X_EALG_AES_GCM 13 /* private allocations should use 249-255 (RFC2407) */ #if 1 /*nonstandard */ @@ -468,6 +477,10 @@ struct sadb_sastat { #define SADB_X_EXT_NATT_KEEPALIVE_OFFLOAD 0x8000 #endif /* PRIVATE */ +#ifdef PRIVATE +#define NATT_KEEPALIVE_OFFLOAD_INTERVAL 0x1 +#endif + #if 1 #define SADB_X_EXT_RAWCPI 0x0080 /* use well known CPI (IPComp) */ #endif diff --git a/bsd/net/pfvar.h b/bsd/net/pfvar.h index 29a7716ab..11e771bf5 100644 --- a/bsd/net/pfvar.h +++ b/bsd/net/pfvar.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Apple Inc. All rights reserved. + * Copyright (c) 2007-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,6 +31,7 @@ /* * Copyright (c) 2001 Daniel Hartmeier + * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -82,7 +83,7 @@ extern "C" { #include #include - +#include #ifdef KERNEL #include #include @@ -155,7 +156,7 @@ struct pf_esp_hdr; enum { PF_INOUT, PF_IN, PF_OUT }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, - PF_DUMMYNET, PF_NODUMMYNET }; + PF_DUMMYNET, PF_NODUMMYNET, PF_NAT64, PF_NONAT64 }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_DUMMYNET, PF_RULESET_MAX }; @@ -543,6 +544,7 @@ struct pf_pool { u_int16_t proxy_port[2]; u_int8_t port_op; u_int8_t opts; + sa_family_t af; }; @@ -974,8 +976,10 @@ struct pf_app_state { struct pf_state_key_cmp { struct pf_state_host lan; struct pf_state_host gwy; - struct pf_state_host ext; - sa_family_t af; + struct pf_state_host ext_lan; + struct pf_state_host ext_gwy; + sa_family_t af_lan; + sa_family_t af_gwy; u_int8_t proto; u_int8_t direction; u_int8_t proto_variant; @@ -987,8 +991,10 @@ TAILQ_HEAD(pf_statelist, pf_state); struct pf_state_key { struct pf_state_host lan; struct pf_state_host gwy; - struct pf_state_host ext; - sa_family_t af; + struct pf_state_host ext_lan; + struct pf_state_host ext_gwy; + sa_family_t af_lan; + sa_family_t af_gwy; u_int8_t proto; u_int8_t direction; u_int8_t proto_variant; @@ -1097,7 +1103,8 @@ struct pfsync_state { char ifname[IFNAMSIZ]; struct pfsync_state_host lan; struct pfsync_state_host gwy; - struct pfsync_state_host ext; + struct pfsync_state_host ext_lan; + struct pfsync_state_host ext_gwy; struct pfsync_state_peer src; struct pfsync_state_peer dst; struct pf_addr rt_addr; @@ -1114,7 +1121,8 @@ struct pfsync_state { u_int32_t bytes[2][2]; u_int32_t creatorid; u_int16_t tag; - sa_family_t af; + sa_family_t af_lan; + sa_family_t af_gwy; u_int8_t proto; u_int8_t direction; u_int8_t log; @@ -1412,8 +1420,12 @@ struct pf_pdesc { struct pf_esp_hdr *esp; void *any; } hdr; - struct pf_addr baddr; /* address before translation */ - struct pf_addr naddr; /* address after translation */ + + /* XXX TODO: Change baddr and naddr to *saddr */ + struct pf_addr baddr; /* src address before translation */ + struct pf_addr bdaddr; /* dst address before translation */ + struct pf_addr naddr; /* src address after translation */ + struct pf_addr ndaddr; /* dst address after translation */ struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ struct pf_addr *src; struct pf_addr *dst; @@ -1423,6 +1435,8 @@ struct pf_pdesc { int lmw; /* lazy writable offset */ struct pf_mtag *pf_mtag; u_int16_t *ip_sum; + u_int32_t off; /* protocol header offset */ + u_int32_t hdrlen; /* protocol header length */ u_int32_t p_len; /* total length of payload */ u_int16_t flags; /* Let SCRUB trigger behavior in */ /* state code. Easier than tags */ @@ -1430,8 +1444,10 @@ struct pf_pdesc { #define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */ #define PFDESC_IP_FRAG 0x0004 /* This is a fragment */ sa_family_t af; + sa_family_t naf; /* address family after translation */ u_int8_t proto; u_int8_t tos; + u_int8_t ttl; u_int8_t proto_variant; mbuf_svc_class_t sc; /* mbuf service class (MBUF_SVC) */ u_int32_t pktflags; /* mbuf packet flags (PKTF) */ @@ -2364,6 +2380,7 @@ extern struct pf_anchor pf_main_anchor; #define pf_main_ruleset pf_main_anchor.ruleset extern int pf_is_enabled; +extern int16_t pf_nat64_configured; #define PF_IS_ENABLED (pf_is_enabled != 0) extern u_int32_t pf_hash_seed; diff --git a/bsd/net/pktap.c b/bsd/net/pktap.c index 81d0c35fb..e02810ac4 100644 --- a/bsd/net/pktap.c +++ b/bsd/net/pktap.c @@ -531,8 +531,7 @@ pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) break; case PKTAP_FILTER_PARAM_IF_NAME: - if (x_filter->filter_param_if_name == 0 || - strncmp(x_filter->filter_param_if_name, PKTAP_IFNAME, + if (strncmp(x_filter->filter_param_if_name, PKTAP_IFNAME, strlen(PKTAP_IFNAME)) == 0) { error = EINVAL; break; @@ -758,12 +757,11 @@ pktap_set_procinfo(struct pktap_header *hdr, struct so_procinfo *soprocinfo) /* * When not delegated, the effective pid is the same as the real pid */ - if (soprocinfo->spi_epid != soprocinfo->spi_pid) { + if (soprocinfo->spi_delegated != 0) { hdr->pth_flags |= PTH_FLAG_PROC_DELEGATED; hdr->pth_epid = soprocinfo->spi_epid; proc_name(soprocinfo->spi_epid, hdr->pth_ecomm, MAXCOMLEN); - if (soprocinfo->spi_epid != 0) - uuid_copy(hdr->pth_uuid, soprocinfo->spi_euuid); + uuid_copy(hdr->pth_euuid, soprocinfo->spi_euuid); } } @@ -807,25 +805,19 @@ pktap_fill_proc_info(struct pktap_header *hdr, protocol_family_t proto, * For outgoing, do the lookup only if there's an * associated socket as indicated by the flowhash */ - if (outgoing != 0 && (m->m_pkthdr.pkt_flags & - (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC)) == (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) && - m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { + if (outgoing != 0 && m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { /* * To avoid lock ordering issues we delay the process lookup * to the BPF read as we cannot * assume the socket lock is unlocked on output */ - if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK) || - m->m_pkthdr.pkt_proto == IPPROTO_TCP || - m->m_pkthdr.pkt_proto == IPPROTO_UDP) { - found = 0; - hdr->pth_flags |= PTH_FLAG_DELAY_PKTAP; - hdr->pth_flowid = m->m_pkthdr.pkt_flowid; - if (m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK) - hdr->pth_ipproto = IPPROTO_RAW; - else - hdr->pth_ipproto = m->m_pkthdr.pkt_proto; - } + found = 0; + hdr->pth_flags |= PTH_FLAG_DELAY_PKTAP; + hdr->pth_flowid = m->m_pkthdr.pkt_flowid; + if (m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK) + hdr->pth_ipproto = IPPROTO_RAW; + else + hdr->pth_ipproto = m->m_pkthdr.pkt_proto; } else if (outgoing == 0) { struct inpcb *inp = NULL; diff --git a/bsd/net/pktsched/Makefile b/bsd/net/pktsched/Makefile index a5d9cba8e..f091673a3 100644 --- a/bsd/net/pktsched/Makefile +++ b/bsd/net/pktsched/Makefile @@ -24,9 +24,9 @@ EXPORT_MI_LIST = ${INSTALL_MI_LIST} ${KERNELFILES} EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} -INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} +INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/net/pktsched/pktsched.c b/bsd/net/pktsched/pktsched.c index eda1ae420..4f7d32a75 100644 --- a/bsd/net/pktsched/pktsched.c +++ b/bsd/net/pktsched/pktsched.c @@ -231,7 +231,6 @@ pktsched_teardown(struct ifclassq *ifq) error = ENXIO; break; } - return (error); } diff --git a/bsd/net/pktsched/pktsched_cbq.c b/bsd/net/pktsched/pktsched_cbq.c index 0c8663899..41b1f8ede 100644 --- a/bsd/net/pktsched/pktsched_cbq.c +++ b/bsd/net/pktsched/pktsched_cbq.c @@ -570,6 +570,7 @@ cbq_enqueue(cbq_state_t *cbqp, struct rm_class *cl, struct mbuf *m, /* successfully queued. */ ++cbqp->cbq_qlen; IFCQ_INC_LEN(ifq); + IFCQ_INC_BYTES(ifq, len); return (ret); } @@ -587,6 +588,7 @@ cbq_dequeue(cbq_state_t *cbqp, cqdq_op_t op) if (m && op == CLASSQDQ_REMOVE) { --cbqp->cbq_qlen; /* decrement # of packets in cbq */ IFCQ_DEC_LEN(ifq); + IFCQ_DEC_BYTES(ifq, m_pktlen(m)); IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m)); /* Update the class. */ diff --git a/bsd/net/pktsched/pktsched_fairq.c b/bsd/net/pktsched/pktsched_fairq.c index 28d7363e8..7e61e04c1 100644 --- a/bsd/net/pktsched/pktsched_fairq.c +++ b/bsd/net/pktsched/pktsched_fairq.c @@ -709,6 +709,7 @@ fairq_enqueue(struct fairq_if *fif, struct fairq_class *cl, struct mbuf *m, } } IFCQ_INC_LEN(ifq); + IFCQ_INC_BYTES(ifq, len); /* successfully queued. */ return (ret); @@ -747,6 +748,7 @@ fairq_dequeue(struct fairq_if *fif, cqdq_op_t op) fif->fif_poll_cache = NULL; if (m != NULL) { IFCQ_DEC_LEN(ifq); + IFCQ_DEC_BYTES(ifq, m_pktlen(m)); IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m)); PKTCNTR_ADD(&best_cl->cl_xmitcnt, 1, m_pktlen(m)); } @@ -797,6 +799,7 @@ fairq_dequeue(struct fairq_if *fif, cqdq_op_t op) m = fairq_getq(best_cl, cur_time); if (m != NULL) { IFCQ_DEC_LEN(ifq); + IFCQ_DEC_BYTES(ifq, m_pktlen(m)); IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m)); PKTCNTR_ADD(&best_cl->cl_xmitcnt, 1, m_pktlen(m)); diff --git a/bsd/net/pktsched/pktsched_hfsc.c b/bsd/net/pktsched/pktsched_hfsc.c index 894cf7e28..365d16f01 100644 --- a/bsd/net/pktsched/pktsched_hfsc.c +++ b/bsd/net/pktsched/pktsched_hfsc.c @@ -808,6 +808,7 @@ hfsc_enqueue(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf *m, } } IFCQ_INC_LEN(ifq); + IFCQ_INC_BYTES(ifq, len); cl->cl_hif->hif_packets++; /* successfully queued. */ @@ -897,6 +898,7 @@ hfsc_dequeue(struct hfsc_if *hif, cqdq_op_t op) len = m_pktlen(m); cl->cl_hif->hif_packets--; IFCQ_DEC_LEN(ifq); + IFCQ_DEC_BYTES(ifq, len); IFCQ_XMIT_ADD(ifq, 1, len); PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, 1, len); diff --git a/bsd/net/pktsched/pktsched_priq.c b/bsd/net/pktsched/pktsched_priq.c index 23fa87fc2..78da2f1b1 100644 --- a/bsd/net/pktsched/pktsched_priq.c +++ b/bsd/net/pktsched/pktsched_priq.c @@ -613,6 +613,7 @@ priq_enqueue(struct priq_if *pif, struct priq_class *cl, struct mbuf *m, } } IFCQ_INC_LEN(ifq); + IFCQ_INC_BYTES(ifq, len); /* class is now active; indicate it as such */ if (!pktsched_bit_tst(pri, &pif->pif_bitmap)) @@ -657,6 +658,7 @@ priq_dequeue(struct priq_if *pif, cqdq_op_t op) len = m_pktlen(m); IFCQ_DEC_LEN(ifq); + IFCQ_DEC_BYTES(ifq, len); if (qempty(&cl->cl_q)) { cl->cl_period++; /* class is now inactive; indicate it as such */ diff --git a/bsd/net/pktsched/pktsched_qfq.c b/bsd/net/pktsched/pktsched_qfq.c index 2e0428233..bc7cc2215 100644 --- a/bsd/net/pktsched/pktsched_qfq.c +++ b/bsd/net/pktsched/pktsched_qfq.c @@ -977,6 +977,7 @@ qfq_dequeue(struct qfq_if *qif, cqdq_op_t op) #endif /* QFQ_DEBUG */ IFCQ_DEC_LEN(ifq); + IFCQ_DEC_BYTES(ifq, len); if (qempty(&cl->cl_q)) cl->cl_period++; PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len); @@ -1122,6 +1123,7 @@ qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, struct mbuf *m, } } IFCQ_INC_LEN(ifq); + IFCQ_INC_BYTES(ifq, len); #if QFQ_DEBUG qif->qif_queued++; diff --git a/bsd/net/pktsched/pktsched_tcq.c b/bsd/net/pktsched/pktsched_tcq.c index ecadb8bbd..5a57824e6 100644 --- a/bsd/net/pktsched/pktsched_tcq.c +++ b/bsd/net/pktsched/pktsched_tcq.c @@ -582,6 +582,7 @@ tcq_enqueue(struct tcq_if *tif, struct tcq_class *cl, struct mbuf *m, } } IFCQ_INC_LEN(ifq); + IFCQ_INC_BYTES(ifq, len); /* successfully queued. */ return (ret); @@ -625,6 +626,7 @@ tcq_dequeue_cl(struct tcq_if *tif, struct tcq_class *cl, m = tcq_getq(cl); if (m != NULL) { IFCQ_DEC_LEN(ifq); + IFCQ_DEC_BYTES(ifq, m_pktlen(m)); if (qempty(&cl->cl_q)) cl->cl_period++; PKTCNTR_ADD(&cl->cl_xmitcnt, 1, m_pktlen(m)); diff --git a/bsd/net/radix.h b/bsd/net/radix.h index 464ef2349..d48399aae 100644 --- a/bsd/net/radix.h +++ b/bsd/net/radix.h @@ -101,6 +101,7 @@ struct radix_node { struct radix_node *rn_twin; struct radix_node *rn_ybro; #endif + }; #define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey diff --git a/bsd/net/raw_usrreq.c b/bsd/net/raw_usrreq.c index 79dcda95a..657f47d06 100644 --- a/bsd/net/raw_usrreq.c +++ b/bsd/net/raw_usrreq.c @@ -178,7 +178,7 @@ void raw_ctlinput(int cmd, __unused struct sockaddr *arg, __unused void *dummy) { - if (cmd < 0 || cmd > PRC_NCMDS) + if (cmd < 0 || cmd >= PRC_NCMDS) return; /* INCOMPLETE */ } diff --git a/bsd/net/route.c b/bsd/net/route.c index 8f077340b..d13a1994f 100644 --- a/bsd/net/route.c +++ b/bsd/net/route.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -237,6 +237,8 @@ static struct zone *rte_zone; /* special zone for rtentry */ #define RTD_INUSE 0xFEEDFACE /* entry is in use */ #define RTD_FREED 0xDEADBEEF /* entry is freed */ +#define MAX_SCOPE_ADDR_STR_LEN (MAX_IPv6_STR_LEN + 6) + /* For gdb */ __private_extern__ unsigned int ctrace_stack_size = CTRACE_STACK_SIZE; __private_extern__ unsigned int ctrace_hist_size = CTRACE_HIST_SIZE; @@ -282,8 +284,8 @@ static inline struct rtentry *rte_alloc_debug(void); static inline void rte_free_debug(struct rtentry *); static inline void rte_lock_debug(struct rtentry_dbg *); static inline void rte_unlock_debug(struct rtentry_dbg *); -static void rt_maskedcopy(struct sockaddr *, - struct sockaddr *, struct sockaddr *); +static void rt_maskedcopy(const struct sockaddr *, + struct sockaddr *, const struct sockaddr *); static void rtable_init(void **); static inline void rtref_audit(struct rtentry_dbg *); static inline void rtunref_audit(struct rtentry_dbg *); @@ -297,8 +299,6 @@ static void rtalloc_ign_common_locked(struct route *, uint32_t, unsigned int); static inline void sin6_set_ifscope(struct sockaddr *, unsigned int); static inline void sin6_set_embedded_ifscope(struct sockaddr *, unsigned int); static inline unsigned int sin6_get_embedded_ifscope(struct sockaddr *); -static struct sockaddr *sa_copy(struct sockaddr *, struct sockaddr_storage *, - unsigned int *); static struct sockaddr *ma_copy(int, struct sockaddr *, struct sockaddr_storage *, unsigned int); static struct sockaddr *sa_trim(struct sockaddr *, int); @@ -560,7 +560,7 @@ sin6_get_embedded_ifscope(struct sockaddr *sa) * In any case, the effective scope ID value is returned to the caller via * pifscope, if it is non-NULL. */ -static struct sockaddr * +struct sockaddr * sa_copy(struct sockaddr *src, struct sockaddr_storage *dst, unsigned int *pifscope) { @@ -589,7 +589,13 @@ sa_copy(struct sockaddr *src, struct sockaddr_storage *dst, eifscope = sin6_get_embedded_ifscope(SA(dst)); if (eifscope != IFSCOPE_NONE && ifscope == IFSCOPE_NONE) ifscope = eifscope; - sin6_set_ifscope(SA(dst), ifscope); + if (ifscope != IFSCOPE_NONE) { + /* Set ifscope from pifscope or eifscope */ + sin6_set_ifscope(SA(dst), ifscope); + } else { + /* If sin6_scope_id has a value, use that one */ + ifscope = sin6_get_ifscope(SA(dst)); + } /* * If sin6_scope_id is set but the address doesn't * contain the equivalent embedded value, set it. @@ -952,6 +958,38 @@ rtalloc1_common_locked(struct sockaddr *dst, int report, uint32_t ignflags, * reference held during rtrequest. */ rtfree_locked(rt); + + /* + * If the newly created cloned route is a direct host route + * then also check if it is to a router or not. + * If it is, then set the RTF_ROUTER flag on the host route + * for the gateway. + * + * XXX It is possible for the default route to be created post + * cloned route creation of router's IP. + * We can handle that corner case by special handing for RTM_ADD + * of default route. + */ + if ((newrt->rt_flags & (RTF_HOST | RTF_LLINFO)) == + (RTF_HOST | RTF_LLINFO)) { + struct rtentry *defrt = NULL; + struct sockaddr_storage def_key; + + bzero(&def_key, sizeof(def_key)); + def_key.ss_len = rt_key(newrt)->sa_len; + def_key.ss_family = rt_key(newrt)->sa_family; + + defrt = rtalloc1_scoped_locked((struct sockaddr *)&def_key, + 0, 0, newrt->rt_ifp->if_index); + + if (defrt) { + if (equal(rt_key(newrt), defrt->rt_gateway)) { + newrt->rt_flags |= RTF_ROUTER; + } + rtfree_locked(defrt); + } + } + if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) { /* * If the new route specifies it be @@ -2659,23 +2697,23 @@ rt_set_gwroute(struct rtentry *rt, struct sockaddr *dst, struct rtentry *gwrt) } static void -rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, - struct sockaddr *netmask) +rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, + const struct sockaddr *netmask) { - u_char *cp1 = (u_char *)src; - u_char *cp2 = (u_char *)dst; - u_char *cp3 = (u_char *)netmask; - u_char *cplim = cp2 + *cp3; - u_char *cplim2 = cp2 + *cp1; + const char *netmaskp = &netmask->sa_data[0]; + const char *srcp = &src->sa_data[0]; + char *dstp = &dst->sa_data[0]; + const char *maskend = (char *)dst + + MIN(netmask->sa_len, src->sa_len); + const char *srcend = (char *)dst + src->sa_len; + + dst->sa_len = src->sa_len; + dst->sa_family = src->sa_family; - *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ - cp3 += 2; - if (cplim > cplim2) - cplim = cplim2; - while (cp2 < cplim) - *cp2++ = *cp1++ & *cp3++; - if (cp2 < cplim2) - bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); + while (dstp < maskend) + *dstp++ = *srcp++ & *netmaskp++; + if (dstp < srcend) + memset(dstp, 0, (size_t)(srcend - dstp)); } /* @@ -2734,6 +2772,29 @@ node_lookup_default(int af) rnh->rnh_lookup(&sin6_def, NULL, rnh)); } +boolean_t +rt_ifa_is_dst(struct sockaddr *dst, struct ifaddr *ifa) +{ + boolean_t result = FALSE; + + if (ifa == NULL || ifa->ifa_addr == NULL) + return (result); + + IFA_LOCK_SPIN(ifa); + + if (dst->sa_family == ifa->ifa_addr->sa_family && + ((dst->sa_family == AF_INET && + SIN(dst)->sin_addr.s_addr == + SIN(ifa->ifa_addr)->sin_addr.s_addr) || + (dst->sa_family == AF_INET6 && + SA6_ARE_ADDR_EQUAL(SIN6(dst), SIN6(ifa->ifa_addr))))) + result = TRUE; + + IFA_UNLOCK(ifa); + + return (result); +} + /* * Common routine to lookup/match a route. It invokes the lookup/matchaddr * callback which could be address family-specific. The main difference @@ -2765,6 +2826,8 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, boolean_t dontcare; int af = dst->sa_family; struct sockaddr_storage dst_ss, mask_ss; + char s_dst[MAX_IPv6_STR_LEN], s_netmask[MAX_IPv6_STR_LEN]; + char dbuf[MAX_SCOPE_ADDR_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; VERIFY(!coarse || ifscope == IFSCOPE_NONE); @@ -2818,6 +2881,26 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, netmask = ma_copy(af, netmask, &mask_ss, ifscope); dontcare = (ifscope == IFSCOPE_NONE); + if (rt_verbose) { + if (af == AF_INET) + (void) inet_ntop(af, &SIN(dst)->sin_addr.s_addr, + s_dst, sizeof (s_dst)); + else + (void) inet_ntop(af, &SIN6(dst)->sin6_addr, + s_dst, sizeof (s_dst)); + + if (netmask != NULL && af == AF_INET) + (void) inet_ntop(af, &SIN(netmask)->sin_addr.s_addr, + s_netmask, sizeof (s_netmask)); + if (netmask != NULL && af == AF_INET6) + (void) inet_ntop(af, &SIN6(netmask)->sin6_addr, + s_netmask, sizeof (s_netmask)); + else + *s_netmask = '\0'; + printf("%s (%d, %d, %s, %s, %u)\n", + __func__, lookup_only, coarse, s_dst, s_netmask, ifscope); + } + /* * Scoped route lookup: * @@ -2852,6 +2935,16 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, */ if (rn != NULL) { struct rtentry *rt = RT(rn); + + if (rt_verbose) { + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + printf("%s unscoped search %p to %s->%s->%s ifa_ifp %s\n", + __func__, rt, + dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + (rt->rt_ifa->ifa_ifp != NULL) ? + rt->rt_ifa->ifa_ifp->if_xname : ""); + } if (!(rt->rt_ifp->if_flags & IFF_LOOPBACK)) { if (rt->rt_ifp->if_index != ifscope) { /* @@ -2860,11 +2953,15 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, * and do a more specific scoped search using * the scope of the found route. Otherwise, * start again from scratch. + * + * For loopback scope we keep the unscoped + * route for local addresses */ rn = NULL; if (dontcare) ifscope = rt->rt_ifp->if_index; - else + else if (ifscope != lo_ifp->if_index || + rt_ifa_is_dst(dst, rt->rt_ifa) == FALSE) rn0 = NULL; } else if (!(rt->rt_flags & RTF_IFSCOPE)) { /* @@ -2884,9 +2981,21 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, * interface scope as the one requested. The following will result * in searching for the longest prefix scoped match. */ - if (rn == NULL) + if (rn == NULL) { rn = node_lookup(dst, netmask, ifscope); + if (rt_verbose && rn != NULL) { + struct rtentry *rt = RT(rn); + + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + printf("%s scoped search %p to %s->%s->%s ifa %s\n", + __func__, rt, + dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + (rt->rt_ifa->ifa_ifp != NULL) ? + rt->rt_ifa->ifa_ifp->if_xname : ""); + } + } /* * Use the original result if either of the following is true: * @@ -2909,8 +3018,9 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, * route as long as the interface portion satistifes the scope. */ if (rn == NULL && (rn = node_lookup_default(af)) != NULL && - RT(rn)->rt_ifp->if_index != ifscope) + RT(rn)->rt_ifp->if_index != ifscope) { rn = NULL; + } if (rn != NULL) { /* @@ -2930,6 +3040,23 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, } } + if (rt_verbose) { + if (rn == NULL) + printf("%s %u return NULL\n", __func__, ifscope); + else { + struct rtentry *rt = RT(rn); + + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + + printf("%s %u return %p to %s->%s->%s ifa_ifp %s\n", + __func__, ifscope, rt, + dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + (rt->rt_ifa->ifa_ifp != NULL) ? + rt->rt_ifa->ifa_ifp->if_xname : ""); + } + } + return (RT(rn)); } @@ -3418,6 +3545,7 @@ rte_free(struct rtentry *p) panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt); /* NOTREACHED */ } + zfree(rte_zone, p); } @@ -3845,9 +3973,20 @@ rt_str4(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) { VERIFY(rt_key(rt)->sa_family == AF_INET); - if (ds != NULL) + if (ds != NULL) { (void) inet_ntop(AF_INET, &SIN(rt_key(rt))->sin_addr.s_addr, ds, dslen); + if (dslen >= MAX_SCOPE_ADDR_STR_LEN && + SINIFSCOPE(rt_key(rt))->sin_scope_id != IFSCOPE_NONE) { + char scpstr[16]; + + snprintf(scpstr, sizeof(scpstr), "@%u", + SINIFSCOPE(rt_key(rt))->sin_scope_id); + + strlcat(ds, scpstr, dslen); + } + } + if (gs != NULL) { if (rt->rt_flags & RTF_GATEWAY) { (void) inet_ntop(AF_INET, @@ -3866,9 +4005,20 @@ rt_str6(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) { VERIFY(rt_key(rt)->sa_family == AF_INET6); - if (ds != NULL) + if (ds != NULL) { (void) inet_ntop(AF_INET6, &SIN6(rt_key(rt))->sin6_addr, ds, dslen); + if (dslen >= MAX_SCOPE_ADDR_STR_LEN && + SIN6IFSCOPE(rt_key(rt))->sin6_scope_id != IFSCOPE_NONE) { + char scpstr[16]; + + snprintf(scpstr, sizeof(scpstr), "@%u", + SIN6IFSCOPE(rt_key(rt))->sin6_scope_id); + + strlcat(ds, scpstr, dslen); + } + } + if (gs != NULL) { if (rt->rt_flags & RTF_GATEWAY) { (void) inet_ntop(AF_INET6, diff --git a/bsd/net/route.h b/bsd/net/route.h index d382013da..8ddef833d 100644 --- a/bsd/net/route.h +++ b/bsd/net/route.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -193,6 +193,7 @@ struct rtentry { (struct rtentry *, struct ifnet_llreach_info *); void (*rt_llinfo_purge)(struct rtentry *); /* llinfo purge fn */ void (*rt_llinfo_free)(void *); /* link level info free function */ + void (*rt_llinfo_refresh) (struct rtentry *); /* expedite llinfo refresh */ struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */ #define rt_use rt_rmx.rmx_pksent struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ @@ -350,7 +351,8 @@ struct rt_msghdr_ext { #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics or flags */ #define RTM_GET 0x4 /* Report Metrics */ -#define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ +#define RTM_LOSING 0x5 /* RTM_LOSING is no longer generated by xnu + and is deprecated */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_LOCK 0x8 /* fix specified metrics */ @@ -383,6 +385,9 @@ struct rt_msghdr_ext { #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ +#ifdef PRIVATE +#define RTV_REFRESH_HOST 0x100 /* init host route to expedite refresh */ +#endif /* * Bitmask values for rtm_addrs. @@ -575,5 +580,8 @@ extern void rt_str(struct rtentry *, char *, uint32_t, char *, uint32_t); extern const char *rtm2str(int); extern void route_copyin(struct route *, struct route *, size_t); extern void route_copyout(struct route *, const struct route *, size_t); +extern boolean_t rt_ifa_is_dst(struct sockaddr *, struct ifaddr *); +extern struct sockaddr *sa_copy(struct sockaddr *, struct sockaddr_storage *, + unsigned int *); #endif /* BSD_KERNEL_PRIVATE */ #endif /* _NET_ROUTE_H_ */ diff --git a/bsd/net/rtsock.c b/bsd/net/rtsock.c index 2c2ae2dcd..a7a9f16db 100644 --- a/bsd/net/rtsock.c +++ b/bsd/net/rtsock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -124,7 +124,7 @@ static int rts_shutdown(struct socket *); static int rts_sockaddr(struct socket *, struct sockaddr **); static int route_output(struct mbuf *, struct socket *); -static void rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *); +static int rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *); static void rt_getmetrics(struct rtentry *, struct rt_metrics *); static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *, struct sockaddr *, unsigned int); @@ -481,7 +481,7 @@ route_output(struct mbuf *m, struct socket *so) rt_setif(saved_nrt, info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], info.rti_info[RTAX_GATEWAY], ifscope); - rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, saved_nrt); + (void)rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, saved_nrt); saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); saved_nrt->rt_rmx.rmx_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); @@ -613,7 +613,12 @@ route_output(struct mbuf *m, struct socket *so) info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], info.rti_info[RTAX_GATEWAY], ifscope); - rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, rt); + if ((error = rt_setmetrics(rtm->rtm_inits, + &rtm->rtm_rmx, rt))) { + int tmp = error; + RT_UNLOCK(rt); + senderr(tmp); + } if (info.rti_info[RTAX_GENMASK]) rt->rt_genmask = info.rti_info[RTAX_GENMASK]; /* FALLTHRU */ @@ -705,41 +710,54 @@ rt_setexpire(struct rtentry *rt, uint64_t expiry) } } -static void +static int rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out) { - struct timeval caltime; - - getmicrotime(&caltime); - + if (!(which & RTV_REFRESH_HOST)) { + struct timeval caltime; + getmicrotime(&caltime); #define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e; - metric(RTV_RPIPE, rmx_recvpipe); - metric(RTV_SPIPE, rmx_sendpipe); - metric(RTV_SSTHRESH, rmx_ssthresh); - metric(RTV_RTT, rmx_rtt); - metric(RTV_RTTVAR, rmx_rttvar); - metric(RTV_HOPCOUNT, rmx_hopcount); - metric(RTV_MTU, rmx_mtu); - metric(RTV_EXPIRE, rmx_expire); + metric(RTV_RPIPE, rmx_recvpipe); + metric(RTV_SPIPE, rmx_sendpipe); + metric(RTV_SSTHRESH, rmx_ssthresh); + metric(RTV_RTT, rmx_rtt); + metric(RTV_RTTVAR, rmx_rttvar); + metric(RTV_HOPCOUNT, rmx_hopcount); + metric(RTV_MTU, rmx_mtu); + metric(RTV_EXPIRE, rmx_expire); #undef metric + if (out->rt_rmx.rmx_expire > 0) { + /* account for system time change */ + getmicrotime(&caltime); + out->base_calendartime += + NET_CALCULATE_CLOCKSKEW(caltime, + out->base_calendartime, + net_uptime(), out->base_uptime); + rt_setexpire(out, + out->rt_rmx.rmx_expire - + out->base_calendartime + + out->base_uptime); + } else { + rt_setexpire(out, 0); + } - if (out->rt_rmx.rmx_expire > 0) { - /* account for system time change */ - getmicrotime(&caltime); - out->base_calendartime += - NET_CALCULATE_CLOCKSKEW(caltime, - out->base_calendartime, - net_uptime(), out->base_uptime); - rt_setexpire(out, - out->rt_rmx.rmx_expire - - out->base_calendartime + - out->base_uptime); + VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0); + VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0); } else { - rt_setexpire(out, 0); - } + /* Only RTV_REFRESH_HOST must be set */ + if ((which & ~RTV_REFRESH_HOST) || + (out->rt_flags & RTF_STATIC) || + !(out->rt_flags & RTF_LLINFO)) { + return (EINVAL); + } - VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0); - VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0); + if (out->rt_llinfo_refresh == NULL) { + return (ENOTSUP); + } + + out->rt_llinfo_refresh(out); + } + return (0); } static void @@ -983,7 +1001,7 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo) struct rt_msghdr *rtm; struct mbuf *m; int i; - int len, dlen; + int len, dlen, off; switch (type) { @@ -1004,8 +1022,6 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo) default: len = sizeof (struct rt_msghdr); } - if (len > MCLBYTES) - panic("rt_msg1"); m = m_gethdr(M_DONTWAIT, MT_DATA); if (m && len > MHLEN) { MCLGET(m, M_DONTWAIT); @@ -1020,6 +1036,7 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo) m->m_pkthdr.rcvif = NULL; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); + off = len; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa, *hint; uint8_t ssbuf[SOCK_MAXADDRLEN + 1]; @@ -1048,9 +1065,10 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo) } rtinfo->rti_addrs |= (1 << i); - dlen = ROUNDUP32(sa->sa_len); - m_copyback(m, len, dlen, (caddr_t)sa); - len += dlen; + dlen = sa->sa_len; + m_copyback(m, off, dlen, (caddr_t)sa); + len = off + dlen; + off += ROUNDUP32(dlen); } if (m->m_pkthdr.len != len) { m_freem(m); @@ -1067,7 +1085,7 @@ rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w, kauth_cred_t* credp) { int i; - int len, dlen, second_time = 0; + int len, dlen, rlen, second_time = 0; caddr_t cp0; rtinfo->rti_addrs = 0; @@ -1143,12 +1161,15 @@ rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w, } rtinfo->rti_addrs |= (1 << i); - dlen = ROUNDUP32(sa->sa_len); + dlen = sa->sa_len; + rlen = ROUNDUP32(dlen); if (cp) { - bcopy((caddr_t)sa, cp, (unsigned)dlen); - cp += dlen; + bcopy((caddr_t)sa, cp, (size_t)dlen); + if (dlen != rlen) + bzero(cp + dlen, rlen - dlen); + cp += rlen; } - len += dlen; + len += rlen; } if (cp == NULL && w != NULL && !second_time) { struct walkarg *rw = w; diff --git a/bsd/netinet/Makefile b/bsd/netinet/Makefile index 03bb36728..9ab2a3192 100644 --- a/bsd/netinet/Makefile +++ b/bsd/netinet/Makefile @@ -19,27 +19,37 @@ KERNELFILES = \ kpi_ipfilter.h in_arp.h PRIVATE_DATAFILES = \ + flow_divert_proto.h \ + igmp_var.h \ + in.h \ + in_gif.h \ + in_pcb.h \ + ip.h \ + ip_compat.h \ ip_dummynet.h \ ip_flowid.h \ - ip_fw.h ip_fw2.h \ + ip_fw.h \ + ip_fw2.h \ + mptcp_var.h \ + tcp.h \ tcp_debug.h \ - in_gif.h ip_compat.h \ - flow_divert_proto.h \ - mptcp_var.h + tcp_var.h \ + tcp_cache.h \ + udp.h PRIVATE_KERNELFILES = ${KERNELFILES} \ - ip_ecn.h ip_encap.h + ip_ecn.h ip_encap.h INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = netinet -EXPORT_MI_LIST = ${DATAFILES} ${KERNELFILES} +EXPORT_MI_LIST = ${DATAFILES} ${KERNELFILES} EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} -INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = $(sort ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES}) include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/netinet/flow_divert.c b/bsd/netinet/flow_divert.c index 697016c49..76e29f8d6 100644 --- a/bsd/netinet/flow_divert.c +++ b/bsd/netinet/flow_divert.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 Apple Inc. All rights reserved. + * Copyright (c) 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -57,6 +57,7 @@ #include #include #if INET6 +#include #include #endif /* INET6 */ #include @@ -149,13 +150,40 @@ static kern_ctl_ref g_flow_divert_kctl_ref = NULL; static struct protosw g_flow_divert_in_protosw; static struct pr_usrreqs g_flow_divert_in_usrreqs; +static struct protosw g_flow_divert_in_udp_protosw; +static struct pr_usrreqs g_flow_divert_in_udp_usrreqs; #if INET6 static struct ip6protosw g_flow_divert_in6_protosw; static struct pr_usrreqs g_flow_divert_in6_usrreqs; +static struct ip6protosw g_flow_divert_in6_udp_protosw; +static struct pr_usrreqs g_flow_divert_in6_udp_usrreqs; #endif /* INET6 */ static struct protosw *g_tcp_protosw = NULL; static struct ip6protosw *g_tcp6_protosw = NULL; +static struct protosw *g_udp_protosw = NULL; +static struct ip6protosw *g_udp6_protosw = NULL; + +static errno_t +flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup); + +static errno_t +flow_divert_inp_to_sockaddr(const struct inpcb *inp, struct sockaddr **local_socket); + +static boolean_t +flow_divert_is_sockaddr_valid(struct sockaddr *addr); + +static int +flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr); + +struct sockaddr * +flow_divert_get_buffered_target_address(mbuf_t buffer); + +static boolean_t +flow_divert_has_pcb_local_address(const struct inpcb *inp); + +static void +flow_divert_disconnect_socket(struct socket *so); static inline int flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b) @@ -210,12 +238,11 @@ flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group) static errno_t flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit) { - int error = 0; + errno_t error = 0; struct flow_divert_pcb *exist = NULL; struct flow_divert_group *group; static uint32_t g_nextkey = 1; static uint32_t g_hash_seed = 0; - errno_t result = 0; int try_count = 0; if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) { @@ -277,7 +304,7 @@ flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit) FDRETAIN(fd_cb); /* The group now has a reference */ } else { fd_cb->hash = 0; - result = EEXIST; + error = EEXIST; } socket_unlock(fd_cb->so, 0); @@ -286,7 +313,7 @@ flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit) lck_rw_done(&g_flow_divert_group_lck); socket_lock(fd_cb->so, 0); - return result; + return error; } static struct flow_divert_pcb * @@ -371,10 +398,10 @@ flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf } static int -flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, size_t length, const void *value) +flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, uint32_t length, const void *value) { - size_t net_length = htonl(length); - int error = 0; + uint32_t net_length = htonl(length); + int error = 0; error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT); if (error) { @@ -400,10 +427,10 @@ flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, size_t length, const static int flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, int next) { - size_t cursor = offset; - int error = 0; - size_t curr_length; - uint8_t curr_type; + size_t cursor = offset; + int error = 0; + uint32_t curr_length; + uint8_t curr_type; *err = 0; @@ -435,11 +462,11 @@ flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, i } static int -flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, size_t *val_size) +flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, uint32_t *val_size) { - int error = 0; - size_t length; - int tlv_offset; + int error = 0; + uint32_t length; + int tlv_offset; tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0); if (tlv_offset < 0) { @@ -778,8 +805,9 @@ flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, si return current; } +#define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp" static uint16_t -flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes) +flow_divert_trie_search(struct flow_divert_trie *trie, uint8_t *string_bytes) { uint16_t current = trie->root; uint16_t string_idx = 0; @@ -796,6 +824,10 @@ flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_byt if (node_idx == node_end) { if (string_bytes[string_idx] == '\0') { return current; /* Got an exact match */ + } else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) && + 0 == strncmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) { + string_bytes[string_idx] = '\0'; + return current; /* Got an apple webclip id prefix match */ } else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) { next = TRIE_CHILD(trie, current, string_bytes[string_idx]); } @@ -841,7 +873,7 @@ flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet, Boolean en if (fd_cb->group == NULL) { fd_cb->so->so_error = ECONNABORTED; - soisdisconnected(fd_cb->so); + flow_divert_disconnect_socket(fd_cb->so); return ECONNABORTED; } @@ -873,6 +905,7 @@ static int flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, mbuf_t connect_packet) { int error = 0; + int flow_type = 0; error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, @@ -882,6 +915,23 @@ flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, mbu goto done; } + if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) { + flow_type = FLOW_DIVERT_FLOW_TYPE_TCP; + } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) { + flow_type = FLOW_DIVERT_FLOW_TYPE_UDP; + } else { + error = EINVAL; + goto done; + } + error = flow_divert_packet_append_tlv(connect_packet, + FLOW_DIVERT_TLV_FLOW_TYPE, + sizeof(flow_type), + &flow_type); + + if (error) { + goto done; + } + if (fd_cb->so->so_flags & SOF_DELEGATED) { error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_PID, @@ -923,33 +973,27 @@ flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, mbu fd_cb->connect_token = NULL; } else { uint32_t ctl_unit = htonl(fd_cb->control_group_unit); - int port; error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit); if (error) { goto done; } - error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, to->sa_len, to); - if (error) { - goto done; - } - - if (to->sa_family == AF_INET) { - port = ntohs((satosin(to))->sin_port); - } -#if INET6 - else { - port = ntohs((satosin6(to))->sin6_port); - } -#endif - - error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port); + error = flow_divert_append_target_endpoint_tlv(connect_packet, to); if (error) { goto done; } } + if (fd_cb->local_address != NULL) { + /* socket is bound. */ + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, + sizeof(struct sockaddr_storage), fd_cb->local_address); + if (error) { + goto done; + } + } + error = flow_divert_send_packet(fd_cb, connect_packet, TRUE); if (error) { goto done; @@ -972,7 +1016,7 @@ flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb) goto done; } - rbuff_space = sbspace(&fd_cb->so->so_rcv); + rbuff_space = fd_cb->so->so_rcv.sb_hiwat; if (rbuff_space < 0) { rbuff_space = 0; } @@ -992,7 +1036,7 @@ flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb) done: if (error && packet != NULL) { - mbuf_free(packet); + mbuf_freem(packet); } return error; @@ -1092,12 +1136,12 @@ flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb) } if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) { - soisdisconnected(fd_cb->so); + flow_divert_disconnect_socket(fd_cb->so); } } static errno_t -flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, Boolean force) +flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, struct sockaddr *toaddr, Boolean force) { mbuf_t packet; mbuf_t last; @@ -1109,15 +1153,22 @@ flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t return error; } + if (toaddr != NULL) { + error = flow_divert_append_target_endpoint_tlv(packet, toaddr); + if (error) { + FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error); + return error; + } + } + last = m_last(packet); mbuf_setnext(last, data); mbuf_pkthdr_adjustlen(packet, data_len); - error = flow_divert_send_packet(fd_cb, packet, force); if (error) { mbuf_setnext(last, NULL); - mbuf_free(packet); + mbuf_freem(packet); } else { fd_cb->bytes_sent += data_len; flow_divert_add_data_statistics(fd_cb, data_len, TRUE); @@ -1147,28 +1198,72 @@ flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force) to_send = fd_cb->send_window; } - while (sent < to_send) { - mbuf_t data; - size_t data_len; + if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) { + while (sent < to_send) { + mbuf_t data; + size_t data_len; - data_len = to_send - sent; - if (data_len > FLOW_DIVERT_CHUNK_SIZE) { - data_len = FLOW_DIVERT_CHUNK_SIZE; - } + data_len = to_send - sent; + if (data_len > FLOW_DIVERT_CHUNK_SIZE) { + data_len = FLOW_DIVERT_CHUNK_SIZE; + } - error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data); - if (error) { - FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error); - break; - } + error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data); + if (error) { + FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error); + break; + } - error = flow_divert_send_data_packet(fd_cb, data, data_len, force); - if (error) { - mbuf_free(data); - break; - } + error = flow_divert_send_data_packet(fd_cb, data, data_len, NULL, force); + if (error) { + mbuf_freem(data); + break; + } - sent += data_len; + sent += data_len; + } + sbdrop(&fd_cb->so->so_snd, sent); + sowwakeup(fd_cb->so); + } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) { + mbuf_t data; + mbuf_t m; + size_t data_len; + + while(buffer) { + struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer); + + m = buffer; + if (toaddr != NULL) { + /* look for data in the chain */ + do { + m = m->m_next; + if (m != NULL && m->m_type == MT_DATA) { + break; + } + } while(m); + if (m == NULL) { + /* unexpected */ + FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain."); + goto move_on; + } + } + data_len = mbuf_pkthdr_len(m); + FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %u", data_len); + error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data); + if (error) { + FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error); + break; + } + error = flow_divert_send_data_packet(fd_cb, data, data_len, toaddr, force); + if (error) { + mbuf_freem(data); + break; + } + sent += data_len; +move_on: + buffer = buffer->m_nextpkt; + (void) sbdroprecord(&(fd_cb->so->so_snd)); + } } if (sent > 0) { @@ -1178,19 +1273,14 @@ flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force) } else { fd_cb->send_window = 0; } - sbdrop(&fd_cb->so->so_snd, sent); - sowwakeup(fd_cb->so); } } static int -flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data) +flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data, struct sockaddr *toaddr) { size_t to_send = mbuf_pkthdr_len(data); - size_t sent = 0; - int error = 0; - mbuf_t remaining_data = data; - mbuf_t pkt_data = NULL; + int error = 0; if (to_send > fd_cb->send_window) { to_send = fd_cb->send_window; @@ -1200,57 +1290,94 @@ flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data) to_send = 0; /* If the send buffer is non-empty, then we can't send anything */ } - while (sent < to_send) { - size_t pkt_data_len; + if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) { + size_t sent = 0; + mbuf_t remaining_data = data; + mbuf_t pkt_data = NULL; + while (sent < to_send) { + size_t pkt_data_len; + + pkt_data = remaining_data; - pkt_data = remaining_data; + if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) { + pkt_data_len = FLOW_DIVERT_CHUNK_SIZE; + } else { + pkt_data_len = to_send - sent; + } + + if (pkt_data_len < mbuf_pkthdr_len(pkt_data)) { + error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data); + if (error) { + FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error); + pkt_data = NULL; + break; + } + } else { + remaining_data = NULL; + } + + error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len, NULL, FALSE); - if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) { - pkt_data_len = FLOW_DIVERT_CHUNK_SIZE; - error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data); if (error) { - FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error); - pkt_data = NULL; break; } - } else { - pkt_data_len = to_send - sent; - remaining_data = NULL; - } - error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len, FALSE); - - if (error) { - break; + pkt_data = NULL; + sent += pkt_data_len; } - pkt_data = NULL; - sent += pkt_data_len; - } + fd_cb->send_window -= sent; - fd_cb->send_window -= sent; + error = 0; - error = 0; - - if (pkt_data != NULL) { - if (sbspace(&fd_cb->so->so_snd) > 0) { - if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) { - FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n", - fd_cb->so->so_snd.sb_cc, fd_cb->send_window); + if (pkt_data != NULL) { + if (sbspace(&fd_cb->so->so_snd) > 0) { + if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) { + FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n", + fd_cb->so->so_snd.sb_cc, fd_cb->send_window); + } + } else { + error = ENOBUFS; } - } else { - error = ENOBUFS; } - } - if (remaining_data != NULL) { - if (sbspace(&fd_cb->so->so_snd) > 0) { - if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) { - FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n", - fd_cb->so->so_snd.sb_cc, fd_cb->send_window); + if (remaining_data != NULL) { + if (sbspace(&fd_cb->so->so_snd) > 0) { + if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) { + FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n", + fd_cb->so->so_snd.sb_cc, fd_cb->send_window); + } + } else { + error = ENOBUFS; + } + } + } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) { + if (to_send) { + error = flow_divert_send_data_packet(fd_cb, data, to_send, toaddr, FALSE); + if (error) { + FDLOG(LOG_ERR, fd_cb, "flow_divert_send_data_packet failed. send data size = %u", to_send); + } else { + fd_cb->send_window -= to_send; } } else { - error = ENOBUFS; + /* buffer it */ + if (sbspace(&fd_cb->so->so_snd) >= (int)mbuf_pkthdr_len(data)) { + if (toaddr != NULL) { + if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &error)) { + FDLOG(LOG_ERR, fd_cb, + "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d\n", + fd_cb->so->so_snd.sb_cc, fd_cb->send_window, error); + } + } else { + if (!sbappendrecord(&fd_cb->so->so_snd, data)) { + FDLOG(LOG_ERR, fd_cb, + "sbappendrecord failed. send buffer size = %u, send_window = %u, error = %d\n", + fd_cb->so->so_snd.sb_cc, fd_cb->send_window, error); + } + } + } else { + error = ENOBUFS; + } } } @@ -1408,14 +1535,15 @@ flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, goto set_socket_state; } - if (local_address.ss_family != 0) { + if (local_address.ss_family == 0 && fd_cb->local_address == NULL) { + error = EINVAL; + goto set_socket_state; + } + if (local_address.ss_family != 0 && fd_cb->local_address == NULL) { if (local_address.ss_len > sizeof(local_address)) { local_address.ss_len = sizeof(local_address); } fd_cb->local_address = dup_sockaddr((struct sockaddr *)&local_address, 1); - } else { - error = EINVAL; - goto set_socket_state; } if (remote_address.ss_family != 0) { @@ -1482,7 +1610,7 @@ flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE); fd_cb->so->so_error = connect_error; } - soisdisconnected(fd_cb->so); + flow_divert_disconnect_socket(fd_cb->so); } else { soisconnected(fd_cb->so); } @@ -1528,7 +1656,7 @@ flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offse how = flow_divert_tunnel_how_closed(fd_cb); if (how == SHUT_RDWR) { - soisdisconnected(fd_cb->so); + flow_divert_disconnect_socket(fd_cb->so); } else if (how == SHUT_RD) { socantrcvmore(fd_cb->so); } else if (how == SHUT_WR) { @@ -1540,49 +1668,119 @@ flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offse FDUNLOCK(fd_cb); } -static void -flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset) +static mbuf_t +flow_divert_get_control_mbuf(struct flow_divert_pcb *fd_cb) { - int error = 0; - mbuf_t data = NULL; - size_t data_size; + struct inpcb *inp = sotoinpcb(fd_cb->so); + if (inp->inp_vflag & INP_IPV4 && inp->inp_flags & INP_RECVDSTADDR) { + struct sockaddr_in *sin = (struct sockaddr_in *)(void *)fd_cb->local_address; - data_size = (mbuf_pkthdr_len(packet) - offset); + return sbcreatecontrol((caddr_t) &sin->sin_addr, sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); + } else if (inp->inp_vflag & INP_IPV6 && (inp->inp_flags & IN6P_PKTINFO) != 0) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)fd_cb->local_address; + struct in6_pktinfo pi6; - FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size); - - error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data); - if (error || data == NULL) { - FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error); - return; + bcopy(&sin6->sin6_addr, &pi6.ipi6_addr, sizeof (struct in6_addr)); + pi6.ipi6_ifindex = 0; + return sbcreatecontrol((caddr_t)&pi6, sizeof (struct in6_pktinfo), IPV6_PKTINFO, IPPROTO_IPV6); } + return (NULL); +} +static void +flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset) +{ FDLOCK(fd_cb); if (fd_cb->so != NULL) { + int error = 0; + mbuf_t data = NULL; + size_t data_size; + struct sockaddr_storage remote_address; + boolean_t got_remote_sa = FALSE; + socket_lock(fd_cb->so, 0); - if (flow_divert_check_no_cellular(fd_cb) || - flow_divert_check_no_expensive(fd_cb)) { - flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE); - flow_divert_send_close(fd_cb, SHUT_RDWR); - soisdisconnected(fd_cb->so); - } else if (!(fd_cb->so->so_state & SS_CANTRCVMORE)) { - if (sbappendstream(&fd_cb->so->so_rcv, data)) { - fd_cb->bytes_received += data_size; - flow_divert_add_data_statistics(fd_cb, data_size, FALSE); - fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc; - sorwakeup(fd_cb->so); - data = NULL; + + if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) { + uint32_t val_size = 0; + + /* check if we got remote address with data */ + memset(&remote_address, 0, sizeof(remote_address)); + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size); + if (error || val_size > sizeof(remote_address)) { + FDLOG0(LOG_INFO, fd_cb, "No remote address provided"); + error = 0; } else { - FDLOG0(LOG_ERR, fd_cb, "received data, but appendstream failed"); + /* validate the address */ + if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) { + got_remote_sa = TRUE; + } + offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size); + } + } + + data_size = (mbuf_pkthdr_len(packet) - offset); + + FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size); + + error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data); + if (error || data == NULL) { + FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error); + } else { + if (flow_divert_check_no_cellular(fd_cb) || + flow_divert_check_no_expensive(fd_cb)) + { + flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE); + flow_divert_send_close(fd_cb, SHUT_RDWR); + flow_divert_disconnect_socket(fd_cb->so); + } else if (!(fd_cb->so->so_state & SS_CANTRCVMORE)) { + if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) { + if (sbappendstream(&fd_cb->so->so_rcv, data)) { + fd_cb->bytes_received += data_size; + flow_divert_add_data_statistics(fd_cb, data_size, FALSE); + fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc; + sorwakeup(fd_cb->so); + data = NULL; + } else { + FDLOG0(LOG_ERR, fd_cb, "received data, but appendstream failed"); + } + } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) { + struct sockaddr *append_sa; + mbuf_t mctl; + + if (got_remote_sa == TRUE) { + error = flow_divert_dup_addr(fd_cb->so->so_proto->pr_domain->dom_family, + (struct sockaddr *)&remote_address, &append_sa); + } else { + error = flow_divert_dup_addr(fd_cb->so->so_proto->pr_domain->dom_family, + fd_cb->remote_address, &append_sa); + } + if (error) { + FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address."); + } + + mctl = flow_divert_get_control_mbuf(fd_cb); + if (sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, NULL)) { + fd_cb->bytes_received += data_size; + flow_divert_add_data_statistics(fd_cb, data_size, FALSE); + fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc; + sorwakeup(fd_cb->so); + data = NULL; + } else { + FDLOG0(LOG_ERR, fd_cb, "received data, but sbappendaddr failed"); + } + if (!error) { + FREE(append_sa, M_TEMP); + } + } } } socket_unlock(fd_cb->so, 0); - } - FDUNLOCK(fd_cb); - if (data != NULL) { - mbuf_free(data); + if (data != NULL) { + mbuf_freem(data); + } } + FDUNLOCK(fd_cb); } static void @@ -1597,7 +1795,7 @@ flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_t packe return; } - FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", read_count); + FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count)); FDLOCK(fd_cb); if (fd_cb->so != NULL) { @@ -1613,7 +1811,7 @@ static void flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_t packet, int offset) { int error = 0; - size_t key_size = 0; + uint32_t key_size = 0; int log_level; error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size); @@ -1747,7 +1945,7 @@ flow_divert_handle_app_map_create(mbuf_t packet, int offset) cursor >= 0; cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) { - size_t sid_size = 0; + uint32_t sid_size = 0; flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size); new_trie.bytes_count += sid_size; signing_id_count++; @@ -1795,7 +1993,7 @@ flow_divert_handle_app_map_create(mbuf_t packet, int offset) cursor >= 0; cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) { - size_t sid_size = 0; + uint32_t sid_size = 0; flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size); if (new_trie.bytes_free_next + sid_size <= new_trie.bytes_count) { boolean_t is_dns; @@ -1848,7 +2046,7 @@ flow_divert_handle_app_map_update(struct flow_divert_group *group, mbuf_t packet cursor >= 0; cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) { - size_t sid_size = 0; + uint32_t sid_size = 0; flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size); if (sid_size > max_size) { max_size = sid_size; @@ -1865,7 +2063,7 @@ flow_divert_handle_app_map_update(struct flow_divert_group *group, mbuf_t packet cursor >= 0; cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) { - size_t signing_id_len = 0; + uint32_t signing_id_len = 0; uint16_t node; flow_divert_packet_get_tlv(packet, @@ -1904,6 +2102,12 @@ flow_divert_input(mbuf_t packet, struct flow_divert_group *group) goto done; } + if (mbuf_pkthdr_len(packet) > FD_CTL_RCVBUFF_SIZE) { + FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) > %lu", mbuf_pkthdr_len(packet), FD_CTL_RCVBUFF_SIZE); + error = EINVAL; + goto done; + } + error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr); if (error) { FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error); @@ -1963,7 +2167,7 @@ flow_divert_input(mbuf_t packet, struct flow_divert_group *group) FDRELEASE(fd_cb); done: - mbuf_free(packet); + mbuf_freem(packet); return error; } @@ -2018,6 +2222,8 @@ flow_divert_detach(struct socket *so) /* Last-ditch effort to send any buffered data */ flow_divert_send_buffered_data(fd_cb, TRUE); + flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE); + flow_divert_send_close_if_needed(fd_cb); /* Remove from the group */ flow_divert_pcb_remove(fd_cb); } @@ -2040,8 +2246,10 @@ flow_divert_close(struct socket *so) FDLOG0(LOG_INFO, fd_cb, "Closing"); - soisdisconnecting(so); - sbflush(&so->so_rcv); + if (SOCK_TYPE(so) == SOCK_STREAM) { + soisdisconnecting(so); + sbflush(&so->so_rcv); + } flow_divert_send_buffered_data(fd_cb, TRUE); flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE); @@ -2054,9 +2262,10 @@ flow_divert_close(struct socket *so) } static int -flow_divert_disconnectx(struct socket *so, associd_t aid, connid_t cid __unused) +flow_divert_disconnectx(struct socket *so, sae_associd_t aid, + sae_connid_t cid __unused) { - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) { + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) { return (EINVAL); } @@ -2108,6 +2317,106 @@ flow_divert_rcvd(struct socket *so, int flags __unused) return 0; } +static int +flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr) +{ + int error = 0; + int port = 0; + + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, toaddr); + if (error) { + goto done; + } + + if (toaddr->sa_family == AF_INET) { + port = ntohs((satosin(toaddr))->sin_port); + } +#if INET6 + else { + port = ntohs((satosin6(toaddr))->sin6_port); + } +#endif + + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port); + if (error) { + goto done; + } + +done: + return error; +} + +struct sockaddr * +flow_divert_get_buffered_target_address(mbuf_t buffer) +{ + if (buffer != NULL && buffer->m_type == MT_SONAME) { + struct sockaddr *toaddr = mtod(buffer, struct sockaddr *); + if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) { + return toaddr; + } + } + return NULL; +} + +static boolean_t +flow_divert_is_sockaddr_valid(struct sockaddr *addr) +{ + switch(addr->sa_family) + { + case AF_INET: + if (addr->sa_len != sizeof(struct sockaddr_in)) { + return FALSE; + } + break; +#if INET6 + case AF_INET6: + if (addr->sa_len != sizeof(struct sockaddr_in6)) { + return FALSE; + } + break; +#endif /* INET6 */ + default: + return FALSE; + } + return TRUE; +} + +static errno_t +flow_divert_inp_to_sockaddr(const struct inpcb *inp, struct sockaddr **local_socket) +{ + int error = 0; + union sockaddr_in_4_6 sin46; + + bzero(&sin46, sizeof(sin46)); + if (inp->inp_vflag & INP_IPV4) { + struct sockaddr_in *sin = &sin46.sin; + + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_port = inp->inp_lport; + sin->sin_addr = inp->inp_laddr; + } else if (inp->inp_vflag & INP_IPV6) { + struct sockaddr_in6 *sin6 = &sin46.sin6; + + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = inp->inp_lport; + sin6->sin6_addr = inp->in6p_laddr; + } + *local_socket = dup_sockaddr((struct sockaddr *)&sin46, 1); + if (*local_socket == NULL) { + error = ENOBUFS; + } + return (error); +} + +static boolean_t +flow_divert_has_pcb_local_address(const struct inpcb *inp) +{ + return (inp->inp_lport != 0 + && (inp->inp_laddr.s_addr != INADDR_ANY || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))); +} + static errno_t flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup) @@ -2145,6 +2454,25 @@ flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, return error; } +static void +flow_divert_disconnect_socket(struct socket *so) +{ + soisdisconnected(so); + if (SOCK_TYPE(so) == SOCK_DGRAM) { + struct inpcb *inp = NULL; + + inp = sotoinpcb(so); + if (inp != NULL) { +#if INET6 + if (SOCK_CHECK_DOM(so, PF_INET6)) + in6_pcbdetach(inp); + else +#endif /* INET6 */ + in_pcbdetach(inp); + } + } +} + static errno_t flow_divert_getpeername(struct socket *so, struct sockaddr **sa) { @@ -2244,6 +2572,20 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) } } + if (fd_cb->local_address != NULL) { + error = EALREADY; + goto done; + } else { + if (flow_divert_has_pcb_local_address(inp)) { + error = flow_divert_inp_to_sockaddr(inp, &fd_cb->local_address); + if (error) { + FDLOG0(LOG_ERR, fd_cb, "failed to get the local socket address."); + goto done; + } + } + } + + error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet); if (error) { goto done; @@ -2252,7 +2594,7 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) error = EPERM; if (fd_cb->connect_token != NULL) { - size_t sid_size = 0; + uint32_t sid_size = 0; int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size); if (find_error == 0 && sid_size > 0) { MALLOC(signing_id, char *, sid_size + 1, M_TEMP, M_WAITOK | M_ZERO); @@ -2274,7 +2616,9 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) if (src_proc != PROC_NULL) { proc_lock(src_proc); if (src_proc->p_csflags & CS_VALID) { - signing_id = (char *)cs_identity_get(src_proc); + const char * cs_id; + cs_id = cs_identity_get(src_proc); + signing_id = __DECONST(char *, cs_id); } else { FDLOG0(LOG_WARNING, fd_cb, "Signature is invalid"); } @@ -2288,7 +2632,7 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) if (signing_id != NULL) { uint16_t result = NULL_TRIE_IDX; lck_rw_lock_shared(&g_flow_divert_group_lck); - result = flow_divert_trie_search(&g_signing_id_trie, (const uint8_t *)signing_id); + result = flow_divert_trie_search(&g_signing_id_trie, (uint8_t *)signing_id); lck_rw_done(&g_flow_divert_group_lck); if (result != NULL_TRIE_IDX) { error = 0; @@ -2350,7 +2694,7 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) done: if (error && connect_packet != NULL) { - mbuf_free(connect_packet); + mbuf_freem(connect_packet); } return error; } @@ -2358,8 +2702,8 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) static int flow_divert_connectx_out_common(struct socket *so, int af, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, - struct proc *p, uint32_t ifscope __unused, associd_t aid __unused, - connid_t *pcid, uint32_t flags __unused, void *arg __unused, + struct proc *p, uint32_t ifscope __unused, sae_associd_t aid __unused, + sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused, uint32_t arglen __unused) { struct sockaddr_entry *src_se = NULL, *dst_se = NULL; @@ -2395,9 +2739,10 @@ flow_divert_connectx_out_common(struct socket *so, int af, static int flow_divert_connectx_out(struct socket *so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written) { +#pragma unused(uio, bytes_written) return (flow_divert_connectx_out_common(so, AF_INET, src_sl, dst_sl, p, ifscope, aid, pcid, flags, arg, arglen)); } @@ -2406,16 +2751,17 @@ flow_divert_connectx_out(struct socket *so, struct sockaddr_list **src_sl, static int flow_divert_connectx6_out(struct socket *so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written) { +#pragma unused(uio, bytes_written) return (flow_divert_connectx_out_common(so, AF_INET6, src_sl, dst_sl, p, ifscope, aid, pcid, flags, arg, arglen)); } #endif /* INET6 */ static int -flow_divert_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, +flow_divert_getconninfo(struct socket *so, sae_connid_t cid, uint32_t *flags, uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len, user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, user_addr_t aux_data __unused, uint32_t *aux_len) @@ -2432,7 +2778,7 @@ flow_divert_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, goto out; } - if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) { + if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL && cid != 1) { error = EINVAL; goto out; } @@ -2605,7 +2951,7 @@ flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", mbuf_pkthdr_len(data)); fd_cb->bytes_written_by_app += mbuf_pkthdr_len(data); - error = flow_divert_send_app_data(fd_cb, data); + error = flow_divert_send_app_data(fd_cb, data, to); if (error) { goto done; } @@ -2618,7 +2964,7 @@ flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr done: if (data) { - mbuf_free(data); + mbuf_freem(data); } if (control) { mbuf_free(control); @@ -2640,6 +2986,20 @@ flow_divert_set_protosw(struct socket *so) #endif /* INET6 */ } +static void +flow_divert_set_udp_protosw(struct socket *so) +{ + so->so_flags |= SOF_FLOW_DIVERT; + if (SOCK_DOM(so) == PF_INET) { + so->so_proto = &g_flow_divert_in_udp_protosw; + } +#if INET6 + else { + so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw; + } +#endif /* INET6 */ +} + static errno_t flow_divert_attach(struct socket *so, uint32_t flow_id, uint32_t ctl_unit) { @@ -2679,10 +3039,14 @@ flow_divert_attach(struct socket *so, uint32_t flow_id, uint32_t ctl_unit) VERIFY(inp != NULL); socket_lock(old_so, 0); - soisdisconnected(old_so); + flow_divert_disconnect_socket(old_so); old_so->so_flags &= ~SOF_FLOW_DIVERT; old_so->so_fd_pcb = NULL; - old_so->so_proto = pffindproto(SOCK_DOM(old_so), IPPROTO_TCP, SOCK_STREAM); + if (SOCK_TYPE(old_so) == SOCK_STREAM) { + old_so->so_proto = pffindproto(SOCK_DOM(old_so), IPPROTO_TCP, SOCK_STREAM); + } else if (SOCK_TYPE(old_so) == SOCK_DGRAM) { + old_so->so_proto = pffindproto(SOCK_DOM(old_so), IPPROTO_UDP, SOCK_DGRAM); + } fd_cb->so = NULL; /* Save the output interface */ ifp = inp->inp_last_outifp; @@ -2720,6 +3084,44 @@ flow_divert_attach(struct socket *so, uint32_t flow_id, uint32_t ctl_unit) return error; } +errno_t +flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + struct inpcb *inp; + int error = 0; + + inp = sotoinpcb(so); + if (inp == NULL) { + return (EINVAL); + } + + if (fd_cb == NULL) { + uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp); + if (fd_ctl_unit > 0) { + error = flow_divert_pcb_init(so, fd_ctl_unit); + fd_cb = so->so_fd_pcb; + if (error != 0 || fd_cb == NULL) { + goto done; + } + } else { + error = ENETDOWN; + goto done; + } + } + return flow_divert_data_out(so, flags, data, to, control, p); + +done: + if (data) { + mbuf_freem(data); + } + if (control) { + mbuf_free(control); + } + + return error; +} + errno_t flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit) { @@ -2737,11 +3139,14 @@ flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit) FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error); FDRELEASE(fd_cb); } else { - fd_cb->log_level = LOG_NOTICE; fd_cb->control_group_unit = ctl_unit; so->so_fd_pcb = fd_cb; - flow_divert_set_protosw(so); + if (SOCK_TYPE(so) == SOCK_STREAM) { + flow_divert_set_protosw(so); + } else if (SOCK_TYPE(so) == SOCK_DGRAM) { + flow_divert_set_udp_protosw(so); + } FDLOG0(LOG_INFO, fd_cb, "Created"); } @@ -2772,8 +3177,8 @@ flow_divert_token_set(struct socket *so, struct sockopt *sopt) goto done; } - if (SOCK_TYPE(so) != SOCK_STREAM || - SOCK_PROTO(so) != IPPROTO_TCP || + if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) || + (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) || (SOCK_DOM(so) != PF_INET #if INET6 && SOCK_DOM(so) != PF_INET6 @@ -2783,10 +3188,12 @@ flow_divert_token_set(struct socket *so, struct sockopt *sopt) error = EINVAL; goto done; } else { - struct tcpcb *tp = sototcpcb(so); - if (tp == NULL || tp->t_state != TCPS_CLOSED) { - error = EINVAL; - goto done; + if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) { + struct tcpcb *tp = sototcpcb(so); + if (tp == NULL || tp->t_state != TCPS_CLOSED) { + error = EINVAL; + goto done; + } } } @@ -2957,7 +3364,7 @@ flow_divert_token_get(struct socket *so, struct sockopt *sopt) static errno_t flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo) { - struct flow_divert_group *new_group; + struct flow_divert_group *new_group = NULL; int error = 0; if (sac->sc_unit >= GROUP_COUNT_MAX) { @@ -3193,6 +3600,39 @@ flow_divert_init(void) g_flow_divert_in_protosw.pr_filter_head.tqh_last = (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; + /* UDP */ + g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM); + VERIFY(g_udp_protosw != NULL); + + memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw)); + memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs)); + + g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out; + g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out; + g_flow_divert_in_udp_usrreqs.pru_control = flow_divert_in_control; + g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close; + g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx; + g_flow_divert_in_udp_usrreqs.pru_peeraddr = flow_divert_getpeername; + g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd; + g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out; + g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown; + g_flow_divert_in_udp_usrreqs.pru_sockaddr = flow_divert_getsockaddr; + g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp; + g_flow_divert_in_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp; + + g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs; + g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput; + + /* + * Socket filters shouldn't attach/detach to/from this protosw + * since pr_protosw is to be used instead, which points to the + * real protocol; if they do, it is a bug and we should panic. + */ + g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first = + (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef; + g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last = + (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; + #if INET6 g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM); @@ -3223,6 +3663,39 @@ flow_divert_init(void) (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef; g_flow_divert_in6_protosw.pr_filter_head.tqh_last = (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; + + /* UDP6 */ + g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM); + + VERIFY(g_udp6_protosw != NULL); + + memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw)); + memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs)); + + g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out; + g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out; + g_flow_divert_in6_udp_usrreqs.pru_control = flow_divert_in6_control; + g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close; + g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx; + g_flow_divert_in6_udp_usrreqs.pru_peeraddr = flow_divert_getpeername; + g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd; + g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out; + g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown; + g_flow_divert_in6_udp_usrreqs.pru_sockaddr = flow_divert_getsockaddr; + g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp; + g_flow_divert_in6_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp; + + g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs; + g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput; + /* + * Socket filters shouldn't attach/detach to/from this protosw + * since pr_protosw is to be used instead, which points to the + * real protocol; if they do, it is a bug and we should panic. + */ + g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first = + (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef; + g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last = + (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; #endif /* INET6 */ flow_divert_grp_attr = lck_grp_attr_alloc_init(); diff --git a/bsd/netinet/flow_divert.h b/bsd/netinet/flow_divert.h index 0d1f6255b..0968d9ad0 100644 --- a/bsd/netinet/flow_divert.h +++ b/bsd/netinet/flow_divert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -73,5 +73,6 @@ errno_t flow_divert_token_set(struct socket *so, struct sockopt *sopt); errno_t flow_divert_token_get(struct socket *so, struct sockopt *sopt); errno_t flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit); errno_t flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p); +errno_t flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p); #endif /* __FLOW_DIVERT_H__ */ diff --git a/bsd/netinet/flow_divert_proto.h b/bsd/netinet/flow_divert_proto.h index d3bf02c4f..a3b025eb7 100644 --- a/bsd/netinet/flow_divert_proto.h +++ b/bsd/netinet/flow_divert_proto.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,6 +31,8 @@ #define FLOW_DIVERT_CONTROL_NAME "com.apple.flow-divert" +#define FLOW_DIVERT_TLV_LENGTH_UINT32 1 + #define FLOW_DIVERT_PKT_CONNECT 1 #define FLOW_DIVERT_PKT_CONNECT_RESULT 2 #define FLOW_DIVERT_PKT_DATA 3 @@ -66,6 +68,10 @@ #define FLOW_DIVERT_TLV_UUID 27 #define FLOW_DIVERT_TLV_PREFIX_COUNT 28 #define FLOW_DIVERT_TLV_FLAGS 29 +#define FLOW_DIVERT_TLV_FLOW_TYPE 30 + +#define FLOW_DIVERT_FLOW_TYPE_TCP 1 +#define FLOW_DIVERT_FLOW_TYPE_UDP 3 #define FLOW_DIVERT_CHUNK_SIZE 4096 diff --git a/bsd/netinet/icmp6.h b/bsd/netinet/icmp6.h index da271fa32..0dc3dda2f 100644 --- a/bsd/netinet/icmp6.h +++ b/bsd/netinet/icmp6.h @@ -714,6 +714,10 @@ struct in6_multi; struct ip6protosw; void icmp6_init(struct ip6protosw *, struct domain *); void icmp6_paramerror(struct mbuf *, int); + +void icmp6_error_flag(struct mbuf *, int, int, int, int); +#define ICMP6_ERROR_RST_MRCVIF 0x1 + void icmp6_error(struct mbuf *, int, int, int); void icmp6_error2(struct mbuf *, int, int, int, struct ifnet *); int icmp6_input(struct mbuf **, int *, int); diff --git a/bsd/netinet/igmp.c b/bsd/netinet/igmp.c index 572a083ec..2b1859270 100644 --- a/bsd/netinet/igmp.c +++ b/bsd/netinet/igmp.c @@ -3902,7 +3902,7 @@ igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m) if (m->m_flags & M_IGMPV3_HDR) { igmpreclen -= hdrlen; } else { - M_PREPEND(m, hdrlen, M_DONTWAIT); + M_PREPEND(m, hdrlen, M_DONTWAIT, 1); if (m == NULL) return (NULL); m->m_flags |= M_IGMPV3_HDR; diff --git a/bsd/netinet/in.c b/bsd/netinet/in.c index 83d2a024a..f25e77c05 100644 --- a/bsd/netinet/in.c +++ b/bsd/netinet/in.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -134,8 +134,8 @@ static void in_ifaddr_free(struct ifaddr *); static void in_ifaddr_trace(struct ifaddr *, int); static int in_getassocids(struct socket *, uint32_t *, user_addr_t); -static int in_getconnids(struct socket *, associd_t, uint32_t *, user_addr_t); -static int in_getconninfo(struct socket *, connid_t, uint32_t *, +static int in_getconnids(struct socket *, sae_associd_t, uint32_t *, user_addr_t); +static int in_getconninfo(struct socket *, sae_connid_t, uint32_t *, uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *, uint32_t *, user_addr_t, uint32_t *); @@ -187,6 +187,10 @@ static struct zone *inifa_zone; /* zone for in_ifaddr */ #define INIFA_ZONE_MAX 64 /* maximum elements in zone */ #define INIFA_ZONE_NAME "in_ifaddr" /* zone name */ +static const unsigned int in_extra_size = sizeof (struct in_ifextra); +static const unsigned int in_extra_bufsize = in_extra_size + + sizeof (void *) + sizeof (uint64_t); + /* * Return 1 if the address is * - loopback @@ -316,10 +320,47 @@ in_domifattach(struct ifnet *ifp) VERIFY(ifp != NULL); - if ((error = proto_plumb(PF_INET, ifp)) && error != EEXIST) + if ((error = proto_plumb(PF_INET, ifp)) && error != EEXIST) { log(LOG_ERR, "%s: proto_plumb returned %d if=%s\n", __func__, error, if_name(ifp)); + } else if (error == 0 && ifp->if_inetdata == NULL) { + void **pbuf, *base; + struct in_ifextra *ext; + int errorx; + + if ((ext = (struct in_ifextra *)_MALLOC(in_extra_bufsize, + M_IFADDR, M_WAITOK|M_ZERO)) == NULL) { + error = ENOMEM; + errorx = proto_unplumb(PF_INET, ifp); + if (errorx != 0) { + log(LOG_ERR, + "%s: proto_unplumb returned %d if=%s%d\n", + __func__, errorx, ifp->if_name, + ifp->if_unit); + } + goto done; + } + /* Align on 64-bit boundary */ + base = (void *)P2ROUNDUP((intptr_t)ext + sizeof (uint64_t), + sizeof (uint64_t)); + VERIFY(((intptr_t)base + in_extra_size) <= + ((intptr_t)ext + in_extra_bufsize)); + pbuf = (void **)((intptr_t)base - sizeof (void *)); + *pbuf = ext; + ifp->if_inetdata = base; + VERIFY(IS_P2ALIGNED(ifp->if_inetdata, sizeof (uint64_t))); + } +done: + if (error == 0 && ifp->if_inetdata != NULL) { + /* + * Since the structure is never freed, we need to + * zero out its contents to avoid reusing stale data. + * A little redundant with allocation above, but it + * keeps the code simpler for all cases. + */ + bzero(ifp->if_inetdata, in_extra_size); + } return (error); } @@ -2137,13 +2178,13 @@ static int in_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp) { struct inpcb *inp = sotoinpcb(so); - associd_t aid; + sae_associd_t aid; if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) return (EINVAL); /* INPCB has no concept of association */ - aid = ASSOCID_ANY; + aid = SAE_ASSOCID_ANY; *cnt = 0; /* just asking how many there are? */ @@ -2157,16 +2198,16 @@ in_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp) * Handle SIOCGCONNIDS ioctl for PF_INET domain. */ static int -in_getconnids(struct socket *so, associd_t aid, uint32_t *cnt, +in_getconnids(struct socket *so, sae_associd_t aid, uint32_t *cnt, user_addr_t cidp) { struct inpcb *inp = sotoinpcb(so); - connid_t cid; + sae_connid_t cid; if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) return (EINVAL); - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) return (EINVAL); /* if connected, return 1 connection count */ @@ -2177,7 +2218,7 @@ in_getconnids(struct socket *so, associd_t aid, uint32_t *cnt, return (0); /* if INPCB is connected, assign it connid 1 */ - cid = ((*cnt != 0) ? 1 : CONNID_ANY); + cid = ((*cnt != 0) ? 1 : SAE_CONNID_ANY); return (copyout(&cid, cidp, sizeof (cid))); } @@ -2186,7 +2227,7 @@ in_getconnids(struct socket *so, associd_t aid, uint32_t *cnt, * Handle SIOCGCONNINFO ioctl for PF_INET domain. */ static int -in_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, +in_getconninfo(struct socket *so, sae_connid_t cid, uint32_t *flags, uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len, user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, user_addr_t aux_data, uint32_t *aux_len) @@ -2207,7 +2248,7 @@ in_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, goto out; } - if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) { + if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL && cid != 1) { error = EINVAL; goto out; } diff --git a/bsd/netinet/in.h b/bsd/netinet/in.h index 53c436517..497a03cea 100644 --- a/bsd/netinet/in.h +++ b/bsd/netinet/in.h @@ -855,17 +855,18 @@ extern uint16_t b_sum16(const void *buf, int len); /* exported for ApplicationFirewall */ extern int in_localaddr(struct in_addr); extern int inaddr_local(struct in_addr); + +extern char *inet_ntoa(struct in_addr); +extern char *inet_ntoa_r(struct in_addr ina, char *buf, + size_t buflen); +extern int inet_pton(int af, const char *, void *); #endif /* KERNEL_PRIVATE */ #define MAX_IPv4_STR_LEN 16 #define MAX_IPv6_STR_LEN 64 extern int inet_aton(const char *, struct in_addr *); /* in libkern */ -extern char *inet_ntoa(struct in_addr); /* in libkern */ -extern char *inet_ntoa_r(struct in_addr ina, char *buf, - size_t buflen); /* in libkern */ extern const char *inet_ntop(int, const void *, char *, socklen_t); /* in libkern*/ -extern int inet_pton(int af, const char *, void *); /* in libkern */ #endif /* KERNEL */ #ifndef KERNEL diff --git a/bsd/netinet/in_arp.c b/bsd/netinet/in_arp.c index 418f2d26c..b889a8aaf 100644 --- a/bsd/netinet/in_arp.c +++ b/bsd/netinet/in_arp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2014 Apple Inc. All rights reserved. + * Copyright (c) 2004-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -146,6 +146,7 @@ static void arp_llinfo_free(void *); static void arp_llinfo_purge(struct rtentry *); static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *); static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *); +static void arp_llinfo_refresh(struct rtentry *); static __inline void arp_llreach_use(struct llinfo_arp *); static __inline int arp_llreach_reachable(struct llinfo_arp *); @@ -355,6 +356,27 @@ arp_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri) } } +static void +arp_llinfo_refresh(struct rtentry *rt) +{ + uint64_t timenow = net_uptime(); + /* + * If route entry is permanent or if expiry is less + * than timenow and extra time taken for unicast probe + * we can't expedite the refresh + */ + if ((rt->rt_expire == 0) || + (rt->rt_flags & RTF_STATIC) || + !(rt->rt_flags & RTF_LLINFO)) { + return; + } + + if (rt->rt_expire > timenow + arp_unicast_lim) { + rt->rt_expire = timenow + arp_unicast_lim; + } + return; +} + void arp_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen) { @@ -753,6 +775,7 @@ arp_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa) rt->rt_llinfo_get_iflri = arp_llinfo_get_iflri; rt->rt_llinfo_purge = arp_llinfo_purge; rt->rt_llinfo_free = arp_llinfo_free; + rt->rt_llinfo_refresh = arp_llinfo_refresh; rt->rt_flags |= RTF_LLINFO; la->la_rt = rt; LIST_INSERT_HEAD(&llinfo_arp, la, la_le); @@ -1695,7 +1718,7 @@ arp_ip_handle_input(ifnet_t ifp, u_short arpop, route->rt_flags & RTF_ROUTER && llinfo->la_flags & LLINFO_RTRFAIL_EVTSENT) { struct kev_msg ev_msg; - struct kev_in_arpfailure in_arpalive; + struct kev_in_arpalive in_arpalive; llinfo->la_flags &= ~LLINFO_RTRFAIL_EVTSENT; RT_UNLOCK(route); diff --git a/bsd/netinet/in_cksum.c b/bsd/netinet/in_cksum.c index 4e6d453cf..bc302ae30 100644 --- a/bsd/netinet/in_cksum.c +++ b/bsd/netinet/in_cksum.c @@ -117,7 +117,7 @@ in_cksumdata(const void *buf, int mlen) { uint32_t sum, partial; unsigned int final_acc; - uint8_t *data = (void *)buf; + const uint8_t *data = (const uint8_t *)buf; boolean_t needs_swap, started_on_odd; VERIFY(mlen >= 0); @@ -142,22 +142,22 @@ in_cksumdata(const void *buf, int mlen) needs_swap = started_on_odd; while (mlen >= 32) { __builtin_prefetch(data + 32); - partial += *(uint16_t *)(void *)data; - partial += *(uint16_t *)(void *)(data + 2); - partial += *(uint16_t *)(void *)(data + 4); - partial += *(uint16_t *)(void *)(data + 6); - partial += *(uint16_t *)(void *)(data + 8); - partial += *(uint16_t *)(void *)(data + 10); - partial += *(uint16_t *)(void *)(data + 12); - partial += *(uint16_t *)(void *)(data + 14); - partial += *(uint16_t *)(void *)(data + 16); - partial += *(uint16_t *)(void *)(data + 18); - partial += *(uint16_t *)(void *)(data + 20); - partial += *(uint16_t *)(void *)(data + 22); - partial += *(uint16_t *)(void *)(data + 24); - partial += *(uint16_t *)(void *)(data + 26); - partial += *(uint16_t *)(void *)(data + 28); - partial += *(uint16_t *)(void *)(data + 30); + partial += *(const uint16_t *)(const void *)data; + partial += *(const uint16_t *)(const void *)(data + 2); + partial += *(const uint16_t *)(const void *)(data + 4); + partial += *(const uint16_t *)(const void *)(data + 6); + partial += *(const uint16_t *)(const void *)(data + 8); + partial += *(const uint16_t *)(const void *)(data + 10); + partial += *(const uint16_t *)(const void *)(data + 12); + partial += *(const uint16_t *)(const void *)(data + 14); + partial += *(const uint16_t *)(const void *)(data + 16); + partial += *(const uint16_t *)(const void *)(data + 18); + partial += *(const uint16_t *)(const void *)(data + 20); + partial += *(const uint16_t *)(const void *)(data + 22); + partial += *(const uint16_t *)(const void *)(data + 24); + partial += *(const uint16_t *)(const void *)(data + 26); + partial += *(const uint16_t *)(const void *)(data + 28); + partial += *(const uint16_t *)(const void *)(data + 30); data += 32; mlen -= 32; if (PREDICT_FALSE(partial & 0xc0000000)) { @@ -170,14 +170,14 @@ in_cksumdata(const void *buf, int mlen) } } if (mlen & 16) { - partial += *(uint16_t *)(void *)data; - partial += *(uint16_t *)(void *)(data + 2); - partial += *(uint16_t *)(void *)(data + 4); - partial += *(uint16_t *)(void *)(data + 6); - partial += *(uint16_t *)(void *)(data + 8); - partial += *(uint16_t *)(void *)(data + 10); - partial += *(uint16_t *)(void *)(data + 12); - partial += *(uint16_t *)(void *)(data + 14); + partial += *(const uint16_t *)(const void *)data; + partial += *(const uint16_t *)(const void *)(data + 2); + partial += *(const uint16_t *)(const void *)(data + 4); + partial += *(const uint16_t *)(const void *)(data + 6); + partial += *(const uint16_t *)(const void *)(data + 8); + partial += *(const uint16_t *)(const void *)(data + 10); + partial += *(const uint16_t *)(const void *)(data + 12); + partial += *(const uint16_t *)(const void *)(data + 14); data += 16; mlen -= 16; } @@ -186,19 +186,19 @@ in_cksumdata(const void *buf, int mlen) * are using bit masks, which are not affected. */ if (mlen & 8) { - partial += *(uint16_t *)(void *)data; - partial += *(uint16_t *)(void *)(data + 2); - partial += *(uint16_t *)(void *)(data + 4); - partial += *(uint16_t *)(void *)(data + 6); + partial += *(const uint16_t *)(const void *)data; + partial += *(const uint16_t *)(const void *)(data + 2); + partial += *(const uint16_t *)(const void *)(data + 4); + partial += *(const uint16_t *)(const void *)(data + 6); data += 8; } if (mlen & 4) { - partial += *(uint16_t *)(void *)data; - partial += *(uint16_t *)(void *)(data + 2); + partial += *(const uint16_t *)(const void *)data; + partial += *(const uint16_t *)(const void *)(data + 2); data += 4; } if (mlen & 2) { - partial += *(uint16_t *)(void *)data; + partial += *(const uint16_t *)(const void *)data; data += 2; } if (mlen & 1) { @@ -228,7 +228,7 @@ in_cksumdata(const void *buf, int mlen) { uint64_t sum, partial; unsigned int final_acc; - uint8_t *data = (void *)buf; + const uint8_t *data = (const uint8_t *)buf; boolean_t needs_swap, started_on_odd; VERIFY(mlen >= 0); @@ -254,29 +254,29 @@ in_cksumdata(const void *buf, int mlen) if ((uintptr_t)data & 2) { if (mlen < 2) goto trailing_bytes; - partial += *(uint16_t *)(void *)data; + partial += *(const uint16_t *)(const void *)data; data += 2; mlen -= 2; } while (mlen >= 64) { __builtin_prefetch(data + 32); __builtin_prefetch(data + 64); - partial += *(uint32_t *)(void *)data; - partial += *(uint32_t *)(void *)(data + 4); - partial += *(uint32_t *)(void *)(data + 8); - partial += *(uint32_t *)(void *)(data + 12); - partial += *(uint32_t *)(void *)(data + 16); - partial += *(uint32_t *)(void *)(data + 20); - partial += *(uint32_t *)(void *)(data + 24); - partial += *(uint32_t *)(void *)(data + 28); - partial += *(uint32_t *)(void *)(data + 32); - partial += *(uint32_t *)(void *)(data + 36); - partial += *(uint32_t *)(void *)(data + 40); - partial += *(uint32_t *)(void *)(data + 44); - partial += *(uint32_t *)(void *)(data + 48); - partial += *(uint32_t *)(void *)(data + 52); - partial += *(uint32_t *)(void *)(data + 56); - partial += *(uint32_t *)(void *)(data + 60); + partial += *(const uint32_t *)(const void *)data; + partial += *(const uint32_t *)(const void *)(data + 4); + partial += *(const uint32_t *)(const void *)(data + 8); + partial += *(const uint32_t *)(const void *)(data + 12); + partial += *(const uint32_t *)(const void *)(data + 16); + partial += *(const uint32_t *)(const void *)(data + 20); + partial += *(const uint32_t *)(const void *)(data + 24); + partial += *(const uint32_t *)(const void *)(data + 28); + partial += *(const uint32_t *)(const void *)(data + 32); + partial += *(const uint32_t *)(const void *)(data + 36); + partial += *(const uint32_t *)(const void *)(data + 40); + partial += *(const uint32_t *)(const void *)(data + 44); + partial += *(const uint32_t *)(const void *)(data + 48); + partial += *(const uint32_t *)(const void *)(data + 52); + partial += *(const uint32_t *)(const void *)(data + 56); + partial += *(const uint32_t *)(const void *)(data + 60); data += 64; mlen -= 64; if (PREDICT_FALSE(partial & (3ULL << 62))) { @@ -293,34 +293,34 @@ in_cksumdata(const void *buf, int mlen) * are using bit masks, which are not affected. */ if (mlen & 32) { - partial += *(uint32_t *)(void *)data; - partial += *(uint32_t *)(void *)(data + 4); - partial += *(uint32_t *)(void *)(data + 8); - partial += *(uint32_t *)(void *)(data + 12); - partial += *(uint32_t *)(void *)(data + 16); - partial += *(uint32_t *)(void *)(data + 20); - partial += *(uint32_t *)(void *)(data + 24); - partial += *(uint32_t *)(void *)(data + 28); + partial += *(const uint32_t *)(const void *)data; + partial += *(const uint32_t *)(const void *)(data + 4); + partial += *(const uint32_t *)(const void *)(data + 8); + partial += *(const uint32_t *)(const void *)(data + 12); + partial += *(const uint32_t *)(const void *)(data + 16); + partial += *(const uint32_t *)(const void *)(data + 20); + partial += *(const uint32_t *)(const void *)(data + 24); + partial += *(const uint32_t *)(const void *)(data + 28); data += 32; } if (mlen & 16) { - partial += *(uint32_t *)(void *)data; - partial += *(uint32_t *)(void *)(data + 4); - partial += *(uint32_t *)(void *)(data + 8); - partial += *(uint32_t *)(void *)(data + 12); + partial += *(const uint32_t *)(const void *)data; + partial += *(const uint32_t *)(const void *)(data + 4); + partial += *(const uint32_t *)(const void *)(data + 8); + partial += *(const uint32_t *)(const void *)(data + 12); data += 16; } if (mlen & 8) { - partial += *(uint32_t *)(void *)data; - partial += *(uint32_t *)(void *)(data + 4); + partial += *(const uint32_t *)(const void *)data; + partial += *(const uint32_t *)(const void *)(data + 4); data += 8; } if (mlen & 4) { - partial += *(uint32_t *)(void *)data; + partial += *(const uint32_t *)(const void *)data; data += 4; } if (mlen & 2) { - partial += *(uint16_t *)(void *)data; + partial += *(const uint16_t *)(const void *)data; data += 2; } trailing_bytes: diff --git a/bsd/netinet/in_dhcp.c b/bsd/netinet/in_dhcp.c deleted file mode 100644 index e7f8ab6a8..000000000 --- a/bsd/netinet/in_dhcp.c +++ /dev/null @@ -1,935 +0,0 @@ -/* - * Copyright (c) 1988-2013 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * in_dhcp.c - * - use DHCP to allocate an IP address and get the subnet mask and router - */ - -/* - * Modification History - * - * April 17, 2007 Dieter Siegmund (dieter@apple.com) - * - created based on in_bootp.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifdef DHCP_DEBUG -#define dprintf(x) printf x; -#else /* !DHCP_DEBUG */ -#define dprintf(x) -#endif /* DHCP_DEBUG */ - -#define INITIAL_WAIT_SECS 2 -#define MAX_WAIT_SECS 64 -#define GATHER_TIME_SECS 4 -#define RAND_TICKS (hz) /* one second */ - -const struct sockaddr_in blank_sin = { - sizeof(struct sockaddr_in), - AF_INET, - 0, - { 0 }, - { 0, 0, 0, 0, 0, 0, 0, 0 } -}; - -__private_extern__ int -inet_aifaddr(struct socket * so, const char * name, - const struct in_addr * addr, - const struct in_addr * mask, - const struct in_addr * broadcast) -{ - struct ifaliasreq ifra; - - bzero(&ifra, sizeof(ifra)); - strlcpy(ifra.ifra_name, name, sizeof(ifra.ifra_name)); - if (addr) { - *((struct sockaddr_in *)(void *)&ifra.ifra_addr) = blank_sin; - ((struct sockaddr_in *)(void *)&ifra.ifra_addr)->sin_addr = *addr; - } - if (mask) { - *((struct sockaddr_in *)(void *)&ifra.ifra_mask) = blank_sin; - ((struct sockaddr_in *)(void *)&ifra.ifra_mask)->sin_addr = *mask; - } - if (broadcast) { - *((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr) = blank_sin; - ((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr)->sin_addr = *broadcast; - } - return (ifioctl(so, SIOCAIFADDR, (caddr_t)&ifra, current_proc())); -} - - -struct dhcp_context { - struct ifnet * ifp; - struct sockaddr_dl * dl_p; - struct ifreq ifr; - struct socket * so; - uint8_t request[DHCP_PACKET_MIN]; - dhcpoa_t request_options; - uint8_t reply[DHCP_PAYLOAD_MIN]; - struct timeval start_time; - uint32_t xid; - int max_try; - struct in_addr iaddr; - struct in_addr netmask; - struct in_addr router; - struct in_addr server_id; -}; - -static __inline__ struct dhcp_packet * -dhcp_context_request(struct dhcp_context * context) -{ - return ((struct dhcp_packet *)(void *)context->request); -} - -static __inline__ struct dhcp * -dhcp_context_reply(struct dhcp_context * context) -{ - return ((struct dhcp *)(void *)context->reply); -} - -struct mbuf * ip_pkt_to_mbuf(caddr_t pkt, int pktsize); - -static int -receive_packet(struct socket * so, void * pp, int psize, - int * actual_size); - -/* ip address formatting macros */ -#define IP_FORMAT "%d.%d.%d.%d" -#define IP_CH(ip) ((const uint8_t *)ip) -#define IP_LIST(ip) IP_CH(ip)[0],IP_CH(ip)[1],IP_CH(ip)[2],IP_CH(ip)[3] - -#define SUGGESTED_LEASE_LENGTH (60 * 60 * 24 * 30 * 3) /* 3 months */ - -static const uint8_t dhcp_params[] = { - dhcptag_subnet_mask_e, - dhcptag_router_e, -}; - -#define N_DHCP_PARAMS (sizeof(dhcp_params) / sizeof(dhcp_params[0])) - -static __inline__ long -random_range(long bottom, long top) -{ - long number = top - bottom + 1; - long range_size = LONG_MAX / number; - return (((long)random()) / range_size + bottom); -} - -static void -init_dhcp_packet_header(struct dhcp_packet * pkt, int pkt_size) -{ - bzero(&pkt->ip, sizeof(pkt->ip)); - bzero(&pkt->udp, sizeof(pkt->udp)); - pkt->ip.ip_v = IPVERSION; - pkt->ip.ip_hl = sizeof(struct ip) >> 2; - pkt->ip.ip_ttl = MAXTTL; - pkt->ip.ip_p = IPPROTO_UDP; - pkt->ip.ip_src.s_addr = 0; - pkt->ip.ip_dst.s_addr = htonl(INADDR_BROADCAST); - pkt->ip.ip_len = htons(pkt_size); - pkt->ip.ip_sum = 0; - pkt->udp.uh_sport = htons(IPPORT_BOOTPC); - pkt->udp.uh_dport = htons(IPPORT_BOOTPS); - pkt->udp.uh_sum = 0; - pkt->udp.uh_ulen = htons(pkt_size - sizeof(pkt->ip)); - return; -} - -/* - * Function: make_dhcp_request - * Purpose: - * Initialize the DHCP-specific parts of the message. - */ -static void -make_dhcp_request(struct dhcp * request, int request_size, - dhcp_msgtype_t msg, - const uint8_t * hwaddr, uint8_t hwtype, int hwlen, - dhcpoa_t * options_p) -{ - uint8_t cid[ETHER_ADDR_LEN + 1]; - uint8_t rfc_magic[RFC_MAGIC_SIZE] = RFC_OPTIONS_MAGIC; - - if (hwlen >= (int)sizeof(cid)) { - printf("dhcp: hwlen is %d (> %d), truncating\n", hwlen, - (int)sizeof(cid)); - hwlen = sizeof(cid) - 1; - } - bzero(request, request_size); - request->dp_op = BOOTREQUEST; - request->dp_htype = hwtype; - request->dp_hlen = hwlen; - bcopy(hwaddr, request->dp_chaddr, hwlen); - bcopy(rfc_magic, request->dp_options, RFC_MAGIC_SIZE); - dhcpoa_init(options_p, request->dp_options + RFC_MAGIC_SIZE, - request_size - sizeof(struct dhcp) - RFC_MAGIC_SIZE); - /* make the request a dhcp packet */ - dhcpoa_add_dhcpmsg(options_p, msg); - - /* add the list of required parameters */ - dhcpoa_add(options_p, dhcptag_parameter_request_list_e, - N_DHCP_PARAMS, dhcp_params); - - /* add the DHCP client identifier */ - cid[0] = hwtype; - bcopy(hwaddr, cid + 1, hwlen); - dhcpoa_add(options_p, dhcptag_client_identifier_e, hwlen + 1, cid); - - return; -} - -/* - * Function: ip_pkt_to_mbuf - * Purpose: - * Put the given IP packet into an mbuf, calculate the - * IP checksum. - */ -struct mbuf * -ip_pkt_to_mbuf(caddr_t pkt, int pktsize) -{ - struct ip * ip; - struct mbuf * m; - - m = (struct mbuf *)m_devget(pkt, pktsize, 0, NULL, NULL); - if (m == 0) { - printf("dhcp: ip_pkt_to_mbuf: m_devget failed\n"); - return NULL; - } - m->m_flags |= M_BCAST; - /* Compute the checksum */ - ip = mtod(m, struct ip *); - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, sizeof(struct ip)); - return (m); -} - -static __inline__ u_char * -link_address(struct sockaddr_dl * dl_p) -{ - return (u_char *)(dl_p->sdl_data + dl_p->sdl_nlen); -} - -static __inline__ int -link_address_length(struct sockaddr_dl * dl_p) -{ - return (dl_p->sdl_alen); -} - -static __inline__ void -link_print(struct sockaddr_dl * dl_p) -{ - int i; - - for (i = 0; i < dl_p->sdl_alen; i++) - printf("%s%x", i ? ":" : "", - (link_address(dl_p))[i]); - printf("\n"); - return; -} - -static struct sockaddr_dl * -link_from_ifnet(struct ifnet * ifp) -{ - return ((struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr); -} - -/* - * Function: send_packet - * Purpose: - * Send the request directly on the interface, bypassing the routing code. - */ -static int -send_packet(struct ifnet * ifp, struct dhcp_packet * pkt, int pkt_size) -{ - struct mbuf * m; - struct sockaddr_in dest; - - dest = blank_sin; - dest.sin_port = htons(IPPORT_BOOTPS); - dest.sin_addr.s_addr = INADDR_BROADCAST; - m = ip_pkt_to_mbuf((caddr_t)pkt, pkt_size); - return dlil_output(ifp, PF_INET, m, 0, (struct sockaddr *)&dest, 0, NULL); -} - -/* - * Function: receive_packet - * Purpose: - * Return a received packet or an error if none available. - */ -static int -receive_packet(struct socket * so, void * pp, int psize, int * actual_size) -{ - uio_t auio; - int error; - int rcvflg; - char uio_buf[ UIO_SIZEOF(1) ]; - - auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, - &uio_buf[0], sizeof(uio_buf)); - uio_addiov(auio, CAST_USER_ADDR_T(pp), psize); - rcvflg = MSG_WAITALL; - - error = soreceive(so, (struct sockaddr **) 0, auio, 0, 0, &rcvflg); - *actual_size = psize - uio_resid(auio); - return (error); -} - -/* - * Function: dhcp_timeout - * Purpose: - * Wakeup the process waiting for something on a socket. - */ -static void -dhcp_timeout(void * arg) -{ - struct socket * * timer_arg = (struct socket * *)arg; - struct socket * so = *timer_arg; - - dprintf(("dhcp: timeout\n")); - - *timer_arg = NULL; - socket_lock(so, 1); - sowakeup(so, &so->so_rcv); - socket_unlock(so, 1); - return; -} - -/* - * Function: rate_packet - * Purpose: - * Return an integer point rating value for the given dhcp packet. - * If yiaddr non-zero, the packet gets a rating of 1. - * Another point is given if the packet contains the subnet mask, - * and another if the router is present. - */ -#define GOOD_RATING 3 -static __inline__ int -rate_packet(dhcpol_t * options_p) -{ - int len; - int rating = 1; - - if (dhcpol_find(options_p, dhcptag_subnet_mask_e, &len, NULL) != NULL) { - rating++; - } - if (dhcpol_find(options_p, dhcptag_router_e, &len, NULL) != NULL) { - rating++; - } - return (rating); -} - -static dhcp_msgtype_t -get_dhcp_msgtype(dhcpol_t * options_p) -{ - int len; - const uint8_t * opt; - - opt = dhcpol_find(options_p, dhcptag_dhcp_message_type_e, &len, NULL); - if (opt != NULL && len == 1) { - return (*opt); - } - return (dhcp_msgtype_none_e); -} - -static int -dhcp_get_ack(struct dhcp_context * context, int wait_ticks) -{ - int error = 0; - const struct in_addr * ip; - int len; - int n; - struct dhcp * reply; - struct in_addr server_id; - struct socket * timer_arg; - - timer_arg = context->so; - reply = dhcp_context_reply(context); - timeout((timeout_fcn_t)dhcp_timeout, &timer_arg, wait_ticks); - while (1) { - error = receive_packet(context->so, context->reply, - sizeof(context->reply), &n); - if (error == 0) { - dhcp_msgtype_t msg; - dhcpol_t options; - - dprintf(("\ndhcp: received packet length %d\n", n)); - if (n < (int)sizeof(struct dhcp)) { - dprintf(("dhcp: packet is too short %d < %d\n", - n, (int)sizeof(struct dhcp))); - continue; - } - if (ntohl(reply->dp_xid) != context->xid - || bcmp(reply->dp_chaddr, link_address(context->dl_p), - link_address_length(context->dl_p)) != 0) { - /* not for us */ - continue; - } - (void)dhcpol_parse_packet(&options, reply, n); - server_id.s_addr = 0; - ip = (const struct in_addr *) - dhcpol_find(&options, - dhcptag_server_identifier_e, &len, NULL); - if (ip != NULL && len >= (int)sizeof(*ip)) { - server_id = *ip; - } - msg = get_dhcp_msgtype(&options); - if (msg == dhcp_msgtype_nak_e - && server_id.s_addr == context->server_id.s_addr) { - /* server NAK'd us, start over */ - dhcpol_free(&options); - error = EPROTO; - untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg); - break; - } - if (msg != dhcp_msgtype_ack_e - || reply->dp_yiaddr.s_addr == 0 - || reply->dp_yiaddr.s_addr == INADDR_BROADCAST) { - /* ignore the packet */ - goto next_packet; - } - printf("dhcp: received ACK: server " IP_FORMAT - " IP address " IP_FORMAT "\n", - IP_LIST(&server_id), IP_LIST(&reply->dp_yiaddr)); - context->iaddr = reply->dp_yiaddr; - ip = (const struct in_addr *) - dhcpol_find(&options, - dhcptag_subnet_mask_e, &len, NULL); - if (ip != NULL && len >= (int)sizeof(*ip)) { - context->netmask = *ip; - } - ip = (const struct in_addr *) - dhcpol_find(&options, dhcptag_router_e, &len, NULL); - if (ip != NULL && len >= (int)sizeof(*ip)) { - context->router = *ip; - } - dhcpol_free(&options); - untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg); - break; - - next_packet: - dhcpol_free(&options); - } - else if ((error != EWOULDBLOCK)) { - /* if some other error occurred, we're done */ - untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg); - break; - } - else if (timer_arg == NULL) { - /* timed out */ - break; - } - else { - /* wait for a wait to arrive, or a timeout to occur */ - socket_lock(context->so, 1); - error = sbwait(&context->so->so_rcv); - socket_unlock(context->so, 1); - } - } - return (error); -} - -static int -dhcp_select(struct dhcp_context * context) -{ - struct timeval current_time; - int error = 0; - dhcpoa_t * options_p; - struct dhcp_packet * request; - int request_size; - int retry; - int wait_ticks; - - /* format a DHCP Request packet */ - request = dhcp_context_request(context); - options_p = &context->request_options; - - make_dhcp_request(&request->dhcp, DHCP_PAYLOAD_MIN, - dhcp_msgtype_request_e, - link_address(context->dl_p), ARPHRD_ETHER, - link_address_length(context->dl_p), - options_p); - /* insert server identifier and requested ip address */ - dhcpoa_add(options_p, dhcptag_requested_ip_address_e, - sizeof(context->iaddr), &context->iaddr); - dhcpoa_add(options_p, dhcptag_server_identifier_e, - sizeof(context->server_id), &context->server_id); - dhcpoa_add(options_p, dhcptag_end_e, 0, 0); - request_size = sizeof(*request) + RFC_MAGIC_SIZE - + dhcpoa_used(options_p); - if (request_size < (int)sizeof(struct bootp_packet)) { - /* pad out to BOOTP-sized packet */ - request_size = sizeof(struct bootp_packet); - } - init_dhcp_packet_header(request, request_size); - - wait_ticks = INITIAL_WAIT_SECS * hz; -#define SELECT_RETRY_COUNT 3 - for (retry = 0; retry < SELECT_RETRY_COUNT; retry++) { - /* Send the request */ - printf("dhcp: sending REQUEST: server " IP_FORMAT - " IP address " IP_FORMAT "\n", - IP_LIST(&context->server_id), - IP_LIST(&context->iaddr)); - microtime(¤t_time); - request->dhcp.dp_secs - = htons((u_short) - (current_time.tv_sec - context->start_time.tv_sec)); - request->dhcp.dp_xid = htonl(context->xid); - request->ip.ip_id = ip_randomid(); - error = send_packet(context->ifp, request, request_size); - if (error != 0) { - printf("dhcp: send_packet failed with %d\n", error); - goto failed; - } - - wait_ticks += random_range(-RAND_TICKS, RAND_TICKS); - dprintf(("dhcp: waiting %d ticks\n", wait_ticks)); - error = dhcp_get_ack(context, wait_ticks); - switch (error) { - case 0: - /* we're done */ - goto done; - case EPROTO: - printf("dhcp: server " IP_FORMAT " send us a NAK\n", - IP_LIST(&context->server_id)); - goto failed; - case EWOULDBLOCK: - break; - default: - dprintf(("dhcp: failed to receive packets: %d\n", error)); - goto failed; - } - wait_ticks *= 2; - if (wait_ticks > (MAX_WAIT_SECS * hz)) - wait_ticks = MAX_WAIT_SECS * hz; - microtime(¤t_time); - } - error = ETIMEDOUT; - goto failed; - - done: - error = 0; - - failed: - return (error); -} - -static int -dhcp_get_offer(struct dhcp_context * context, int wait_ticks) -{ - int error = 0; - int gather_count = 0; - const struct in_addr * ip; - int last_rating = 0; - int len; - int n; - int rating; - struct dhcp * reply; - struct in_addr server_id; - struct socket * timer_arg; - - timer_arg = context->so; - reply = dhcp_context_reply(context); - timeout((timeout_fcn_t)dhcp_timeout, &timer_arg, wait_ticks); - while (1) { - error = receive_packet(context->so, context->reply, - sizeof(context->reply), &n); - if (error == 0) { - dhcpol_t options; - - dprintf(("\ndhcp: received packet length %d\n", n)); - if (n < (int)sizeof(struct dhcp)) { - dprintf(("dhcp: packet is too short %d < %d\n", - n, (int)sizeof(struct dhcp))); - continue; - } - if (ntohl(reply->dp_xid) != context->xid - || reply->dp_yiaddr.s_addr == 0 - || reply->dp_yiaddr.s_addr == INADDR_BROADCAST - || bcmp(reply->dp_chaddr, - link_address(context->dl_p), - link_address_length(context->dl_p)) != 0) { - /* not for us */ - continue; - } - (void)dhcpol_parse_packet(&options, reply, n); - if (get_dhcp_msgtype(&options) != dhcp_msgtype_offer_e) { - /* not an offer */ - goto next_packet; - } - ip = (const struct in_addr *) - dhcpol_find(&options, - dhcptag_server_identifier_e, &len, NULL); - if (ip == NULL || len < (int)sizeof(*ip)) { - /* missing/invalid server identifier */ - goto next_packet; - } - printf("dhcp: received OFFER: server " IP_FORMAT - " IP address " IP_FORMAT "\n", - IP_LIST(ip), IP_LIST(&reply->dp_yiaddr)); - server_id = *ip; - rating = rate_packet(&options); - if (rating > last_rating) { - context->iaddr = reply->dp_yiaddr; - ip = (const struct in_addr *) - dhcpol_find(&options, - dhcptag_subnet_mask_e, &len, NULL); - if (ip != NULL && len >= (int)sizeof(*ip)) { - context->netmask = *ip; - } - ip = (const struct in_addr *) - dhcpol_find(&options, dhcptag_router_e, &len, NULL); - if (ip != NULL && len >= (int)sizeof(*ip)) { - context->router = *ip; - } - context->server_id = server_id; - } - if (rating >= GOOD_RATING) { - dhcpol_free(&options); - /* packet is good enough */ - untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg); - break; - } - if (gather_count == 0) { - untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg); - timer_arg = context->so; - timeout((timeout_fcn_t)dhcp_timeout, &timer_arg, - hz * GATHER_TIME_SECS); - } - gather_count = 1; - next_packet: - dhcpol_free(&options); - } - else if ((error != EWOULDBLOCK)) { - untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg); - break; - } - else if (timer_arg == NULL) { /* timed out */ - if (gather_count != 0) { - dprintf(("dhcp: gathering time has expired\n")); - error = 0; - } - break; - } - else { - socket_lock(context->so, 1); - error = sbwait(&context->so->so_rcv); - socket_unlock(context->so, 1); - } - } - return (error); -} - -/* - * Function: dhcp_init - * Purpose: - * Start in the DHCP INIT state sending DISCOVER's. When we get OFFER's, - * try to select one of them by sending a REQUEST and waiting for an ACK. - */ -static int -dhcp_init(struct dhcp_context * context) -{ - struct timeval current_time; - int error = 0; - uint32_t lease_option = htonl(SUGGESTED_LEASE_LENGTH); - dhcpoa_t * options_p; - struct dhcp_packet * request; - int request_size; - int retry; - int wait_ticks; - - /* remember the time we started */ - microtime(&context->start_time); - current_time = context->start_time; - - request = dhcp_context_request(context); - options_p = &context->request_options; - - retry: - /* format a DHCP DISCOVER packet */ - make_dhcp_request(&request->dhcp, DHCP_PAYLOAD_MIN, - dhcp_msgtype_discover_e, - link_address(context->dl_p), ARPHRD_ETHER, - link_address_length(context->dl_p), - options_p); - /* add the requested lease time */ - dhcpoa_add(options_p, dhcptag_lease_time_e, - sizeof(lease_option), &lease_option); - dhcpoa_add(options_p, dhcptag_end_e, 0, 0); - request_size = sizeof(*request) + RFC_MAGIC_SIZE - + dhcpoa_used(options_p); - if (request_size < (int)sizeof(struct bootp_packet)) { - /* pad out to BOOTP-sized packet */ - request_size = sizeof(struct bootp_packet); - } - init_dhcp_packet_header(request, request_size); - - wait_ticks = INITIAL_WAIT_SECS * hz; - for (retry = 0; retry < context->max_try; retry++) { - /* Send the request */ - printf("dhcp: sending DISCOVER\n"); - request->dhcp.dp_secs - = htons((u_short)(current_time.tv_sec - - context->start_time.tv_sec)); - request->dhcp.dp_xid = htonl(context->xid); - request->ip.ip_id = ip_randomid(); - error = send_packet(context->ifp, request, request_size); - if (error != 0) { - printf("dhcp: send_packet failed with %d\n", error); - goto failed; - } - wait_ticks += random_range(-RAND_TICKS, RAND_TICKS); - dprintf(("dhcp: waiting %d ticks\n", wait_ticks)); - error = dhcp_get_offer(context, wait_ticks); - if (error == 0) { - /* send a REQUEST */ - error = dhcp_select(context); - if (error == 0) { - /* we're done !*/ - goto done; - } - if (error != EPROTO && error != ETIMEDOUT) { - /* fatal error */ - dprintf(("dhcp: dhcp_select failed %d\n", error)); - goto failed; - } - /* wait 10 seconds, and try again */ - printf("dhcp: trying again in 10 seconds\n"); - tsleep(&error, PRIBIO, "dhcp_init", 10 * hz); - context->xid++; - goto retry; - } - else if (error != EWOULDBLOCK) { - dprintf(("dhcp: failed to receive packets: %d\n", error)); - goto failed; - } - wait_ticks *= 2; - if (wait_ticks > (MAX_WAIT_SECS * hz)) - wait_ticks = MAX_WAIT_SECS * hz; - microtime(¤t_time); - } - error = ETIMEDOUT; - goto failed; - - done: - error = 0; - - failed: - return (error); -} - -static void -dhcp_context_free(struct dhcp_context * context, struct proc * procp) -{ - if (context == NULL) { - return; - } - if (context->so != NULL) { - int error; - - /* disable reception of DHCP packets before address assignment */ - context->ifr.ifr_intval = 0; - error = ifioctl(context->so, SIOCAUTOADDR, - (caddr_t)&context->ifr, procp); - if (error) { - printf("dhcp: SIOCAUTOADDR failed: %d\n", error); - } - soclose(context->so); - } - kfree(context, sizeof(*context)); - return; -} - -static struct dhcp_context * -dhcp_context_create(struct ifnet * ifp, int max_try, - struct proc * procp, int * error_p) -{ - struct dhcp_context * context = NULL; - struct sockaddr_dl * dl_p; - struct in_addr lo_addr; - struct in_addr lo_mask; - int error; - struct sockaddr_in sin; - - /* get the hardware address from the interface */ - dl_p = link_from_ifnet(ifp); - if (dl_p == NULL) { - printf("dhcp: can't get link address\n"); - error = ENXIO; - goto failed; - } - - printf("dhcp: h/w addr "); - link_print(dl_p); - if (dl_p->sdl_type != IFT_ETHER) { - printf("dhcp: hardware type %d not supported\n", - dl_p->sdl_type); - error = ENXIO; - goto failed; - } - - context = (struct dhcp_context *)kalloc(sizeof(*context)); - if (context == NULL) { - printf("dhcp: failed to allocate context\n"); - error = ENOMEM; - goto failed; - } - bzero(context, sizeof(*context)); - - /* get a socket */ - error = socreate(AF_INET, &context->so, SOCK_DGRAM, 0); - if (error != 0) { - printf("dhcp: socreate failed %d\n", error); - goto failed; - } - - /* assign 127.0.0.1 to lo0 so that the bind will succeed */ - lo_addr.s_addr = htonl(INADDR_LOOPBACK); - lo_mask.s_addr = htonl(IN_CLASSA_NET); - error = inet_aifaddr(context->so, "lo0", &lo_addr, &lo_mask, NULL); - if (error != 0) { - printf("dhcp: assigning loopback address failed %d\n", error); - } - - /* enable reception of DHCP packets before an address is assigned */ - snprintf(context->ifr.ifr_name, - sizeof(context->ifr.ifr_name), "%s", if_name(ifp)); - context->ifr.ifr_intval = 1; - - error = ifioctl(context->so, SIOCAUTOADDR, (caddr_t)&context->ifr, procp); - if (error) { - printf("dhcp: SIOCAUTOADDR failed: %d\n", error); - goto failed; - } - dprintf(("dhcp: SIOCAUTOADDR done\n")); - - error = ifioctl(context->so, SIOCPROTOATTACH, (caddr_t)&context->ifr, - procp); - if (error) { - printf("dhcp: SIOCPROTOATTACH failed: %d\n", error); - goto failed; - } - dprintf(("dhcp: SIOCPROTOATTACH done\n")); - - /* bind the socket */ - sin.sin_len = sizeof(sin); - sin.sin_family = AF_INET; - sin.sin_port = htons(IPPORT_BOOTPC); - sin.sin_addr.s_addr = INADDR_ANY; - error = sobindlock(context->so, (struct sockaddr *)&sin, 1); - if (error) { - printf("dhcp: sobind failed, %d\n", error); - goto failed; - } - - /* make it non-blocking I/O */ - socket_lock(context->so, 1); - context->so->so_state |= SS_NBIO; - socket_unlock(context->so, 1); - - /* save passed-in information */ - context->max_try = max_try; - context->dl_p = dl_p; - context->ifp = ifp; - - /* get a random transaction id */ - context->xid = random(); - - return (context); - - failed: - dhcp_context_free(context, procp); - *error_p = error; - return (NULL); -} - -/* - * Routine: dhcp - * Function: - * Do DHCP over the specified interface to retrieve the IP address, - * subnet mask, and router. - */ -int -dhcp(struct ifnet * ifp, struct in_addr * iaddr_p, int max_try, - struct in_addr * netmask_p, struct in_addr * router_p, - struct proc * procp) -{ - int error = 0; - struct dhcp_context * context; - - context = dhcp_context_create(ifp, max_try, procp, &error); - if (context == NULL) { - return (error); - } - - /* start DHCP in the INIT state */ - error = dhcp_init(context); - if (error == 0) { - *iaddr_p = context->iaddr; - *netmask_p = context->netmask; - *router_p = context->router; - } - dhcp_context_free(context, procp); - return (error); -} diff --git a/bsd/netinet/in_gif.c b/bsd/netinet/in_gif.c index 66bf01c30..df1130702 100644 --- a/bsd/netinet/in_gif.c +++ b/bsd/netinet/in_gif.c @@ -177,12 +177,12 @@ in_gif_output( iphdr.ip_ttl = ip_gif_ttl; iphdr.ip_len = m->m_pkthdr.len + sizeof (struct ip); if (ifp->if_flags & IFF_LINK1) - ip_ecn_ingress(ECN_ALLOWED, &iphdr.ip_tos, &tos); + ip_ecn_ingress(ECN_NORMAL, &iphdr.ip_tos, &tos); else ip_ecn_ingress(ECN_NOCARE, &iphdr.ip_tos, &tos); /* prepend new IP header */ - M_PREPEND(m, sizeof (struct ip), M_DONTWAIT); + M_PREPEND(m, sizeof (struct ip), M_DONTWAIT, 0); if (m && mbuf_len(m) < sizeof (struct ip)) m = m_pullup(m, sizeof (struct ip)); if (m == NULL) { @@ -240,6 +240,7 @@ in_gif_input(m, off) struct ip *ip; int af, proto; u_int8_t otos; + int egress_success = 0; ip = mtod(m, struct ip *); proto = ip->ip_p; @@ -268,9 +269,9 @@ in_gif_input(m, off) } ip = mtod(m, struct ip *); if (gifp->if_flags & IFF_LINK1) - ip_ecn_egress(ECN_ALLOWED, &otos, &ip->ip_tos); + egress_success = ip_ecn_egress(ECN_NORMAL, &otos, &ip->ip_tos); else - ip_ecn_egress(ECN_NOCARE, &otos, &ip->ip_tos); + egress_success = ip_ecn_egress(ECN_NOCARE, &otos, &ip->ip_tos); break; } #endif @@ -288,9 +289,9 @@ in_gif_input(m, off) ip6 = mtod(m, struct ip6_hdr *); itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; if (gifp->if_flags & IFF_LINK1) - ip_ecn_egress(ECN_ALLOWED, &otos, &itos); + egress_success = ip_ecn_egress(ECN_NORMAL, &otos, &itos); else - ip_ecn_egress(ECN_NOCARE, &otos, &itos); + egress_success = ip_ecn_egress(ECN_NOCARE, &otos, &itos); ip6->ip6_flow &= ~htonl(0xff << 20); ip6->ip6_flow |= htonl((u_int32_t)itos << 20); break; @@ -301,6 +302,13 @@ in_gif_input(m, off) m_freem(m); return; } + + if (egress_success == 0) { + OSAddAtomic(1, &ipstat.ips_nogif); + m_freem(m); + return; + } + #ifdef __APPLE__ /* Replace the rcvif by gifp for dlil to route it correctly */ if (m->m_pkthdr.rcvif) diff --git a/bsd/netinet/in_mcast.c b/bsd/netinet/in_mcast.c index 893665ada..320c7394a 100644 --- a/bsd/netinet/in_mcast.c +++ b/bsd/netinet/in_mcast.c @@ -1737,7 +1737,7 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) if (error) return (error); /* we never use msfr.msfr_srcs; */ - memcpy(&msfr, &msfr64, sizeof(msfr)); + memcpy(&msfr, &msfr64, sizeof(msfr64)); } else { error = sooptcopyin(sopt, &msfr32, sizeof(struct __msfilterreq32), @@ -1745,7 +1745,7 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) if (error) return (error); /* we never use msfr.msfr_srcs; */ - memcpy(&msfr, &msfr32, sizeof(msfr)); + memcpy(&msfr, &msfr32, sizeof(msfr32)); } ifnet_head_lock_shared(); @@ -1809,7 +1809,6 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) IMO_UNLOCK(imo); return (ENOBUFS); } - bzero(tss, (size_t) msfr.msfr_nsrcs * sizeof(*tss)); } /* @@ -1858,7 +1857,7 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) msfr32.msfr_ifindex = msfr.msfr_ifindex; msfr32.msfr_fmode = msfr.msfr_fmode; msfr32.msfr_nsrcs = msfr.msfr_nsrcs; - memcpy(&msfr64.msfr_group, &msfr.msfr_group, + memcpy(&msfr32.msfr_group, &msfr.msfr_group, sizeof(struct sockaddr_storage)); error = sooptcopyout(sopt, &msfr32, sizeof(struct __msfilterreq32)); @@ -2723,7 +2722,7 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) if (error) return (error); /* we never use msfr.msfr_srcs; */ - memcpy(&msfr, &msfr64, sizeof(msfr)); + memcpy(&msfr, &msfr64, sizeof(msfr64)); } else { error = sooptcopyin(sopt, &msfr32, sizeof(struct __msfilterreq32), @@ -2731,7 +2730,7 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) if (error) return (error); /* we never use msfr.msfr_srcs; */ - memcpy(&msfr, &msfr32, sizeof(msfr)); + memcpy(&msfr, &msfr32, sizeof(msfr32)); } if ((size_t) msfr.msfr_nsrcs > diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c index e74dccbc3..0cbd238cc 100644 --- a/bsd/netinet/in_pcb.c +++ b/bsd/netinet/in_pcb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -534,7 +534,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) int mac_error; #endif /* CONFIG_MACF_NET */ - if (!so->cached_in_sock_layer) { + if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); if (inp == NULL) return (ENOBUFS); @@ -552,7 +552,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) #if CONFIG_MACF_NET mac_error = mac_inpcb_label_init(inp, M_WAITOK); if (mac_error != 0) { - if (!so->cached_in_sock_layer) + if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) zfree(pcbinfo->ipi_zone, inp); return (mac_error); } @@ -1296,6 +1296,16 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; inp->inp_flags |= INP_INADDR_ANY; } else { + /* + * Usage of IP_PKTINFO, without local port already + * speficified will cause kernel to panic, + * see rdar://problem/18508185. + * For now returning error to avoid a kernel panic + * This routines can be refactored and handle this better + * in future. + */ + if (inp->inp_lport == 0) + return (EINVAL); if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { /* * Lock inversion issue, mostly with udp @@ -1369,6 +1379,13 @@ in_pcbdetach(struct inpcb *inp) if (nstat_collect && (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) nstat_pcb_detach(inp); + + /* Free memory buffer held for generating keep alives */ + if (inp->inp_keepalive_data != NULL) { + FREE(inp->inp_keepalive_data, M_TEMP); + inp->inp_keepalive_data = NULL; + } + /* mark socket state as dead */ if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", @@ -1465,7 +1482,7 @@ in_pcbdispose(struct inpcb *inp) * we deallocate the structure. */ ROUTE_RELEASE(&inp->inp_route); - if (!so->cached_in_sock_layer) { + if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { zfree(ipi->ipi_zone, inp); } sodealloc(so); @@ -1618,18 +1635,11 @@ in_losing(struct inpcb *inp) { boolean_t release = FALSE; struct rtentry *rt; - struct rt_addrinfo info; if ((rt = inp->inp_route.ro_rt) != NULL) { struct in_ifaddr *ia = NULL; - bzero((caddr_t)&info, sizeof (info)); RT_LOCK(rt); - info.rti_info[RTAX_DST] = - (struct sockaddr *)&inp->inp_route.ro_dst; - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; - info.rti_info[RTAX_NETMASK] = rt_mask(rt); - rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); if (rt->rt_flags & RTF_DYNAMIC) { /* * Prevent another thread from modifying rt_key, @@ -2822,10 +2832,11 @@ inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) * When not delegated, the effective pid is the same as the real pid */ if (so->so_flags & SOF_DELEGATED) { + soprocinfo->spi_delegated = 1; soprocinfo->spi_epid = so->e_pid; - if (so->e_pid != 0) - uuid_copy(soprocinfo->spi_euuid, so->e_uuid); + uuid_copy(soprocinfo->spi_euuid, so->e_uuid); } else { + soprocinfo->spi_delegated = 0; soprocinfo->spi_epid = so->last_pid; } } diff --git a/bsd/netinet/in_pcb.h b/bsd/netinet/in_pcb.h index c86c03c6c..7ce89307a 100644 --- a/bsd/netinet/in_pcb.h +++ b/bsd/netinet/in_pcb.h @@ -220,6 +220,11 @@ struct inpcb { } inp_necp_attributes; struct necp_inpcb_result inp_policyresult; #endif + u_char *inp_keepalive_data; /* for keepalive offload */ + u_int8_t inp_keepalive_datalen; /* keepalive data length */ + u_int8_t inp_keepalive_type; /* type of application */ + u_int16_t inp_keepalive_interval; /* keepalive interval */ + uint32_t inp_nstat_refcnt __attribute__((aligned(4))); struct inp_stat *inp_stat; struct inp_stat *inp_cstat; /* cellular data */ struct inp_stat *inp_wstat; /* Wi-Fi data */ @@ -228,7 +233,6 @@ struct inpcb { u_int8_t inp_cstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; u_int8_t inp_wstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; u_int8_t inp_Wstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; - uint32_t inp_nstat_refcnt __attribute__((aligned(4))); }; #define INP_ADD_STAT(_inp, _cnt_cellular, _cnt_wifi, _cnt_wired, _a, _n)\ @@ -678,6 +682,7 @@ struct inpcbinfo { #define INP2_NO_IFF_EXPENSIVE 0x00000008 /* do not use expensive interface */ #define INP2_INHASHLIST 0x00000010 /* pcb is in inp_hash list */ #define INP2_AWDL_UNRESTRICTED 0x00000020 /* AWDL restricted mode allowed */ +#define INP2_KEEPALIVE_OFFLOAD 0x00000040 /* Enable UDP keepalive offload */ /* * Flags passed to in_pcblookup*() functions. @@ -754,9 +759,14 @@ extern int in_pcb_checkstate(struct inpcb *, int, int); extern void in_pcbremlists(struct inpcb *); extern void inpcb_to_compat(struct inpcb *, struct inpcb_compat *); extern void inpcb_to_xinpcb64(struct inpcb *, struct xinpcb64 *); + extern int get_pcblist_n(short, struct sysctl_req *, struct inpcbinfo *); -#define INPCB_GET_PORTS_USED_WILDCARDOK 0x1 -#define INPCB_GET_PORTS_USED_NOWAKEUPOK 0x2 +#define INPCB_GET_PORTS_USED_WILDCARDOK 0x01 +#define INPCB_GET_PORTS_USED_NOWAKEUPOK 0x02 +#define INPCB_GET_PORTS_USED_RECVANYIFONLY 0x04 +#define INPCB_GET_PORTS_USED_EXTBGIDLEONLY 0x08 +#define INPCB_GET_PORTS_USED_ACTIVEONLY 0x10 + extern void inpcb_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *, struct inpcbinfo *); #define INPCB_OPPORTUNISTIC_THROTTLEON 0x0001 diff --git a/bsd/netinet/in_pcblist.c b/bsd/netinet/in_pcblist.c index 4df416c6a..3e0facc1c 100644 --- a/bsd/netinet/in_pcblist.c +++ b/bsd/netinet/in_pcblist.c @@ -401,21 +401,30 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags, struct socket *so; inp_gen_t gencnt; bool iswildcard, wildcardok, nowakeok; + bool recvanyifonly, extbgidleok; + bool activeonly; wildcardok = ((flags & INPCB_GET_PORTS_USED_WILDCARDOK) != 0); nowakeok = ((flags & INPCB_GET_PORTS_USED_NOWAKEUPOK) != 0); + recvanyifonly = ((flags & INPCB_GET_PORTS_USED_RECVANYIFONLY) != 0); + extbgidleok = ((flags & INPCB_GET_PORTS_USED_EXTBGIDLEONLY) != 0); + activeonly = ((flags & INPCB_GET_PORTS_USED_ACTIVEONLY) != 0); + lck_rw_lock_shared(pcbinfo->ipi_lock); gencnt = pcbinfo->ipi_gencnt; + for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp; inp = LIST_NEXT(inp, inp_list)) { uint16_t port; if (inp->inp_gencnt > gencnt || - inp->inp_state == INPCB_STATE_DEAD) + inp->inp_state == INPCB_STATE_DEAD || + inp->inp_wantcnt == WNT_STOPUSING) continue; if ((so = inp->inp_socket) == NULL || - (so->so_state & SS_DEFUNCT)) + (so->so_state & SS_DEFUNCT) || + (so->so_state & SS_ISDISCONNECTED)) continue; if (!(protocol == PF_UNSPEC || @@ -435,12 +444,64 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags, !nowakeok) continue; + if (!(inp->inp_flags & INP_RECV_ANYIF) && + recvanyifonly) + continue; + + if (!(so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) && + extbgidleok) + continue; + if (!iswildcard && !(ifindex == 0 || inp->inp_last_outifp == NULL || ifindex == inp->inp_last_outifp->if_index)) continue; + if (SOCK_PROTO(inp->inp_socket) == IPPROTO_UDP && + so->so_state & SS_CANTRCVMORE) + continue; + + if (SOCK_PROTO(inp->inp_socket) == IPPROTO_TCP) { + struct tcpcb *tp = sototcpcb(inp->inp_socket); + + switch (tp->t_state) { + case TCPS_CLOSED: + continue; + /* NOT REACHED */ + case TCPS_LISTEN: + case TCPS_SYN_SENT: + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + /* + * Note: FIN_WAIT_1 is an active state + * because we need our FIN to be + * acknowledged + */ + break; + case TCPS_CLOSE_WAIT: + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_FIN_WAIT_2: + /* + * In the closing states, the connection + * is not idle when there is outgoing + * data having to be acknowledged + */ + if (activeonly && so->so_snd.sb_cc == 0) + continue; + break; + case TCPS_TIME_WAIT: + continue; + /* NOT REACHED */ + } + } + /* + * Final safeguard to exclude unspecified local port + */ port = ntohs(inp->inp_lport); + if (port == 0) + continue; bit_set(bitfield, port); } lck_rw_done(pcbinfo->ipi_lock); diff --git a/bsd/netinet/in_proto.c b/bsd/netinet/in_proto.c index 321e7819e..bb0fee864 100644 --- a/bsd/netinet/in_proto.c +++ b/bsd/netinet/in_proto.c @@ -129,7 +129,7 @@ static struct protosw inetsw[] = { .pr_type = SOCK_DGRAM, .pr_protocol = IPPROTO_UDP, .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK| - PR_EVCONNINFO, + PR_EVCONNINFO|PR_PRECONN_WRITE, .pr_input = udp_input, .pr_ctlinput = udp_ctlinput, .pr_ctloutput = udp_ctloutput, @@ -143,7 +143,8 @@ static struct protosw inetsw[] = { .pr_type = SOCK_STREAM, .pr_protocol = IPPROTO_TCP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_PCBLOCK| - PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO, + PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO| + PR_PRECONN_WRITE|PR_DATA_IDEMPOTENT, .pr_input = tcp_input, .pr_ctlinput = tcp_ctlinput, .pr_ctloutput = tcp_ctloutput, @@ -325,15 +326,15 @@ static void ip_proto_input(protocol_family_t protocol, mbuf_t packet_list) { #pragma unused(protocol) - mbuf_t packet; - int how_many = 0 ; - /* ip_input should handle a list of packets but does not yet */ - for (packet = packet_list; packet; packet = packet_list) { - how_many++; - packet_list = mbuf_nextpkt(packet); - mbuf_setnextpkt(packet, NULL); - ip_input(packet); + if (packet_list->m_nextpkt != NULL) { + ip_input_process_list(packet_list); + } else { + /* + * XXX remove this path if ip_input_process_list is proven + * to be stable and has minimum overhead on most platforms. + */ + ip_input(packet_list); } } diff --git a/bsd/netinet/in_systm.h b/bsd/netinet/in_systm.h index e0972f831..1eb03c11b 100644 --- a/bsd/netinet/in_systm.h +++ b/bsd/netinet/in_systm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/netinet/in_tclass.c b/bsd/netinet/in_tclass.c index e019e43c3..20a37fd9f 100644 --- a/bsd/netinet/in_tclass.c +++ b/bsd/netinet/in_tclass.c @@ -80,6 +80,7 @@ static int flush_pid_tclass(struct so_tcdbg *); static int purge_tclass_for_proc(void); static int flush_tclass_for_proc(void); int get_tclass_for_curr_proc(int *); +static inline int so_throttle_best_effort(struct socket* ,struct ifnet *); static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */ static lck_grp_t *tclass_lck_grp = NULL; /* mutex group definition */ @@ -92,7 +93,7 @@ static lck_mtx_t *tclass_lock = &tclass_lock_data; * seconds, the background connections can switch to foreground TCP * congestion control. */ -#define TCP_BG_SWITCH_TIME 2 +#define TCP_BG_SWITCH_TIME 2 /* seconds */ /* * Must be called with tclass_lock held @@ -787,6 +788,17 @@ so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes, uint32_t tc) so->so_tc_stats[tc].rxpackets += pkts; so->so_tc_stats[tc].rxbytes +=bytes; } + +static inline int +so_throttle_best_effort(struct socket *so, struct ifnet *ifp) +{ + u_int32_t uptime = net_uptime(); + return (soissrcbesteffort(so) && + net_io_policy_throttle_best_effort == 1 && + ifp->if_rt_sendts > 0 && + (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME); +} + __private_extern__ void set_tcp_stream_priority(struct socket *so) { @@ -795,7 +807,7 @@ set_tcp_stream_priority(struct socket *so) struct ifnet *outifp; u_char old_cc = tp->tcp_cc_index; int recvbg = IS_TCP_RECV_BG(so); - bool is_local, fg_active = false; + bool is_local = false, fg_active = false; u_int32_t uptime; VERIFY((SOCK_CHECK_DOM(so, PF_INET) @@ -817,20 +829,42 @@ set_tcp_stream_priority(struct socket *so) * background. The variable sotcdb which can be set with sysctl * is used to disable these settings for testing. */ - if (soissrcbackground(so)) { - if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) - is_local = true; - else - is_local = false; - - /* Check if there has been recent foreground activity */ - if ((outifp != NULL && - outifp->if_fg_sendts > 0 && + if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) + is_local = true; + + /* Check if there has been recent foreground activity */ + if (outifp != NULL) { + /* + * If the traffic source is background, check if + * if it can be switched to foreground. This can + * happen when there is no indication of foreground + * activity. + */ + if (soissrcbackground(so) && + ((outifp->if_fg_sendts > 0 && (int)(uptime - outifp->if_fg_sendts) <= - TCP_BG_SWITCH_TIME) || - net_io_policy_throttled) + TCP_BG_SWITCH_TIME) || net_io_policy_throttled)) fg_active = true; + /* + * The traffic source is best-effort -- check if + * the policy to throttle best effort is enabled + * and there was realtime activity on this + * interface recently. If this is true, enable + * algorithms that respond to increased latency + * on best-effort traffic. + */ + if (so_throttle_best_effort(so, outifp)) + fg_active = true; + } + + /* + * System initiated background traffic like cloud uploads should + * always use background delay sensitive algorithms. This will + * make the stream more responsive to other streams on the user's + * network and it will minimize latency induced. + */ + if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) { /* * If the interface that the connection is using is * loopback, do not use background congestion @@ -842,18 +876,9 @@ set_tcp_stream_priority(struct socket *so) * switch the backgroung streams to use background * congestion control algorithm. Otherwise, even background * flows can move into foreground. - * - * System initiated background traffic like cloud uploads - * should always use background delay sensitive - * algorithms. This will make the stream more resposive to - * other streams on the user's network and it will - * minimize the latency induced. */ - if (IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) - fg_active = true; - - if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || - is_local || !fg_active) { + if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local || + !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) { if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) tcp_set_foreground_cc(so); } else { @@ -862,11 +887,12 @@ set_tcp_stream_priority(struct socket *so) } /* Set receive side background flags */ - if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || - is_local || !fg_active) + if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local || + !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) { tcp_clear_recv_bg(so); - else + } else { tcp_set_recv_bg(so); + } } else { tcp_clear_recv_bg(so); if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) @@ -920,13 +946,21 @@ set_packet_service_class(struct mbuf *m, struct socket *so, } /* - * If TRAFFIC_MGT_SO_BACKGROUND is set, depress the priority. + * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle + * best effort is set, depress the priority. */ - if (soisthrottled(so) && !IS_MBUF_SC_BACKGROUND(msc)) + if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) + msc = MBUF_SC_BK; + + if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL && + so_throttle_best_effort(so, inp->inp_last_outifp)) msc = MBUF_SC_BK; if (soissrcbackground(so)) m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND; + + if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) + m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME; /* * Set the traffic class in the mbuf packet header svc field */ diff --git a/bsd/netinet/in_var.h b/bsd/netinet/in_var.h index 8a3e7b94c..5b047a561 100644 --- a/bsd/netinet/in_var.h +++ b/bsd/netinet/in_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -163,6 +163,7 @@ struct kev_in_portinuse { router */ #ifdef BSD_KERNEL_PRIVATE +#include #include #include #include @@ -473,6 +474,15 @@ struct inpcb; #define MCAST_NOTSMEMBER 2 /* This host excluded source */ #define MCAST_MUTED 3 /* [deprecated] */ +/* + * Per-interface IPv4 structures. + */ +struct in_ifextra { + uint32_t netsig_len; + u_int8_t netsig[IFNET_SIGNATURELEN]; +}; +#define IN_IFEXTRA(_ifp) ((struct in_ifextra *)(_ifp->if_inetdata)) + extern u_int32_t ipv4_ll_arp_aware; extern void in_ifaddr_init(void); @@ -500,6 +510,7 @@ extern int in_inithead(void **, int); extern void in_rtqdrain(void); extern struct radix_node *in_validate(struct radix_node *); extern void ip_input(struct mbuf *); +extern void ip_input_process_list(struct mbuf *); extern int in_ifadown(struct ifaddr *ifa, int); extern void in_ifscrub(struct ifnet *, struct in_ifaddr *, int); extern u_int32_t inaddr_hashval(u_int32_t); diff --git a/bsd/netinet/ip_dummynet.c b/bsd/netinet/ip_dummynet.c index 38338ae58..c24935fd9 100644 --- a/bsd/netinet/ip_dummynet.c +++ b/bsd/netinet/ip_dummynet.c @@ -1304,10 +1304,10 @@ find_queue(struct dn_flow_set *fs, struct ip_flow_id *id) ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^ ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[0] >> 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[1] >> 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[2] >> 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[3] >> 16) & 0xffff)^ (id->dst_port << 1) ^ (id->src_port) ^ (id->proto ) ^ diff --git a/bsd/netinet/ip_ecn.c b/bsd/netinet/ip_ecn.c index 28a558961..38c790026 100644 --- a/bsd/netinet/ip_ecn.c +++ b/bsd/netinet/ip_ecn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000,2007 Apple Inc. All rights reserved. + * Copyright (c) 2000, 2007, 2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -91,11 +91,10 @@ ip_ecn_ingress(mode, outer, inner) *outer = *inner; switch (mode) { - case ECN_ALLOWED: /* ECN allowed */ - *outer &= ~IPTOS_CE; + case ECN_NORMAL: /* ECN normal mode, copy flags */ break; - case ECN_FORBIDDEN: /* ECN forbidden */ - *outer &= ~(IPTOS_ECT | IPTOS_CE); + case ECN_COMPATIBILITY: /* ECN compatibility mode */ + *outer &= ~IPTOS_ECN_MASK; break; case ECN_NOCARE: /* no consideration to ECN */ break; @@ -105,7 +104,7 @@ ip_ecn_ingress(mode, outer, inner) /* * modify inner ECN (TOS) field on egress operation (tunnel decapsulation). */ -void +int ip_ecn_egress(mode, outer, inner) int mode; const u_int8_t *outer; @@ -115,14 +114,25 @@ ip_ecn_egress(mode, outer, inner) panic("NULL pointer passed to ip_ecn_egress"); switch (mode) { - case ECN_ALLOWED: - if (*outer & IPTOS_CE) - *inner |= IPTOS_CE; + /* Process ECN for both normal and compatibility modes */ + case ECN_NORMAL: + case ECN_COMPATIBILITY: + if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) { + if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) { + /* Drop */ + return (0); + } else { + *inner |= IPTOS_ECN_CE; + } + } else if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 && + (*inner & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0) { + *inner = *outer; + } break; - case ECN_FORBIDDEN: /* ECN forbidden */ case ECN_NOCARE: /* no consideration to ECN */ break; } + return (1); } #if INET6 @@ -143,7 +153,7 @@ ip6_ecn_ingress(mode, outer, inner) *outer |= htonl((u_int32_t)outer8 << 20); } -void +int ip6_ecn_egress(mode, outer, inner) int mode; const u_int32_t *outer; @@ -156,8 +166,95 @@ ip6_ecn_egress(mode, outer, inner) outer8 = (ntohl(*outer) >> 20) & 0xff; inner8 = (ntohl(*inner) >> 20) & 0xff; - ip_ecn_egress(mode, &outer8, &inner8); + if (ip_ecn_egress(mode, &outer8, &inner8) == 0) { + return (0); + } + *inner &= ~htonl(0xff << 20); + *inner |= htonl((u_int32_t)inner8 << 20); + return (1); +} + +/* + * Modify outer IPv6 ECN (Traffic Class) field according to inner IPv4 TOS field + * on ingress operation (tunnel encapsulation). + */ +void +ip46_ecn_ingress(mode, outer, tos) + int mode; + u_int32_t *outer; + const u_int8_t *tos; +{ + u_int8_t outer8; + + if (!outer || !tos) + panic("NULL pointer passed to ip46_ecn_ingress"); + + ip_ecn_ingress(mode, &outer8, tos); + *outer &= ~htonl(0xff << 20); + *outer |= htonl((u_int32_t)outer8 << 20); +} + +/* + * Modify inner IPv4 ECN (TOS) field according to output IPv6 ECN (Traffic Class) + * on egress operation (tunnel decapsulation). + */ +int +ip46_ecn_egress(mode, outer, tos) + int mode; + const u_int32_t *outer; + u_int8_t *tos; +{ + u_int8_t outer8; + + if (!outer || !tos) + panic("NULL pointer passed to ip46_ecn_egress"); + + outer8 = (ntohl(*outer) >> 20) & 0xff; + return ip_ecn_egress(mode, &outer8, tos); +} + +/* + * Modify outer IPv4 TOS field according to inner IPv6 ECN (Traffic Class) + * on ingress operation (tunnel encapsulation). + */ +void +ip64_ecn_ingress(mode, outer, inner) + int mode; + u_int8_t *outer; + const u_int32_t *inner; +{ + u_int8_t inner8; + + if (!outer || ! inner) + panic("NULL pointer passed to ip64_ecn_ingress"); + + inner8 = (ntohl(*inner) >> 20) & 0xff; + ip_ecn_ingress(mode, outer, &inner8); +} + +/* + * Modify inner IPv6 ECN (Traffic Class) according to outer IPv4 TOS field + * on egress operation (tunnel decapsulation). + */ +int +ip64_ecn_egress(mode, outer, inner) + int mode; + const u_int8_t *outer; + u_int32_t *inner; +{ + u_int8_t inner8; + + if (!outer || !inner) + panic("NULL pointer passed to ip64_ecn_egress"); + + inner8 = (ntohl(*inner) >> 20) & 0xff; + if (ip_ecn_egress(mode, outer, &inner8) == 0) { + return (0); + } + *inner &= ~htonl(0xff << 20); *inner |= htonl((u_int32_t)inner8 << 20); + return (1); } + #endif diff --git a/bsd/netinet/ip_ecn.h b/bsd/netinet/ip_ecn.h index ae06c45c0..959a8e24d 100644 --- a/bsd/netinet/ip_ecn.h +++ b/bsd/netinet/ip_ecn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013, 2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -61,10 +61,10 @@ #include #ifdef BSD_KERNEL_PRIVATE -#define ECN_ALLOWED 1 /* ECN allowed */ -#define ECN_FORBIDDEN 0 /* ECN forbidden */ -#define ECN_NOCARE (-1) /* no consideration to ECN */ +#define ECN_NORMAL 1 /* ECN normal mode */ +#define ECN_COMPATIBILITY 0 /* ECN comptability mode */ +#define ECN_NOCARE (-1) /* Ignore ECN. Use caution with this mode. */ extern void ip_ecn_ingress(int, u_int8_t *, const u_int8_t *); -extern void ip_ecn_egress(int, const u_int8_t *, u_int8_t *); +extern int ip_ecn_egress(int, const u_int8_t *, u_int8_t *); #endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/netinet/ip_encap.c b/bsd/netinet/ip_encap.c index 3c0ee4a48..6c4d33072 100644 --- a/bsd/netinet/ip_encap.c +++ b/bsd/netinet/ip_encap.c @@ -390,12 +390,11 @@ encap_attach(af, proto, sp, sm, dp, dm, psw, arg) goto fail; } - ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK); /*XXX*/ + ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK | M_ZERO); /* XXX */ if (ep == NULL) { error = ENOBUFS; goto fail; } - bzero(ep, sizeof(*ep)); ep->af = af; ep->proto = proto; @@ -432,12 +431,11 @@ encap_attach_func(af, proto, func, psw, arg) goto fail; } - ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK); /*XXX*/ + ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK | M_ZERO); /* XXX */ if (ep == NULL) { error = ENOBUFS; goto fail; } - bzero(ep, sizeof(*ep)); ep->af = af; ep->proto = proto; diff --git a/bsd/netinet/ip_flowid.h b/bsd/netinet/ip_flowid.h index aa3ac23c9..8e84eec1f 100644 --- a/bsd/netinet/ip_flowid.h +++ b/bsd/netinet/ip_flowid.h @@ -127,6 +127,14 @@ struct ip_fw_args { #define fwa_dst fwa_dst_._fwa_dst #define fwa_dst6 fwa_dst_._fwa_dst6 +/* Allocate a separate structure for inputs args to save space and bzero time */ +struct ip_fw_in_args { + struct sockaddr_in *fwai_next_hop; /* forward address */ + struct ip_fw *fwai_ipfw_rule;/* matching IPFW rule */ + struct pf_rule *fwai_pf_rule; /* matching PF rule */ + u_int16_t fwai_divert_rule;/* divert cookie */ +}; + #endif /* BSD_KERNEL_PRIVATE */ #endif /* __IP_FLOWID_H__ */ diff --git a/bsd/netinet/ip_fw2.c b/bsd/netinet/ip_fw2.c index d9520158e..bfdc61964 100644 --- a/bsd/netinet/ip_fw2.c +++ b/bsd/netinet/ip_fw2.c @@ -2838,13 +2838,12 @@ add_rule(struct ip_fw **head, struct ip_fw *input_rule) if (*head == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE) return (EINVAL); - rule = _MALLOC(l, M_IPFW, M_WAIT); + rule = _MALLOC(l, M_IPFW, M_WAIT | M_ZERO); if (rule == NULL) { printf("ipfw2: add_rule MALLOC failed\n"); return (ENOSPC); } - bzero(rule, l); bcopy(input_rule, rule, l); rule->next = NULL; @@ -3539,14 +3538,12 @@ ipfw_ctl(struct sockopt *sopt) * how much room is needed, do not bother filling up the * buffer, just jump to the sooptcopyout. */ - buf = _MALLOC(size, M_TEMP, M_WAITOK); + buf = _MALLOC(size, M_TEMP, M_WAITOK | M_ZERO); if (buf == 0) { lck_mtx_unlock(ipfw_mutex); error = ENOBUFS; break; } - - bzero(buf, size); bp = buf; for (rule = layer3_chain; rule ; rule = rule->next) { @@ -3607,7 +3604,7 @@ ipfw_ctl(struct sockopt *sopt) ipfw_dyn_dst->ack_rev = p->ack_rev; ipfw_dyn_dst->dyn_type = p->dyn_type; ipfw_dyn_dst->count = p->count; - last = (char*)&ipfw_dyn_dst->next; + last = (char*)ipfw_dyn_dst; } else { ipfw_dyn_rule_32 *ipfw_dyn_dst; @@ -3633,11 +3630,16 @@ ipfw_ctl(struct sockopt *sopt) ipfw_dyn_dst->ack_rev = p->ack_rev; ipfw_dyn_dst->dyn_type = p->dyn_type; ipfw_dyn_dst->count = p->count; - last = (char*)&ipfw_dyn_dst->next; + last = (char*)ipfw_dyn_dst; } } - if (last != NULL) /* mark last dynamic rule */ - bzero(last, sizeof(last)); + /* mark last dynamic rule */ + if (last != NULL) { + if (is64user) + ((ipfw_dyn_rule_64 *)last)->next = 0; + else + ((ipfw_dyn_rule_32 *)last)->next = 0; + } } lck_mtx_unlock(ipfw_mutex); @@ -3758,13 +3760,11 @@ ipfw_ctl(struct sockopt *sopt) case IP_FW_ADD: { size_t savedsopt_valsize=0; - rule = _MALLOC(RULE_MAXSIZE, M_TEMP, M_WAITOK); + rule = _MALLOC(RULE_MAXSIZE, M_TEMP, M_WAITOK | M_ZERO); if (rule == 0) { error = ENOBUFS; break; } - - bzero(rule, RULE_MAXSIZE); if (api_version != IP_FW_CURRENT_API_VERSION) { error = ipfw_convert_to_latest(sopt, rule, api_version, is64user); diff --git a/bsd/netinet/ip_fw2_compat.c b/bsd/netinet/ip_fw2_compat.c index 1022e03f1..c4f1bf576 100644 --- a/bsd/netinet/ip_fw2_compat.c +++ b/bsd/netinet/ip_fw2_compat.c @@ -1981,7 +1981,7 @@ ipfw_convert_to_cmds_32(struct ip_fw *curr_rule, struct ip_fw_compat_32 *compat_ ipfw_insn *action, *cmd, *src, *dst; ipfw_insn *have_state = NULL; /* track check-state or keep-state */ - if (!compat_rule || !curr_rule || !(curr_rule->cmd)) { + if (!compat_rule || !curr_rule) { return; } @@ -2525,7 +2525,7 @@ ipfw_convert_to_cmds_64(struct ip_fw *curr_rule, struct ip_fw_compat_64 *compat_ ipfw_insn *action, *cmd, *src, *dst; ipfw_insn *have_state = NULL; /* track check-state or keep-state */ - if (!compat_rule || !curr_rule || !(curr_rule->cmd)) { + if (!compat_rule || !curr_rule) { return; } diff --git a/bsd/netinet/ip_icmp.c b/bsd/netinet/ip_icmp.c index 256d54b8f..cb0e43f7a 100644 --- a/bsd/netinet/ip_icmp.c +++ b/bsd/netinet/ip_icmp.c @@ -148,7 +148,7 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW | CTLFLAG_LOCKED, &log_redirect, 0, ""); -static int icmp_datalen = 8; +const static int icmp_datalen = 8; #if ICMP_BANDLIM @@ -287,7 +287,7 @@ stdreply: icmpelen = max(ICMP_MINLEN, min(icmp_datalen, */ if (MHLEN > (sizeof(struct ip) + ICMP_MINLEN + icmplen)) m = m_gethdr(M_DONTWAIT, MT_HEADER); /* MAC-OK */ - else + else m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m == NULL) diff --git a/bsd/netinet/ip_input.c b/bsd/netinet/ip_input.c index bc1bb0f2f..994bbf8a3 100644 --- a/bsd/netinet/ip_input.c +++ b/bsd/netinet/ip_input.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -102,6 +102,7 @@ #include #include #include +#include #if PF #include #endif /* PF */ @@ -159,6 +160,8 @@ static void frag_sched_timeout(void); static struct ipq *ipq_alloc(int); static void ipq_free(struct ipq *); static void ipq_updateparams(void); +static void ip_input_second_pass(struct mbuf *, struct ifnet *, + u_int32_t, int, int, struct ip_fw_in_args *, int); decl_lck_mtx_data(static, ipqlock); static lck_attr_t *ipqlock_attr; @@ -183,6 +186,9 @@ static u_int32_t ipq_count; /* current # of allocated ipq's */ static int sysctl_ipforwarding SYSCTL_HANDLER_ARGS; static int sysctl_maxnipq SYSCTL_HANDLER_ARGS; static int sysctl_maxfragsperpacket SYSCTL_HANDLER_ARGS; +static int sysctl_reset_ip_input_stats SYSCTL_HANDLER_ARGS; +static int sysctl_ip_input_measure_bins SYSCTL_HANDLER_ARGS; +static int sysctl_ip_input_getperf SYSCTL_HANDLER_ARGS; int ipforwarding = 0; SYSCTL_PROC(_net_inet_ip, IPCTL_FORWARDING, forwarding, @@ -251,6 +257,31 @@ static int ip_checkinterface = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW | CTLFLAG_LOCKED, &ip_checkinterface, 0, "Verify packet arrives on correct interface"); +static int ip_chaining = 1; +SYSCTL_INT(_net_inet_ip, OID_AUTO, rx_chaining, CTLFLAG_RW | CTLFLAG_LOCKED, + &ip_chaining, 1, "Do receive side ip address based chaining"); + +static int ip_chainsz = 6; +SYSCTL_INT(_net_inet_ip, OID_AUTO, rx_chainsz, CTLFLAG_RW | CTLFLAG_LOCKED, + &ip_chainsz, 1, "IP receive side max chaining"); + +static int ip_input_measure = 0; +SYSCTL_PROC(_net_inet_ip, OID_AUTO, input_perf, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &ip_input_measure, 0, sysctl_reset_ip_input_stats, "I", "Do time measurement"); + +static uint64_t ip_input_measure_bins = 0; +SYSCTL_PROC(_net_inet_ip, OID_AUTO, input_perf_bins, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip_input_measure_bins, 0, + sysctl_ip_input_measure_bins, "I", + "bins for chaining performance data histogram"); + +static net_perf_t net_perf; +SYSCTL_PROC(_net_inet_ip, OID_AUTO, input_perf_data, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_ip_input_getperf, "S,net_perf", + "IP input performance data (struct net_perf, net/net_perf.h)"); + #if DIAGNOSTIC static int ipprintfs = 0; #endif @@ -444,198 +475,1270 @@ ip_init(struct protosw *pp, struct domain *dp) /* NOTREACHED */ } - /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ - for (i = 0; i < IPPROTO_MAX; i++) - ip_protox[i] = pr; + /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ + for (i = 0; i < IPPROTO_MAX; i++) + ip_protox[i] = pr; + /* + * Cycle through IP protocols and put them into the appropriate place + * in ip_protox[], skipping protocols IPPROTO_{IP,RAW}. + */ + VERIFY(dp == inetdomain && dp->dom_family == PF_INET); + TAILQ_FOREACH(pr, &dp->dom_protosw, pr_entry) { + VERIFY(pr->pr_domain == dp); + if (pr->pr_protocol != 0 && pr->pr_protocol != IPPROTO_RAW) { + /* Be careful to only index valid IP protocols. */ + if (pr->pr_protocol < IPPROTO_MAX) + ip_protox[pr->pr_protocol] = pr; + } + } + + /* IP fragment reassembly queue lock */ + ipqlock_grp_attr = lck_grp_attr_alloc_init(); + ipqlock_grp = lck_grp_alloc_init("ipqlock", ipqlock_grp_attr); + ipqlock_attr = lck_attr_alloc_init(); + lck_mtx_init(&ipqlock, ipqlock_grp, ipqlock_attr); + + lck_mtx_lock(&ipqlock); + /* Initialize IP reassembly queue. */ + for (i = 0; i < IPREASS_NHASH; i++) + TAILQ_INIT(&ipq[i]); + + maxnipq = nmbclusters / 32; + maxfragsperpacket = 128; /* enough for 64k in 512 byte fragments */ + ipq_updateparams(); + lck_mtx_unlock(&ipqlock); + + getmicrotime(&tv); + ip_id = RandomULong() ^ tv.tv_usec; + ip_initid(); + + ipf_init(); + +#if IPSEC + sadb_stat_mutex_grp_attr = lck_grp_attr_alloc_init(); + sadb_stat_mutex_grp = lck_grp_alloc_init("sadb_stat", + sadb_stat_mutex_grp_attr); + sadb_stat_mutex_attr = lck_attr_alloc_init(); + lck_mtx_init(sadb_stat_mutex, sadb_stat_mutex_grp, + sadb_stat_mutex_attr); + +#endif + arp_init(); +} + +/* + * Initialize IPv4 source address hash table. + */ +static void +in_ifaddrhashtbl_init(void) +{ + int i, k, p; + + if (in_ifaddrhashtbl != NULL) + return; + + PE_parse_boot_argn("inaddr_nhash", &inaddr_nhash, + sizeof (inaddr_nhash)); + if (inaddr_nhash == 0) + inaddr_nhash = INADDR_NHASH; + + MALLOC(in_ifaddrhashtbl, struct in_ifaddrhashhead *, + inaddr_nhash * sizeof (*in_ifaddrhashtbl), + M_IFADDR, M_WAITOK | M_ZERO); + if (in_ifaddrhashtbl == NULL) + panic("in_ifaddrhashtbl_init allocation failed"); + + /* + * Generate the next largest prime greater than inaddr_nhash. + */ + k = (inaddr_nhash % 2 == 0) ? inaddr_nhash + 1 : inaddr_nhash + 2; + for (;;) { + p = 1; + for (i = 3; i * i <= k; i += 2) { + if (k % i == 0) + p = 0; + } + if (p == 1) + break; + k += 2; + } + inaddr_hashp = k; +} + +u_int32_t +inaddr_hashval(u_int32_t key) +{ + /* + * The hash index is the computed prime times the key modulo + * the hash size, as documented in "Introduction to Algorithms" + * (Cormen, Leiserson, Rivest). + */ + if (inaddr_nhash > 1) + return ((key * inaddr_hashp) % inaddr_nhash); + else + return (0); +} + +void +ip_proto_dispatch_in_wrapper(struct mbuf *m, int hlen, u_int8_t proto) +{ + ip_proto_dispatch_in(m, hlen, proto, 0); +} + +__private_extern__ void +ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto, + ipfilter_t inject_ipfref) +{ + struct ipfilter *filter; + int seen = (inject_ipfref == NULL); + int changed_header = 0; + struct ip *ip; + void (*pr_input)(struct mbuf *, int len); + + if (!TAILQ_EMPTY(&ipv4_filters)) { + ipf_ref(); + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { + if (seen == 0) { + if ((struct ipfilter *)inject_ipfref == filter) + seen = 1; + } else if (filter->ipf_filter.ipf_input) { + errno_t result; + + if (changed_header == 0) { + /* + * Perform IP header alignment fixup, + * if needed, before passing packet + * into filter(s). + */ + IP_HDR_ALIGNMENT_FIXUP(m, + m->m_pkthdr.rcvif, ipf_unref()); + + /* ipf_unref() already called */ + if (m == NULL) + return; + + changed_header = 1; + ip = mtod(m, struct ip *); + ip->ip_len = htons(ip->ip_len + hlen); + ip->ip_off = htons(ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = ip_cksum_hdr_in(m, hlen); + } + result = filter->ipf_filter.ipf_input( + filter->ipf_filter.cookie, (mbuf_t *)&m, + hlen, proto); + if (result == EJUSTRETURN) { + ipf_unref(); + return; + } + if (result != 0) { + ipf_unref(); + m_freem(m); + return; + } + } + } + ipf_unref(); + } + + /* Perform IP header alignment fixup (post-filters), if needed */ + IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return); + + /* + * If there isn't a specific lock for the protocol + * we're about to call, use the generic lock for AF_INET. + * otherwise let the protocol deal with its own locking + */ + ip = mtod(m, struct ip *); + + if (changed_header) { + ip->ip_len = ntohs(ip->ip_len) - hlen; + ip->ip_off = ntohs(ip->ip_off); + } + + if ((pr_input = ip_protox[ip->ip_p]->pr_input) == NULL) { + m_freem(m); + } else if (!(ip_protox[ip->ip_p]->pr_flags & PR_PROTOLOCK)) { + lck_mtx_lock(inet_domain_mutex); + pr_input(m, hlen); + lck_mtx_unlock(inet_domain_mutex); + } else { + pr_input(m, hlen); + } +} + +struct pktchain_elm { + struct mbuf *pkte_head; + struct mbuf *pkte_tail; + struct in_addr pkte_saddr; + struct in_addr pkte_daddr; + uint16_t pkte_npkts; + uint16_t pkte_proto; + uint32_t pkte_nbytes; +}; + +typedef struct pktchain_elm pktchain_elm_t; + +/* Store upto PKTTBL_SZ unique flows on the stack */ +#define PKTTBL_SZ 7 + +static struct mbuf * +ip_chain_insert(struct mbuf *packet, pktchain_elm_t *tbl) +{ + struct ip* ip; + int pkttbl_idx = 0; + + ip = mtod(packet, struct ip*); + + /* reusing the hash function from inaddr_hashval */ + pkttbl_idx = inaddr_hashval(ntohs(ip->ip_src.s_addr)) % PKTTBL_SZ; + if (tbl[pkttbl_idx].pkte_head == NULL) { + tbl[pkttbl_idx].pkte_head = packet; + tbl[pkttbl_idx].pkte_saddr.s_addr = ip->ip_src.s_addr; + tbl[pkttbl_idx].pkte_daddr.s_addr = ip->ip_dst.s_addr; + tbl[pkttbl_idx].pkte_proto = ip->ip_p; + } else { + if ((ip->ip_dst.s_addr == tbl[pkttbl_idx].pkte_daddr.s_addr) && + (ip->ip_src.s_addr == tbl[pkttbl_idx].pkte_saddr.s_addr) && + (ip->ip_p == tbl[pkttbl_idx].pkte_proto)) { + } else { + return (packet); + } + } + if (tbl[pkttbl_idx].pkte_tail != NULL) + mbuf_setnextpkt(tbl[pkttbl_idx].pkte_tail, packet); + + tbl[pkttbl_idx].pkte_tail = packet; + tbl[pkttbl_idx].pkte_npkts += 1; + tbl[pkttbl_idx].pkte_nbytes += packet->m_pkthdr.len; + return (NULL); +} + +/* args is a dummy variable here for backward compatibility */ +static void +ip_input_second_pass_loop_tbl(pktchain_elm_t *tbl, struct ip_fw_in_args *args) +{ + int i = 0; + + for (i = 0; i < PKTTBL_SZ; i++) { + if (tbl[i].pkte_head != NULL) { + struct mbuf *m = tbl[i].pkte_head; + ip_input_second_pass(m, m->m_pkthdr.rcvif, 0, + tbl[i].pkte_npkts, tbl[i].pkte_nbytes, args, 0); + + if (tbl[i].pkte_npkts > 2) + ipstat.ips_rxc_chainsz_gt2++; + if (tbl[i].pkte_npkts > 4) + ipstat.ips_rxc_chainsz_gt4++; + + if (ip_input_measure) + net_perf_histogram(&net_perf, tbl[i].pkte_npkts); + + tbl[i].pkte_head = tbl[i].pkte_tail = NULL; + tbl[i].pkte_npkts = 0; + tbl[i].pkte_nbytes = 0; + /* no need to initialize address and protocol in tbl */ + } + } +} + +static void +ip_input_cpout_args(struct ip_fw_in_args *args, struct ip_fw_args *args1, + boolean_t *done_init) +{ + if (*done_init == FALSE) { + bzero(args1, sizeof(struct ip_fw_args)); + *done_init = TRUE; + } + args1->fwa_next_hop = args->fwai_next_hop; + args1->fwa_ipfw_rule = args->fwai_ipfw_rule; + args1->fwa_pf_rule = args->fwai_pf_rule; + args1->fwa_divert_rule = args->fwai_divert_rule; +} + +static void +ip_input_cpin_args(struct ip_fw_args *args1, struct ip_fw_in_args *args) +{ + args->fwai_next_hop = args1->fwa_next_hop; + args->fwai_ipfw_rule = args1->fwa_ipfw_rule; + args->fwai_pf_rule = args1->fwa_pf_rule; + args->fwai_divert_rule = args1->fwa_divert_rule; +} + +typedef enum { + IPINPUT_DOCHAIN = 0, + IPINPUT_DONTCHAIN, + IPINPUT_FREED, + IPINPUT_DONE +} ipinput_chain_ret_t; + +static void +ip_input_update_nstat(struct ifnet *ifp, struct in_addr src_ip, + u_int32_t packets, u_int32_t bytes) +{ + if (nstat_collect) { + struct rtentry *rt = ifnet_cached_rtlookup_inet(ifp, + src_ip); + if (rt != NULL) { + nstat_route_rx(rt, packets, bytes, 0); + rtfree(rt); + } + } +} + +static void +ip_input_dispatch_chain(struct mbuf *m) +{ + struct mbuf *tmp_mbuf = m; + struct mbuf *nxt_mbuf = NULL; + struct ip *ip = NULL; + unsigned int hlen; + + ip = mtod(tmp_mbuf, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + while(tmp_mbuf) { + nxt_mbuf = mbuf_nextpkt(tmp_mbuf); + mbuf_setnextpkt(tmp_mbuf, NULL); + + if ((sw_lro) && (ip->ip_p == IPPROTO_TCP)) + tmp_mbuf = tcp_lro(tmp_mbuf, hlen); + if (tmp_mbuf) + ip_proto_dispatch_in(tmp_mbuf, hlen, ip->ip_p, 0); + tmp_mbuf = nxt_mbuf; + if (tmp_mbuf) { + ip = mtod(tmp_mbuf, struct ip *); + /* first mbuf of chain already has adjusted ip_len */ + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + ip->ip_len -= hlen; + } + } +} + +static void +ip_input_setdst_chain(struct mbuf *m, uint32_t ifindex, struct in_ifaddr *ia) +{ + struct mbuf *tmp_mbuf = m; + + while (tmp_mbuf) { + ip_setdstifaddr_info(tmp_mbuf, ifindex, ia); + tmp_mbuf = mbuf_nextpkt(tmp_mbuf); + } +} + +/* + * First pass does all essential packet validation and places on a per flow + * queue for doing operations that have same outcome for all packets of a flow. + * div_info is packet divert/tee info + */ +static ipinput_chain_ret_t +ip_input_first_pass(struct mbuf *m, u_int32_t *div_info, + struct ip_fw_in_args *args, int *ours, struct mbuf **modm) +{ + struct ip *ip; + struct ifnet *inifp; + unsigned int hlen; + int retval = IPINPUT_DOCHAIN; + int len = 0; + struct in_addr src_ip; +#if IPFIREWALL + int i; +#endif +#if IPFIREWALL || DUMMYNET + struct m_tag *copy; + struct m_tag *p; + boolean_t delete = FALSE; + struct ip_fw_args args1; + boolean_t init = FALSE; +#endif + ipfilter_t inject_filter_ref = NULL; + +#if !IPFIREWALL +#pragma unused (args) +#endif + +#if !IPDIVERT +#pragma unused (div_info) +#pragma unused (ours) +#endif + +#if !IPFIREWALL_FORWARD +#pragma unused (ours) +#endif + + /* Check if the mbuf is still valid after interface filter processing */ + MBUF_INPUT_CHECK(m, m->m_pkthdr.rcvif); + inifp = mbuf_pkthdr_rcvif(m); + VERIFY(inifp != NULL); + + /* Perform IP header alignment fixup, if needed */ + IP_HDR_ALIGNMENT_FIXUP(m, inifp, goto bad); + + m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED; + +#if IPFIREWALL || DUMMYNET + + /* + * Don't bother searching for tag(s) if there's none. + */ + if (SLIST_EMPTY(&m->m_pkthdr.tags)) + goto ipfw_tags_done; + + /* Grab info from mtags prepended to the chain */ + p = m_tag_first(m); + while (p) { + if (p->m_tag_id == KERNEL_MODULE_TAG_ID) { +#if DUMMYNET + if (p->m_tag_type == KERNEL_TAG_TYPE_DUMMYNET) { + struct dn_pkt_tag *dn_tag; + + dn_tag = (struct dn_pkt_tag *)(p+1); + args->fwai_ipfw_rule = dn_tag->dn_ipfw_rule; + args->fwai_pf_rule = dn_tag->dn_pf_rule; + delete = TRUE; + } +#endif + +#if IPDIVERT + if (p->m_tag_type == KERNEL_TAG_TYPE_DIVERT) { + struct divert_tag *div_tag; + + div_tag = (struct divert_tag *)(p+1); + args->fwai_divert_rule = div_tag->cookie; + delete = TRUE; + } +#endif + + if (p->m_tag_type == KERNEL_TAG_TYPE_IPFORWARD) { + struct ip_fwd_tag *ipfwd_tag; + + ipfwd_tag = (struct ip_fwd_tag *)(p+1); + args->fwai_next_hop = ipfwd_tag->next_hop; + delete = TRUE; + } + + if (delete) { + copy = p; + p = m_tag_next(m, p); + m_tag_delete(m, copy); + } else { + p = m_tag_next(m, p); + } + } else { + p = m_tag_next(m, p); + } + } + +#if DIAGNOSTIC + if (m == NULL || !(m->m_flags & M_PKTHDR)) + panic("ip_input no HDR"); +#endif + +#if DUMMYNET + if (args->fwai_ipfw_rule || args->fwai_pf_rule) { + /* dummynet already filtered us */ + ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + inject_filter_ref = ipf_get_inject_filter(m); +#if IPFIREWALL + if (args->fwai_ipfw_rule) + goto iphack; +#endif /* IPFIREWALL */ + if (args->fwai_pf_rule) + goto check_with_pf; + } +#endif /* DUMMYNET */ +ipfw_tags_done: +#endif /* IPFIREWALL || DUMMYNET */ + + /* + * No need to process packet twice if we've already seen it. + */ + if (!SLIST_EMPTY(&m->m_pkthdr.tags)) + inject_filter_ref = ipf_get_inject_filter(m); + if (inject_filter_ref != NULL) { + ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + + DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, + struct ip *, ip, struct ifnet *, inifp, + struct ip *, ip, struct ip6_hdr *, NULL); + + ip->ip_len = ntohs(ip->ip_len) - hlen; + ip->ip_off = ntohs(ip->ip_off); + ip_proto_dispatch_in(m, hlen, ip->ip_p, inject_filter_ref); + return (IPINPUT_DONE); + } + + if (m->m_pkthdr.len < sizeof (struct ip)) { + OSAddAtomic(1, &ipstat.ips_total); + OSAddAtomic(1, &ipstat.ips_tooshort); + m_freem(m); + return (IPINPUT_FREED); + } + + if (m->m_len < sizeof (struct ip) && + (m = m_pullup(m, sizeof (struct ip))) == NULL) { + OSAddAtomic(1, &ipstat.ips_total); + OSAddAtomic(1, &ipstat.ips_toosmall); + return (IPINPUT_FREED); + } + + ip = mtod(m, struct ip *); + *modm = m; + + KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, ip->ip_src.s_addr, + ip->ip_p, ip->ip_off, ip->ip_len); + + if (IP_VHL_V(ip->ip_vhl) != IPVERSION) { + OSAddAtomic(1, &ipstat.ips_total); + OSAddAtomic(1, &ipstat.ips_badvers); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + m_freem(m); + return (IPINPUT_FREED); + } + + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + if (hlen < sizeof (struct ip)) { + OSAddAtomic(1, &ipstat.ips_total); + OSAddAtomic(1, &ipstat.ips_badhlen); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + m_freem(m); + return (IPINPUT_FREED); + } + + if (hlen > m->m_len) { + if ((m = m_pullup(m, hlen)) == NULL) { + OSAddAtomic(1, &ipstat.ips_total); + OSAddAtomic(1, &ipstat.ips_badhlen); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + return (IPINPUT_FREED); + } + ip = mtod(m, struct ip *); + *modm = m; + } + + /* 127/8 must not appear on wire - RFC1122 */ + if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || + (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { + /* + * Allow for the following exceptions: + * + * 1. If the packet was sent to loopback (i.e. rcvif + * would have been set earlier at output time.) + * + * 2. If the packet was sent out on loopback from a local + * source address which belongs to a non-loopback + * interface (i.e. rcvif may not necessarily be a + * loopback interface, hence the test for PKTF_LOOP.) + * Unlike IPv6, there is no interface scope ID, and + * therefore we don't care so much about PKTF_IFINFO. + */ + if (!(inifp->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + OSAddAtomic(1, &ipstat.ips_total); + OSAddAtomic(1, &ipstat.ips_badaddr); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + m_freem(m); + return (IPINPUT_FREED); + } + } + + /* IPv4 Link-Local Addresses as defined in RFC3927 */ + if ((IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) || + IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)))) { + ip_linklocal_stat.iplls_in_total++; + if (ip->ip_ttl != MAXTTL) { + OSAddAtomic(1, &ip_linklocal_stat.iplls_in_badttl); + /* Silently drop link local traffic with bad TTL */ + if (!ip_linklocal_in_allowbadttl) { + OSAddAtomic(1, &ipstat.ips_total); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + m_freem(m); + return (IPINPUT_FREED); + } + } + } + + if (ip_cksum(m, hlen)) { + OSAddAtomic(1, &ipstat.ips_total); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + m_freem(m); + return (IPINPUT_FREED); + } + + DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, + struct ip *, ip, struct ifnet *, inifp, + struct ip *, ip, struct ip6_hdr *, NULL); + + /* + * Convert fields to host representation. + */ +#if BYTE_ORDER != BIG_ENDIAN + NTOHS(ip->ip_len); +#endif + + if (ip->ip_len < hlen) { + OSAddAtomic(1, &ipstat.ips_total); + OSAddAtomic(1, &ipstat.ips_badlen); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + m_freem(m); + return (IPINPUT_FREED); + } + +#if BYTE_ORDER != BIG_ENDIAN + NTOHS(ip->ip_off); +#endif + + /* + * Check that the amount of data in the buffers + * is as at least much as the IP header would have us expect. + * Trim mbufs if longer than we expect. + * Drop packet if shorter than we expect. + */ + if (m->m_pkthdr.len < ip->ip_len) { + OSAddAtomic(1, &ipstat.ips_total); + OSAddAtomic(1, &ipstat.ips_tooshort); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + m_freem(m); + return (IPINPUT_FREED); + } + + if (m->m_pkthdr.len > ip->ip_len) { + /* + * Invalidate hardware checksum info if ip_adj_clear_hwcksum + * is set; useful to handle buggy drivers. Note that this + * should not be enabled by default, as we may get here due + * to link-layer padding. + */ + if (ip_adj_clear_hwcksum && + (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) && + !(inifp->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + m->m_pkthdr.csum_data = 0; + ipstat.ips_adj_hwcsum_clr++; + } + + ipstat.ips_adj++; + if (m->m_len == m->m_pkthdr.len) { + m->m_len = ip->ip_len; + m->m_pkthdr.len = ip->ip_len; + } else + m_adj(m, ip->ip_len - m->m_pkthdr.len); + } + + /* for consistency */ + m->m_pkthdr.pkt_proto = ip->ip_p; + + /* for netstat route statistics */ + src_ip = ip->ip_src; + len = m->m_pkthdr.len; + +#if DUMMYNET +check_with_pf: +#endif +#if PF + /* Invoke inbound packet filter */ + if (PF_IS_ENABLED) { + int error; + ip_input_cpout_args(args, &args1, &init); + +#if DUMMYNET + error = pf_af_hook(inifp, NULL, &m, AF_INET, TRUE, &args1); +#else + error = pf_af_hook(inifp, NULL, &m, AF_INET, TRUE, NULL); +#endif /* DUMMYNET */ + if (error != 0 || m == NULL) { + if (m != NULL) { + panic("%s: unexpected packet %p\n", + __func__, m); + /* NOTREACHED */ + } + /* Already freed by callee */ + ip_input_update_nstat(inifp, src_ip, 1, len); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + OSAddAtomic(1, &ipstat.ips_total); + return (IPINPUT_FREED); + } + ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + *modm = m; + ip_input_cpin_args(&args1, args); + } +#endif /* PF */ + +#if IPSEC + if (ipsec_bypass == 0 && ipsec_gethist(m, NULL)) { + retval = IPINPUT_DONTCHAIN; /* XXX scope for chaining here? */ + goto pass; + } +#endif + +#if IPFIREWALL +#if DUMMYNET +iphack: +#endif /* DUMMYNET */ + /* + * Check if we want to allow this packet to be processed. + * Consider it to be bad if not. + */ + if (fw_enable && IPFW_LOADED) { +#if IPFIREWALL_FORWARD + /* + * If we've been forwarded from the output side, then + * skip the firewall a second time + */ + if (args->fwai_next_hop) { + *ours = 1; + return (IPINPUT_DONTCHAIN); + } +#endif /* IPFIREWALL_FORWARD */ + ip_input_cpout_args(args, &args1, &init); + args1.fwa_m = m; + + i = ip_fw_chk_ptr(&args1); + m = args1.fwa_m; + + if ((i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */ + if (m) + m_freem(m); + ip_input_update_nstat(inifp, src_ip, 1, len); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + OSAddAtomic(1, &ipstat.ips_total); + return (IPINPUT_FREED); + } + ip = mtod(m, struct ip *); /* just in case m changed */ + *modm = m; + ip_input_cpin_args(&args1, args); + + if (i == 0 && args->fwai_next_hop == NULL) { /* common case */ + goto pass; + } +#if DUMMYNET + if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) { + /* Send packet to the appropriate pipe */ + ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args1, + DN_CLIENT_IPFW); + ip_input_update_nstat(inifp, src_ip, 1, len); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + OSAddAtomic(1, &ipstat.ips_total); + return (IPINPUT_FREED); + } +#endif /* DUMMYNET */ +#if IPDIVERT + if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) { + /* Divert or tee packet */ + *div_info = i; + *ours = 1; + return (IPINPUT_DONTCHAIN); + } +#endif +#if IPFIREWALL_FORWARD + if (i == 0 && args->fwai_next_hop != NULL) { + retval = IPINPUT_DONTCHAIN; + goto pass; + } +#endif + /* + * if we get here, the packet must be dropped + */ + ip_input_update_nstat(inifp, src_ip, 1, len); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + m_freem(m); + OSAddAtomic(1, &ipstat.ips_total); + return (IPINPUT_FREED); + } +#endif /* IPFIREWALL */ +#if IPSEC | IPFIREWALL +pass: +#endif + /* + * Process options and, if not destined for us, + * ship it on. ip_dooptions returns 1 when an + * error was detected (causing an icmp message + * to be sent and the original packet to be freed). + */ + ip_nhops = 0; /* for source routed packets */ +#if IPFIREWALL + if (hlen > sizeof (struct ip) && + ip_dooptions(m, 0, args->fwai_next_hop)) { +#else /* !IPFIREWALL */ + if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL)) { +#endif /* !IPFIREWALL */ + ip_input_update_nstat(inifp, src_ip, 1, len); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + OSAddAtomic(1, &ipstat.ips_total); + return (IPINPUT_FREED); + } + + /* + * Don't chain fragmented packets as the process of determining + * if it is our fragment or someone else's plus the complexity of + * divert and fw args makes it harder to do chaining. + */ + if (ip->ip_off & ~(IP_DF | IP_RF)) + return (IPINPUT_DONTCHAIN); + + /* Allow DHCP/BootP responses through */ + if ((inifp->if_eflags & IFEF_AUTOCONFIGURING) && + hlen == sizeof (struct ip) && ip->ip_p == IPPROTO_UDP) { + struct udpiphdr *ui; + + if (m->m_len < sizeof (struct udpiphdr) && + (m = m_pullup(m, sizeof (struct udpiphdr))) == NULL) { + OSAddAtomic(1, &udpstat.udps_hdrops); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + OSAddAtomic(1, &ipstat.ips_total); + return (IPINPUT_FREED); + } + *modm = m; + ui = mtod(m, struct udpiphdr *); + if (ntohs(ui->ui_dport) == IPPORT_BOOTPC) { + ip_setdstifaddr_info(m, inifp->if_index, NULL); + return (IPINPUT_DONTCHAIN); + } + } + + /* Avoid chaining raw sockets as ipsec checks occur later for them */ + if (ip_protox[ip->ip_p]->pr_flags & PR_LASTHDR) + return (IPINPUT_DONTCHAIN); + + return (retval); +#if !defined(__i386__) && !defined(__x86_64__) +bad: + m_freem(m); + return (IPINPUT_FREED); +#endif +} + +static void +ip_input_second_pass(struct mbuf *m, struct ifnet *inifp, u_int32_t div_info, + int npkts_in_chain, int bytes_in_chain, struct ip_fw_in_args *args, int ours) +{ + unsigned int checkif; + struct mbuf *tmp_mbuf = NULL; + struct in_ifaddr *ia = NULL; + struct in_addr pkt_dst; + unsigned int hlen; + +#if !IPFIREWALL +#pragma unused (args) +#endif + +#if !IPDIVERT +#pragma unused (div_info) +#endif + + struct ip *ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + + OSAddAtomic(npkts_in_chain, &ipstat.ips_total); + + /* + * Naively assume we can attribute inbound data to the route we would + * use to send to this destination. Asymmetric routing breaks this + * assumption, but it still allows us to account for traffic from + * a remote node in the routing table. + * this has a very significant performance impact so we bypass + * if nstat_collect is disabled. We may also bypass if the + * protocol is tcp in the future because tcp will have a route that + * we can use to attribute the data to. That does mean we would not + * account for forwarded tcp traffic. + */ + ip_input_update_nstat(inifp, ip->ip_src, npkts_in_chain, + bytes_in_chain); + + if (ours) + goto ours; + + /* + * Check our list of addresses, to see if the packet is for us. + * If we don't have any addresses, assume any unicast packet + * we receive might be for us (and let the upper layers deal + * with it). + */ + tmp_mbuf = m; + if (TAILQ_EMPTY(&in_ifaddrhead)) { + while (tmp_mbuf) { + if (!(tmp_mbuf->m_flags & (M_MCAST|M_BCAST))) { + ip_setdstifaddr_info(tmp_mbuf, inifp->if_index, + NULL); + } + tmp_mbuf = mbuf_nextpkt(tmp_mbuf); + } + goto ours; + } + /* + * Cache the destination address of the packet; this may be + * changed by use of 'ipfw fwd'. + */ +#if IPFIREWALL + pkt_dst = args->fwai_next_hop == NULL ? + ip->ip_dst : args->fwai_next_hop->sin_addr; +#else /* !IPFIREWALL */ + pkt_dst = ip->ip_dst; +#endif /* !IPFIREWALL */ + + /* + * Enable a consistency check between the destination address + * and the arrival interface for a unicast packet (the RFC 1122 + * strong ES model) if IP forwarding is disabled and the packet + * is not locally generated and the packet is not subject to + * 'ipfw fwd'. + * + * XXX - Checking also should be disabled if the destination + * address is ipnat'ed to a different interface. + * + * XXX - Checking is incompatible with IP aliases added + * to the loopback interface instead of the interface where + * the packets are received. + */ + checkif = ip_checkinterface && (ipforwarding == 0) && + !(inifp->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP) +#if IPFIREWALL + && (args->fwai_next_hop == NULL); +#else /* !IPFIREWALL */ + ; +#endif /* !IPFIREWALL */ + + /* + * Check for exact addresses in the hash bucket. + */ + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) { + /* + * If the address matches, verify that the packet + * arrived via the correct interface if checking is + * enabled. + */ + if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && + (!checkif || ia->ia_ifp == inifp)) { + ip_input_setdst_chain(m, 0, ia); + lck_rw_done(in_ifaddr_rwlock); + goto ours; + } + } + lck_rw_done(in_ifaddr_rwlock); + + /* + * Check for broadcast addresses. + * + * Only accept broadcast packets that arrive via the matching + * interface. Reception of forwarded directed broadcasts would be + * handled via ip_forward() and ether_frameout() with the loopback + * into the stack for SIMPLEX interfaces handled by ether_frameout(). + */ + if (inifp->if_flags & IFF_BROADCAST) { + struct ifaddr *ifa; + + ifnet_lock_shared(inifp); + TAILQ_FOREACH(ifa, &inifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET) { + continue; + } + ia = ifatoia(ifa); + if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == + pkt_dst.s_addr || ia->ia_netbroadcast.s_addr == + pkt_dst.s_addr) { + ip_input_setdst_chain(m, 0, ia); + ifnet_lock_done(inifp); + goto ours; + } + } + ifnet_lock_done(inifp); + } + + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { + struct in_multi *inm; + /* + * See if we belong to the destination multicast group on the + * arrival interface. + */ + in_multihead_lock_shared(); + IN_LOOKUP_MULTI(&ip->ip_dst, inifp, inm); + in_multihead_lock_done(); + if (inm == NULL) { + OSAddAtomic(npkts_in_chain, &ipstat.ips_notmember); + m_freem_list(m); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + return; + } + ip_input_setdst_chain(m, inifp->if_index, NULL); + INM_REMREF(inm); + goto ours; + } + + if (ip->ip_dst.s_addr == (u_int32_t)INADDR_BROADCAST || + ip->ip_dst.s_addr == INADDR_ANY) { + ip_input_setdst_chain(m, inifp->if_index, NULL); + goto ours; + } + + if (ip->ip_p == IPPROTO_UDP) { + struct udpiphdr *ui; + ui = mtod(m, struct udpiphdr *); + if (ntohs(ui->ui_dport) == IPPORT_BOOTPC) { + goto ours; + } + } + + tmp_mbuf = m; + struct mbuf *nxt_mbuf = NULL; + while (tmp_mbuf) { + nxt_mbuf = mbuf_nextpkt(tmp_mbuf); + /* + * Not for us; forward if possible and desirable. + */ + mbuf_setnextpkt(tmp_mbuf, NULL); + if (ipforwarding == 0) { + OSAddAtomic(1, &ipstat.ips_cantforward); + m_freem(tmp_mbuf); + } else { +#if IPFIREWALL + ip_forward(tmp_mbuf, 0, args->fwai_next_hop); +#else + ip_forward(tmp_mbuf, 0, NULL); +#endif + } + tmp_mbuf = nxt_mbuf; + } + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + return; +ours: + /* + * If offset or IP_MF are set, must reassemble. + */ + if (ip->ip_off & ~(IP_DF | IP_RF)) { + VERIFY(npkts_in_chain == 1); + /* + * ip_reass() will return a different mbuf, and update + * the divert info in div_info and args->fwai_divert_rule. + */ +#if IPDIVERT + m = ip_reass(m, (u_int16_t *)&div_info, &args->fwai_divert_rule); +#else + m = ip_reass(m); +#endif + if (m == NULL) + return; + ip = mtod(m, struct ip *); + /* Get the header length of the reassembled packet */ + hlen = IP_VHL_HL(ip->ip_vhl) << 2; +#if IPDIVERT + /* Restore original checksum before diverting packet */ + if (div_info != 0) { + VERIFY(npkts_in_chain == 1); +#if BYTE_ORDER != BIG_ENDIAN + HTONS(ip->ip_len); + HTONS(ip->ip_off); +#endif + ip->ip_sum = 0; + ip->ip_sum = ip_cksum_hdr_in(m, hlen); +#if BYTE_ORDER != BIG_ENDIAN + NTOHS(ip->ip_off); + NTOHS(ip->ip_len); +#endif + } +#endif + } + + /* + * Further protocols expect the packet length to be w/o the + * IP header. + */ + ip->ip_len -= hlen; + +#if IPDIVERT + /* + * Divert or tee packet to the divert protocol if required. + * + * If div_info is zero then cookie should be too, so we shouldn't + * need to clear them here. Assume divert_packet() does so also. + */ + if (div_info != 0) { + struct mbuf *clone = NULL; + VERIFY(npkts_in_chain == 1); + + /* Clone packet if we're doing a 'tee' */ + if (div_info & IP_FW_PORT_TEE_FLAG) + clone = m_dup(m, M_DONTWAIT); + + /* Restore packet header fields to original values */ + ip->ip_len += hlen; + +#if BYTE_ORDER != BIG_ENDIAN + HTONS(ip->ip_len); + HTONS(ip->ip_off); +#endif + /* Deliver packet to divert input routine */ + OSAddAtomic(1, &ipstat.ips_delivered); + divert_packet(m, 1, div_info & 0xffff, args->fwai_divert_rule); + + /* If 'tee', continue with original packet */ + if (clone == NULL) { + return; + } + m = clone; + ip = mtod(m, struct ip *); + } +#endif + +#if IPSEC /* - * Cycle through IP protocols and put them into the appropriate place - * in ip_protox[], skipping protocols IPPROTO_{IP,RAW}. + * enforce IPsec policy checking if we are seeing last header. + * note that we do not visit this with protocols with pcb layer + * code - like udp/tcp/raw ip. */ - VERIFY(dp == inetdomain && dp->dom_family == PF_INET); - TAILQ_FOREACH(pr, &dp->dom_protosw, pr_entry) { - VERIFY(pr->pr_domain == dp); - if (pr->pr_protocol != 0 && pr->pr_protocol != IPPROTO_RAW) { - /* Be careful to only index valid IP protocols. */ - if (pr->pr_protocol < IPPROTO_MAX) - ip_protox[pr->pr_protocol] = pr; + if (ipsec_bypass == 0 && (ip_protox[ip->ip_p]->pr_flags & PR_LASTHDR)) { + VERIFY(npkts_in_chain == 1); + if (ipsec4_in_reject(m, NULL)) { + IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); + goto bad; } } +#endif /* IPSEC */ - /* IP fragment reassembly queue lock */ - ipqlock_grp_attr = lck_grp_attr_alloc_init(); - ipqlock_grp = lck_grp_alloc_init("ipqlock", ipqlock_grp_attr); - ipqlock_attr = lck_attr_alloc_init(); - lck_mtx_init(&ipqlock, ipqlock_grp, ipqlock_attr); - - lck_mtx_lock(&ipqlock); - /* Initialize IP reassembly queue. */ - for (i = 0; i < IPREASS_NHASH; i++) - TAILQ_INIT(&ipq[i]); - - maxnipq = nmbclusters / 32; - maxfragsperpacket = 128; /* enough for 64k in 512 byte fragments */ - ipq_updateparams(); - lck_mtx_unlock(&ipqlock); - - getmicrotime(&tv); - ip_id = RandomULong() ^ tv.tv_usec; - ip_initid(); + /* + * Switch out to protocol's input routine. + */ + OSAddAtomic(npkts_in_chain, &ipstat.ips_delivered); - ipf_init(); +#if IPFIREWALL + if (args->fwai_next_hop && ip->ip_p == IPPROTO_TCP) { + /* TCP needs IPFORWARD info if available */ + struct m_tag *fwd_tag; + struct ip_fwd_tag *ipfwd_tag; -#if IPSEC - sadb_stat_mutex_grp_attr = lck_grp_attr_alloc_init(); - sadb_stat_mutex_grp = lck_grp_alloc_init("sadb_stat", - sadb_stat_mutex_grp_attr); - sadb_stat_mutex_attr = lck_attr_alloc_init(); - lck_mtx_init(sadb_stat_mutex, sadb_stat_mutex_grp, - sadb_stat_mutex_attr); + VERIFY(npkts_in_chain == 1); + fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_IPFORWARD, sizeof (*ipfwd_tag), + M_NOWAIT, m); + if (fwd_tag == NULL) + goto bad; -#endif - arp_init(); -} + ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); + ipfwd_tag->next_hop = args->fwai_next_hop; -/* - * Initialize IPv4 source address hash table. - */ -static void -in_ifaddrhashtbl_init(void) -{ - int i, k, p; + m_tag_prepend(m, fwd_tag); - if (in_ifaddrhashtbl != NULL) - return; + KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, + ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); - PE_parse_boot_argn("inaddr_nhash", &inaddr_nhash, - sizeof (inaddr_nhash)); - if (inaddr_nhash == 0) - inaddr_nhash = INADDR_NHASH; + /* TCP deals with its own locking */ + ip_proto_dispatch_in(m, hlen, ip->ip_p, 0); + } else { + KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, + ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); - MALLOC(in_ifaddrhashtbl, struct in_ifaddrhashhead *, - inaddr_nhash * sizeof (*in_ifaddrhashtbl), - M_IFADDR, M_WAITOK | M_ZERO); - if (in_ifaddrhashtbl == NULL) - panic("in_ifaddrhashtbl_init allocation failed"); + ip_input_dispatch_chain(m); - /* - * Generate the next largest prime greater than inaddr_nhash. - */ - k = (inaddr_nhash % 2 == 0) ? inaddr_nhash + 1 : inaddr_nhash + 2; - for (;;) { - p = 1; - for (i = 3; i * i <= k; i += 2) { - if (k % i == 0) - p = 0; - } - if (p == 1) - break; - k += 2; } - inaddr_hashp = k; -} +#else /* !IPFIREWALL */ + ip_input_dispatch_chain(m); -u_int32_t -inaddr_hashval(u_int32_t key) -{ - /* - * The hash index is the computed prime times the key modulo - * the hash size, as documented in "Introduction to Algorithms" - * (Cormen, Leiserson, Rivest). - */ - if (inaddr_nhash > 1) - return ((key * inaddr_hashp) % inaddr_nhash); - else - return (0); +#endif /* !IPFIREWALL */ + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + return; +bad: + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); + m_freem(m); } void -ip_proto_dispatch_in_wrapper(struct mbuf *m, int hlen, u_int8_t proto) -{ - ip_proto_dispatch_in(m, hlen, proto, 0); -} - -__private_extern__ void -ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto, - ipfilter_t inject_ipfref) +ip_input_process_list(struct mbuf *packet_list) { - struct ipfilter *filter; - int seen = (inject_ipfref == NULL); - int changed_header = 0; - struct ip *ip; - void (*pr_input)(struct mbuf *, int len); - - if (!TAILQ_EMPTY(&ipv4_filters)) { - ipf_ref(); - TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { - if (seen == 0) { - if ((struct ipfilter *)inject_ipfref == filter) - seen = 1; - } else if (filter->ipf_filter.ipf_input) { - errno_t result; - - if (changed_header == 0) { - /* - * Perform IP header alignment fixup, - * if needed, before passing packet - * into filter(s). - */ - IP_HDR_ALIGNMENT_FIXUP(m, - m->m_pkthdr.rcvif, ipf_unref()); - - /* ipf_unref() already called */ - if (m == NULL) - return; - - changed_header = 1; - ip = mtod(m, struct ip *); - ip->ip_len = htons(ip->ip_len + hlen); - ip->ip_off = htons(ip->ip_off); - ip->ip_sum = 0; - ip->ip_sum = ip_cksum_hdr_in(m, hlen); - } - result = filter->ipf_filter.ipf_input( - filter->ipf_filter.cookie, (mbuf_t *)&m, - hlen, proto); - if (result == EJUSTRETURN) { - ipf_unref(); - return; - } - if (result != 0) { - ipf_unref(); - m_freem(m); - return; - } + pktchain_elm_t pktchain_tbl[PKTTBL_SZ]; + + struct mbuf *packet = NULL; + struct mbuf *modm = NULL; /* modified mbuf */ + int retval = 0; + u_int32_t div_info = 0; + int ours = 0; + struct timeval start_tv; + int num_pkts = 0; + int chain = 0; + struct ip_fw_in_args args; + + if (ip_chaining == 0) { + struct mbuf *m = packet_list; + if (ip_input_measure) + net_perf_start_time(&net_perf, &start_tv); + while (m) { + packet_list = mbuf_nextpkt(m); + mbuf_setnextpkt(m, NULL); + ip_input(m); + m = packet_list; + num_pkts++; + } + if (ip_input_measure) + net_perf_measure_time(&net_perf, &start_tv, num_pkts); + return; + } + if (ip_input_measure) + net_perf_start_time(&net_perf, &start_tv); + + bzero(&pktchain_tbl, sizeof(pktchain_tbl)); +restart_list_process: + chain = 0; + for (packet = packet_list; packet; packet = packet_list) { + packet_list = mbuf_nextpkt(packet); + mbuf_setnextpkt(packet, NULL); + + num_pkts++; + modm = NULL; + div_info = 0; + bzero(&args, sizeof (args)); + + retval = ip_input_first_pass(packet, &div_info, &args, + &ours, &modm); + + if (retval == IPINPUT_DOCHAIN) { + if (modm) + packet = modm; + packet = ip_chain_insert(packet, &pktchain_tbl[0]); + if (packet == NULL) { + ipstat.ips_rxc_chained++; + chain++; + if (chain > ip_chainsz) + break; + } else { + ipstat.ips_rxc_collisions++; + break; } + } else if (retval == IPINPUT_DONTCHAIN) { + /* in order to preserve order, exit from chaining */ + if (modm) + packet = modm; + ipstat.ips_rxc_notchain++; + break; + } else { + /* packet was freed or delivered, do nothing. */ } - ipf_unref(); } - /* Perform IP header alignment fixup (post-filters), if needed */ - IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return); + /* do second pass here for pktchain_tbl */ + if (chain) + ip_input_second_pass_loop_tbl(&pktchain_tbl[0], &args); - /* - * If there isn't a specific lock for the protocol - * we're about to call, use the generic lock for AF_INET. - * otherwise let the protocol deal with its own locking - */ - ip = mtod(m, struct ip *); + if (packet) { + /* + * equivalent update in chaining case if performed in + * ip_input_second_pass_loop_tbl(). + */ + if (ip_input_measure) + net_perf_histogram(&net_perf, 1); - if (changed_header) { - ip->ip_len = ntohs(ip->ip_len) - hlen; - ip->ip_off = ntohs(ip->ip_off); + ip_input_second_pass(packet, packet->m_pkthdr.rcvif, div_info, + 1, packet->m_pkthdr.len, &args, ours); } - if ((pr_input = ip_protox[ip->ip_p]->pr_input) == NULL) { - m_freem(m); - } else if (!(ip_protox[ip->ip_p]->pr_flags & PR_PROTOLOCK)) { - lck_mtx_lock(inet_domain_mutex); - pr_input(m, hlen); - lck_mtx_unlock(inet_domain_mutex); - } else { - pr_input(m, hlen); - } -} + if (packet_list) + goto restart_list_process; + if (ip_input_measure) + net_perf_measure_time(&net_perf, &start_tv, num_pkts); +} /* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. @@ -664,6 +1767,8 @@ ip_input(struct mbuf *m) inifp = m->m_pkthdr.rcvif; VERIFY(inifp != NULL); + ipstat.ips_rxc_notlist++; + /* Perform IP header alignment fixup, if needed */ IP_HDR_ALIGNMENT_FIXUP(m, inifp, goto bad); @@ -833,7 +1938,7 @@ ip_input(struct mbuf *m) /* * Naively assume we can attribute inbound data to the route we would - * use to send to this destination. Asymetric routing breaks this + * use to send to this destination. Asymmetric routing breaks this * assumption, but it still allows us to account for traffic from * a remote node in the routing table. * this has a very significant performance impact so we bypass @@ -3249,3 +4354,58 @@ ip_gre_register_input(gre_input_func_t fn) return (0); } + +static int +sysctl_reset_ip_input_stats SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + i = ip_input_measure; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < 0 || i > 1) { + error = EINVAL; + goto done; + } + if (ip_input_measure != i && i == 1) { + net_perf_initialize(&net_perf, ip_input_measure_bins); + } + ip_input_measure = i; +done: + return (error); +} + +static int +sysctl_ip_input_measure_bins SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error; + uint64_t i; + + i = ip_input_measure_bins; + error = sysctl_handle_quad(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* validate data */ + if (!net_perf_validate_bins(i)) { + error = EINVAL; + goto done; + } + ip_input_measure_bins = i; +done: + return (error); +} + +static int +sysctl_ip_input_getperf SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + if (req->oldptr == USER_ADDR_NULL) + req->oldlen = (size_t)sizeof (struct ipstat); + + return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen))); +} + diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index f59d299a9..383751d4d 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -95,6 +95,7 @@ #include #include #include +#include #include #include @@ -152,6 +153,9 @@ u_short ip_id; +static int sysctl_reset_ip_output_stats SYSCTL_HANDLER_ARGS; +static int sysctl_ip_output_measure_bins SYSCTL_HANDLER_ARGS; +static int sysctl_ip_output_getperf SYSCTL_HANDLER_ARGS; static void ip_out_cksum_stats(int, u_int32_t); static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); static int ip_optcopy(struct ip *, struct ip *); @@ -184,6 +188,24 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &ip_select_srcif_debug, 0, "log source interface selection debug info"); +static int ip_output_measure = 0; +SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &ip_output_measure, 0, sysctl_reset_ip_output_stats, "I", + "Do time measurement"); + +static uint64_t ip_output_measure_bins = 0; +SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf_bins, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip_output_measure_bins, 0, + sysctl_ip_output_measure_bins, "I", + "bins for chaining performance data histogram"); + +static net_perf_t net_perf; +SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf_data, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_ip_output_getperf, "S,net_perf", + "IP output performance data (struct net_perf, net/net_perf.h)"); + #define IMO_TRACE_HIST_SIZE 32 /* size of trace history */ /* For gdb */ @@ -259,8 +281,10 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, ipfilter_t inject_filter_ref = NULL; struct mbuf *packetlist; uint32_t sw_csum, pktcnt = 0, scnt = 0, bytecnt = 0; + uint32_t packets_processed = 0; unsigned int ifscope = IFSCOPE_NONE; struct flowadv *adv = NULL; + struct timeval start_tv; #if IPSEC struct socket *so = NULL; struct secpolicy *sp = NULL; @@ -326,6 +350,8 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, ((_ipobf).noexpensive && IFNET_IS_EXPENSIVE(_ifp)) || \ (!(_ipobf).awdl_unrestricted && IFNET_IS_AWDL_RESTRICTED(_ifp))) + if (ip_output_measure) + net_perf_start_time(&net_perf, &start_tv); KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); VERIFY(m0->m_flags & M_PKTHDR); @@ -495,6 +521,7 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, #endif /* DUMMYNET */ loopit: + packets_processed++; ipobf.isbroadcast = FALSE; ipobf.didfilter = FALSE; #if IPFIREWALL_FORWARD @@ -1172,6 +1199,11 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, necp_mark_packet_from_ip(m, necp_matched_policy_id); switch (necp_result) { case NECP_KERNEL_POLICY_RESULT_PASS: + /* Check if the interface is allowed */ + if (!necp_packet_is_allowed_over_interface(m, ifp)) { + error = EHOSTUNREACH; + goto bad; + } goto skip_ipsec; case NECP_KERNEL_POLICY_RESULT_DROP: case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT: @@ -1182,9 +1214,20 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, /* Verify that the packet is being routed to the tunnel */ struct ifnet *policy_ifp = necp_get_ifnet_from_result_parameter(&necp_result_parameter); if (policy_ifp == ifp) { + /* Check if the interface is allowed */ + if (!necp_packet_is_allowed_over_interface(m, ifp)) { + error = EHOSTUNREACH; + goto bad; + } goto skip_ipsec; } else { if (necp_packet_can_rebind_to_ifnet(m, policy_ifp, &necp_route, AF_INET)) { + /* Check if the interface is allowed */ + if (!necp_packet_is_allowed_over_interface(m, policy_ifp)) { + error = EHOSTUNREACH; + goto bad; + } + /* Set ifp to the tunnel interface, since it is compatible with the packet */ ifp = policy_ifp; ro = &necp_route; @@ -1200,8 +1243,13 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, break; } } + /* Catch-all to check if the interface is allowed */ + if (!necp_packet_is_allowed_over_interface(m, ifp)) { + error = EHOSTUNREACH; + goto bad; + } #endif /* NECP */ - + #if IPSEC if (ipsec_bypass != 0 || (flags & IP_NOIPSEC)) goto skip_ipsec; @@ -1896,6 +1944,10 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, #endif /* IPFIREWALL_FORWARD */ KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error, 0, 0, 0, 0); + if (ip_output_measure) { + net_perf_measure_time(&net_perf, &start_tv, packets_processed); + net_perf_histogram(&net_perf, packets_processed); + } return (error); bad: if (pktcnt > 0) @@ -3466,3 +3518,58 @@ ip_gre_output(struct mbuf *m) return (error); } + +static int +sysctl_reset_ip_output_stats SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + i = ip_output_measure; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < 0 || i > 1) { + error = EINVAL; + goto done; + } + if (ip_output_measure != i && i == 1) { + net_perf_initialize(&net_perf, ip_output_measure_bins); + } + ip_output_measure = i; +done: + return (error); +} + +static int +sysctl_ip_output_measure_bins SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error; + uint64_t i; + + i = ip_output_measure_bins; + error = sysctl_handle_quad(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* validate data */ + if (!net_perf_validate_bins(i)) { + error = EINVAL; + goto done; + } + ip_output_measure_bins = i; +done: + return (error); +} + +static int +sysctl_ip_output_getperf SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + if (req->oldptr == USER_ADDR_NULL) + req->oldlen = (size_t)sizeof (struct ipstat); + + return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen))); +} + diff --git a/bsd/netinet/ip_var.h b/bsd/netinet/ip_var.h index 9440d9ad3..99982111d 100644 --- a/bsd/netinet/ip_var.h +++ b/bsd/netinet/ip_var.h @@ -221,6 +221,13 @@ struct ipstat { u_int32_t ips_snd_swcsum_bytes; /* ip hdr swcksum (outbound), bytes */ u_int32_t ips_adj; /* total packets trimmed/adjusted */ u_int32_t ips_adj_hwcsum_clr; /* hwcksum discarded during adj */ + u_int32_t ips_rxc_collisions; /* rx chaining collisions */ + u_int32_t ips_rxc_chained; /* rx chains */ + u_int32_t ips_rxc_notchain; /* rx bypassed chaining */ + u_int32_t ips_rxc_chainsz_gt2; /* rx chain size greater than 2 */ + u_int32_t ips_rxc_chainsz_gt4; /* rx chain size greater than 4 */ + u_int32_t ips_rxc_notlist; /* count of pkts through ip_input */ + }; struct ip_linklocal_stat { diff --git a/bsd/netinet/mp_pcb.c b/bsd/netinet/mp_pcb.c index 9dfc68ed3..304c4c05c 100644 --- a/bsd/netinet/mp_pcb.c +++ b/bsd/netinet/mp_pcb.c @@ -198,21 +198,23 @@ mp_pcbinfo_detach(struct mppcbinfo *mppi) int mp_pcballoc(struct socket *so, struct mppcbinfo *mppi) { - struct mppcb *mpp; + struct mppcb *mpp = NULL; VERIFY(sotomppcb(so) == NULL); lck_mtx_lock(&mppi->mppi_lock); if (mppi->mppi_count >= mptcp_socket_limit) { lck_mtx_unlock(&mppi->mppi_lock); - mptcplog((LOG_ERR, "Reached MPTCP socket limit.")); + mptcplog((LOG_ERR, "MPTCP Socket: Reached MPTCP socket limit."), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); return (ENOBUFS); } lck_mtx_unlock(&mppi->mppi_lock); mpp = zalloc(mppi->mppi_zone); - if (mpp == NULL) + if (mpp == NULL) { return (ENOBUFS); + } bzero(mpp, mppi->mppi_size); lck_mtx_init(&mpp->mpp_lock, mppi->mppi_lock_grp, mppi->mppi_lock_attr); @@ -221,6 +223,12 @@ mp_pcballoc(struct socket *so, struct mppcbinfo *mppi) mpp->mpp_socket = so; so->so_pcb = mpp; + if (NULL == mppi->mppi_pcbe_create(so, mpp)) { + lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp); + zfree(mppi->mppi_zone, mpp); + return (ENOBUFS); + } + lck_mtx_lock(&mppi->mppi_lock); mpp->mpp_flags |= MPP_ATTACHED; TAILQ_INSERT_TAIL(&mppi->mppi_pcbs, mpp, mpp_entry); diff --git a/bsd/netinet/mp_pcb.h b/bsd/netinet/mp_pcb.h index 3c317a692..eba202b85 100644 --- a/bsd/netinet/mp_pcb.h +++ b/bsd/netinet/mp_pcb.h @@ -74,6 +74,8 @@ struct mppcbinfo { decl_lck_mtx_data(, mppi_lock); /* global PCB lock */ uint32_t (*mppi_gc)(struct mppcbinfo *); /* garbage collector func */ uint32_t (*mppi_timer)(struct mppcbinfo *); /* timer func */ + /* Extended pcb create func */ + void *(*mppi_pcbe_create) (struct socket *mp_so, struct mppcb *mpp); }; __BEGIN_DECLS diff --git a/bsd/netinet/mptcp.c b/bsd/netinet/mptcp.c index 1945ecfcf..d218931be 100644 --- a/bsd/netinet/mptcp.c +++ b/bsd/netinet/mptcp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 Apple Inc. All rights reserved. + * Copyright (c) 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -61,10 +61,6 @@ int mptcp_enable = 1; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_enable, 0, "Enable Multipath TCP Support"); -int mptcp_dbg = 0; -SYSCTL_INT(_net_inet_mptcp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_dbg, 0, "Enable Multipath TCP Debugging"); - /* Number of times to try negotiating MPTCP on SYN retransmissions */ int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr, @@ -130,6 +126,61 @@ int mptcp_rwnotify = 0; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rwnotify, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_rwnotify, 0, "Enable RW notify on resume"); +/* + * Using RTT history for sending new data + */ +int mptcp_use_rtthist = 1; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_use_rtthist, 0, "Disable RTT History"); + +#define MPTCP_RTTHIST_MINTHRESH 500 +int mptcp_rtthist_rtthresh = 600; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_rtthist_rtthresh, 0, "Rtt threshold"); + +/* + * Use RTO history for sending new data + */ +int mptcp_use_rto = 1; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, userto, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_use_rto, 0, "Disable RTO for subflow selection"); + +#define MPTCP_RTO_MINTHRESH 1000 +int mptcp_rtothresh = 1500; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_rtothresh, 0, "RTO threshold"); + +/* + * Use server's chosen path for sending new data + */ +int mptcp_peerswitch = 1; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, use_peer, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_peerswitch, 0, "Use peer"); + +#define MPTCP_PEERSWITCH_CNTMIN 3 +uint32_t mptcp_peerswitch_cnt = 3; +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, peerswitchno, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_peerswitch_cnt, 0, "Set threshold based on peer's data arrival"); + +/* + * Probe the preferred path, when it is not in use + */ +#define MPTCP_PROBETO_MIN 500 +uint32_t mptcp_probeto = 1000; +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_probeto, 0, "Disable probing by setting to 0"); + +#define MPTCP_PROBE_MX 15 +uint32_t mptcp_probecnt = 5; +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_probecnt, 0, "Number of probe writes"); + +/* + * Static declarations + */ +static int mptcp_validate_csum(struct tcpcb *, struct mbuf *, int); +static uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, int); + /* * MPTCP input, called when data has been read from a subflow socket. */ @@ -184,7 +235,8 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) struct sockbuf *, &mp_so->so_snd, struct mptses *, mpte); count = mp_so->so_rcv.sb_cc - count; - mptcplog3((LOG_DEBUG, "%s: fread %d bytes\n", __func__, count)); + mptcplog((LOG_DEBUG, "MPTCP Receiver: Fallback read %d bytes\n", + count), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); return; } @@ -255,8 +307,9 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) } else { m_adj(m, (mp_tp->mpt_rcvatmark - mb_dsn)); } - mptcplog((LOG_INFO, "%s: %llu %d 2 \n", __func__, - mp_tp->mpt_rcvatmark, m->m_pkthdr.len)); + mptcplog((LOG_INFO, "MPTCP Receiver: Left Edge %llu\n", + mp_tp->mpt_rcvatmark), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); } MPT_UNLOCK(mp_tp); @@ -272,7 +325,9 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) count = mp_so->so_rcv.sb_cc - count; tcpstat.tcps_mp_rcvtotal++; tcpstat.tcps_mp_rcvbytes += count; - mptcplog3((LOG_DEBUG, "%s: read %d bytes\n", __func__, count)); + mptcplog((LOG_DEBUG, "MPTCP Receiver: Read %d bytes\n", count), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); + /* * The data received at the MPTCP layer will never exceed the * receive window because anything to the right of the @@ -299,25 +354,30 @@ mptcp_output(struct mptses *mpte) struct mptsub *mpts; struct mptsub *mpts_tried = NULL; struct socket *mp_so; + struct mptsub *preferred_mpts = NULL; int error = 0; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ mp_so = mpte->mpte_mppcb->mpp_socket; if (mp_so->so_state & SS_CANTSENDMORE) { + mptcplog((LOG_DEBUG, "MPTCP Sender: cantsendmore\n"), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); return (EPIPE); } try_again: /* get the "best" subflow to be used for transmission */ - mpts = mptcp_get_subflow(mpte, NULL); + mpts = mptcp_get_subflow(mpte, NULL, &preferred_mpts); if (mpts == NULL) { - mptcplog((LOG_ERR, "%s: mp_so 0x%llx has no usable subflow\n", - __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + mptcplog((LOG_ERR, "MPTCP Sender: mp_so 0x%llx no subflow\n", + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); goto out; } - mptcplog3((LOG_INFO, "%s: mp_so 0x%llx cid %d \n", __func__, - (uint64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Sender: mp_so 0x%llx using cid %d \n", + (uint64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); /* In case there's just one flow, we reattempt later */ MPTS_LOCK(mpts); @@ -328,11 +388,10 @@ mptcp_output(struct mptses *mpte) mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER; mpts_tried->mpts_flags |= MPTSF_ACTIVE; MPTS_UNLOCK(mpts_tried); - MPT_LOCK(mpte->mpte_mptcb); - mptcp_start_timer(mpte->mpte_mptcb, MPTT_REXMT); - MPT_UNLOCK(mpte->mpte_mptcb); - mptcplog((LOG_INFO, "%s: mp_so 0x%llx retry later\n", - __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + mptcp_start_timer(mpte, MPTT_REXMT); + mptcplog((LOG_DEBUG, "MPTCP Sender: mp_so 0x%llx retry later\n", + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); goto out; } @@ -345,19 +404,53 @@ mptcp_output(struct mptses *mpte) mpts->mpts_flags &= ~MPTSF_ACTIVE; mpts_tried = mpts; MPTS_UNLOCK(mpts); - mptcplog((LOG_INFO, "%s: error = %d \n", __func__, error)); + mptcplog((LOG_INFO, "MPTCP Sender: Error = %d \n", error), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); goto try_again; } /* The model is to have only one active flow at a time */ mpts->mpts_flags |= MPTSF_ACTIVE; + mpts->mpts_probesoon = mpts->mpts_probecnt = 0; MPTS_UNLOCK(mpts); + + /* Allows us to update the smoothed rtt */ + if ((mptcp_probeto) && (mptcp_probeto >= MPTCP_PROBETO_MIN) && + (mpts != preferred_mpts) && (preferred_mpts != NULL)) { + MPTS_LOCK(preferred_mpts); + if (preferred_mpts->mpts_probesoon) { + if ((tcp_now - preferred_mpts->mpts_probesoon) > + mptcp_probeto) { + (void) mptcp_subflow_output(mpte, preferred_mpts); + if (preferred_mpts->mpts_probecnt >= + MIN(mptcp_probecnt, MPTCP_PROBE_MX)) { + preferred_mpts->mpts_probesoon = 0; + preferred_mpts->mpts_probecnt = 0; + } + } + } else { + preferred_mpts->mpts_probesoon = tcp_now; + preferred_mpts->mpts_probecnt = 0; + } + MPTS_UNLOCK(preferred_mpts); + } + if (mpte->mpte_active_sub == NULL) { mpte->mpte_active_sub = mpts; } else if (mpte->mpte_active_sub != mpts) { + mptcplog((LOG_DEBUG, "MPTCP Sender: switch [cid %d, srtt %d]" + "to [cid %d, srtt %d]\n", + mpte->mpte_active_sub->mpts_connid, + mpte->mpte_active_sub->mpts_srtt >> 5, + mpts->mpts_connid, + mpts->mpts_srtt >> 5), + MPTCP_SENDER_DBG | MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); + MPTS_LOCK(mpte->mpte_active_sub); mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE; + mpts->mpts_peerswitch = 0; MPTS_UNLOCK(mpte->mpte_active_sub); mpte->mpte_active_sub = mpts; + tcpstat.tcps_mp_switches++; } out: /* subflow errors should not be percolated back up */ @@ -367,13 +460,17 @@ mptcp_output(struct mptses *mpte) /* * Return the most eligible subflow to be used for sending data. * This function also serves to check if any alternate subflow is available - * or not. + * or not. best and second_best flows are chosen by their priority. third_best + * could be best or second_best but is under loss at the time of evaluation. */ struct mptsub * -mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) +mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **preferred) { struct mptsub *mpts; - struct mptsub *fallback = NULL; + struct mptsub *best = NULL; + struct mptsub *second_best = NULL; + struct mptsub *third_best = NULL; + struct mptsub *symptoms_best = NULL; struct socket *so = NULL; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ @@ -389,6 +486,7 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) /* There can only be one subflow in degraded state */ if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { MPTS_UNLOCK(mpts); + best = mpts; break; } @@ -421,10 +519,14 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) (mptcp_no_rto_spike(so))) { mpts->mpts_flags &= ~MPTSF_FAILINGOVER; so->so_flags &= ~SOF_MP_TRYFAILOVER; - fallback = mpts; socket_unlock(so, 1); } else { - fallback = mpts; + third_best = mpts; + mptcplog((LOG_DEBUG, "MPTCP Sender: " + "%s cid %d in failover\n", + __func__, third_best->mpts_connid), + MPTCP_SENDER_DBG, + MPTCP_LOGLVL_VERBOSE); socket_unlock(so, 1); MPTS_UNLOCK(mpts); continue; @@ -435,25 +537,82 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) } } + /* When there are no preferred flows, use first one in list */ + if ((!second_best) && !(mpts->mpts_flags & MPTSF_PREFERRED)) + second_best = mpts; + if (mpts->mpts_flags & MPTSF_PREFERRED) { - MPTS_UNLOCK(mpts); - break; + best = mpts; } - /* When there are no preferred flows, use first one in list */ - fallback = mpts; - MPTS_UNLOCK(mpts); } + /* * If there is no preferred or backup subflow, and there is no active * subflow use the last usable subflow. */ - if (mpts == NULL) { - return (fallback); + if (best == NULL) { + return (second_best ? second_best : third_best); } - return (mpts); + if (second_best == NULL) { + return (best ? best : third_best); + } + + if (preferred != NULL) + *preferred = best; + + /* Use a hint from symptomsd if it exists */ + symptoms_best = mptcp_use_symptoms_hints(best, second_best); + if (symptoms_best != NULL) + return (symptoms_best); + + /* Compare RTTs, select second_best if best's rtt exceeds rttthresh */ + if ((mptcp_use_rtthist) && + (best->mpts_srtt) && (second_best->mpts_srtt) && + (best->mpts_srtt > second_best->mpts_srtt) && + (best->mpts_srtt >= MAX((MPTCP_RTTHIST_MINTHRESH << 5), + (mptcp_rtthist_rtthresh << 5)))) { + tcpstat.tcps_mp_sel_rtt++; + mptcplog((LOG_DEBUG, "MPTCP Sender: %s best cid %d" + " at rtt %d, second cid %d at rtt %d\n", __func__, + best->mpts_connid, best->mpts_srtt >> 5, + second_best->mpts_connid, + second_best->mpts_srtt >> 5), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); + return (second_best); + } + + /* Compare RTOs, select second_best if best's rto exceeds rtothresh */ + if ((mptcp_use_rto) && + (best->mpts_rxtcur) && (second_best->mpts_rxtcur) && + (best->mpts_rxtcur > second_best->mpts_rxtcur) && + (best->mpts_rxtcur >= + MAX(MPTCP_RTO_MINTHRESH, mptcp_rtothresh))) { + tcpstat.tcps_mp_sel_rto++; + mptcplog((LOG_DEBUG, "MPTCP Sender: %s best cid %d" + " at rto %d, second cid %d at rto %d\n", __func__, + best->mpts_connid, best->mpts_rxtcur, + second_best->mpts_connid, second_best->mpts_rxtcur), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); + + return (second_best); + } + + /* If second_best received data, use second_best */ + if (mptcp_peerswitch && + (second_best->mpts_peerswitch > + MAX(MPTCP_PEERSWITCH_CNTMIN, mptcp_peerswitch_cnt))) { + tcpstat.tcps_mp_sel_peer++; + mptcplog((LOG_DEBUG, "MPTCP Sender: %s: best cid %d" + " but using cid %d after receiving %d segments\n", + __func__, best->mpts_connid, second_best->mpts_connid, + second_best->mpts_peerswitch), MPTCP_SENDER_DBG, + MPTCP_LOGLVL_LOG); + return (second_best); + } + return (best); } struct mptsub * @@ -481,10 +640,71 @@ mptcp_get_pending_subflow(struct mptses *mpte, struct mptsub *ignore) return (mpts); } +static const char * +mptcp_event_to_str(uint32_t event) +{ + const char *c = "UNDEFINED"; + switch (event) { + case MPCE_CLOSE: + c = "MPCE_CLOSE"; + break; + case MPCE_RECV_DATA_ACK: + c = "MPCE_RECV_DATA_ACK"; + break; + case MPCE_RECV_DATA_FIN: + c = "MPCE_RECV_DATA_FIN"; + break; + } + return (c); +} + +static const char * +mptcp_state_to_str(mptcp_state_t state) +{ + const char *c = "UNDEFINED"; + switch (state) { + case MPTCPS_CLOSED: + c = "MPTCPS_CLOSED"; + break; + case MPTCPS_LISTEN: + c = "MPTCPS_LISTEN"; + break; + case MPTCPS_ESTABLISHED: + c = "MPTCPS_ESTABLISHED"; + break; + case MPTCPS_CLOSE_WAIT: + c = "MPTCPS_CLOSE_WAIT"; + break; + case MPTCPS_FIN_WAIT_1: + c = "MPTCPS_FIN_WAIT_1"; + break; + case MPTCPS_CLOSING: + c = "MPTCPS_CLOSING"; + break; + case MPTCPS_LAST_ACK: + c = "MPTCPS_LAST_ACK"; + break; + case MPTCPS_FIN_WAIT_2: + c = "MPTCPS_FIN_WAIT_2"; + break; + case MPTCPS_TIME_WAIT: + c = "MPTCPS_TIME_WAIT"; + break; + case MPTCPS_FASTCLOSE_WAIT: + c = "MPTCPS_FASTCLOSE_WAIT"; + break; + case MPTCPS_TERMINATE: + c = "MPTCPS_TERMINATE"; + break; + } + return (c); +} + void mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) { MPT_LOCK_ASSERT_HELD(mp_tp); + mptcp_state_t old_state = mp_tp->mpt_state; DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, uint32_t, event); @@ -556,8 +776,11 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) } DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, uint32_t, event); - mptcplog((LOG_INFO, "%s: state = %d\n", - __func__, mp_tp->mpt_state)); + mptcplog((LOG_INFO, "MPTCP State: %s to %s on event %s\n", + mptcp_state_to_str(old_state), + mptcp_state_to_str(mp_tp->mpt_state), + mptcp_event_to_str(event)), + MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG); } /* @@ -619,25 +842,28 @@ mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp, uint16_t csum) { if (mdss_data_len == 0) { - mptcplog((LOG_INFO, "%s: Received infinite mapping.", - __func__)); + mptcplog((LOG_INFO, "MPTCP Receiver: Infinite Mapping.\n"), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG); + if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) { - mptcplog((LOG_ERR, "%s: Bad checksum value %x \n", - __func__, csum)); + mptcplog((LOG_ERR, "MPTCP Receiver: Bad checksum %x \n", + csum), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR); } mptcp_notify_mpfail(tp->t_inpcb->inp_socket); return; } MPT_LOCK(mp_tp); - if (mptcp_dbg >= MP_VERBOSE_DEBUG_1) - printf("%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n", - __func__, seqn, mdss_data_len, full_dsn, - mp_tp->mpt_rcvnxt); + mptcplog((LOG_DEBUG, + "MPTCP Receiver: seqn = %x len = %x full = %llx " + "rcvnxt = %llu \n", + seqn, mdss_data_len, full_dsn, mp_tp->mpt_rcvnxt), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); /* Process a Data FIN packet , handled in mptcp_do_fin_opt */ if ((seqn == 0) && (mdss_data_len == 1)) { - mptcplog((LOG_INFO, "%s: Data FIN DSS opt state = %d \n", - __func__, mp_tp->mpt_state)); + mptcplog((LOG_INFO, "MPTCP Receiver: Data FIN in %s state \n", + mptcp_state_to_str(mp_tp->mpt_state)), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG); MPT_UNLOCK(mp_tp); return; } @@ -687,6 +913,52 @@ mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *dss_info, csum); } +static int +mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m, + int hdrlen) +{ + u_int32_t sseq, datalen; + + if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) + return 0; + + sseq = m->m_pkthdr.mp_rseq + tp->irs; + datalen = m->m_pkthdr.mp_rlen; + +#if 0 + /* enable this to test TCP fallback post connection establishment */ + if (SEQ_GT(sseq, (tp->irs+1))) + datalen = m->m_pkthdr.len - hdrlen - 1; +#endif + + /* unacceptable DSS option, fallback to TCP */ + if (m->m_pkthdr.len > ((int) datalen + hdrlen)) { + mptcplog((LOG_ERR, "MPTCP Receiver: " + "%s: mbuf len %d, MPTCP expected %d", + __func__, m->m_pkthdr.len, datalen), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG); + } else { + return 0; + } + tp->t_mpflags |= TMPF_SND_MPFAIL; + mptcp_notify_mpfail(so); + m_freem(m); + return -1; +} + +int +mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, int drop_hdrlen) +{ + if (mptcp_validate_csum(tp, m, drop_hdrlen) != 0) + return -1; + + mptcp_insert_rmap(tp, m); + if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m, + drop_hdrlen) != 0) + return -1; + return 0; +} + /* * MPTCP Checksum support * The checksum is calculated whenever the MPTCP DSS option is included @@ -695,7 +967,23 @@ mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *dss_info, * DSS option. */ -uint16_t +static int +mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, int drop_hdrlen) +{ + uint16_t mptcp_csum = 0; + mptcp_csum = mptcp_input_csum(tp, m, drop_hdrlen); + if (mptcp_csum) { + tp->t_mpflags |= TMPF_SND_MPFAIL; + tp->t_mpflags &= ~TMPF_EMBED_DSN; + mptcp_notify_mpfail(tp->t_inpcb->inp_socket); + m_freem(m); + tcpstat.tcps_mp_badcsum++; + return -1; + } + return 0; +} + +static uint16_t mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, int off) { struct mptcb *mp_tp = tptomptp(tp); @@ -735,7 +1023,8 @@ mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, int off) ADDCARRY(sum); DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, uint32_t, sum); - mptcplog((LOG_INFO, "%s: sum = %x \n", __func__, sum)); + mptcplog((LOG_DEBUG, "MPTCP Receiver: sum = %x \n", sum), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); return (~sum & 0xffff); } @@ -774,5 +1063,35 @@ mptcp_output_csum(struct tcpcb *tp, struct mbuf *m, int32_t len, DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, uint32_t, sum); *csump = sum; - mptcplog3((LOG_INFO, "%s: sum = %x \n", __func__, sum)); + mptcplog((LOG_DEBUG, "MPTCP Sender: sum = %x \n", sum), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); +} + +/* + * When WiFi signal starts fading, there's more loss and RTT spikes. + * Check if there has been a large spike by comparing against + * a tolerable RTT spike threshold. + */ +boolean_t +mptcp_no_rto_spike(struct socket *so) +{ + struct tcpcb *tp = intotcpcb(sotoinpcb(so)); + int32_t spike = 0; + + if (tp->t_rxtcur > MAX(mptcp_rtothresh, MPTCP_RTO_MINTHRESH)) { + spike = tp->t_rxtcur - mptcp_rtothresh; + + mptcplog((LOG_DEBUG, "MPTCP Socket: %s: spike = %d rto = %d" + "best = %d cur = %d\n", __func__, spike, + tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT, + tp->t_rttcur), + (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG); + + } + + if (spike > 0 ) { + return (FALSE); + } else { + return (TRUE); + } } diff --git a/bsd/netinet/mptcp.h b/bsd/netinet/mptcp.h index 0dc8c9c61..3ea265ebc 100644 --- a/bsd/netinet/mptcp.h +++ b/bsd/netinet/mptcp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,7 +54,7 @@ #define MPO_FASTCLOSE 0x7 /* MPTCP Protocol version */ -#define MP_DRAFT_VERSION_12 0x0 +#define MPTCP_STD_VERSION_0 0x0 /* * MPTCP MP_CAPABLE TCP Option definitions diff --git a/bsd/netinet/mptcp_opt.c b/bsd/netinet/mptcp_opt.c index 173e56075..414e76c5f 100644 --- a/bsd/netinet/mptcp_opt.c +++ b/bsd/netinet/mptcp_opt.c @@ -30,7 +30,7 @@ #include #include #include - +#include #include #include #include @@ -112,10 +112,6 @@ mptcp_setup_first_subflow_syn_opts(struct socket *so, int flags, u_char *opt, memcpy(opt + optlen, &mptcp_opt, mptcp_opt.mmc_common.mmco_len); optlen += mptcp_opt.mmc_common.mmco_len; - if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) { - printf("%s: SYN_ACK localkey = %llx \n", - __func__, mp_localkey); - } } else { /* Only the SYN flag is set */ struct mptcp_mpcapable_opt_common mptcp_opt; @@ -205,8 +201,9 @@ mptcp_setup_join_subflow_syn_opts(struct socket *so, int flags, u_char *opt, mpjoin_req.mmjo_addr_id = tp->t_local_aid; mpjoin_req.mmjo_peer_token = mptcp_get_remotetoken(tp->t_mptcb); if (mpjoin_req.mmjo_peer_token == 0) { - if (mptcp_dbg >= MP_ERR_DEBUG) - printf("%s: zero peer token \n", __func__); + mptcplog((LOG_DEBUG, "MPTCP Socket: %s: peer token 0", + __func__), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); } mptcp_get_rands(tp->t_local_aid, tptomptp(tp), &mpjoin_req.mmjo_rand, NULL); @@ -217,8 +214,6 @@ mptcp_setup_join_subflow_syn_opts(struct socket *so, int flags, u_char *opt, (so->so_flags & SOF_MPTCP_FASTJOIN)) { soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFASTJ)); - if (mptcp_dbg >= MP_ERR_DEBUG) - printf("%s: fast join request\n", __func__); } } return (optlen); @@ -306,8 +301,9 @@ mptcp_send_mpfail(struct tcpcb *tp, u_char *opt, unsigned int optlen) memcpy(opt + optlen, &fail_opt, len); optlen += len; tp->t_mpflags &= ~TMPF_SND_MPFAIL; - if (mptcp_dbg >= MP_ERR_DEBUG) - printf("%s: %d \n", __func__, tp->t_local_aid); + mptcplog((LOG_DEBUG, "MPTCP Socket: %s: %d \n", __func__, + tp->t_local_aid), (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), + MPTCP_LOGLVL_LOG); return (optlen); } @@ -370,12 +366,13 @@ mptcp_send_infinite_mapping(struct tcpcb *tp, u_char *opt, unsigned int optlen) optlen += csum_len; } - if (mptcp_dbg == MP_VERBOSE_DEBUG_1) { - printf("%s: dsn = %x, seq = %x len = %x\n", __func__, - ntohl(infin_opt.mdss_dsn), - ntohl(infin_opt.mdss_subflow_seqn), - ntohs(infin_opt.mdss_data_len)); - } + mptcplog((LOG_DEBUG, "MPTCP Socket: %s: dsn = %x, seq = %x len = %x\n", + __func__, + ntohl(infin_opt.mdss_dsn), + ntohl(infin_opt.mdss_subflow_seqn), + ntohs(infin_opt.mdss_data_len)), + (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), + MPTCP_LOGLVL_LOG); /* so->so_flags &= ~SOF_MPTCP_CLIENT; */ tp->t_mpflags |= TMPF_INFIN_SENT; @@ -444,7 +441,8 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, boolean_t send_64bit_dsn = FALSE; boolean_t send_64bit_ack = FALSE; u_int32_t old_mpt_flags = tp->t_mpflags & - (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL); + (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL | + TMPF_MPCAP_RETRANSMIT); if ((mptcp_enable == 0) || (mp_tp == NULL) || @@ -465,12 +463,11 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpcapable_opt_common)) { - if (mptcp_dbg >= MP_ERR_DEBUG) { - printf("MPTCP ERROR %s: no space left %d flags %x " - "tp->t_mpflags %x" - "len %d\n", __func__, optlen, flags, tp->t_mpflags, - datalen); - } + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: no space left %d flags %x " + "tp->t_mpflags %x " + "len %d\n", __func__, optlen, flags, tp->t_mpflags, + datalen), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); goto ret_optlen; } @@ -501,9 +498,10 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, goto fastjoin_send; } - if ((tp->t_mpflags & TMPF_PREESTABLISHED) && + if (((tp->t_mpflags & TMPF_PREESTABLISHED) && (!(tp->t_mpflags & TMPF_SENT_KEYS)) && - (!(tp->t_mpflags & TMPF_JOINED_FLOW))) { + (!(tp->t_mpflags & TMPF_JOINED_FLOW))) || + (tp->t_mpflags & TMPF_MPCAP_RETRANSMIT)) { struct mptcp_mpcapable_opt_rsp1 mptcp_opt; if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpcapable_opt_rsp1)) @@ -513,7 +511,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, mptcp_opt.mmc_common.mmco_len = sizeof (struct mptcp_mpcapable_opt_rsp1); mptcp_opt.mmc_common.mmco_subtype = MPO_CAPABLE; - mptcp_opt.mmc_common.mmco_version = MP_DRAFT_VERSION_12; + mptcp_opt.mmc_common.mmco_version = mp_tp->mpt_version; /* HMAC-SHA1 is the proposal */ mptcp_opt.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT; MPT_LOCK(mp_tp); @@ -524,19 +522,16 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, MPT_UNLOCK(mp_tp); memcpy(opt + optlen, &mptcp_opt, mptcp_opt.mmc_common.mmco_len); optlen += mptcp_opt.mmc_common.mmco_len; - tp->t_mpflags |= TMPF_SENT_KEYS; + tp->t_mpflags |= TMPF_SENT_KEYS | TMPF_MPTCP_TRUE; so->so_flags |= SOF_MPTCP_TRUE; tp->t_mpflags &= ~TMPF_PREESTABLISHED; - tp->t_mpflags |= TMPF_MPTCP_TRUE; + tp->t_mpflags &= ~TMPF_MPCAP_RETRANSMIT; if (!tp->t_mpuna) { tp->t_mpuna = tp->snd_una; } else { /* its a retransmission of the MP_CAPABLE ACK */ } - if (mptcp_dbg >= MP_ERR_DEBUG) { - printf("MPTCP SUCCESS %s: established.\n", __func__); - } goto ret_optlen; } else if (tp->t_mpflags & TMPF_MPTCP_TRUE) { if (tp->t_mpflags & TMPF_SND_REM_ADDR) { @@ -593,11 +588,9 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, #define CHECK_OPTLEN { \ if ((MAX_TCPOPTLEN - optlen) < len) { \ - if (mptcp_dbg >= MP_ERR_DEBUG) { \ - printf("MPTCP ERROR %s: len %d optlen %d \n", \ - __func__, \ - len, optlen); \ - } \ + mptcplog((LOG_ERR, "MPTCP Socket: " \ + "%s: len %d optlen %d \n", __func__, len, optlen), \ + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); \ goto ret_optlen; \ } \ } @@ -616,10 +609,11 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, #define CHECK_DATALEN { \ /* MPTCP socket does not support IP options */ \ if ((datalen + optlen + len) > tp->t_maxopd) { \ - if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) \ - printf("%s: nosp %d len %d opt %d %d %d\n", \ - __func__, datalen, len, optlen, \ - tp->t_maxseg, tp->t_maxopd); \ + mptcplog((LOG_ERR, "MPTCP Socket: " \ + "%s: nosp %d len %d opt %d %d %d\n", \ + __func__, datalen, len, optlen, \ + tp->t_maxseg, tp->t_maxopd), \ + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); \ /* remove option length from payload len */ \ datalen = tp->t_maxopd - optlen - len; \ } \ @@ -691,12 +685,13 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, mdss_subflow_seqn)); } optlen += len; - if (mptcp_dbg == MP_VERBOSE_DEBUG_2) { - printf("%s: long DSS = %llx ACK = %llx \n", - __func__, - mptcp_ntoh64(dsn_ack_opt.mdss_dsn), - mptcp_ntoh64(dsn_ack_opt.mdss_ack)); - } + mptcplog((LOG_DEBUG,"MPTCP Socket: " + "%s: long DSS = %llx ACK = %llx \n", + __func__, + mptcp_ntoh64(dsn_ack_opt.mdss_dsn), + mptcp_ntoh64(dsn_ack_opt.mdss_ack)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); + tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; goto ret_optlen; } @@ -746,13 +741,6 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, offsetof(struct mptcp_dsn_opt, mdss_subflow_seqn)); } optlen += len; - if (mptcp_dbg == MP_VERBOSE_DEBUG_2) { - printf("%s: DSS option. dsn = %x, seq = %x len = %x\n", - __func__, - ntohl(dsn_opt.mdss_dsn), - ntohl(dsn_opt.mdss_subflow_seqn), - ntohs(dsn_opt.mdss_data_len)); - } tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; goto ret_optlen; } @@ -957,10 +945,6 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, if (((mp_tp->mpt_sndnxt + 1) != mp_tp->mpt_sndmax) || (mp_tp->mpt_snduna == mp_tp->mpt_sndmax)) { MPT_UNLOCK(mp_tp); - if (mptcp_dbg == MP_VERBOSE_DEBUG_2) - printf("%s: Fin state %d %llu %llu\n", __func__, - mp_tp->mpt_state, mp_tp->mpt_sndnxt, - mp_tp->mpt_sndmax); goto ret_optlen; } @@ -990,7 +974,8 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, if (TRUE == *p_mptcp_acknow ) { VERIFY(old_mpt_flags != 0); u_int32_t new_mpt_flags = tp->t_mpflags & - (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL); + (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL | + TMPF_MPCAP_RETRANSMIT); /* * If none of the above mpflags were acted on by @@ -1006,10 +991,18 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, * we haven't modified the logic in tcp_output to avoid * that. */ - if (old_mpt_flags == new_mpt_flags) { + if ((old_mpt_flags == new_mpt_flags) || (new_mpt_flags == 0)) { tp->t_mpflags &= ~(TMPF_SND_MPPRIO - | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL); + | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL | + TMPF_MPCAP_RETRANSMIT); *p_mptcp_acknow = FALSE; + mptcplog((LOG_DEBUG, "MPTCP Sender: %s: no action \n", + __func__), MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); + } else { + mptcplog((LOG_DEBUG, "MPTCP Sender: acknow set, " + "old flags %x new flags %x \n", + old_mpt_flags, new_mpt_flags), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); } } @@ -1020,19 +1013,50 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, * MPTCP Options Input Processing */ +static int +mptcp_sanitize_option(struct tcpcb *tp, int mptcp_subtype) +{ + struct mptcb *mp_tp = tptomptp(tp); + int ret = 1; + + if (mp_tp == NULL) { + mptcplog((LOG_ERR, "MPTCP Socket: %s: NULL mpsocket \n", + __func__), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + return (0); + } + + switch (mptcp_subtype) { + case MPO_CAPABLE: + break; + case MPO_JOIN: /* fall through */ + case MPO_DSS: /* fall through */ + case MPO_FASTCLOSE: /* fall through */ + case MPO_FAIL: /* fall through */ + case MPO_REMOVE_ADDR: /* fall through */ + case MPO_ADD_ADDR: /* fall through */ + case MPO_PRIO: /* fall through */ + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) + ret = 0; + break; + default: + ret = 0; + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: type = %d \n", __func__, + mptcp_subtype), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + break; + } + return (ret); +} static int -mptcp_valid_mpcapable_common_opt(u_char *cp, u_int32_t mptcp_version) +mptcp_valid_mpcapable_common_opt(u_char *cp) { struct mptcp_mpcapable_opt_common *rsp = (struct mptcp_mpcapable_opt_common *)cp; /* mmco_kind, mmco_len and mmco_subtype are validated before */ - /* In future, there can be more than one version supported */ - if (rsp->mmco_version != mptcp_version) - return (0); - if (!(rsp->mmco_flags & MPCAP_PROPOSAL_SBIT)) return (0); @@ -1061,15 +1085,8 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, } \ } - if (mp_tp == NULL) { - if (mptcp_dbg == MP_ERR_DEBUG) - printf("MPTCP ERROR %s: NULL mpsocket \n", __func__); - tcpstat.tcps_invalid_mpcap++; - return; - } - /* Validate the kind, len, flags */ - if (mptcp_valid_mpcapable_common_opt(cp, mp_tp->mpt_version) != 1) { + if (mptcp_valid_mpcapable_common_opt(cp) != 1) { tcpstat.tcps_invalid_mpcap++; return; } @@ -1080,14 +1097,24 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, return; } else if ((th->th_flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { + /* Handle old duplicate SYN/ACK retransmission */ + if (SEQ_GT(tp->rcv_nxt, (tp->irs + 1))) + return; + + /* handle SYN/ACK retransmission by acknowledging with ACK */ + if (mp_tp->mpt_state >= MPTCPS_ESTABLISHED) { + tp->t_mpflags |= TMPF_MPCAP_RETRANSMIT; + return; + } + /* A SYN/ACK contains peer's key and flags */ if (optlen != sizeof (struct mptcp_mpcapable_opt_rsp)) { /* complain */ - if (mptcp_dbg == MP_ERR_DEBUG) { - printf("%s: SYN_ACK optlen = %d, sizeof mp opt \ - = %lu \n", __func__, optlen, - sizeof (struct mptcp_mpcapable_opt_rsp)); - } + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: SYN_ACK optlen = %d, sizeof mp opt = %lu \n", + __func__, optlen, + sizeof (struct mptcp_mpcapable_opt_rsp)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); tcpstat.tcps_invalid_mpcap++; return; } @@ -1103,16 +1130,15 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, rsp = (struct mptcp_mpcapable_opt_rsp *)cp; MPT_LOCK_SPIN(mp_tp); mp_tp->mpt_remotekey = rsp->mmc_localkey; + /* For now just downgrade to the peer's version */ + mp_tp->mpt_peer_version = rsp->mmc_common.mmco_version; + if (rsp->mmc_common.mmco_version < mp_tp->mpt_version) { + mp_tp->mpt_version = rsp->mmc_common.mmco_version; + tcpstat.tcps_mp_verdowngrade++; + } MPT_UNLOCK(mp_tp); tp->t_mpflags |= TMPF_PREESTABLISHED; - if (mptcp_dbg > MP_VERBOSE_DEBUG_1) { - printf("SYN_ACK pre established, optlen = %d, tp \ - state = %d sport = %x dport = %x key = %llx \n", - optlen, tp->t_state, th->th_sport, th->th_dport, - mp_tp->mpt_remotekey); - } - } else if ((th->th_flags & TH_ACK) && (tp->t_mpflags & TMPF_PREESTABLISHED)) { @@ -1123,10 +1149,9 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && !(((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags & MPCAP_CHECKSUM_CBIT)) { - if (mptcp_dbg == MP_ERR_DEBUG) { - printf("%s: checksum negotiation failure \n", - __func__); - } + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: checksum negotiation failure \n", __func__), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); MPTCP_OPT_ERROR_PATH(tp); return; } @@ -1134,10 +1159,9 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags & MPCAP_CHECKSUM_CBIT)) { - if (mptcp_dbg == MP_ERR_DEBUG) { - printf("%s: checksum negotiation failure 2.\n", - __func__); - } + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: checksum negotiation failure 2.\n", __func__), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); MPTCP_OPT_ERROR_PATH(tp); return; } @@ -1148,38 +1172,33 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, */ if (optlen != sizeof (struct mptcp_mpcapable_opt_rsp1)) { /* complain */ - if (mptcp_dbg == MP_ERR_DEBUG) { - printf("%s: ACK optlen = %d , sizeof mp option \ - = %lu, state = %d \n", - __func__, - optlen, + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: ACK optlen = %d , sizeof mp option = %lu, " + " state = %d \n", __func__, optlen, sizeof (struct mptcp_mpcapable_opt_rsp1), - tp->t_state); - } + tp->t_state), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); MPTCP_OPT_ERROR_PATH(tp); return; } rsp1 = (struct mptcp_mpcapable_opt_rsp1 *)cp; + /* Skipping MPT_LOCK for invariant key */ if (rsp1->mmc_remotekey != *mp_tp->mpt_localkey) { - if (mptcp_dbg == MP_ERR_DEBUG) { - printf("MPTCP ERROR %s: key mismatch locally " - "stored key. rsp = %llx local = %llx \n", - __func__, rsp1->mmc_remotekey, - *mp_tp->mpt_localkey); - } - tp->t_mpflags &= ~TMPF_PREESTABLISHED; + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: key mismatch locally stored key. " + "rsp = %llx local = %llx \n", __func__, + rsp1->mmc_remotekey, *mp_tp->mpt_localkey), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); MPTCP_OPT_ERROR_PATH(tp); return; } else { /* We received both keys. Almost an MPTCP connection */ /* Skipping MPT_LOCK for invariant key */ if (mp_tp->mpt_remotekey != rsp1->mmc_localkey) { - if (mptcp_dbg == MP_ERR_DEBUG) { - printf("MPTCP ERROR %s: keys don't" - " match\n", __func__); - } + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: keys don't match\n", __func__), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); tp->t_mpflags &= ~TMPF_PREESTABLISHED; MPTCP_OPT_ERROR_PATH(tp); return; @@ -1191,14 +1210,12 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, MPT_LOCK(mp_tp); DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, uint32_t, 0 /* event */); + mptcplog((LOG_DEBUG, "MPTCP State: " + "MPTCPS_ESTABLISHED \n"), + MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG); + mp_tp->mpt_state = MPTCPS_ESTABLISHED; MPT_UNLOCK(mp_tp); - if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) { - printf("MPTCP SUCCESS %s: rem key = %llx local \ - key = %llx \n", - __func__, mp_tp->mpt_remotekey, - *mp_tp->mpt_localkey); - } } if (tp->t_mpuna) { tp->t_mpuna = 0; @@ -1219,7 +1236,6 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) } \ } int error = 0; - struct mptcb *mp_tp = tptomptp(tp); if ((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) { /* We won't accept join requests as an active opener */ @@ -1229,12 +1245,11 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) } if (optlen != sizeof (struct mptcp_mpjoin_opt_req)) { - if (mptcp_dbg == MP_ERR_DEBUG) { - printf("SYN: unexpected optlen = %d, mp option" - "= %lu\n", - optlen, - sizeof (struct mptcp_mpjoin_opt_req)); - } + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: SYN: unexpected optlen = %d, mp option" + "= %lu\n", __func__, optlen, + sizeof (struct mptcp_mpjoin_opt_req)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); /* send RST and close */ MPTCP_JOPT_ERROR_PATH(tp); return; @@ -1246,9 +1261,10 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) (struct mptcp_mpjoin_opt_req *)cp; mp_so = mptcp_find_mpso(join_req->mmjo_peer_token); if (!mp_so) { - if (mptcp_dbg >= MP_ERR_DEBUG) - printf("%s: cannot find mp_so token = %x\n", - __func__, join_req->mmjo_peer_token); + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: cannot find mp_so token = %x\n", + __func__, join_req->mmjo_peer_token), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); /* send RST */ MPTCP_JOPT_ERROR_PATH(tp); return; @@ -1270,21 +1286,11 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) (struct mptcp_mpjoin_opt_rsp *)cp; if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp)) { - if (mptcp_dbg >= MP_ERR_DEBUG) { - printf("SYN_ACK: unexpected optlen = %d mp " - "option = %lu\n", optlen, - sizeof (struct mptcp_mpjoin_opt_rsp)); - } - tp->t_mpflags &= ~TMPF_PREESTABLISHED; - /* send RST and close */ - MPTCP_JOPT_ERROR_PATH(tp); - return; - } - - if (mp_tp == NULL) { - if (mptcp_dbg >= MP_ERR_DEBUG) - printf("%s: cannot find mp_tp in SYN_ACK\n", - __func__); + mptcplog((LOG_ERR, "MPTCP Socket: " + "SYN_ACK: unexpected optlen = %d mp " + "option = %lu\n", optlen, + sizeof (struct mptcp_mpjoin_opt_rsp)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); tp->t_mpflags &= ~TMPF_PREESTABLISHED; /* send RST and close */ MPTCP_JOPT_ERROR_PATH(tp); @@ -1297,10 +1303,9 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) error = mptcp_validate_join_hmac(tp, (u_char*)&join_rsp->mmjo_mac, SHA1_TRUNCATED); if (error) { - if (mptcp_dbg >= MP_ERR_DEBUG) { - printf("%s: SYN_ACK error = %d \n", __func__, - error); - } + mptcplog((LOG_ERR, "MPTCP Socket: %s: " + "SYN_ACK error = %d \n", __func__, error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); tp->t_mpflags &= ~TMPF_PREESTABLISHED; /* send RST and close */ MPTCP_JOPT_ERROR_PATH(tp); @@ -1313,19 +1318,14 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) (struct mptcp_mpjoin_opt_rsp2 *)cp; if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp2)) { - if (mptcp_dbg >= MP_ERR_DEBUG) { - printf("ACK: unexpected optlen = %d mp option " - "= %lu \n", optlen, - sizeof (struct mptcp_mpjoin_opt_rsp2)); - } - tp->t_mpflags &= ~TMPF_PREESTABLISHED; - /* send RST and close */ - MPTCP_JOPT_ERROR_PATH(tp); - return; - } + mptcplog((LOG_ERR, "MPTCP Socket: " + "ACK: unexpected optlen = %d mp option " + "= %lu \n", optlen, + sizeof (struct mptcp_mpjoin_opt_rsp2)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); - if (mp_tp == NULL) { tp->t_mpflags &= ~TMPF_PREESTABLISHED; + /* send RST and close */ MPTCP_JOPT_ERROR_PATH(tp); return; } @@ -1333,10 +1333,9 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) error = mptcp_validate_join_hmac(tp, join_rsp2->mmjo_mac, SHA1_RESULTLEN); if (error) { - if (mptcp_dbg >= MP_ERR_DEBUG) { - printf("%s: ACK error = %d\n", __func__, - error); - } + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: ACK error = %d\n", __func__, error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); tp->t_mpflags &= ~TMPF_PREESTABLISHED; MPTCP_JOPT_ERROR_PATH(tp); return; @@ -1346,9 +1345,6 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) tp->t_flags |= TF_ACKNOW; tp->t_mpflags |= TMPF_MPTCP_ACKNOW; tp->t_inpcb->inp_socket->so_flags |= SOF_MPTCP_TRUE; - if (mptcp_dbg >= MP_ERR_DEBUG) { - printf("MPTCP SUCCESS %s: join \n", __func__); - } } } @@ -1361,8 +1357,6 @@ mptcp_validate_join_hmac(struct tcpcb *tp, u_char* hmac, int mac_len) u_int32_t rem_rand, loc_rand; mp_tp = tp->t_mptcb; - if (mp_tp == NULL) - return (-1); rem_rand = loc_rand = 0; @@ -1394,9 +1388,6 @@ mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, struct tcpcb *tp) struct mptcb *mp_tp = tptomptp(tp); int close_notify = 0; - if (mp_tp == NULL) - return; - MPT_LOCK(mp_tp); if (MPTCP_SEQ_LEQ(full_dack, mp_tp->mpt_sndmax) && MPTCP_SEQ_GEQ(full_dack, mp_tp->mpt_snduna)) { @@ -1413,15 +1404,12 @@ mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, struct tcpcb *tp) } } else { MPT_UNLOCK(mp_tp); - if (mptcp_dbg == MP_VERBOSE_DEBUG_2) { - printf("%s: unexpected dack %llx snduna %llx " - "sndmax %llx\n", __func__, full_dack, - mp_tp->mpt_snduna, mp_tp->mpt_sndmax); - } - } - - if (mptcp_dbg == MP_VERBOSE_DEBUG_2) { - printf("%s: full_dack = %llu \n", __func__, full_dack); + mptcplog((LOG_ERR,"MPTCP Socket: " + "%s: unexpected dack %llx snduna %llx " + "sndmax %llx\n", __func__, full_dack, + mp_tp->mpt_snduna, mp_tp->mpt_sndmax), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), + MPTCP_LOGLVL_LOG); } } @@ -1435,17 +1423,14 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp) #define MPTCP_DSS_OPT_SZ_CHK(len, expected_len) { \ if (len != expected_len) { \ - if (mptcp_dbg >= MP_ERR_DEBUG) { \ - printf("MPTCP ERROR %s: bad len = %d" \ - "dss: %x \n", __func__, \ - len, \ - dss_rsp->mdss_flags); \ - } \ + mptcplog((LOG_ERR, "MPTCP Socket: " \ + "%s: bad len = %d dss: %x \n", __func__, \ + len, dss_rsp->mdss_flags), \ + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), \ + MPTCP_LOGLVL_LOG); \ return; \ } \ } - if (mp_tp == NULL) - return; if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) csum_len = 2; @@ -1519,9 +1504,10 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp) MPTCP_DSS_OPT_SZ_CHK(dsn64->mdss_copt.mdss_len, sizeof (struct mptcp_dsn64_opt) + csum_len); - if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { - printf("%s: 64-bit M present.\n", __func__); - } + mptcplog((LOG_DEBUG,"MPTCP Socket: " + "%s: 64-bit M present.\n", __func__), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), + MPTCP_LOGLVL_LOG); MPT_LOCK_SPIN(mp_tp); mp_tp->mpt_flags |= MPTCPF_SND_64BITACK; @@ -1552,10 +1538,10 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp) MPTCP_DSS_OPT_SZ_CHK(dack64->mdss_copt.mdss_len, sizeof (struct mptcp_data_ack64_opt)); - - if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { - printf("%s: 64-bit A present. \n", __func__); - } + mptcplog((LOG_DEBUG,"MPTCP Socket: " + "%s: 64-bit A present. \n", __func__), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), + MPTCP_LOGLVL_LOG); MPT_LOCK_SPIN(mp_tp); mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK; @@ -1574,10 +1560,10 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp) MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len, sizeof (struct mptcp_dss64_ack32_opt) + csum_len); - if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { - printf("%s: 64-bit M and 32-bit A present.\n", - __func__); - } + mptcplog((LOG_DEBUG,"MPTCP Socket: " + "%s: 64-bit M and 32-bit A present.\n", __func__), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), + MPTCP_LOGLVL_LOG); u_int32_t dack = dss_ack_rsp->mdss_ack; NTOHL(dack); @@ -1606,10 +1592,11 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp) dss32_ack64_opt->mdss_copt.mdss_len, sizeof (struct mptcp_dss32_ack64_opt) + csum_len); - if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { - printf("%s: 32-bit M and 64-bit A present.\n", - __func__); - } + mptcplog((LOG_DEBUG,"MPTCP Socket: " + "%s: 32-bit M and 64-bit A present.\n", __func__), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), + MPTCP_LOGLVL_LOG); + full_dack = mptcp_ntoh64(dss32_ack64_opt->mdss_ack); mptcp_do_dss_opt_ack_meat(full_dack, tp); NTOHL(dss32_ack64_opt->mdss_dsn); @@ -1643,10 +1630,11 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp) MPTCP_DSS_OPT_SZ_CHK(dss64_ack64->mdss_copt.mdss_len, sizeof (struct mptcp_dss64_ack64_opt) + csum_len); - if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { - printf("%s: 64-bit M and 64-bit A present.\n", - __func__); - } + mptcplog((LOG_DEBUG,"MPTCP Socket: " + "%s: 64-bit M and 64-bit A present.\n", __func__), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), + MPTCP_LOGLVL_LOG); + MPT_LOCK_SPIN(mp_tp); mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK; mp_tp->mpt_flags |= MPTCPF_SND_64BITACK; @@ -1670,10 +1658,11 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp) break; } default: - if (mptcp_dbg >= MP_ERR_DEBUG) { - printf("MPTCP ERROR %s: File bug, DSS flags = %x\n", - __func__, dss_rsp->mdss_flags); - } + mptcplog((LOG_DEBUG,"MPTCP Socket: " + "%s: File bug, DSS flags = %x\n", __func__, + dss_rsp->mdss_flags), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), + MPTCP_LOGLVL_LOG); break; } } @@ -1684,6 +1673,10 @@ mptcp_do_fin_opt(struct tcpcb *tp) { struct mptcb *mp_tp = (struct mptcb *)tp->t_mptcb; + mptcplog((LOG_DEBUG,"MPTCP Socket: %s \n", __func__), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), + MPTCP_LOGLVL_LOG); + if (!(tp->t_mpflags & TMPF_RECV_DFIN)) { if (mp_tp != NULL) { MPT_LOCK(mp_tp); @@ -1723,13 +1716,7 @@ mptcp_do_dss_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp; if (dss_rsp->mdss_subtype == MPO_DSS) { - if (mptcp_dbg > MP_VERBOSE_DEBUG_4) { - printf("%s: DSS option received: %d ", - __func__, dss_rsp->mdss_flags); - } if (dss_rsp->mdss_flags & MDSS_F) { - if (mptcp_dbg >= MP_VERBOSE_DEBUG_1) - printf("%s: received FIN\n", __func__); mptcp_do_fin_opt(tp); } @@ -1747,8 +1734,9 @@ mptcp_do_fastclose_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th) if (th->th_flags != TH_ACK) return; - if (mptcp_dbg > MP_VERBOSE_DEBUG_2) - printf("%s: received \n", __func__); + mptcplog((LOG_DEBUG,"MPTCP Socket: %s: \n", __func__), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), + MPTCP_LOGLVL_LOG); if (fc_opt->mfast_len != sizeof (struct mptcp_fastclose_opt)) { tcpstat.tcps_invalid_opt++; @@ -1773,15 +1761,6 @@ mptcp_do_fastclose_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th) return; } - MPT_LOCK(mp_tp); - if (mp_tp->mpt_state != MPTCPS_FASTCLOSE_WAIT) { - mp_tp->mpt_state = MPTCPS_FASTCLOSE_WAIT; - DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, - uint32_t, 0 /* event */); - mptcp_start_timer(mp_tp, MPTT_FASTCLOSE); - } - MPT_UNLOCK(mp_tp); - /* Reset this flow */ tp->t_mpflags |= TMPF_RESET; @@ -1813,12 +1792,13 @@ mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th) if ((th->th_flags != TH_ACK) && (th->th_flags != TH_RST)) return; + mptcplog((LOG_DEBUG, "MPTCP Socket: %s: \n", __func__), + (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), MPTCP_LOGLVL_LOG); + if (fail_opt->mfail_len != sizeof (struct mptcp_mpfail_opt)) return; mp_tp = (struct mptcb *)tp->t_mptcb; - if (mp_tp == NULL) - return; MPT_LOCK(mp_tp); mp_tp->mpt_flags |= MPTCPF_RECVD_MPFAIL; mp_tp->mpt_dsn_at_csum_fail = mptcp_hton64(fail_opt->mfail_dsn); @@ -1832,7 +1812,7 @@ mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th) mptcp_notify_mpfail(tp->t_inpcb->inp_socket); } -int +void tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th, struct tcpopt *to, int optlen) { @@ -1840,10 +1820,13 @@ tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th, /* All MPTCP options have atleast 4 bytes */ if (optlen < 4) - return (0); + return; mptcp_subtype = (cp[2] >> 4); + if (mptcp_sanitize_option(tp, mptcp_subtype) == 0) + return; + switch (mptcp_subtype) { case MPO_CAPABLE: mptcp_do_mpcapable_opt(tp, cp, th, optlen); @@ -1866,10 +1849,9 @@ tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th, to->to_flags |= TOF_MPTCP; break; default: - printf("%s: type = %d\n", __func__, mptcp_subtype); break; } - return (0); + return; } /* @@ -1914,9 +1896,9 @@ mptcp_send_addaddr_opt(struct tcpcb *tp, struct mptcp_addaddr_opt *opt) void mptcp_send_remaddr_opt(struct tcpcb *tp, struct mptcp_remaddr_opt *opt) { - if (mptcp_dbg >= MP_ERR_DEBUG) - printf("%s: local id %d remove id %d \n", __func__, - tp->t_local_aid, tp->t_rem_aid); + mptcplog((LOG_DEBUG,"MPTCP Socket: %s: local id %d remove id %d \n", + __func__, tp->t_local_aid, tp->t_rem_aid), + (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG); bzero(opt, sizeof (*opt)); opt->mr_kind = TCPOPT_MULTIPATH; @@ -1983,7 +1965,8 @@ mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen) memcpy(cp + optlen, &mpprio, sizeof (mpprio)); optlen += sizeof (mpprio); tp->t_mpflags &= ~TMPF_SND_MPPRIO; - if (mptcp_dbg >= MP_ERR_DEBUG) - printf("%s: aid = %d \n", __func__, tp->t_local_aid); + mptcplog((LOG_DEBUG, "MPTCP Socket: %s: aid = %d \n", __func__, + tp->t_local_aid), + (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG); return (optlen); } diff --git a/bsd/netinet/mptcp_opt.h b/bsd/netinet/mptcp_opt.h index 8c925b9b9..a9450dee9 100644 --- a/bsd/netinet/mptcp_opt.h +++ b/bsd/netinet/mptcp_opt.h @@ -41,7 +41,7 @@ #define MPTCP_CAPABLE_RETRIES (2) __BEGIN_DECLS -extern int tcp_do_mptcp_options(struct tcpcb *, u_char *, struct tcphdr *, +extern void tcp_do_mptcp_options(struct tcpcb *, u_char *, struct tcphdr *, struct tcpopt *, int); extern unsigned mptcp_setup_syn_opts(struct socket *, int, u_char*, unsigned); extern unsigned mptcp_setup_join_ack_opts(struct tcpcb *, u_char*, unsigned); diff --git a/bsd/netinet/mptcp_subr.c b/bsd/netinet/mptcp_subr.c index 6537a1c5f..b4ecb3ff0 100644 --- a/bsd/netinet/mptcp_subr.c +++ b/bsd/netinet/mptcp_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 Apple Inc. All rights reserved. + * Copyright (c) 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -47,6 +47,7 @@ #include #include +#include #include #include #include @@ -146,6 +147,7 @@ static void mptcp_subflow_wupcall(struct socket *, void *, int); static void mptcp_subflow_eupcall(struct socket *, void *, uint32_t); static void mptcp_update_last_owner(struct mptsub *, struct socket *); static void mptcp_output_needed(struct mptses *mpte, struct mptsub *to_mpts); +static void mptcp_get_rtt_measurement(struct mptsub *, struct mptses *); /* * Possible return values for subflow event handlers. Note that success @@ -159,26 +161,25 @@ typedef enum { MPTS_EVRET_OK = 2, /* OK */ MPTS_EVRET_CONNECT_PENDING = 3, /* resume pended connects */ MPTS_EVRET_DISCONNECT_FALLBACK = 4, /* abort all but preferred */ - MPTS_EVRET_OK_UPDATE = 5, /* OK with conninfo update */ } ev_ret_t; -static ev_ret_t mptcp_subflow_events(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_connreset_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_cantrcvmore_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_cantsendmore_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_timeout_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_nosrcaddr_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_failover_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_ifdenied_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_suspend_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_resume_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_fastjoin_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_deleteok_ev(struct mptses *, struct mptsub *); -static ev_ret_t mptcp_subflow_mpcantrcvmore_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_events(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_connreset_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_cantrcvmore_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_cantsendmore_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_timeout_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_nosrcaddr_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_failover_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_ifdenied_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_suspend_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_resume_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_fastjoin_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_deleteok_ev(struct mptses *, struct mptsub *, uint64_t *); +static ev_ret_t mptcp_subflow_mpcantrcvmore_ev(struct mptses *, struct mptsub *, uint64_t *); static const char *mptcp_evret2str(ev_ret_t); @@ -206,9 +207,14 @@ SYSCTL_DECL(_net_inet); SYSCTL_NODE(_net_inet, OID_AUTO, mptcp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "MPTCP"); -uint32_t mptcp_verbose = 0; /* more noise if greater than 1 */ -SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, verbose, CTLFLAG_RW|CTLFLAG_LOCKED, - &mptcp_verbose, 0, "MPTCP verbosity level"); +uint32_t mptcp_dbg_area = 0; /* more noise if greater than 1 */ +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, dbg_area, CTLFLAG_RW|CTLFLAG_LOCKED, + &mptcp_dbg_area, 0, "MPTCP debug area"); + +uint32_t mptcp_dbg_level = 0; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dbg_level, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_dbg_level, 0, "MPTCP debug level"); + SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, pcbcount, CTLFLAG_RD|CTLFLAG_LOCKED, &mtcbinfo.mppi_count, 0, "Number of active PCBs"); @@ -229,12 +235,11 @@ SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, delayed, CTLFLAG_RW|CTLFLAG_LOCKED, &mptcp_delayed_subf_start, 0, "MPTCP Delayed Subflow start"); /* - * SYSCTL for RTT spike measurement threshold in msecs. + * sysctl to use network status hints from symptomsd */ -int32_t mptcp_rto_spike_thresh = 3000; -SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, rto_spikethresh, - CTLFLAG_RW|CTLFLAG_LOCKED, &mptcp_rto_spike_thresh, 0, - "MPTCP RTT spike thresh"); +uint32_t mptcp_use_symptomsd = 1; +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, usesymptoms, CTLFLAG_RW|CTLFLAG_LOCKED, + &mptcp_use_symptomsd, 0, "MPTCP Use SymptomsD"); static struct protosw mptcp_subflow_protosw; static struct pr_usrreqs mptcp_subflow_usrreqs; @@ -243,6 +248,80 @@ static struct ip6protosw mptcp_subflow_protosw6; static struct pr_usrreqs mptcp_subflow_usrreqs6; #endif /* INET6 */ +typedef struct mptcp_subflow_event_entry { + uint64_t sofilt_hint_mask; + ev_ret_t (*sofilt_hint_ev_hdlr)( + struct mptses *mpte, + struct mptsub *mpts, + uint64_t *p_mpsofilt_hint); +} mptsub_ev_entry_t; + +static mptsub_ev_entry_t mpsub_ev_entry_tbl [] = { + { + .sofilt_hint_mask = SO_FILT_HINT_MPCANTRCVMORE, + .sofilt_hint_ev_hdlr = mptcp_subflow_mpcantrcvmore_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_MPFAILOVER, + .sofilt_hint_ev_hdlr = mptcp_subflow_failover_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_CONNRESET, + .sofilt_hint_ev_hdlr = mptcp_subflow_connreset_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_MUSTRST, + .sofilt_hint_ev_hdlr = mptcp_subflow_mustrst_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_CANTRCVMORE, + .sofilt_hint_ev_hdlr = mptcp_subflow_cantrcvmore_ev, + }, + { .sofilt_hint_mask = SO_FILT_HINT_CANTSENDMORE, + .sofilt_hint_ev_hdlr = mptcp_subflow_cantsendmore_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_TIMEOUT, + .sofilt_hint_ev_hdlr = mptcp_subflow_timeout_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_NOSRCADDR, + .sofilt_hint_ev_hdlr = mptcp_subflow_nosrcaddr_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_IFDENIED, + .sofilt_hint_ev_hdlr = mptcp_subflow_ifdenied_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_SUSPEND, + .sofilt_hint_ev_hdlr = mptcp_subflow_suspend_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_RESUME, + .sofilt_hint_ev_hdlr = mptcp_subflow_resume_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_CONNECTED, + .sofilt_hint_ev_hdlr = mptcp_subflow_connected_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_MPSTATUS, + .sofilt_hint_ev_hdlr = mptcp_subflow_mpstatus_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_DELETEOK, + .sofilt_hint_ev_hdlr = mptcp_deleteok_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_DISCONNECTED, + .sofilt_hint_ev_hdlr = mptcp_subflow_disconnected_ev, + }, + { + .sofilt_hint_mask = SO_FILT_HINT_MPFASTJ, + .sofilt_hint_ev_hdlr = mptcp_fastjoin_ev, + } +}; + /* * Protocol pr_init callback. */ @@ -327,9 +406,10 @@ mptcp_init(struct protosw *pp, struct domain *dp) mtcbinfo.mppi_lock_attr = lck_attr_alloc_init(); lck_mtx_init(&mtcbinfo.mppi_lock, mtcbinfo.mppi_lock_grp, mtcbinfo.mppi_lock_attr); - mtcbinfo.mppi_gc = mptcp_gc; + mtcbinfo.mppi_gc = mptcp_gc; mtcbinfo.mppi_timer = mptcp_timer; + mtcbinfo.mppi_pcbe_create = mptcp_sescreate; /* attach to MP domain for garbage collection to take place */ mp_pcbinfo_attach(&mtcbinfo); @@ -364,13 +444,12 @@ mptcp_init(struct protosw *pp, struct domain *dp) /* Set up a list of unique keys */ mptcp_key_pool_init(); - } /* * Create an MPTCP session, called as a result of opening a MPTCP socket. */ -struct mptses * +void * mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp) { struct mppcbinfo *mppi; @@ -382,8 +461,8 @@ mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp) mppi = mpp->mpp_pcbinfo; VERIFY(mppi != NULL); - mpte = &((struct mpp_mtp *)mpp)->mpp_ses; - mp_tp = &((struct mpp_mtp *)mpp)->mtcb; + __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses); + __IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb); /* MPTCP Multipath PCB Extension */ bzero(mpte, sizeof (*mpte)); @@ -394,8 +473,8 @@ mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp) TAILQ_INIT(&mpte->mpte_sopts); TAILQ_INIT(&mpte->mpte_subflows); - mpte->mpte_associd = ASSOCID_ANY; - mpte->mpte_connid_last = CONNID_ANY; + mpte->mpte_associd = SAE_ASSOCID_ANY; + mpte->mpte_connid_last = SAE_CONNID_ANY; lck_mtx_init(&mpte->mpte_thread_lock, mppi->mppi_lock_grp, mppi->mppi_lock_attr); @@ -420,7 +499,7 @@ mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp) lck_mtx_init(&mp_tp->mpt_lock, mppi->mppi_lock_grp, mppi->mppi_lock_attr); mp_tp->mpt_mpte = mpte; - + mp_tp->mpt_state = MPTCPS_CLOSED; out: if (error != 0) lck_mtx_destroy(&mpte->mpte_thread_lock, mppi->mppi_lock_grp); @@ -625,9 +704,10 @@ mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom, */ if ((error = socreate_internal(dom, so, SOCK_STREAM, IPPROTO_TCP, p, SOCF_ASYNC | SOCF_MP_SUBFLOW, PROC_NULL)) != 0) { - mptcplog((LOG_ERR, "MPTCP ERROR %s: mp_so 0x%llx unable to " - "create subflow socket error %d\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), error)); + mptcplog((LOG_ERR, "MPTCP Socket: subflow socreate mp_so 0x%llx" + " unable to create subflow socket error %d\n", + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); return (error); } @@ -705,11 +785,13 @@ mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom, interim = (mpo->mpo_flags & MPOF_INTERIM); if (mptcp_subflow_sosetopt(mpte, *so, mpo) != 0 && interim) { char buf[32]; - mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s val %d " - "interim record removed\n", __func__, + mptcplog((LOG_ERR, "MPTCP Socket: subflow socreate" + " mp_so 0x%llx" + " sopt %s val %d interim record removed\n", (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name, - buf, sizeof (buf)), mpo->mpo_intval)); + buf, sizeof (buf)), mpo->mpo_intval), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); mptcp_sopt_remove(mpte, mpo); mptcp_sopt_free(mpo); continue; @@ -807,8 +889,8 @@ mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts) dst_se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head); VERIFY(dst_se != NULL); - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx dst %s[%d] cid %d " - "[pended %s]\n", __func__, + mptcplog((LOG_DEBUG, "MPTCP Socket: connectx mp_so 0x%llx " + "dst %s[%d] cid %d [pended %s]\n", (u_int64_t)VM_KERNEL_ADDRPERM(mpte->mpte_mppcb->mpp_socket), inet_ntop(af, ((af == AF_INET) ? (void *)&SIN(dst_se->se_addr)->sin_addr.s_addr : @@ -818,7 +900,8 @@ mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts) ntohs(SIN6(dst_se->se_addr)->sin6_port)), mpts->mpts_connid, ((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ? - "YES" : "NO"))); + "YES" : "NO")), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); } mpts->mpts_flags &= ~MPTSF_CONNECT_PENDING; @@ -829,8 +912,8 @@ mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts) /* connect the subflow socket */ error = soconnectxlocked(so, &mpts->mpts_src_sl, &mpts->mpts_dst_sl, mpts->mpts_mpcr.mpcr_proc, mpts->mpts_mpcr.mpcr_ifscope, - mpte->mpte_associd, NULL, TCP_CONNREQF_MPTCP, - &mpts->mpts_mpcr, sizeof (mpts->mpts_mpcr)); + mpte->mpte_associd, NULL, CONNREQF_MPTCP, + &mpts->mpts_mpcr, sizeof (mpts->mpts_mpcr), NULL, NULL); socket_unlock(so, 0); /* Allocate a unique address id per subflow */ @@ -1147,7 +1230,7 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, VERIFY(mpts->mpts_mpte == NULL); VERIFY(mpts->mpts_socket == NULL); VERIFY(mpts->mpts_dst_sl != NULL); - VERIFY(mpts->mpts_connid == CONNID_ANY); + VERIFY(mpts->mpts_connid == SAE_CONNID_ANY); /* select source (if specified) and destination addresses */ if ((error = in_selectaddrs(AF_UNSPEC, &mpts->mpts_src_sl, &src_se, @@ -1197,17 +1280,17 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, } /* - * Increment the counter, while avoiding 0 (CONNID_ANY) and - * -1 (CONNID_ALL). + * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and + * -1 (SAE_CONNID_ALL). */ mpte->mpte_connid_last++; - if (mpte->mpte_connid_last == CONNID_ALL || - mpte->mpte_connid_last == CONNID_ANY) + if (mpte->mpte_connid_last == SAE_CONNID_ALL || + mpte->mpte_connid_last == SAE_CONNID_ANY) mpte->mpte_connid_last++; mpts->mpts_connid = mpte->mpte_connid_last; - VERIFY(mpts->mpts_connid != CONNID_ANY && - mpts->mpts_connid != CONNID_ALL); + VERIFY(mpts->mpts_connid != SAE_CONNID_ANY && + mpts->mpts_connid != SAE_CONNID_ALL); /* Allocate a unique address id per subflow */ mpte->mpte_addrid_last++; @@ -1226,11 +1309,12 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, VERIFY(mpts->mpts_outif != NULL); mpts->mpts_flags |= MPTSF_BOUND_IF; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx bindif %s[%d] " - "cid %d\n", __func__, + mptcplog((LOG_DEBUG, "MPTCP Socket: subflow_add mp_so 0x%llx " + "bindif %s[%d] cid d\n", (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_outif->if_xname, - ifscope, mpts->mpts_connid)); + ifscope, mpts->mpts_connid), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); socket_unlock(so, 0); } @@ -1268,13 +1352,14 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, if (af == AF_INET || af == AF_INET6) { char sbuf[MAX_IPv6_STR_LEN]; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx bindip %s[%d] " - "cid %d\n", __func__, + mptcplog((LOG_DEBUG, "MPTCP Socket: subflow_add " + "mp_so 0x%llx bindip %s[%d] cid %d\n", (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), inet_ntop(af, ((af == AF_INET) ? (void *)&SIN(sa)->sin_addr.s_addr : (void *)&SIN6(sa)->sin6_addr), sbuf, sizeof (sbuf)), - ntohs(lport), mpts->mpts_connid)); + ntohs(lport), mpts->mpts_connid), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); } } @@ -1353,7 +1438,8 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, if (af == AF_INET || af == AF_INET6) { char dbuf[MAX_IPv6_STR_LEN]; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx dst %s[%d] cid %d " + mptcplog((LOG_DEBUG, "MPTCP Socket: %s " + "mp_so 0x%llx dst %s[%d] cid %d " "[pending %s]\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), inet_ntop(af, ((af == AF_INET) ? @@ -1364,7 +1450,8 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, ntohs(SIN6(dst_se->se_addr)->sin6_port)), mpts->mpts_connid, ((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ? - "YES" : "NO"))); + "YES" : "NO")), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); } /* connect right away if first attempt, or if join can be done now */ @@ -1401,22 +1488,26 @@ mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts, boolean_t close) if (close && !((mpts->mpts_flags & MPTSF_DELETEOK) && (mpts->mpts_flags & MPTSF_USER_DISCONNECT))) { MPTS_UNLOCK(mpts); - mptcplog((LOG_DEBUG, "%s: %d %x\n", __func__, - mpts->mpts_soerror, mpts->mpts_flags)); + mptcplog((LOG_DEBUG, "MPTCP Socket: subflow_del returning" + " mp_so 0x%llx flags %x\n", + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_flags), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); return; } - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d " - "[close %s] %d %x\n", __func__, + mptcplog((LOG_DEBUG, "MPTCP Socket: subflow_del mp_so 0x%llx " + "[u=%d,r=%d] cid %d [close %s] %d %x error %d\n", (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_usecount, mp_so->so_retaincnt, mpts->mpts_connid, (close ? "YES" : "NO"), mpts->mpts_soerror, - mpts->mpts_flags)); + mpts->mpts_flags, + mp_so->so_error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); VERIFY(mpts->mpts_mpte == mpte); - VERIFY(mpts->mpts_connid != CONNID_ANY && - mpts->mpts_connid != CONNID_ALL); + VERIFY(mpts->mpts_connid != SAE_CONNID_ANY && + mpts->mpts_connid != SAE_CONNID_ALL); VERIFY(mpts->mpts_flags & MPTSF_ATTACHED); atomic_bitclear_32(&mpts->mpts_flags, MPTSF_ATTACHED); @@ -1466,8 +1557,8 @@ mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts, VERIFY(mpts->mpts_mpte == mpte); VERIFY(mpts->mpts_socket != NULL); - VERIFY(mpts->mpts_connid != CONNID_ANY && - mpts->mpts_connid != CONNID_ALL); + VERIFY(mpts->mpts_connid != SAE_CONNID_ANY && + mpts->mpts_connid != SAE_CONNID_ALL); if (mpts->mpts_flags & (MPTSF_DISCONNECTING|MPTSF_DISCONNECTED)) return; @@ -1493,9 +1584,10 @@ mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts, socket_lock(so, 0); if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) && (so->so_state & SS_ISCONNECTED)) { - mptcplog((LOG_DEBUG, "%s: cid %d fin %d [linger %s]\n", - __func__, mpts->mpts_connid, send_dfin, - (deleteok ? "NO" : "YES"))); + mptcplog((LOG_DEBUG, "MPTCP Socket %s: cid %d fin %d " + "[linger %s]\n", __func__, mpts->mpts_connid, send_dfin, + (deleteok ? "NO" : "YES")), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); if (send_dfin) mptcp_send_dfin(so); @@ -1570,34 +1662,38 @@ mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts) error = sock_receive_internal(so, NULL, &m, 0, NULL); if (error != 0 && error != EWOULDBLOCK) { - mptcplog((LOG_ERR, "%s: cid %d error %d\n", - __func__, mpts->mpts_connid, error)); + mptcplog((LOG_ERR, "MPTCP Receiver: %s cid %d error %d\n", + __func__, mpts->mpts_connid, error), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR); MPTS_UNLOCK(mpts); - mpts_alt = mptcp_get_subflow(mpte, mpts); + mpts_alt = mptcp_get_subflow(mpte, mpts, NULL); if (mpts_alt == NULL) { if (mptcp_delayed_subf_start) { mpts_alt = mptcp_get_pending_subflow(mpte, mpts); if (mpts_alt) { - mptcplog((LOG_INFO,"%s: pending %d\n", - __func__, mpts_alt->mpts_connid)); + mptcplog((LOG_DEBUG,"MPTCP Receiver:" + " %s: pending %d\n", + __func__, mpts_alt->mpts_connid), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR); } else { - mptcplog((LOG_ERR, "%s: no pending", - "%d\n", __func__, - mpts->mpts_connid)); - mpte->mpte_mppcb->mpp_socket->so_error = - error; + mptcplog((LOG_ERR, "MPTCP Receiver:" + " %s: no pending flow for cid %d", + __func__, mpts->mpts_connid), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR); } } else { - mptcplog((LOG_ERR, "%s: no alt path cid %d\n", - __func__, mpts->mpts_connid)); - mpte->mpte_mppcb->mpp_socket->so_error = error; + mptcplog((LOG_ERR, "MPTCP Receiver: %s: no alt" + " path for cid %d\n", __func__, + mpts->mpts_connid), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR); } } MPTS_LOCK(mpts); } else if (error == 0) { - mptcplog3((LOG_DEBUG, "%s: cid %d \n", - __func__, mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Receiver: %s: cid %d \n", + __func__, mpts->mpts_connid), + MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); } /* In fallback, make sure to accept data on all but one subflow */ @@ -1608,6 +1704,13 @@ mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts) } if (m != NULL) { + + /* Did we receive data on the backup subflow? */ + if (!(mpts->mpts_flags & MPTSF_ACTIVE)) + mpts->mpts_peerswitch++; + else + mpts->mpts_peerswitch = 0; + /* * Release subflow lock since this may trigger MPTCP to send, * possibly on a different subflow. An extra reference has @@ -1680,9 +1783,10 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) /* subflow socket is suspended? */ if (mpts->mpts_flags & MPTSF_SUSPENDED) { - mptcplog((LOG_ERR, "%s: mp_so 0x%llx cid %d is flow " - "controlled\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid)); + mptcplog((LOG_ERR, "MPTCP Sender: %s mp_so 0x%llx cid %d is " + "flow controlled\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR); goto out; } @@ -1690,9 +1794,10 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE) && !(mpts->mpts_flags & MPTSF_MP_DEGRADED) && !(mpts->mpts_flags & MPTSF_FASTJ_SEND)) { - mptcplog((LOG_ERR, "%s: mp_so 0x%llx cid %d not " + mptcplog((LOG_ERR, "MPTCP Sender: %s mp_so 0x%llx cid %d not " "MPTCP capable\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid)); + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR); goto out; } @@ -1741,8 +1846,9 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) { u_int64_t len = 0; len = mp_tp->mpt_snduna - mpt_dsn; + MPT_UNLOCK(mp_tp); sbdrop(&mp_so->so_snd, (int)len); - + MPT_LOCK(mp_tp); } /* @@ -1820,12 +1926,13 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) mpt_mbuf = mpt_mbuf->m_next; mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; } - if ((mpts->mpts_connid == 2) || (mpts->mpts_flags & MPTSF_MP_DEGRADED)) - mptcplog2((LOG_INFO, "%s: snduna = %llu off = %lld id = %d" - " %llu \n", - __func__, - mp_tp->mpt_snduna, off, mpts->mpts_connid, - mpts->mpts_sndnxt)); + if (mpts->mpts_flags & MPTSF_MP_DEGRADED) + mptcplog((LOG_DEBUG, "MPTCP Sender: %s cid = %d " + "snduna = %llu sndnxt = %llu probe %d\n", + __func__, mpts->mpts_connid, + mp_tp->mpt_snduna, mpts->mpts_sndnxt, + mpts->mpts_probecnt), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); VERIFY(mpt_mbuf && (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP)); @@ -1870,19 +1977,6 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) tail = m; } - /* last contiguous mapping is stored for error cases */ - if (mpts->mpts_lastmap.mptsl_dsn + - mpts->mpts_lastmap.mptsl_len == mpt_dsn) { - mpts->mpts_lastmap.mptsl_len += tot_sent; - } else if (MPTCP_SEQ_LT((mpts->mpts_lastmap.mptsl_dsn + - mpts->mpts_lastmap.mptsl_len), mpt_dsn)) { - if (m->m_pkthdr.mp_dsn == 0) - panic("%s %llu", __func__, mpt_dsn); - mpts->mpts_lastmap.mptsl_dsn = m->m_pkthdr.mp_dsn; - mpts->mpts_lastmap.mptsl_sseq = m->m_pkthdr.mp_rseq; - mpts->mpts_lastmap.mptsl_len = m->m_pkthdr.mp_rlen; - } - tot_sent += mlen; off = 0; mpt_mbuf = mpt_mbuf->m_next; @@ -1906,7 +2000,18 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) if (error == 0) { mpts->mpts_sndnxt += tot_sent; + + if (mpts->mpts_probesoon && mpts->mpts_maxseg && tot_sent) { + tcpstat.tcps_mp_num_probes++; + if (tot_sent < mpts->mpts_maxseg) + mpts->mpts_probecnt += 1; + else + mpts->mpts_probecnt += + tot_sent/mpts->mpts_maxseg; + } + MPT_LOCK(mp_tp); + if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mpts->mpts_sndnxt)) { if (MPTCP_DATASEQ_HIGH32(mpts->mpts_sndnxt) > MPTCP_DATASEQ_HIGH32(mp_tp->mpt_sndnxt)) @@ -1922,14 +2027,18 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_flags &= ~MPTSF_FASTJ_SEND; } - if ((mpts->mpts_connid >= 2) || - (mpts->mpts_flags & MPTSF_MP_DEGRADED)) - mptcplog2((LOG_DEBUG, "%s: cid %d wrote %d %d\n", + if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) || + (mpts->mpts_probesoon != 0)) + mptcplog((LOG_DEBUG, "MPTCP Sender: %s cid %d " + "wrote %d %d probe %d probedelta %d\n", __func__, mpts->mpts_connid, (int)tot_sent, - (int) sb_cc)); + (int) sb_cc, mpts->mpts_probecnt, + (tcp_now - mpts->mpts_probesoon)), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); } else { - mptcplog((LOG_ERR, "MPTCP ERROR %s: cid %d error %d len %zd\n", - __func__, mpts->mpts_connid, error, tot_sent)); + mptcplog((LOG_ERR, "MPTCP Sender: %s cid %d error %d len %zd\n", + __func__, mpts->mpts_connid, error, tot_sent), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR); } out: return (error); @@ -1966,11 +2075,14 @@ mptcp_subflow_eupcall(struct socket *so, void *arg, uint32_t events) * Called for handling events related to the underlying subflow socket. */ static ev_ret_t -mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { uint32_t events, save_events; ev_ret_t ret = MPTS_EVRET_OK; - + int i = 0; + int mpsub_ev_entry_count = sizeof(mpsub_ev_entry_tbl)/ + sizeof(mpsub_ev_entry_tbl[0]); MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ MPTS_LOCK_ASSERT_HELD(mpts); @@ -1991,88 +2103,22 @@ mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts) DTRACE_MPTCP3(subflow__events, struct mptses *, mpte, struct mptsub *, mpts, uint32_t, events); - mptcplog2((LOG_DEBUG, "%s: cid %d events=%b\n", __func__, - mpts->mpts_connid, events, SO_FILT_HINT_BITS)); - - if ((events & SO_FILT_HINT_MPCANTRCVMORE) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_mpcantrcvmore_ev(mpte, mpts); - events &= ~SO_FILT_HINT_MPCANTRCVMORE; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_MPFAILOVER) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_failover_ev(mpte, mpts); - events &= ~SO_FILT_HINT_MPFAILOVER; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_CONNRESET) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_connreset_ev(mpte, mpts); - events &= ~SO_FILT_HINT_CONNRESET; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_MUSTRST) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_mustrst_ev(mpte, mpts); - events &= ~SO_FILT_HINT_MUSTRST; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_CANTRCVMORE) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_cantrcvmore_ev(mpte, mpts); - events &= ~SO_FILT_HINT_CANTRCVMORE; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_CANTSENDMORE) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_cantsendmore_ev(mpte, mpts); - events &= ~SO_FILT_HINT_CANTSENDMORE; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_TIMEOUT) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_timeout_ev(mpte, mpts); - events &= ~SO_FILT_HINT_TIMEOUT; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_NOSRCADDR) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_nosrcaddr_ev(mpte, mpts); - events &= ~SO_FILT_HINT_NOSRCADDR; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_IFDENIED) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_ifdenied_ev(mpte, mpts); - events &= ~SO_FILT_HINT_IFDENIED; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_SUSPEND) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_suspend_ev(mpte, mpts); - events &= ~SO_FILT_HINT_SUSPEND; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_RESUME) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_resume_ev(mpte, mpts); - events &= ~SO_FILT_HINT_RESUME; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_CONNECTED) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_connected_ev(mpte, mpts); - events &= ~SO_FILT_HINT_CONNECTED; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_MPSTATUS) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_mpstatus_ev(mpte, mpts); - events &= ~SO_FILT_HINT_MPSTATUS; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_DELETEOK) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_deleteok_ev(mpte, mpts); - events &= ~SO_FILT_HINT_DELETEOK; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_DISCONNECTED) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_subflow_disconnected_ev(mpte, mpts); - events &= ~SO_FILT_HINT_DISCONNECTED; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); - } - if ((events & SO_FILT_HINT_MPFASTJ) && (ret >= MPTS_EVRET_OK)) { - ev_ret_t error = mptcp_fastjoin_ev(mpte, mpts); - events &= ~SO_FILT_HINT_MPFASTJ; - ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + mptcplog((LOG_DEBUG, "MPTCP Events: %s cid %d events=%b\n", __func__, + mpts->mpts_connid, events, SO_FILT_HINT_BITS), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_VERBOSE); + + /* + * Process all the socket filter hints and reset the hint + * once it is handled + */ + for (i = 0; (i < mpsub_ev_entry_count) && events; i++) { + if ((events & mpsub_ev_entry_tbl[i].sofilt_hint_mask) && + (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = + mpsub_ev_entry_tbl[i].sofilt_hint_ev_hdlr(mpte, mpts, p_mpsofilt_hint); + events &= ~mpsub_ev_entry_tbl[i].sofilt_hint_mask; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } } /* @@ -2080,16 +2126,16 @@ mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts) * so loudly complain if we have any unprocessed one(s). */ if (events != 0 || ret < MPTS_EVRET_OK) { - mptcplog((LOG_ERR, "%s%s: cid %d evret %s (%d)" + mptcplog((LOG_ERR, "MPTCP Events %s%s: cid %d evret %s (%d)" " unhandled events=%b\n", - (events != 0) ? "MPTCP_ERROR " : "", + (events != 0) && (ret == MPTS_EVRET_OK) ? "MPTCP_ERROR " : "", __func__, mpts->mpts_connid, - mptcp_evret2str(ret), ret, events, SO_FILT_HINT_BITS)); + mptcp_evret2str(ret), ret, events, SO_FILT_HINT_BITS), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR); } /* clear the ones we've processed */ atomic_bitclear_32(&mpts->mpts_evctl, save_events); - return (ret); } @@ -2097,7 +2143,8 @@ mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_CONNRESET subflow socket event. */ static ev_ret_t -mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { struct socket *mp_so, *so; struct mptcb *mp_tp; @@ -2113,8 +2160,10 @@ mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts) linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && !(mp_so->so_flags & SOF_PCBCLEARING)); - mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__, - mpts->mpts_connid, (linger ? "YES" : "NO"))); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO")), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); /* * We got a TCP RST for this subflow connection. @@ -2131,11 +2180,7 @@ mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_soerror = mp_so->so_error = ECONNREFUSED; } else if (mpte->mpte_nummpcapflows < 1) { mpts->mpts_soerror = mp_so->so_error = ECONNRESET; - MPT_UNLOCK(mp_tp); - MPTS_UNLOCK(mpts); - soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNRESET); - MPTS_LOCK(mpts); - MPT_LOCK(mp_tp); + *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNRESET; } MPT_UNLOCK(mp_tp); @@ -2151,8 +2196,10 @@ mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_CANTRCVMORE subflow socket event. */ static ev_ret_t -mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { +#pragma unused(p_mpsofilt_hint) struct socket *so; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ @@ -2160,7 +2207,9 @@ mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts) so = mpts->mpts_socket; - mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: cid %d\n", __func__, mpts->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); /* * We got a FIN for this subflow connection. This subflow socket @@ -2176,8 +2225,10 @@ mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_CANTSENDMORE subflow socket event. */ static ev_ret_t -mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { +#pragma unused(p_mpsofilt_hint) struct socket *so; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ @@ -2185,7 +2236,10 @@ mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts) so = mpts->mpts_socket; - mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: cid %d\n", __func__, mpts->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); + return (MPTS_EVRET_OK); /* keep the subflow socket around */ } @@ -2193,8 +2247,10 @@ mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_TIMEOUT subflow socket event. */ static ev_ret_t -mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { +#pragma unused(p_mpsofilt_hint) struct socket *mp_so, *so; struct mptcb *mp_tp; boolean_t linger; @@ -2209,8 +2265,10 @@ mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts) linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && !(mp_so->so_flags & SOF_PCBCLEARING)); - mptcplog((LOG_NOTICE, "%s: cid %d [linger %s]\n", __func__, - mpts->mpts_connid, (linger ? "YES" : "NO"))); + mptcplog((LOG_NOTICE, "MPTCP Events: " + "%s: cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO")), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); if (mpts->mpts_soerror == 0) mpts->mpts_soerror = ETIMEDOUT; @@ -2242,8 +2300,10 @@ mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_NOSRCADDR subflow socket event. */ static ev_ret_t -mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { +#pragma unused(p_mpsofilt_hint) struct socket *mp_so, *so; struct mptcb *mp_tp; boolean_t linger; @@ -2269,8 +2329,10 @@ mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts) linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && !(mp_so->so_flags & SOF_PCBCLEARING)); - mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__, - mpts->mpts_connid, (linger ? "YES" : "NO"))); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO")), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); if (mpts->mpts_soerror == 0) mpts->mpts_soerror = EADDRNOTAVAIL; @@ -2307,7 +2369,8 @@ mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts) * indicates that the remote side sent a Data FIN */ static ev_ret_t -mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { struct socket *so, *mp_so; struct mptcb *mp_tp; @@ -2318,7 +2381,9 @@ mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts) so = mpts->mpts_socket; mp_tp = mpte->mpte_mptcb; - mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: cid %d\n", __func__, mpts->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); /* * We got a Data FIN for the MPTCP connection. @@ -2328,11 +2393,7 @@ mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts) */ MPT_LOCK(mp_tp); if (mp_tp->mpt_state == MPTCPS_CLOSE_WAIT) { - MPT_UNLOCK(mp_tp); - MPTS_UNLOCK(mpts); - soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE); - MPTS_LOCK(mpts); - MPT_LOCK(mp_tp); + *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE; } MPT_UNLOCK(mp_tp); return (MPTS_EVRET_OK); /* keep the subflow socket around */ @@ -2342,7 +2403,8 @@ mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_MPFAILOVER subflow socket event */ static ev_ret_t -mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { struct mptsub *mpts_alt = NULL; struct socket *so = NULL; @@ -2352,18 +2414,23 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ MPTS_LOCK_ASSERT_HELD(mpts); mp_so = mpte->mpte_mppcb->mpp_socket; - mptcplog2((LOG_NOTICE, "%s: mp_so 0x%llx\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + mptcplog((LOG_NOTICE, "MPTCP Events: " + "%s: mp_so 0x%llx\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); MPTS_UNLOCK(mpts); - mpts_alt = mptcp_get_subflow(mpte, mpts); + mpts_alt = mptcp_get_subflow(mpte, mpts, NULL); /* * If there is no alternate eligible subflow, ignore the * failover hint. */ if (mpts_alt == NULL) { - mptcplog2((LOG_WARNING, "%s: no alternate path\n", __func__)); + mptcplog((LOG_WARNING, "MPTCP Events: " + "%s: no alternate path\n", __func__), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR); + if (mptcp_delayed_subf_start) { mpts_alt = mptcp_get_pending_subflow(mpte, mpts); if (mpts_alt != NULL) { @@ -2393,9 +2460,12 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) socket_unlock(so, 1); } if (altpath_exists) { - mptcplog2((LOG_INFO, "%s: cid = %d\n", - __func__, mpts_alt->mpts_connid)); + mptcplog((LOG_INFO, "MPTCP Events: " + "%s: cid = %d\n", + __func__, mpts_alt->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); mpts_alt->mpts_flags |= MPTSF_ACTIVE; + mpts_alt->mpts_peerswitch = 0; struct mptcb *mp_tp = mpte->mpte_mptcb; /* Bring the subflow's notion of snd_nxt into the send window */ MPT_LOCK(mp_tp); @@ -2409,12 +2479,13 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) MPTS_UNLOCK(mpts_alt); if (altpath_exists) { - soevent(mp_so, - SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); - mptcplog((LOG_NOTICE, "%s: mp_so 0x%llx switched from " + *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED; + mptcplog((LOG_NOTICE, "MPTCP Events: " + "%s: mp_so 0x%llx switched from " "%d to %d\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), - mpts->mpts_connid, mpts_alt->mpts_connid)); + mpts->mpts_connid, mpts_alt->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); tcpstat.tcps_mp_switches++; } @@ -2423,8 +2494,9 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_flags |= MPTSF_FAILINGOVER; mpts->mpts_flags &= ~MPTSF_ACTIVE; } else { - mptcplog2((LOG_INFO, "%s: no alt cid = %d\n", - __func__, mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Events %s: no alt cid = %d\n", + __func__, mpts->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); done: so = mpts->mpts_socket; socket_lock(so, 1); @@ -2439,7 +2511,8 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_IFDENIED subflow socket event. */ static ev_ret_t -mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { struct socket *mp_so, *so; struct mptcb *mp_tp; @@ -2455,8 +2528,10 @@ mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts) linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && !(mp_so->so_flags & SOF_PCBCLEARING)); - mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__, - mpts->mpts_connid, (linger ? "YES" : "NO"))); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO")), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); if (mpts->mpts_soerror == 0) mpts->mpts_soerror = EHOSTUNREACH; @@ -2469,9 +2544,7 @@ mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts) * has been established, let the upper layer call disconnectx. */ mptcp_subflow_disconnect(mpte, mpts, !linger); - MPTS_UNLOCK(mpts); - - soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED); + *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED; MPT_LOCK(mp_tp); if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { @@ -2479,7 +2552,6 @@ mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts) } MPT_UNLOCK(mp_tp); - MPTS_LOCK(mpts); /* * Keep the subflow socket around, unless the MPTCP socket has * been detached or the subflow has been disconnected explicitly, @@ -2492,8 +2564,10 @@ mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_SUSPEND subflow socket event. */ static ev_ret_t -mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { +#pragma unused(p_mpsofilt_hint) struct socket *so; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ @@ -2504,8 +2578,9 @@ mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts) /* the subflow connection is being flow controlled */ mpts->mpts_flags |= MPTSF_SUSPENDED; - mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, - mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: cid %d\n", __func__, + mpts->mpts_connid), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); return (MPTS_EVRET_OK); /* keep the subflow socket around */ } @@ -2514,8 +2589,10 @@ mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_RESUME subflow socket event. */ static ev_ret_t -mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { +#pragma unused(p_mpsofilt_hint) struct socket *so; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ @@ -2526,7 +2603,9 @@ mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts) /* the subflow connection is no longer flow controlled */ mpts->mpts_flags &= ~MPTSF_SUSPENDED; - mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: cid %d\n", __func__, mpts->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); return (MPTS_EVRET_OK); /* keep the subflow socket around */ } @@ -2535,7 +2614,8 @@ mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_CONNECTED subflow socket event. */ static ev_ret_t -mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { char buf0[MAX_IPv6_STR_LEN], buf1[MAX_IPv6_STR_LEN]; struct sockaddr_entry *src_se, *dst_se; @@ -2545,6 +2625,9 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) struct ifnet *outifp; int af, error = 0; boolean_t mpok = FALSE; + boolean_t cell = FALSE; + boolean_t wifi = FALSE; + boolean_t wired = FALSE; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ VERIFY(mpte->mpte_mppcb != NULL); @@ -2563,8 +2646,10 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) socket_lock(so, 0); if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) && (so->so_state & SS_ISCONNECTED)) { - mptcplog((LOG_DEBUG, "%s: cid %d disconnect before tcp connect\n", - __func__, mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: cid %d disconnect before tcp connect\n", + __func__, mpts->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); (void) soshutdownlock(so, SHUT_RD); (void) soshutdownlock(so, SHUT_WR); (void) sodisconnectlocked(so); @@ -2635,13 +2720,15 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) if ((mpts->mpts_flags & MPTSF_BOUND_IP) && bcmp(&ms->sin_addr, &s->sin_addr, sizeof (ms->sin_addr)) != 0) { - mptcplog((LOG_ERR, "%s: cid %d local " + mptcplog((LOG_ERR, "MPTCP Events: " + "%s: cid %d local " "address %s (expected %s)\n", __func__, mpts->mpts_connid, inet_ntop(AF_INET, (void *)&s->sin_addr.s_addr, buf0, sizeof (buf0)), inet_ntop(AF_INET, (void *)&ms->sin_addr.s_addr, buf1, - sizeof (buf1)))); + sizeof (buf1))), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR); } bcopy(s, ms, sizeof (*s)); } @@ -2660,13 +2747,15 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) if ((mpts->mpts_flags & MPTSF_BOUND_IP) && bcmp(&ms->sin6_addr, &s->sin6_addr, sizeof (ms->sin6_addr)) != 0) { - mptcplog((LOG_ERR, "%s: cid %d local " + mptcplog((LOG_ERR, "MPTCP Events: " + "%s: cid %d local " "address %s (expected %s)\n", __func__, mpts->mpts_connid, inet_ntop(AF_INET6, (void *)&s->sin6_addr, buf0, sizeof (buf0)), inet_ntop(AF_INET6, (void *)&ms->sin6_addr, buf1, - sizeof (buf1)))); + sizeof (buf1))), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR); } bcopy(s, ms, sizeof (*s)); } @@ -2679,8 +2768,10 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) } if (error != 0) { - mptcplog((LOG_ERR, "%s: cid %d getsockaddr failed (%d)\n", - __func__, mpts->mpts_connid, error)); + mptcplog((LOG_ERR, "MPTCP Events " + "%s: cid %d getsockaddr failed (%d)\n", + __func__, mpts->mpts_connid, error), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR); } /* get/verify the outbound interface */ @@ -2688,10 +2779,12 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) if (mpts->mpts_flags & MPTSF_BOUND_IF) { VERIFY(mpts->mpts_outif != NULL); if (mpts->mpts_outif != outifp) { - mptcplog((LOG_ERR, "%s: cid %d outif %s " + mptcplog((LOG_ERR, "MPTCP Events: %s: cid %d outif %s " "(expected %s)\n", __func__, mpts->mpts_connid, ((outifp != NULL) ? outifp->if_xname : "NULL"), - mpts->mpts_outif->if_xname)); + mpts->mpts_outif->if_xname), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR); + if (outifp == NULL) outifp = mpts->mpts_outif; } @@ -2699,9 +2792,31 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_outif = outifp; } + mpts->mpts_srtt = (intotcpcb(sotoinpcb(so)))->t_srtt; + mpts->mpts_rxtcur = (intotcpcb(sotoinpcb(so)))->t_rxtcur; + mpts->mpts_maxseg = (intotcpcb(sotoinpcb(so)))->t_maxseg; + + cell = IFNET_IS_CELLULAR(mpts->mpts_outif); + wifi = (!cell && IFNET_IS_WIFI(mpts->mpts_outif)); + wired = (!wifi && IFNET_IS_WIRED(mpts->mpts_outif)); + + if (cell) + mpts->mpts_linktype |= MPTSL_CELL; + else if (wifi) + mpts->mpts_linktype |= MPTSL_WIFI; + else if (wired) + mpts->mpts_linktype |= MPTSL_WIRED; + socket_unlock(so, 0); - mptcplog((LOG_DEBUG, "%s: cid %d outif %s %s[%d] -> %s[%d] " + mptcplog((LOG_DEBUG, "MPTCP Sender: %s: cid %d " + "establishment srtt %d \n", __func__, + mpts->mpts_connid, (mpts->mpts_srtt >> 5)), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); + + + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: cid %d outif %s %s[%d] -> %s[%d] " "is %s\n", __func__, mpts->mpts_connid, ((outifp != NULL) ? outifp->if_xname : "NULL"), inet_ntop(af, (af == AF_INET) ? (void *)&SIN(src_se->se_addr)->sin_addr.s_addr : @@ -2714,12 +2829,13 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) ((af == AF_INET) ? ntohs(SIN(dst_se->se_addr)->sin_port) : ntohs(SIN6(dst_se->se_addr)->sin6_port)), ((mpts->mpts_flags & MPTSF_MP_CAPABLE) ? - "MPTCP capable" : "a regular TCP"))); + "MPTCP capable" : "a regular TCP")), + (MPTCP_SOCKET_DBG | MPTCP_EVENTS_DBG), MPTCP_LOGLVL_LOG); mpok = (mpts->mpts_flags & MPTSF_MP_CAPABLE); MPTS_UNLOCK(mpts); - soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); + *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED; MPT_LOCK(mp_tp); if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { @@ -2735,6 +2851,10 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) MPT_UNLOCK(mp_tp); mpok = FALSE; } else { + mptcplog((LOG_DEBUG, "MPTCP State: " + "MPTCPS_ESTABLISHED for mp_so 0x%llx \n", + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG); mp_tp->mpt_state = MPTCPS_ESTABLISHED; mpte->mpte_associd = mpts->mpts_connid; DTRACE_MPTCP2(state__change, @@ -2800,7 +2920,8 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_DISCONNECTED subflow socket event. */ static ev_ret_t -mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { struct socket *mp_so, *so; struct mptcb *mp_tp; @@ -2816,8 +2937,10 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts) linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && !(mp_so->so_flags & SOF_PCBCLEARING)); - mptcplog2((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__, - mpts->mpts_connid, (linger ? "YES" : "NO"))); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO")), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); if (mpts->mpts_flags & MPTSF_DISCONNECTED) return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); @@ -2838,16 +2961,15 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts) * Right now, we simply unblock any waiters at the MPTCP socket layer * if the MPTCP connection has not been established. */ - MPTS_UNLOCK(mpts); - - soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); + *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED; if (mpts->mpts_flags & MPTSF_MPCAP_CTRSET) { mpte->mpte_nummpcapflows--; if (mpte->mpte_active_sub == mpts) { mpte->mpte_active_sub = NULL; - mptcplog((LOG_DEBUG, "%s: resetting active subflow \n", - __func__)); + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: resetting active subflow \n", + __func__), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); } mpts->mpts_flags &= ~MPTSF_MPCAP_CTRSET; } @@ -2855,12 +2977,13 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts) MPT_LOCK(mp_tp); if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { MPT_UNLOCK(mp_tp); + MPTS_UNLOCK(mpts); soisdisconnected(mp_so); + MPTS_LOCK(mpts); } else { MPT_UNLOCK(mp_tp); } - MPTS_LOCK(mpts); /* * The underlying subflow socket has been disconnected; * it is no longer useful to us. Keep the subflow socket @@ -2875,11 +2998,12 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_MPSTATUS subflow socket event */ static ev_ret_t -mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { struct socket *mp_so, *so; struct mptcb *mp_tp; - ev_ret_t ret = MPTS_EVRET_OK_UPDATE; + ev_ret_t ret = MPTS_EVRET_OK; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ VERIFY(mpte->mpte_mppcb != NULL); @@ -2918,16 +3042,24 @@ mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts) if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { VERIFY(!(mp_tp->mpt_flags & MPTCPF_JOIN_READY)); ret = MPTS_EVRET_DISCONNECT_FALLBACK; + *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | + SO_FILT_HINT_CONNINFO_UPDATED; } else if (mpts->mpts_flags & MPTSF_MP_READY) { mp_tp->mpt_flags |= MPTCPF_JOIN_READY; ret = MPTS_EVRET_CONNECT_PENDING; + } else { + *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | + SO_FILT_HINT_CONNINFO_UPDATED; } - mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx mpt_flags=%b cid %d " + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: mp_so 0x%llx mpt_flags=%b cid %d " "mptsf=%b\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mpte->mpte_mppcb->mpp_socket), mp_tp->mpt_flags, MPTCPF_BITS, mpts->mpts_connid, - mpts->mpts_flags, MPTSF_BITS)); + mpts->mpts_flags, MPTSF_BITS), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); + done: MPT_UNLOCK(mp_tp); socket_unlock(so, 0); @@ -2938,7 +3070,8 @@ mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts) * Handle SO_FILT_HINT_MUSTRST subflow socket event */ static ev_ret_t -mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { struct socket *mp_so, *so; struct mptcb *mp_tp; @@ -2982,25 +3115,35 @@ mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts) &t_template->tt_t, (struct mbuf *)NULL, tp->rcv_nxt, tp->snd_una, TH_RST, &tra); (void) m_free(dtom(t_template)); - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx cid %d \n", + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s: mp_so 0x%llx cid %d \n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), - so, mpts->mpts_connid)); + so, mpts->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); } socket_unlock(so, 0); mptcp_subflow_disconnect(mpte, mpts, !linger); - MPTS_UNLOCK(mpts); - soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED | - SO_FILT_HINT_CONNRESET); + *p_mpsofilt_hint |= (SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); + + if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) + *p_mpsofilt_hint |= SO_FILT_HINT_CONNRESET; MPT_LOCK(mp_tp); if ((mp_tp->mpt_state < MPTCPS_ESTABLISHED) || (mp_tp->mpt_state == MPTCPS_FASTCLOSE_WAIT)) { mp_so->so_error = ECONNABORTED; } + /* + * Ideally there should be a state transition for when a FASTCLOSE + * is received. Right now we keep the connection in MPTCPS_ESTABLISHED + * state and only go to terminal state when the user level code calls + * close after processing the SO_FILT_HINT_CONNRESET event. + */ + if (mp_tp->mpt_gc_ticks == MPT_GC_TICKS) + mp_tp->mpt_gc_ticks = MPT_GC_TICKS_FAST; MPT_UNLOCK(mp_tp); - MPTS_LOCK(mpts); /* * Keep the subflow socket around unless the subflow has been * disconnected explicitly. @@ -3009,16 +3152,20 @@ mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts) } static ev_ret_t -mptcp_fastjoin_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_fastjoin_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { +#pragma unused(p_mpsofilt_hint) MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ MPTS_LOCK_ASSERT_HELD(mpts); VERIFY(mpte->mpte_mppcb != NULL); if (mpte->mpte_nummpcapflows == 0) { struct mptcb *mp_tp = mpte->mpte_mptcb; - mptcplog((LOG_DEBUG,"%s %llx %llx \n", - __func__, mp_tp->mpt_snduna, mpts->mpts_sndnxt)); + mptcplog((LOG_DEBUG,"MPTCP Events: %s: %llx %llx \n", + __func__, mp_tp->mpt_snduna, mpts->mpts_sndnxt), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); + mpte->mpte_active_sub = mpts; mpts->mpts_flags |= (MPTSF_FASTJ_SEND | MPTSF_ACTIVE); MPT_LOCK(mp_tp); @@ -3038,12 +3185,17 @@ mptcp_fastjoin_ev(struct mptses *mpte, struct mptsub *mpts) } static ev_ret_t -mptcp_deleteok_ev(struct mptses *mpte, struct mptsub *mpts) +mptcp_deleteok_ev(struct mptses *mpte, struct mptsub *mpts, + uint64_t *p_mpsofilt_hint) { +#pragma unused(p_mpsofilt_hint) MPTE_LOCK_ASSERT_HELD(mpte); MPTS_LOCK_ASSERT_HELD(mpts); VERIFY(mpte->mpte_mppcb != NULL); - mptcplog((LOG_DEBUG, "%s cid %d\n", __func__, mpts->mpts_connid)); + + mptcplog((LOG_DEBUG, "MPTCP Events: " + "%s cid %d\n", __func__, mpts->mpts_connid), + MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); mpts->mpts_flags |= MPTSF_DELETEOK; if (mpts->mpts_flags & MPTSF_DISCONNECTED) @@ -3070,8 +3222,7 @@ mptcp_evret2str(ev_ret_t ret) case MPTS_EVRET_OK: c = "MPTS_EVRET_OK"; break; - case MPTS_EVRET_OK_UPDATE: - c = "MPTS_EVRET_OK_UPDATE"; + default: break; } return (c); @@ -3145,17 +3296,21 @@ mptcp_subflow_sosetopt(struct mptses *mpte, struct socket *so, error = sosetoptlock(so, &sopt, 0); /* already locked */ if (error == 0) { - mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s " "val %d set successful\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name, - buf, sizeof (buf)), mpo->mpo_intval)); + buf, sizeof (buf)), mpo->mpo_intval), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); } else { - mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s " + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s " "val %d set error %d\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name, - buf, sizeof (buf)), mpo->mpo_intval, error)); + buf, sizeof (buf)), mpo->mpo_intval, error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); } return (error); } @@ -3188,16 +3343,20 @@ mptcp_subflow_sogetopt(struct mptses *mpte, struct socket *so, error = sogetoptlock(so, &sopt, 0); /* already locked */ if (error == 0) { - mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s " "val %d get successful\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name, - buf, sizeof (buf)), mpo->mpo_intval)); + buf, sizeof (buf)), mpo->mpo_intval), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); } else { - mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s get error %d\n", + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s get error %d\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(mpo->mpo_level, - mpo->mpo_name, buf, sizeof (buf)), error)); + mpo->mpo_name, buf, sizeof (buf)), error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); } return (error); } @@ -3218,8 +3377,6 @@ mptcp_gc(struct mppcbinfo *mppi) lck_mtx_assert(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED); - mptcplog3((LOG_DEBUG, "%s: running\n", __func__)); - TAILQ_FOREACH_SAFE(mpp, &mppi->mppi_pcbs, mpp_entry, tmpp) { struct socket *mp_so; struct mptses *mpte; @@ -3233,16 +3390,20 @@ mptcp_gc(struct mppcbinfo *mppi) mp_tp = mpte->mpte_mptcb; VERIFY(mp_tp != NULL); - mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx found " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx found " "(u=%d,r=%d,s=%d)\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_usecount, - mp_so->so_retaincnt, mpp->mpp_state)); + mp_so->so_retaincnt, mpp->mpp_state), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); if (!lck_mtx_try_lock(&mpp->mpp_lock)) { - mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx skipped " "(u=%d,r=%d)\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), - mp_so->so_usecount, mp_so->so_retaincnt)); + mp_so->so_usecount, mp_so->so_retaincnt), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); active++; continue; } @@ -3252,12 +3413,15 @@ mptcp_gc(struct mppcbinfo *mppi) boolean_t wakeup = FALSE; struct mptsub *mpts, *tmpts; - mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx skipped " "[u=%d,r=%d] %d %d\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_usecount, mp_so->so_retaincnt, mp_tp->mpt_gc_ticks, - mp_tp->mpt_state)); + mp_tp->mpt_state), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + MPT_LOCK(mp_tp); if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) { if (mp_tp->mpt_gc_ticks > 0) @@ -3290,11 +3454,13 @@ mptcp_gc(struct mppcbinfo *mppi) } if (mpp->mpp_state != MPPCB_STATE_DEAD) { - mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx skipped " "[u=%d,r=%d,s=%d]\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_usecount, mp_so->so_retaincnt, - mpp->mpp_state)); + mpp->mpp_state), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); lck_mtx_unlock(&mpp->mpp_lock); active++; continue; @@ -3307,10 +3473,13 @@ mptcp_gc(struct mppcbinfo *mppi) * allow it to be destroyed below during the next round. */ if (mp_so->so_usecount == 1) { - mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx scheduled for " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx scheduled for " "termination [u=%d,r=%d]\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), - mp_so->so_usecount, mp_so->so_retaincnt)); + mp_so->so_usecount, mp_so->so_retaincnt), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + /* signal MPTCP thread to terminate */ mptcp_thread_terminate_signal(mpte); lck_mtx_unlock(&mpp->mpp_lock); @@ -3318,9 +3487,12 @@ mptcp_gc(struct mppcbinfo *mppi) continue; } - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n", + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), - mp_so->so_usecount, mp_so->so_retaincnt)); + mp_so->so_usecount, mp_so->so_retaincnt), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + DTRACE_MPTCP4(dispose, struct socket *, mp_so, struct sockbuf *, &mp_so->so_rcv, struct sockbuf *, &mp_so->so_snd, @@ -3362,8 +3534,8 @@ mptcp_drop(struct mptses *mpte, struct mptcb *mp_tp, int errno) struct mptses * mptcp_close(struct mptses *mpte, struct mptcb *mp_tp) { - struct socket *mp_so; - struct mptsub *mpts, *tmpts; + struct socket *mp_so = NULL; + struct mptsub *mpts = NULL, *tmpts = NULL; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ MPT_LOCK_ASSERT_HELD(mp_tp); @@ -3451,7 +3623,7 @@ mptcp_thread_dowork(struct mptses *mpte) struct socket *mp_so; struct mptsub *mpts, *tmpts; boolean_t connect_pending = FALSE, disconnect_fallback = FALSE; - boolean_t conninfo_update = FALSE; + uint64_t mpsofilt_hint_mask = 0; MPTE_LOCK(mpte); /* same as MP socket lock */ VERIFY(mpte->mpte_mppcb != NULL); @@ -3468,11 +3640,16 @@ mptcp_thread_dowork(struct mptses *mpte) mptcp_update_last_owner(mpts, mp_so); mptcp_subflow_input(mpte, mpts); - ret = mptcp_subflow_events(mpte, mpts); + + mptcp_get_rtt_measurement(mpts, mpte); + + ret = mptcp_subflow_events(mpte, mpts, &mpsofilt_hint_mask); if (mpts->mpts_flags & MPTSF_ACTIVE) { - mptcplog3((LOG_INFO, "%s: cid %d \n", __func__, - mpts->mpts_connid)); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: cid %d \n", __func__, + mpts->mpts_connid), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); (void) mptcp_subflow_output(mpte, mpts); } @@ -3487,9 +3664,6 @@ mptcp_thread_dowork(struct mptses *mpte) MPTS_UNLOCK(mpts); switch (ret) { - case MPTS_EVRET_OK_UPDATE: - conninfo_update = TRUE; - break; case MPTS_EVRET_OK: /* nothing to do */ break; @@ -3502,13 +3676,19 @@ mptcp_thread_dowork(struct mptses *mpte) case MPTS_EVRET_DISCONNECT_FALLBACK: disconnect_fallback = TRUE; break; + default: + mptcplog((LOG_DEBUG, + "MPTCP Socket: %s: mptcp_subflow_events " + "returned invalid value: %d\n", __func__, + ret), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + break; } MPTS_REMREF(mpts); /* ours */ } - if (conninfo_update) { - soevent(mp_so, SO_FILT_HINT_LOCKED | - SO_FILT_HINT_CONNINFO_UPDATED); + if (mpsofilt_hint_mask) { + soevent(mp_so, mpsofilt_hint_mask); } if (!connect_pending && !disconnect_fallback) { @@ -3531,7 +3711,7 @@ mptcp_thread_dowork(struct mptses *mpte) mpts->mpts_flags |= MPTSF_MP_DEGRADED; if (mpts->mpts_flags & (MPTSF_DISCONNECTING| - MPTSF_DISCONNECTED)) { + MPTSF_DISCONNECTED|MPTSF_CONNECT_PENDING)) { MPTS_UNLOCK(mpts); continue; } @@ -3843,8 +4023,6 @@ mptcp_free_key(mptcp_key_t *key) struct mptcp_key_entry *key_elm; int pt = RandomULong(); - mptcplog((LOG_INFO, "%s\n", __func__)); - lck_mtx_lock(&mptcp_keys_pool.mkph_lock); key_holder = (struct mptcp_key_entry *)(void*)((caddr_t)key - offsetof(struct mptcp_key_entry, mkey_value)); @@ -3974,10 +4152,12 @@ mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so) } if (found) { LIST_REMOVE(sauth_entry, msae_next); - zfree(mpt_subauth_zone, sauth_entry); } MPT_UNLOCK(mp_tp); + if (found) + zfree(mpt_subauth_zone, sauth_entry); + tp->t_mptcb = NULL; socket_unlock(so, 0); } @@ -4014,18 +4194,21 @@ mptcp_set_raddr_rand(mptcp_addr_id laddr_id, struct mptcb *mp_tp, if (sauth_entry->msae_laddr_id == laddr_id) { if ((sauth_entry->msae_raddr_id != 0) && (sauth_entry->msae_raddr_id != raddr_id)) { - mptcplog((LOG_ERR, "MPTCP ERROR %s: mismatched" + mptcplog((LOG_ERR, "MPTCP Socket: %s mismatched" " address ids %d %d \n", __func__, raddr_id, - sauth_entry->msae_raddr_id)); + sauth_entry->msae_raddr_id), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); MPT_UNLOCK(mp_tp); return; } sauth_entry->msae_raddr_id = raddr_id; if ((sauth_entry->msae_raddr_rand != 0) && (sauth_entry->msae_raddr_rand != raddr_rand)) { - mptcplog((LOG_ERR, "%s: dup SYN_ACK %d %d \n", + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: dup SYN_ACK %d %d \n", __func__, raddr_rand, - sauth_entry->msae_raddr_rand)); + sauth_entry->msae_raddr_rand), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); MPT_UNLOCK(mp_tp); return; } @@ -4185,7 +4368,7 @@ mptcp_init_authparms(struct mptcb *mp_tp) MPT_LOCK_ASSERT_HELD(mp_tp); /* Only Version 0 is supported for auth purposes */ - if (mp_tp->mpt_version != MP_DRAFT_VERSION_12) + if (mp_tp->mpt_version != MPTCP_STD_VERSION_0) return (-1); /* Setup local and remote tokens and Initial DSNs */ @@ -4197,8 +4380,8 @@ mptcp_init_authparms(struct mptcb *mp_tp) if (!mptcp_do_sha1(&mp_tp->mpt_remotekey, remote_digest, SHA1_RESULTLEN)) { - mptcplog((LOG_ERR, "MPTCP ERROR %s: unexpected failure", - __func__)); + mptcplog((LOG_ERR, "MPTCP Socket: %s: unexpected failure", + __func__), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); return (-1); } mptcp_generate_token(remote_digest, SHA1_RESULTLEN, @@ -4224,7 +4407,7 @@ static void mptcp_conn_properties(struct mptcb *mp_tp) { /* There is only Version 0 at this time */ - mp_tp->mpt_version = MP_DRAFT_VERSION_12; + mp_tp->mpt_version = MPTCP_STD_VERSION_0; /* Set DSS checksum flag */ if (mptcp_dss_csum) @@ -4300,7 +4483,7 @@ mptcp_insert_dsn(struct mppcb *mpp, struct mbuf *m) if (m == NULL) return; - mp_tp = &((struct mpp_mtp *)mpp)->mtcb; + __IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb); MPT_LOCK(mp_tp); if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { MPT_UNLOCK(mp_tp); @@ -4344,10 +4527,12 @@ mptcp_preproc_sbdrop(struct mbuf *m, unsigned int len) if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) { m->m_pkthdr.mp_rseq += len; } - mptcplog3((LOG_INFO, - "%s: %llu %u %d %d\n", __func__, + mptcplog((LOG_DEBUG, "MPTCP Sender: " + "%s: dsn 0x%llu ssn %u len %d %d\n", + __func__, m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rseq, - m->m_pkthdr.mp_rlen, len)); + m->m_pkthdr.mp_rlen, len), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); m->m_pkthdr.mp_rlen -= len; return; } @@ -4445,21 +4630,24 @@ mptcp_output_getm_dsnmap64(struct socket *so, int off, uint32_t datalen, /* case A */ runlen += mnext->m_pkthdr.mp_rlen; contig_len += mnext->m_pkthdr.mp_rlen; - mptcplog3((LOG_INFO, "%s: contig \n", - __func__)); + mptcplog((LOG_DEBUG, "MPTCP Sender: %s: contig \n", + __func__), MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); } else { /* case B */ - mptcplog((LOG_INFO, + mptcplog((LOG_DEBUG, "MPTCP Sender: " "%s: discontig datalen %d contig_len %d cc %d \n", - __func__, datalen, contig_len, so->so_snd.sb_cc)); + __func__, datalen, contig_len, so->so_snd.sb_cc), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); break; } mnext = mnext->m_next; } datalen = min(datalen, UINT16_MAX); *data_len = min(datalen, contig_len); - mptcplog3((LOG_INFO, "%s: %llu %u %d %d \n", __func__, - *dsn, *relseq, *data_len, off)); + mptcplog((LOG_DEBUG, "MPTCP Sender: " + "%s: %llu %u %d %d \n", __func__, + *dsn, *relseq, *data_len, off), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); } /* @@ -4485,13 +4673,13 @@ mptcp_adj_rcvnxt(struct tcpcb *tp, struct mbuf *m) } /* - * Note that this is called only from tcp_input() which may trim data - * after the dsn mapping is inserted into the mbuf. When it trims data - * tcp_input calls m_adj() which does not remove the m_pkthdr even if the - * m_len becomes 0 as a result of trimming the mbuf. The dsn map insertion - * cannot be delayed after trim, because data can be in the reassembly - * queue for a while and the DSN option info in tp will be overwritten for - * every new packet received. + * Note that this is called only from tcp_input() via mptcp_input_preproc() + * tcp_input() may trim data after the dsn mapping is inserted into the mbuf. + * When it trims data tcp_input calls m_adj() which does not remove the + * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf. + * The dsn map insertion cannot be delayed after trim, because data can be in + * the reassembly queue for a while and the DSN option info in tp will be + * overwritten for every new packet received. * The dsn map will be adjusted just prior to appending to subflow sockbuf * with mptcp_adj_rmap() */ @@ -4542,11 +4730,6 @@ mptcp_adj_rmap(struct socket *so, struct mbuf *m) return 0; } - if (m->m_pkthdr.len > (int)datalen) { - panic("%s: mbuf len = %d expected = %d", __func__, - m->m_pkthdr.len, datalen); - } - old_rcvnxt = tp->rcv_nxt - m->m_pkthdr.len; if (SEQ_GT(old_rcvnxt, sseq)) { /* data trimmed from the left */ @@ -4556,15 +4739,12 @@ mptcp_adj_rmap(struct socket *so, struct mbuf *m) m->m_pkthdr.mp_rlen = m->m_pkthdr.len; } else if (old_rcvnxt == sseq) { /* - * Data was trimmed from the right + * data was trimmed from the right */ m->m_pkthdr.mp_rlen = m->m_pkthdr.len; } else { - /* handle gracefully with reass or fallback */ mptcp_notify_mpfail(so); - m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP; - m_freem(m); - return -1; + return (-1); } mptcp_adj_rcvnxt(tp, m); return 0; @@ -4587,13 +4767,6 @@ mptcp_act_on_txfail(struct socket *so) if (tp == NULL) return; - if (tp->t_state != TCPS_ESTABLISHED) - mptcplog((LOG_INFO, "%s: state = %d \n", __func__, - tp->t_state)); - - mptcplog((LOG_INFO, "%s: Failover = %d \n", __func__, - (so->so_flags & SOF_MP_TRYFAILOVER) ? 1 : 0)); - if (so->so_flags & SOF_MP_TRYFAILOVER) { return; } @@ -4625,8 +4798,9 @@ mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq) (MPTCP_SEQ_GEQ(dsn + datalen, dsn_fail))) { off = dsn_fail - dsn; *tcp_seq = m->m_pkthdr.mp_rseq + off; - mptcplog((LOG_INFO, "%s: %llu %llu \n", - __func__, dsn, dsn_fail)); + mptcplog((LOG_DEBUG, "MPTCP Sender: %s: %llu %llu \n", + __func__, dsn, dsn_fail), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); return (0); } @@ -4638,7 +4812,9 @@ mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq) * not much else to do. */ - mptcplog((LOG_ERR, "%s: %llu not found \n", __func__, dsn_fail)); + mptcplog((LOG_ERR, "MPTCP Sender: " + "%s: %llu not found \n", __func__, dsn_fail), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); return (-1); } @@ -4858,7 +5034,7 @@ fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts) SIN6(&flow->flow_dst)->sin6_addr = inp->in6p_faddr; } else #endif - { + if ((inp->inp_vflag & INP_IPV4) != 0) { flow->flow_src.ss_family = AF_INET; flow->flow_dst.ss_family = AF_INET; flow->flow_src.ss_len = sizeof(struct sockaddr_in); @@ -4868,8 +5044,15 @@ fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts) SIN(&flow->flow_src)->sin_addr = inp->inp_laddr; SIN(&flow->flow_dst)->sin_addr = inp->inp_faddr; } + flow->flow_len = sizeof(*flow); + flow->flow_tcpci_offset = offsetof(mptcp_flow_t, flow_ci); flow->flow_flags = mpts->mpts_flags; flow->flow_cid = mpts->mpts_connid; + flow->flow_sndnxt = mpts->mpts_sndnxt; + flow->flow_relseq = mpts->mpts_rel_seq; + flow->flow_soerror = mpts->mpts_soerror; + flow->flow_probecnt = mpts->mpts_probecnt; + flow->flow_peerswitch = mpts->mpts_peerswitch; } static int @@ -4899,16 +5082,41 @@ mptcp_pcblist SYSCTL_HANDLER_ARGS } TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) { flows = NULL; - bzero(&mptcpci, sizeof(mptcpci)); lck_mtx_lock(&mpp->mpp_lock); VERIFY(mpp->mpp_flags & MPP_ATTACHED); + if (mpp->mpp_flags & MPP_DEFUNCT) { + lck_mtx_unlock(&mpp->mpp_lock); + continue; + } mpte = mptompte(mpp); VERIFY(mpte != NULL); mp_tp = mpte->mpte_mptcb; VERIFY(mp_tp != NULL); - /* N.B. we don't take the mpt_lock just for the state. */ + + bzero(&mptcpci, sizeof(mptcpci)); + MPT_LOCK(mp_tp); mptcpci.mptcpci_state = mp_tp->mpt_state; + mptcpci.mptcpci_flags = mp_tp->mpt_flags; + mptcpci.mptcpci_ltoken = mp_tp->mpt_localtoken; + mptcpci.mptcpci_rtoken = mp_tp->mpt_remotetoken; + mptcpci.mptcpci_notsent_lowat = mp_tp->mpt_notsent_lowat; + mptcpci.mptcpci_snduna = mp_tp->mpt_snduna; + mptcpci.mptcpci_sndnxt = mp_tp->mpt_sndnxt; + mptcpci.mptcpci_sndmax = mp_tp->mpt_sndmax; + mptcpci.mptcpci_lidsn = mp_tp->mpt_local_idsn; + mptcpci.mptcpci_sndwnd = mp_tp->mpt_sndwnd; + mptcpci.mptcpci_rcvnxt = mp_tp->mpt_rcvnxt; + mptcpci.mptcpci_rcvatmark = mp_tp->mpt_rcvatmark; + mptcpci.mptcpci_ridsn = mp_tp->mpt_remote_idsn; + mptcpci.mptcpci_rcvwnd = mp_tp->mpt_rcvwnd; + MPT_UNLOCK(mp_tp); + mptcpci.mptcpci_nflows = mpte->mpte_numflows; + mptcpci.mptcpci_mpte_flags = mpte->mpte_flags; + mptcpci.mptcpci_mpte_addrid = mpte->mpte_addrid_last; + mptcpci.mptcpci_flow_offset = + offsetof(conninfo_mptcp_t, mptcpci_flows); + len = sizeof(*flows) * mpte->mpte_numflows; if (mpte->mpte_numflows != 0) { flows = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO); @@ -4922,8 +5130,7 @@ mptcp_pcblist SYSCTL_HANDLER_ARGS sizeof(mptcpci) - sizeof(mptcp_flow_t)); } else { mptcpci.mptcpci_len = sizeof(mptcpci); - error = SYSCTL_OUT(req, &mptcpci, - sizeof(mptcpci)); + error = SYSCTL_OUT(req, &mptcpci, sizeof(mptcpci)); } if (error) { lck_mtx_unlock(&mpp->mpp_lock); @@ -4993,35 +5200,6 @@ mptcp_output_needed(struct mptses *mpte, struct mptsub *to_mpts) MPTS_LOCK(to_mpts); } - -/* - * When WiFi signal starts fading, there's more loss and RTT spikes. - * Check if there has been a large spike by comparing against - * a tolerable RTT spike threshold. - */ -boolean_t -mptcp_no_rto_spike(struct socket *so) -{ - struct tcpcb *tp = intotcpcb(sotoinpcb(so)); - int32_t spike = 0; - - if (tp->t_rxtcur > mptcp_rto_spike_thresh) { - spike = tp->t_rxtcur - mptcp_rto_spike_thresh; - - mptcplog2((LOG_INFO, "%s: spike = %d rto = %d", - "best = %d cur = %d\n", __func__, spike, - tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT, - tp->t_rttcur)); - - } - - if (spike > 0 ) { - return (FALSE); - } else { - return (TRUE); - } -} - /* * Set notsent lowat mark on the MPTCB */ @@ -5079,9 +5257,11 @@ mptcp_notsent_lowat_check(struct socket *so) { if ((notsent == 0) || ((notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)) <= mp_tp->mpt_notsent_lowat)) { - mptcplog3((LOG_INFO, "%s: lowat %d notsent %d actual %d \n", - __func__, mp_tp->mpt_notsent_lowat, notsent, - notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna))); + mptcplog((LOG_DEBUG, "MPTCP Sender: " + "lowat %d notsent %d actual %d \n", + mp_tp->mpt_notsent_lowat, notsent, + notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)), + MPTCP_SENDER_DBG , MPTCP_LOGLVL_VERBOSE); MPT_UNLOCK(mp_tp); return (1); } @@ -5106,9 +5286,10 @@ mptcp_notsent_lowat_check(struct socket *so) { notsent > 0 && (notsent <= (int)tp->t_maxseg)) { retval = 1; } - mptcplog3((LOG_INFO, "%s: lowat %d notsent %d" + mptcplog((LOG_DEBUG, "MPTCP Sender: lowat %d notsent %d" " nodelay false \n", - __func__, mp_tp->mpt_notsent_lowat, notsent)); + mp_tp->mpt_notsent_lowat, notsent), + MPTCP_SENDER_DBG , MPTCP_LOGLVL_VERBOSE); socket_unlock(subf_so, 0); MPTS_UNLOCK(mpts); return (retval); @@ -5118,3 +5299,220 @@ mptcp_notsent_lowat_check(struct socket *so) { return (0); } +static void +mptcp_get_rtt_measurement(struct mptsub *mpts, struct mptses *mpte) +{ + MPTE_LOCK_ASSERT_HELD(mpte); + MPTS_LOCK_ASSERT_HELD(mpts); + + struct socket *subflow_so = mpts->mpts_socket; + socket_lock(subflow_so, 0); + mpts->mpts_srtt = (intotcpcb(sotoinpcb(subflow_so)))->t_srtt; + mpts->mpts_rxtcur = (intotcpcb(sotoinpcb(subflow_so)))->t_rxtcur; + socket_unlock(subflow_so, 0); +} + +/* Using Symptoms Advisory to detect poor WiFi or poor Cell */ +static kern_ctl_ref mptcp_kern_ctrl_ref = NULL; +static uint32_t mptcp_kern_skt_inuse = 0; +symptoms_advisory_t mptcp_advisory; + +static errno_t +mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, + void **unitinfo) +{ +#pragma unused(kctlref, sac, unitinfo) + /* + * We don't need to do anything here. But we can atleast ensure + * only one user opens the MPTCP_KERN_CTL_NAME control socket. + */ + if (OSCompareAndSwap(0, 1, &mptcp_kern_skt_inuse)) + return (0); + else + return (EALREADY); +} + +static errno_t +mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, + void *unitinfo) +{ +#pragma unused(kctlref, kcunit, unitinfo) + if (OSCompareAndSwap(1, 0, &mptcp_kern_skt_inuse)) { + /* TBD needs to be locked if the size grows more than an int */ + bzero(&mptcp_advisory, sizeof(mptcp_advisory)); + return (0); + } + else { + return (EINVAL); + } +} + +static errno_t +mptcp_symptoms_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, + mbuf_t m, int flags) +{ +#pragma unused(kctlref, kcunit, unitinfo, flags) + symptoms_advisory_t *sa = NULL; + + if (mbuf_pkthdr_len(m) < sizeof(*sa)) { + mbuf_freem(m); + return (EINVAL); + } + + if (mbuf_len(m) >= sizeof(*sa)) + sa = mbuf_data(m); + else + return (EINVAL); + + if (mptcp_advisory.sa_nwk_status_int != sa->sa_nwk_status_int) { + /* + * we could use this notification to notify all mptcp pcbs + * of the change in network status. But its difficult to + * define if sending REMOVE_ADDR or MP_PRIO is appropriate + * given that these are only soft indicators of the network + * state. Leaving this as TBD for now. + */ + } + + if (sa->sa_nwk_status != SYMPTOMS_ADVISORY_NOCOMMENT) { + mptcplog((LOG_DEBUG, "MPTCP Events: %s wifi %d,%d cell %d,%d\n", + __func__, sa->sa_wifi_status, mptcp_advisory.sa_wifi_status, + sa->sa_cell_status, mptcp_advisory.sa_cell_status), + MPTCP_SOCKET_DBG | MPTCP_EVENTS_DBG, + MPTCP_LOGLVL_LOG); + + if ((sa->sa_wifi_status & + (SYMPTOMS_ADVISORY_WIFI_BAD | SYMPTOMS_ADVISORY_WIFI_OK)) != + (SYMPTOMS_ADVISORY_WIFI_BAD | SYMPTOMS_ADVISORY_WIFI_OK)) { + mptcp_advisory.sa_wifi_status = sa->sa_wifi_status; + } + + if ((sa->sa_cell_status & + (SYMPTOMS_ADVISORY_CELL_BAD | SYMPTOMS_ADVISORY_CELL_OK)) != + (SYMPTOMS_ADVISORY_CELL_BAD | SYMPTOMS_ADVISORY_CELL_OK)) { + mptcp_advisory.sa_cell_status = sa->sa_cell_status; + } + } else { + mptcplog((LOG_DEBUG, "MPTCP Events: %s NOCOMMENT " + "wifi %d cell %d\n", __func__, + mptcp_advisory.sa_wifi_status, + mptcp_advisory.sa_cell_status), + MPTCP_SOCKET_DBG | MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG); + } + return (0); +} + +void +mptcp_control_register(void) +{ + /* Set up the advisory control socket */ + struct kern_ctl_reg mptcp_kern_ctl; + + bzero(&mptcp_kern_ctl, sizeof(mptcp_kern_ctl)); + strlcpy(mptcp_kern_ctl.ctl_name, MPTCP_KERN_CTL_NAME, + sizeof(mptcp_kern_ctl.ctl_name)); + mptcp_kern_ctl.ctl_connect = mptcp_symptoms_ctl_connect; + mptcp_kern_ctl.ctl_disconnect = mptcp_symptoms_ctl_disconnect; + mptcp_kern_ctl.ctl_send = mptcp_symptoms_ctl_send; + mptcp_kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; + + (void)ctl_register(&mptcp_kern_ctl, &mptcp_kern_ctrl_ref); +} + +int +mptcp_is_wifi_unusable(void) +{ + /* a false return val indicates there is no info or wifi is ok */ + return (mptcp_advisory.sa_wifi_status & SYMPTOMS_ADVISORY_WIFI_BAD); +} + +int +mptcp_is_cell_unusable(void) +{ + /* a false return val indicates there is no info or cell is ok */ + return (mptcp_advisory.sa_cell_status & SYMPTOMS_ADVISORY_CELL_BAD); +} + +struct mptsub* +mptcp_use_symptoms_hints(struct mptsub* best, struct mptsub *second_best) +{ + struct mptsub *cellsub = NULL; + struct mptsub *wifisub = NULL; + struct mptsub *wiredsub = NULL; + + VERIFY ((best != NULL) && (second_best != NULL)); + + if (!mptcp_use_symptomsd) + return (NULL); + + if (!mptcp_kern_skt_inuse) + return (NULL); + + /* + * There could be devices with more than one wifi interface or + * more than one wired or cell interfaces. + * TBD: SymptomsD is unavailable on such platforms as of now. + * Try to prefer best when possible in general. + * Also, SymptomsD sends notifications about wifi only when it + * is primary. + */ + if (best->mpts_linktype & MPTSL_WIFI) + wifisub = best; + else if (best->mpts_linktype & MPTSL_CELL) + cellsub = best; + else if (best->mpts_linktype & MPTSL_WIRED) + wiredsub = best; + + /* + * On platforms with wired paths, don't use hints about wifi or cell. + * Currently, SymptomsD is not available on platforms with wired paths. + */ + if (wiredsub) + return (NULL); + + if ((wifisub == NULL) && (second_best->mpts_linktype & MPTSL_WIFI)) + wifisub = second_best; + + if ((cellsub == NULL) && (second_best->mpts_linktype & MPTSL_CELL)) + cellsub = second_best; + + if ((wiredsub == NULL) && (second_best->mpts_linktype & MPTSL_WIRED)) + wiredsub = second_best; + + if ((wifisub == best) && mptcp_is_wifi_unusable()) { + tcpstat.tcps_mp_sel_symtomsd++; + if (mptcp_is_cell_unusable()) { + mptcplog((LOG_DEBUG, "MPTCP Sender: SymptomsD hint" + " suggests both Wifi and Cell are bad. Wired %s.", + (wiredsub == NULL) ? "none" : "present"), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); + return (wiredsub); + } else { + mptcplog((LOG_DEBUG, "MPTCP Sender: SymptomsD hint" + " suggests Wifi bad, Cell good. Wired %s.", + (wiredsub == NULL) ? "none" : "present"), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); + return ((wiredsub != NULL) ? wiredsub : cellsub); + } + } + + if ((cellsub == best) && (mptcp_is_cell_unusable())) { + tcpstat.tcps_mp_sel_symtomsd++; + if (mptcp_is_wifi_unusable()) { + mptcplog((LOG_DEBUG, "MPTCP Sender: SymptomsD hint" + " suggests both Cell and Wifi are bad. Wired %s.", + (wiredsub == NULL) ? "none" : "present"), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); + return (wiredsub); + } else { + mptcplog((LOG_DEBUG, "MPTCP Sender: SymptomsD hint" + " suggests Cell bad, Wifi good. Wired %s.", + (wiredsub == NULL) ? "none" : "present"), + MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); + return ((wiredsub != NULL) ? wiredsub : wifisub); + } + } + + /* little is known about the state of the network or wifi is good */ + return (NULL); +} diff --git a/bsd/netinet/mptcp_timer.c b/bsd/netinet/mptcp_timer.c index 322aba514..5469b8323 100644 --- a/bsd/netinet/mptcp_timer.c +++ b/bsd/netinet/mptcp_timer.c @@ -74,8 +74,6 @@ mptcp_timer_demux(struct mptses *mpte, uint32_t now_msecs) int resched_timer = 0; DTRACE_MPTCP2(timer, struct mptses *, mpte, struct mptcb *, mp_tp); - mptcplog2((LOG_DEBUG, "%s: running %d\n", __func__, - mp_tp->mpt_timer_vals)); MPTE_LOCK_ASSERT_HELD(mpte); MPT_LOCK(mp_tp); @@ -98,8 +96,10 @@ mptcp_timer_demux(struct mptses *mpte, uint32_t now_msecs) } else { mp_tp->mpt_sndnxt = mp_tp->mpt_rtseq; MPT_UNLOCK(mp_tp); - mptcplog((LOG_DEBUG, "%s: REXMT %d times.\n", - __func__, mp_tp->mpt_rxtshift)); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: REXMT %d times.\n", + __func__, mp_tp->mpt_rxtshift), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); mptcp_output(mpte); MPT_LOCK(mp_tp); } @@ -167,27 +167,32 @@ mptcp_timer(struct mppcbinfo *mppi) } void -mptcp_start_timer(struct mptcb *mp_tp, int timer_type) +mptcp_start_timer(struct mptses *mpte, int timer_type) { struct timeval now; + struct mptcb *mp_tp = mpte->mpte_mptcb; microuptime(&now); - MPT_LOCK_ASSERT_HELD(mp_tp); - DTRACE_MPTCP2(start__timer, struct mptcb *, mp_tp, int, timer_type); - mptcplog((LOG_DEBUG, "%s %d\n", __func__, timer_type)); + mptcplog((LOG_DEBUG, "MPTCP Socket: %s: %d\n", __func__, timer_type), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); switch (timer_type) { case MPTT_REXMT: + MPT_LOCK(mp_tp); mp_tp->mpt_timer_vals |= MPTT_REXMT; mp_tp->mpt_rxtstart = TIMEVAL_TO_HZ(now); mp_tp->mpt_rxtshift = 0; mp_tp->mpt_rtseq = mp_tp->mpt_sndnxt; + MPT_UNLOCK(mp_tp); break; case MPTT_TW: + /* XXX: Not implemented yet */ + MPT_LOCK(mp_tp); mp_tp->mpt_timer_vals |= MPTT_TW; mp_tp->mpt_timewait = TIMEVAL_TO_HZ(now); + MPT_UNLOCK(mp_tp); break; case MPTT_FASTCLOSE: /* NO-OP */ @@ -203,9 +208,7 @@ void mptcp_cancel_timer(struct mptcb *mp_tp, int timer_type) { MPT_LOCK_ASSERT_HELD(mp_tp); - DTRACE_MPTCP2(cancel__timer, struct mptcb *, mp_tp, int, timer_type); - mptcplog3((LOG_DEBUG, "%s %d\n", __func__, timer_type)); switch (timer_type) { case MPTT_REXMT: diff --git a/bsd/netinet/mptcp_timer.h b/bsd/netinet/mptcp_timer.h index b5dac77a6..94da71dc5 100644 --- a/bsd/netinet/mptcp_timer.h +++ b/bsd/netinet/mptcp_timer.h @@ -36,7 +36,7 @@ __BEGIN_DECLS extern uint32_t mptcp_timer(struct mppcbinfo *); -extern void mptcp_start_timer(struct mptcb *, int); +extern void mptcp_start_timer(struct mptses *, int); extern void mptcp_cancel_timer(struct mptcb *, int); extern void mptcp_cancel_all_timers(struct mptcb *); __END_DECLS diff --git a/bsd/netinet/mptcp_usrreq.c b/bsd/netinet/mptcp_usrreq.c index 268c7284f..d61ad1fc3 100644 --- a/bsd/netinet/mptcp_usrreq.c +++ b/bsd/netinet/mptcp_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 Apple Inc. All rights reserved. + * Copyright (c) 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -56,25 +56,25 @@ static int mptcp_usr_detach(struct socket *); static int mptcp_attach(struct socket *, struct proc *); static int mptcp_detach(struct socket *, struct mppcb *); static int mptcp_connectx(struct mptses *, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, - uint32_t, void *, uint32_t); + struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uint32_t, void *, uint32_t); static int mptcp_usr_connectx(struct socket *, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, - uint32_t, void *, uint32_t); + struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *); static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t); -static int mptcp_getconnids(struct mptses *, associd_t, uint32_t *, +static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *, user_addr_t); -static int mptcp_getconninfo(struct mptses *, connid_t *, uint32_t *, +static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *, uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *, uint32_t *, user_addr_t, uint32_t *); static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *, struct proc *); -static int mptcp_disconnectx(struct mptses *, associd_t, connid_t); +static int mptcp_disconnectx(struct mptses *, sae_associd_t, sae_connid_t); static int mptcp_usr_disconnect(struct socket *); -static int mptcp_usr_disconnectx(struct socket *, associd_t, connid_t); +static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t); static struct mptses *mptcp_usrclosed(struct mptses *); -static int mptcp_usr_peeloff(struct socket *, associd_t, struct socket **); -static int mptcp_peeloff(struct mptses *, associd_t, struct socket **); +static int mptcp_usr_peeloff(struct socket *, sae_associd_t, struct socket **); +static int mptcp_peeloff(struct mptses *, sae_associd_t, struct socket **); static int mptcp_usr_rcvd(struct socket *, int); static int mptcp_usr_send(struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); @@ -154,9 +154,9 @@ static int mptcp_attach(struct socket *mp_so, struct proc *p) { #pragma unused(p) - struct mptses *mpte; - struct mptcb *mp_tp; - struct mppcb *mpp; + struct mptses *mpte = NULL; + struct mptcb *mp_tp = NULL; + struct mppcb *mpp = NULL; int error = 0; if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) { @@ -177,25 +177,16 @@ mptcp_attach(struct socket *mp_so, struct proc *p) mp_so->so_rcv.sb_flags &= ~SB_AUTOSIZE; mp_so->so_snd.sb_flags &= ~SB_AUTOSIZE; - if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) + if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) { goto out; + } mpp = sotomppcb(mp_so); VERIFY(mpp != NULL); - - mpte = mptcp_sescreate(mp_so, mpp); - if (mpte == NULL) { - mp_pcbdetach(mpp); - error = ENOBUFS; - goto out; - } + mpte = (struct mptses *)mpp->mpp_pcbe; + VERIFY(mpte != NULL); mp_tp = mpte->mpte_mptcb; VERIFY(mp_tp != NULL); - - MPT_LOCK(mp_tp); - mp_tp->mpt_state = MPTCPS_CLOSED; - MPT_UNLOCK(mp_tp); - out: return (error); } @@ -217,7 +208,7 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp) mppi = mpp->mpp_pcbinfo; VERIFY(mppi != NULL); - mpte = &((struct mpp_mtp *)mpp)->mpp_ses; + __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses); VERIFY(mpte->mpte_mppcb == mpp); MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ @@ -230,7 +221,7 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp) */ mp_pcbdetach(mpp); - (void) mptcp_disconnectx(mpte, ASSOCID_ALL, CONNID_ALL); + (void) mptcp_disconnectx(mpte, SAE_ASSOCID_ALL, SAE_CONNID_ALL); /* * XXX: adi@apple.com @@ -250,7 +241,7 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp) static int mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, uint32_t arglen) { #pragma unused(p, aid, flags, arg, arglen) @@ -264,10 +255,12 @@ mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl, VERIFY(dst_sl != NULL && *dst_sl != NULL); VERIFY(pcid != NULL); - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); - DTRACE_MPTCP3(connectx, struct mptses *, mpte, associd_t, aid, + DTRACE_MPTCP3(connectx, struct mptses *, mpte, sae_associd_t, aid, struct socket *, mp_so); mpts = mptcp_subflow_alloc(M_WAITOK); @@ -312,12 +305,14 @@ mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl, static int mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written) { -#pragma unused(arg, arglen) +#pragma unused(arg, arglen, uio, bytes_written) struct mppcb *mpp = sotomppcb(mp_so); - struct mptses *mpte; + struct mptses *mpte = NULL; + struct mptcb *mp_tp = NULL; + int error = 0; if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { @@ -327,6 +322,14 @@ mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl, mpte = mptompte(mpp); VERIFY(mpte != NULL); + mp_tp = mpte->mpte_mptcb; + VERIFY(mp_tp != NULL); + + if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { + error = EINVAL; + goto out; + } + error = mptcp_connectx(mpte, src_sl, dst_sl, p, ifscope, aid, pcid, flags, arg, arglen); out: @@ -342,7 +345,7 @@ mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp) MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ /* MPTCP has at most 1 association */ - *cnt = (mpte->mpte_associd != ASSOCID_ANY) ? 1 : 0; + *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0; /* just asking how many there are? */ if (aidp == USER_ADDR_NULL) @@ -356,7 +359,7 @@ mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp) * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain. */ static int -mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt, +mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt, user_addr_t cidp) { struct mptsub *mpts; @@ -364,7 +367,7 @@ mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt, MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL && + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL && aid != mpte->mpte_associd) return (EINVAL); @@ -389,7 +392,7 @@ mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt, * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain. */ static int -mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags, +mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags, uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len, user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, user_addr_t aux_data, uint32_t *aux_len) @@ -402,15 +405,15 @@ mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags, MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - if (*cid == CONNID_ALL) + if (*cid == SAE_CONNID_ALL) return (EINVAL); TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - if (mpts->mpts_connid == *cid || *cid == CONNID_ANY) + if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY) break; } if (mpts == NULL) - return ((*cid == CONNID_ANY) ? ENXIO : EINVAL); + return ((*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL); MPTS_LOCK(mpts); ifp = mpts->mpts_outif; @@ -484,8 +487,11 @@ mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags, goto out; } } - mptcplog2((LOG_INFO, "%s: cid %d flags %x \n", - __func__, mpts->mpts_connid, mpts->mpts_flags)); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: cid %d flags %x \n", + __func__, mpts->mpts_connid, mpts->mpts_flags), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + out: MPTS_UNLOCK(mpts); return (error); @@ -495,15 +501,17 @@ mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags, * Handle SIOCSCONNORDER */ int -mptcp_setconnorder(struct mptses *mpte, connid_t cid, uint32_t rank) +mptcp_setconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t rank) { struct mptsub *mpts, *mpts1; int error = 0; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - mptcplog((LOG_DEBUG, "%s: cid %d rank %d \n", __func__, cid, rank)); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: cid %d rank %d \n", __func__, cid, rank), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); - if (cid == CONNID_ANY || cid == CONNID_ALL) { + if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) { error = EINVAL; goto out; } @@ -588,8 +596,7 @@ mptcp_connorder_helper(struct mptsub *mpts) tp->t_mpflags &= ~TMPF_BACKUP_PATH; else tp->t_mpflags |= TMPF_BACKUP_PATH; - mptcplog((LOG_DEBUG, "%s cid %d flags %x", __func__, - mpts->mpts_connid, mpts->mpts_flags)); + socket_unlock(so, 0); } @@ -598,7 +605,7 @@ mptcp_connorder_helper(struct mptsub *mpts) * Handle SIOCSGONNORDER */ int -mptcp_getconnorder(struct mptses *mpte, connid_t cid, uint32_t *rank) +mptcp_getconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t *rank) { struct mptsub *mpts; int error = 0; @@ -607,7 +614,7 @@ mptcp_getconnorder(struct mptses *mpte, connid_t cid, uint32_t *rank) VERIFY(rank != NULL); *rank = 0; - if (cid == CONNID_ANY || cid == CONNID_ALL) { + if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) { error = EINVAL; goto out; } @@ -748,7 +755,7 @@ mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data, * connection while keeping the MPTCP-level connection (association). */ static int -mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) +mptcp_disconnectx(struct mptses *mpte, sae_associd_t aid, sae_connid_t cid) { struct mptsub *mpts; struct socket *mp_so; @@ -760,16 +767,19 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) mp_so = mpte->mpte_mppcb->mpp_socket; mp_tp = mpte->mpte_mptcb; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error)); - DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, associd_t, aid, - connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); - VERIFY(aid == ASSOCID_ANY || aid == ASSOCID_ALL || + DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, sae_associd_t, aid, + sae_connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp); + + VERIFY(aid == SAE_ASSOCID_ANY || aid == SAE_ASSOCID_ALL || aid == mpte->mpte_associd); /* terminate the association? */ - if (cid == CONNID_ANY || cid == CONNID_ALL) { + if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) { /* if we're not detached, go thru socket state checks */ if (!(mp_so->so_flags & SOF_PCBCLEARING)) { if (!(mp_so->so_state & (SS_ISCONNECTED| @@ -799,10 +809,26 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) (void) mptcp_output(mpte); } } else { + bool disconnect_embryonic_subflows = false; + struct socket *so = NULL; + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { if (mpts->mpts_connid != cid) continue; + MPTS_LOCK(mpts); + /* + * Check if disconnected subflow is the one used + * to initiate MPTCP connection. + * If it is and the connection is not yet join ready + * disconnect all other subflows. + */ + so = mpts->mpts_socket; + if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) && + so && !(so->so_flags & SOF_MP_SEC_SUBFLOW)) { + disconnect_embryonic_subflows = true; + } + mpts->mpts_flags |= MPTSF_USER_DISCONNECT; mptcp_subflow_disconnect(mpte, mpts, FALSE); MPTS_UNLOCK(mpts); @@ -813,6 +839,16 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) error = EINVAL; goto out; } + + if (disconnect_embryonic_subflows) { + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + if (mpts->mpts_connid == cid) + continue; + MPTS_LOCK(mpts); + mptcp_subflow_disconnect(mpte, mpts, TRUE); + MPTS_UNLOCK(mpts); + } + } } if (error == 0) @@ -836,7 +872,7 @@ mptcp_usr_disconnect(struct socket *mp_so) { int error = 0; - error = mptcp_usr_disconnectx(mp_so, ASSOCID_ALL, CONNID_ALL); + error = mptcp_usr_disconnectx(mp_so, SAE_ASSOCID_ALL, SAE_CONNID_ALL); return (error); } @@ -844,7 +880,7 @@ mptcp_usr_disconnect(struct socket *mp_so) * User-protocol pru_disconnectx callback. */ static int -mptcp_usr_disconnectx(struct socket *mp_so, associd_t aid, connid_t cid) +mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid) { struct mppcb *mpp = sotomppcb(mp_so); struct mptses *mpte; @@ -858,7 +894,7 @@ mptcp_usr_disconnectx(struct socket *mp_so, associd_t aid, connid_t cid) VERIFY(mpte != NULL); MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL && + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL && aid != mpte->mpte_associd) { error = EINVAL; goto out; @@ -915,7 +951,7 @@ mptcp_usrclosed(struct mptses *mpte) * User-protocol pru_peeloff callback. */ static int -mptcp_usr_peeloff(struct socket *mp_so, associd_t aid, struct socket **psop) +mptcp_usr_peeloff(struct socket *mp_so, sae_associd_t aid, struct socket **psop) { struct mppcb *mpp = sotomppcb(mp_so); struct mptses *mpte; @@ -942,7 +978,7 @@ mptcp_usr_peeloff(struct socket *mp_so, associd_t aid, struct socket **psop) * yet associated (MPTCP-level connection has not been established.) */ static int -mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop) +mptcp_peeloff(struct mptses *mpte, sae_associd_t aid, struct socket **psop) { struct socket *so = NULL, *mp_so; struct mptsub *mpts; @@ -954,16 +990,16 @@ mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop) VERIFY(psop != NULL); *psop = NULL; - DTRACE_MPTCP3(peeloff, struct mptses *, mpte, associd_t, aid, + DTRACE_MPTCP3(peeloff, struct mptses *, mpte, sae_associd_t, aid, struct socket *, mp_so); /* peeloff cannot happen after an association is established */ - if (mpte->mpte_associd != ASSOCID_ANY) { + if (mpte->mpte_associd != SAE_ASSOCID_ANY) { error = EINVAL; goto out; } - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) { + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) { error = EINVAL; goto out; } @@ -1006,8 +1042,11 @@ mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop) } *psop = so; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); + out: return (error); } @@ -1653,13 +1692,15 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) if (mpo == NULL) { error = ENOBUFS; } else { - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s " "val %d %s\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level, optname, buf, sizeof (buf)), optval, (mpo->mpo_flags & MPOF_ATTACHED) ? - "updated" : "recorded")); + "updated" : "recorded"), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); /* initialize or update, as needed */ mpo->mpo_intval = optval; @@ -1695,16 +1736,20 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) } out: if (error == 0 && mpo != NULL) { - mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s val %d set %s\n", + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s val %d set %s\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level, optname, buf, sizeof (buf)), optval, (mpo->mpo_flags & MPOF_INTERIM) ? - "pending" : "successful")); + "pending" : "successful"), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); } else if (error != 0) { - mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s can't be issued " + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s can't be issued " "error %d\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level, - optname, buf, sizeof (buf)), error)); + optname, buf, sizeof (buf)), error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); } return (error); } @@ -1841,10 +1886,12 @@ mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt) /* we only handle socket and TCP-level socket options for MPTCP */ if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) { char buf[32]; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s level not " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s level not " "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(sopt->sopt_level, - sopt->sopt_name, buf, sizeof (buf)))); + sopt->sopt_name, buf, sizeof (buf))), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); error = EINVAL; goto out; } diff --git a/bsd/netinet/mptcp_var.h b/bsd/netinet/mptcp_var.h index 9faaba48f..905ab934a 100644 --- a/bsd/netinet/mptcp_var.h +++ b/bsd/netinet/mptcp_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 Apple Inc. All rights reserved. + * Copyright (c) 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -55,8 +55,8 @@ struct mptses { TAILQ_HEAD(, mptsub) mpte_subflows; /* list of subflows */ uint16_t mpte_numflows; /* # of subflows in list */ uint16_t mpte_nummpcapflows; /* # of MP_CAP subflows */ - associd_t mpte_associd; /* MPTCP association ID */ - connid_t mpte_connid_last; /* last used connection ID */ + sae_associd_t mpte_associd; /* MPTCP association ID */ + sae_connid_t mpte_connid_last; /* last used connection ID */ /* * Threading (protected by mpte_thread_lock) */ @@ -139,7 +139,7 @@ struct mptsub { uint32_t mpts_flags; /* see flags below */ uint32_t mpts_evctl; /* subflow control events */ uint32_t mpts_family; /* address family */ - connid_t mpts_connid; /* subflow connection ID */ + sae_connid_t mpts_connid; /* subflow connection ID */ int mpts_oldintval; /* sopt_val before sosetopt */ uint32_t mpts_rank; /* subflow priority/rank */ int32_t mpts_soerror; /* most recent subflow error */ @@ -150,13 +150,18 @@ struct mptsub { struct ifnet *mpts_outif; /* outbound interface */ u_int64_t mpts_sndnxt; /* next byte to send in mp so */ u_int32_t mpts_rel_seq; /* running count of subflow # */ - struct { - u_int64_t mptsl_dsn; /* Data Sequence Number */ - u_int32_t mptsl_sseq; /* Corresponding Data Seq */ - u_int32_t mptsl_len; /* length of mapping */ - } mpts_lastmap; struct protosw *mpts_oprotosw; /* original protosw */ struct mptsub_connreq mpts_mpcr; /* connection request */ + int32_t mpts_srtt; /* tcp's rtt estimate */ + int32_t mpts_rxtcur; /* tcp's rto estimate */ + uint32_t mpts_probesoon; /* send probe after probeto */ + uint32_t mpts_probecnt; /* number of probes sent */ + uint32_t mpts_maxseg; /* cached value of t_maxseg */ + uint32_t mpts_peerswitch;/* no of uses of backup so */ +#define MPTSL_WIRED 0x01 +#define MPTSL_WIFI 0x02 +#define MPTSL_CELL 0x04 + uint32_t mpts_linktype; /* wired, wifi, cell */ }; /* @@ -327,10 +332,12 @@ struct mptcb { /* * Zombie handling */ -#define MPT_GC_TICKS (60) +#define MPT_GC_TICKS (30) +#define MPT_GC_TICKS_FAST (10) int32_t mpt_gc_ticks; /* Used for zombie deletion */ u_int32_t mpt_notsent_lowat; /* TCP_NOTSENT_LOWAT support */ + u_int32_t mpt_peer_version; /* Version from peer */ }; /* valid values for mpt_flags (see also notes on mpts_flags above) */ @@ -349,9 +356,10 @@ struct mptcb { "\6SND_64BITDSN\7SND_64BITACK\10RCVD_64BITACK\11POST_FALLBACK_SYNC" /* valid values for mpt_timer_vals */ -#define MPTT_REXMT 0x01 /* Starting Retransmit Timer */ -#define MPTT_TW 0x02 /* Starting Timewait Timer */ -#define MPTT_FASTCLOSE 0x04 /* Starting Fastclose wait timer */ +#define MPTT_REXMT 0x01 /* Starting Retransmit Timer */ +#define MPTT_TW 0x02 /* Starting Timewait Timer */ +#define MPTT_FASTCLOSE 0x04 /* Starting Fastclose wait timer */ +//#define MPTT_PROBE_TIMER 0x08 /* Timer for probing preferred path */ #define MPT_LOCK_ASSERT_HELD(_mpt) \ lck_mtx_assert(&(_mpt)->mpt_lock, LCK_MTX_ASSERT_OWNED) @@ -430,12 +438,20 @@ struct mptcp_keys_pool_head { #define MPTCP_RWIN_MAX (1<<16) /* MPTCP Debugging Levels */ -#define MP_NODEBUG 0x0 -#define MP_ERR_DEBUG 0x1 -#define MP_VERBOSE_DEBUG_1 0x2 -#define MP_VERBOSE_DEBUG_2 0x3 -#define MP_VERBOSE_DEBUG_3 0x4 -#define MP_VERBOSE_DEBUG_4 0x5 /* output path debugging */ +#define MPTCP_LOGLVL_NONE 0x0 /* No debug logging */ +#define MPTCP_LOGLVL_ERR 0x1 /* Errors in execution are logged */ +#define MPTCP_LOGLVL_LOG 0x2 /* Important logs */ +#define MPTCP_LOGLVL_VERBOSE 0x3 /* Verbose logs */ + +/* MPTCP sub-components for debug logging */ +#define MPTCP_NO_DBG 0x00 /* No areas are logged */ +#define MPTCP_STATE_DBG 0x01 /* State machine logging */ +#define MPTCP_SOCKET_DBG 0x02 /* Socket call logging */ +#define MPTCP_SENDER_DBG 0x04 /* Sender side logging */ +#define MPTCP_RECEIVER_DBG 0x08 /* Receiver logging */ +#define MPTCP_EVENTS_DBG 0x10 /* Subflow events logging */ +#define MPTCP_ALL_DBG (MPTCP_STATE_DBG | MPTCP_SOCKET_DBG | \ + MPTCP_SENDER_DBG | MPTCP_RECEIVER_DBG | MPTCP_EVENTS_DBG) /* Mask to obtain 32-bit portion of data sequence number */ #define MPTCP_DATASEQ_LOW32_MASK (0xffffffff) @@ -490,12 +506,13 @@ struct mptcp_keys_pool_head { } \ } -#define mptcplog(x) do { if (mptcp_verbose >= 1) log x; } while (0) -#define mptcplog2(x) do { if (mptcp_verbose >= 2) log x; } while (0) -#define mptcplog3(x) do { if (mptcp_verbose >= 3) log x; } while (0) +#define mptcplog(x, y, z) do { \ + if ((mptcp_dbg_area & y) && \ + (mptcp_dbg_level >= z)) \ + log x; \ +} while (0) extern int mptcp_enable; /* Multipath TCP */ -extern int mptcp_dbg; /* Multipath TCP DBG */ extern int mptcp_mpcap_retries; /* Multipath TCP retries */ extern int mptcp_join_retries; /* Multipath TCP Join retries */ extern int mptcp_dss_csum; /* Multipath DSS Option checksum */ @@ -506,7 +523,9 @@ extern int mptcp_remaddr_enable;/* REMOVE_ADDR option enable/disable */ extern int mptcp_fastjoin; /* Enable FastJoin */ extern int mptcp_zerortt_fastjoin; /* Enable Data after SYN Fast Join */ extern int mptcp_rwnotify; /* Enable RW notification on resume */ -extern uint32_t mptcp_verbose; /* verbose and mptcp_dbg must be unified */ +extern uint32_t mptcp_dbg_level; /* Multipath TCP debugging level */ +extern uint32_t mptcp_dbg_area; /* Multipath TCP debugging area */ + #define MPPCB_LIMIT 16 extern uint32_t mptcp_socket_limit; /* max number of mptcp sockets allowed */ extern uint32_t mptcp_delayed_subf_start; /* delayed cellular subflow start */ @@ -515,7 +534,7 @@ extern int tcp_jack_rxmt; /* Join ACK retransmission value in msecs */ __BEGIN_DECLS extern void mptcp_init(struct protosw *, struct domain *); extern int mptcp_ctloutput(struct socket *, struct sockopt *); -extern struct mptses *mptcp_sescreate(struct socket *, struct mppcb *); +extern void *mptcp_sescreate(struct socket *, struct mppcb *); extern void mptcp_drain(void); extern struct mptses *mptcp_drop(struct mptses *, struct mptcb *, int); extern struct mptses *mptcp_close(struct mptses *, struct mptcb *); @@ -524,8 +543,8 @@ extern int mptcp_unlock(struct socket *, int, void *); extern lck_mtx_t *mptcp_getlock(struct socket *, int); extern void mptcp_thread_signal(struct mptses *); extern void mptcp_flush_sopts(struct mptses *); -extern int mptcp_setconnorder(struct mptses *, connid_t, uint32_t); -extern int mptcp_getconnorder(struct mptses *, connid_t, uint32_t *); +extern int mptcp_setconnorder(struct mptses *, sae_connid_t, uint32_t); +extern int mptcp_getconnorder(struct mptses *, sae_connid_t, uint32_t *); extern struct mptopt *mptcp_sopt_alloc(int); extern const char *mptcp_sopt2str(int, int, char *, int); @@ -580,9 +599,12 @@ extern void mptcp_output_getm_dsnmap64(struct socket *, int, uint32_t, u_int64_t *, u_int32_t *, u_int16_t *); extern void mptcp_send_dfin(struct socket *); extern void mptcp_act_on_txfail(struct socket *); -extern struct mptsub *mptcp_get_subflow(struct mptses *, struct mptsub *); +extern struct mptsub *mptcp_get_subflow(struct mptses *, struct mptsub *, + struct mptsub **); extern struct mptsub *mptcp_get_pending_subflow(struct mptses *, struct mptsub *); +extern struct mptsub* mptcp_use_symptoms_hints(struct mptsub*, + struct mptsub *); extern int mptcp_get_map_for_dsn(struct socket *, u_int64_t, u_int32_t *); extern int32_t mptcp_adj_sendlen(struct socket *so, int32_t off, int32_t len); extern int32_t mptcp_sbspace(struct mptcb *); @@ -593,25 +615,81 @@ extern boolean_t mptcp_no_rto_spike(struct socket*); extern int mptcp_set_notsent_lowat(struct mptses *mpte, int optval); extern u_int32_t mptcp_get_notsent_lowat(struct mptses *mpte); extern int mptcp_notsent_lowat_check(struct socket *so); - +extern void mptcp_control_register(void); +extern int mptcp_is_wifi_unusable(void); +extern int mptcp_is_cell_unusable(void); __END_DECLS #endif /* BSD_KERNEL_PRIVATE */ #ifdef PRIVATE + typedef struct mptcp_flow { + size_t flow_len; + size_t flow_tcpci_offset; uint32_t flow_flags; - connid_t flow_cid; + sae_connid_t flow_cid; struct sockaddr_storage flow_src; struct sockaddr_storage flow_dst; - conninfo_tcp_t flow_ci; + uint64_t flow_sndnxt; /* subflow's sndnxt snapshot */ + uint32_t flow_relseq; /* last subflow rel seq# */ + int32_t flow_soerror; /* subflow level error */ + uint32_t flow_probecnt; /* number of probes sent */ + uint32_t flow_peerswitch;/* did peer switch */ + conninfo_tcp_t flow_ci; /* must be the last field */ } mptcp_flow_t; typedef struct conninfo_mptcp { size_t mptcpci_len; - size_t mptcpci_nflows; - uint32_t mptcpci_state; + size_t mptcpci_flow_offset; /* offsetof first flow */ + size_t mptcpci_nflows; /* number of subflows */ + uint32_t mptcpci_state; /* MPTCP level state */ + uint32_t mptcpci_mpte_flags; /* Session flags */ + uint32_t mptcpci_flags; /* MPTCB flags */ + uint32_t mptcpci_ltoken; /* local token */ + uint32_t mptcpci_rtoken; /* remote token */ + uint32_t mptcpci_notsent_lowat; /* NOTSENT_LOWAT */ + + /* Send side */ + uint64_t mptcpci_snduna; /* DSN of last unacked byte */ + uint64_t mptcpci_sndnxt; /* DSN of next byte to send */ + uint64_t mptcpci_sndmax; /* DSN of max byte sent */ + uint64_t mptcpci_lidsn; /* Local IDSN */ + uint32_t mptcpci_sndwnd; /* Send window snapshot */ + + /* Receive side */ + uint64_t mptcpci_rcvnxt; /* Next expected DSN */ + uint64_t mptcpci_rcvatmark; /* Session level rcvnxt */ + uint64_t mptcpci_ridsn; /* Peer's IDSN */ + uint32_t mptcpci_rcvwnd; /* Receive window */ + + uint8_t mptcpci_mpte_addrid; /* last addr id */ + mptcp_flow_t mptcpci_flows[1]; } conninfo_mptcp_t; +/* Use SymptomsD notifications of wifi and cell status in subflow selection */ +#define MPTCP_KERN_CTL_NAME "com.apple.network.advisory" +typedef struct symptoms_advisory { + union { + uint32_t sa_nwk_status_int; + struct { + union { +#define SYMPTOMS_ADVISORY_NOCOMMENT 0x00 + uint16_t sa_nwk_status; + struct { +#define SYMPTOMS_ADVISORY_WIFI_BAD 0x01 +#define SYMPTOMS_ADVISORY_WIFI_OK 0x02 + uint8_t sa_wifi_status; +#define SYMPTOMS_ADVISORY_CELL_BAD 0x01 +#define SYMPTOMS_ADVISORY_CELL_OK 0x02 + uint8_t sa_cell_status; + }; + }; + uint16_t sa_unused; + }; + }; +} symptoms_advisory_t; + + #endif /* PRIVATE */ #endif /* _NETINET_MPTCP_VAR_H_ */ diff --git a/bsd/netinet/raw_ip.c b/bsd/netinet/raw_ip.c index 173b506a6..c2b41a365 100644 --- a/bsd/netinet/raw_ip.c +++ b/bsd/netinet/raw_ip.c @@ -230,11 +230,12 @@ rip_input(m, iphlen) continue; if (last) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); - + skipit = 0; - + #if NECP - if (n && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, &ip->ip_dst, &ip->ip_src, ifp, NULL)) { + if (n && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, + &ip->ip_dst, &ip->ip_src, ifp, NULL, NULL)) { m_freem(n); /* do not inject data to pcb */ skipit = 1; @@ -286,7 +287,8 @@ rip_input(m, iphlen) skipit = 0; #if NECP - if (last && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, &ip->ip_dst, &ip->ip_src, ifp, NULL)) { + if (last && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, + &ip->ip_dst, &ip->ip_src, ifp, NULL, NULL)) { m_freem(m); OSAddAtomic(1, &ipstat.ips_delivered); /* do not inject data to pcb */ @@ -402,7 +404,7 @@ rip_output( m_freem(m); return(EMSGSIZE); } - M_PREPEND(m, sizeof(struct ip), M_WAIT); + M_PREPEND(m, sizeof(struct ip), M_WAIT, 1); if (m == NULL) return ENOBUFS; ip = mtod(m, struct ip *); @@ -437,19 +439,21 @@ rip_output( if (inp->inp_laddr.s_addr != INADDR_ANY) ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR; - + #if NECP { necp_kernel_policy_id policy_id; - if (!necp_socket_is_allowed_to_send_recv_v4(inp, 0, 0, &ip->ip_src, &ip->ip_dst, NULL, &policy_id)) { + u_int32_t route_rule_id; + if (!necp_socket_is_allowed_to_send_recv_v4(inp, 0, 0, + &ip->ip_src, &ip->ip_dst, NULL, &policy_id, &route_rule_id)) { m_freem(m); return(EHOSTUNREACH); } - necp_mark_packet_from_socket(m, inp, policy_id); + necp_mark_packet_from_socket(m, inp, policy_id, route_rule_id); } #endif /* NECP */ - + #if IPSEC if (inp->inp_sp != NULL && ipsec_setsocket(m, so) != 0) { m_freem(m); @@ -479,6 +483,7 @@ rip_output( * to pass the PCB cached route pointer directly to IP and * the modules beneath it. */ + // TODO: PASS DOWN ROUTE RULE ID error = ip_output(m, inp->inp_options, &inp->inp_route, flags, imo, &ipoa); diff --git a/bsd/netinet/tcp.h b/bsd/netinet/tcp.h index c4ecae9d8..134a61535 100644 --- a/bsd/netinet/tcp.h +++ b/bsd/netinet/tcp.h @@ -63,8 +63,8 @@ #ifndef _NETINET_TCP_H_ #define _NETINET_TCP_H_ +#include #include -#include #include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) @@ -138,6 +138,9 @@ struct tcphdr { #define TCPOPT_MULTIPATH 30 #endif +#define TCPOPT_FASTOPEN 34 +#define TCPOLEN_FASTOPEN_REQ 2 + /* Option definitions */ #define TCPOPT_SACK_PERMIT_HDR \ (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK_PERMITTED<<8|TCPOLEN_SACK_PERMITTED) @@ -217,6 +220,8 @@ struct tcphdr { #define TCP_KEEPCNT 0x102 /* number of keepalives before close */ #define TCP_SENDMOREACKS 0x103 /* always ack every other packet */ #define TCP_ENABLE_ECN 0x104 /* Enable ECN on a connection */ +#define TCP_FASTOPEN 0x105 /* Enable/Disable TCP Fastopen on this socket */ +#define TCP_CONNECTION_INFO 0x106 /* State of TCP connection */ #ifdef PRIVATE #define TCP_INFO 0x200 /* retrieve tcp_info structure */ @@ -251,6 +256,13 @@ struct tcphdr { #define TCPI_FLAG_LOSSRECOVERY 0x01 /* Currently in loss recovery */ +struct tcp_conn_status { + unsigned int probe_activated : 1; + unsigned int write_probe_failed : 1; + unsigned int read_probe_failed : 1; + unsigned int conn_probe_failed : 1; +}; + /* * Add new fields to this structure at the end only. This will preserve * binary compatibility. @@ -314,6 +326,19 @@ struct tcp_info { u_int64_t tcpi_wired_rxbytes __attribute((aligned(8))); /* bytes received over Wired */ u_int64_t tcpi_wired_txpackets __attribute((aligned(8))); /* packets transmitted over Wired */ u_int64_t tcpi_wired_txbytes __attribute((aligned(8))); /* bytes transmitted over Wired */ + struct tcp_conn_status tcpi_connstatus; /* status of connection probes */ + + u_int16_t /* Client-side information */ + tcpi_tfo_cookie_req:1, /* Cookie requested? */ + tcpi_tfo_cookie_rcv:1, /* Cookie received? */ + tcpi_tfo_syn_loss:1, /* Fallback to reg. TCP after SYN-loss */ + tcpi_tfo_syn_data_sent:1, /* SYN+data has been sent out */ + tcpi_tfo_syn_data_acked:1, /* SYN+data has been fully acknowledged */ + /* And the following are for server-side information (must be set on the listener socket) */ + tcpi_tfo_syn_data_rcv:1, /* Server received SYN+data with a valid cookie */ + tcpi_tfo_cookie_req_rcv:1, /* Server received cookie-request */ + tcpi_tfo_cookie_sent:1, /* Server announced cookie */ + tcpi_tfo_cookie_invalid:1; /* Server received an invalid cookie */ }; struct tcp_measure_bw_burst { @@ -360,6 +385,50 @@ typedef struct conninfo_tcp { #pragma pack() #endif /* PRIVATE */ + +struct tcp_connection_info { + u_int8_t tcpi_state; /* connection state */ + u_int8_t tcpi_snd_wscale; /* Window scale for send window */ + u_int8_t tcpi_rcv_wscale; /* Window scale for receive window */ + u_int8_t __pad1; + u_int32_t tcpi_options; /* TCP options supported */ +#define TCPCI_OPT_TIMESTAMPS 0x00000001 /* Timestamps enabled */ +#define TCPCI_OPT_SACK 0x00000002 /* SACK enabled */ +#define TCPCI_OPT_WSCALE 0x00000004 /* Window scaling enabled */ +#define TCPCI_OPT_ECN 0x00000008 /* ECN enabled */ + u_int32_t tcpi_flags; /* flags */ +#define TCPCI_FLAG_LOSSRECOVERY 0x00000001 +#define TCPCI_FLAG_REORDERING_DETECTED 0x00000002 + u_int32_t tcpi_rto; /* retransmit timeout in ms */ + u_int32_t tcpi_maxseg; /* maximum segment size supported */ + u_int32_t tcpi_snd_ssthresh; /* slow start threshold in bytes */ + u_int32_t tcpi_snd_cwnd; /* send congestion window in bytes */ + u_int32_t tcpi_snd_wnd; /* send widnow in bytes */ + u_int32_t tcpi_snd_sbbytes; /* bytes in send socket buffer, including in-flight data */ + u_int32_t tcpi_rcv_wnd; /* receive window in bytes*/ + u_int32_t tcpi_rttcur; /* most recent RTT in ms */ + u_int32_t tcpi_srtt; /* average RTT in ms */ + u_int32_t tcpi_rttvar; /* RTT variance */ + u_int32_t + /* Client-side information */ + tcpi_tfo_cookie_req:1, /* Cookie requested? */ + tcpi_tfo_cookie_rcv:1, /* Cookie received? */ + tcpi_tfo_syn_loss:1, /* Fallback to reg. TCP after SYN-loss */ + tcpi_tfo_syn_data_sent:1, /* SYN+data has been sent out */ + tcpi_tfo_syn_data_acked:1, /* SYN+data has been fully acknowledged */ + /* And the following are for server-side information (must be set on the listener socket) */ + tcpi_tfo_syn_data_rcv:1, /* Server received SYN+data with a valid cookie */ + tcpi_tfo_cookie_req_rcv:1, /* Server received cookie-request */ + tcpi_tfo_cookie_sent:1, /* Server announced cookie */ + tcpi_tfo_cookie_invalid:1, /* Server received an invalid cookie */ + __pad2:23; + u_int64_t tcpi_txpackets __attribute__((aligned(8))); + u_int64_t tcpi_txbytes __attribute__((aligned(8))); + u_int64_t tcpi_txretransmitbytes __attribute__((aligned(8))); + u_int64_t tcpi_rxpackets __attribute__((aligned(8))); + u_int64_t tcpi_rxbytes __attribute__((aligned(8))); + u_int64_t tcpi_rxoutoforderbytes __attribute__((aligned(8))); +}; #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ #endif diff --git a/bsd/netinet/tcp_cache.c b/bsd/netinet/tcp_cache.c new file mode 100644 index 000000000..cb3b86d04 --- /dev/null +++ b/bsd/netinet/tcp_cache.c @@ -0,0 +1,743 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* TCP-cache to store and retrieve TCP-related information */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct tcp_heuristic_key { + union { + uint8_t thk_net_signature[IFNET_SIGNATURELEN]; + union { + struct in_addr addr; + struct in6_addr addr6; + } thk_ip; + }; + sa_family_t thk_family; +}; + +struct tcp_heuristic { + SLIST_ENTRY(tcp_heuristic) list; + + u_int32_t th_last_access; + + struct tcp_heuristic_key th_key; + + /* + * If tfo_cookie_loss is changed to a smaller type, it might be worth + * checking for integer-overflow in tcp_cache_tfo_inc_loss + */ + u_int32_t th_tfo_cookie_loss; /* The number of times a SYN+cookie has been lost */ + u_int32_t th_tfo_fallback_trials; /* Number of times we did not try out TFO due to SYN-loss */ + u_int32_t th_tfo_cookie_backoff; /* Time until when we should not try out TFO */ + + u_int8_t th_tfo_in_backoff:1, /* Are we doing TFO due to the backoff timer? */ + th_tfo_aggressive_fallback:1, /* Agressive fallback due to nasty middlebox */ + th_tfo_snd_middlebox_supp:1, /* We are sure that the network supports TFO in upstream direction */ + th_tfo_rcv_middlebox_supp:1; /* We are sure that the network supports TFO in downstream direction*/ +}; + +struct tcp_heuristics_head { + SLIST_HEAD(tcp_heur_bucket, tcp_heuristic) tcp_heuristics; + + /* Per-hashbucket lock to avoid lock-contention */ + lck_mtx_t thh_mtx; +}; + +struct tcp_cache_key { + sa_family_t tck_family; + + struct tcp_heuristic_key tck_src; + union { + struct in_addr addr; + struct in6_addr addr6; + } tck_dst; +}; + +struct tcp_cache { + SLIST_ENTRY(tcp_cache) list; + + u_int32_t tc_last_access; + + struct tcp_cache_key tc_key; + + u_int8_t tc_tfo_cookie[TFO_COOKIE_LEN_MAX]; + u_int8_t tc_tfo_cookie_len; +}; + +struct tcp_cache_head { + SLIST_HEAD(tcp_cache_bucket, tcp_cache) tcp_caches; + + /* Per-hashbucket lock to avoid lock-contention */ + lck_mtx_t tch_mtx; +}; + +static u_int32_t tcp_cache_hash_seed; + +size_t tcp_cache_size; + +/* + * The maximum depth of the hash-bucket. This way we limit the tcp_cache to + * TCP_CACHE_BUCKET_SIZE * tcp_cache_size and have "natural" garbage collection + */ +#define TCP_CACHE_BUCKET_SIZE 5 + +static struct tcp_cache_head *tcp_cache; + +decl_lck_mtx_data(, tcp_cache_mtx); + +static lck_attr_t *tcp_cache_mtx_attr; +static lck_grp_t *tcp_cache_mtx_grp; +static lck_grp_attr_t *tcp_cache_mtx_grp_attr; + +static struct tcp_heuristics_head *tcp_heuristics; + +decl_lck_mtx_data(, tcp_heuristics_mtx); + +static lck_attr_t *tcp_heuristic_mtx_attr; +static lck_grp_t *tcp_heuristic_mtx_grp; +static lck_grp_attr_t *tcp_heuristic_mtx_grp_attr; + +/* Number of SYN-losses we accept */ +#define TFO_MAX_COOKIE_LOSS 2 + +/* + * Round up to next higher power-of 2. See "Bit Twiddling Hacks". + * + * Might be worth moving this to a library so that others + * (e.g., scale_to_powerof2()) can use this as well instead of a while-loop. + */ +static u_int32_t tcp_cache_roundup2(u_int32_t a) +{ + a--; + a |= a >> 1; + a |= a >> 2; + a |= a >> 4; + a |= a >> 8; + a |= a >> 16; + a++; + + return a; +} + +static void tcp_cache_hash_src(struct inpcb *inp, struct tcp_heuristic_key *key) +{ + struct ifnet *ifn = inp->inp_last_outifp; + uint8_t len = sizeof(key->thk_net_signature); + uint16_t flags; + + if (inp->inp_vflag & INP_IPV6) { + int ret; + + key->thk_family = AF_INET6; + ret = ifnet_get_netsignature(ifn, AF_INET6, &len, &flags, + key->thk_net_signature); + + /* + * ifnet_get_netsignature only returns EINVAL if ifn is NULL + * (we made sure that in the other cases it does not). So, + * in this case we should take the connection's address. + */ + if (ret == ENOENT || ret == EINVAL) + memcpy(&key->thk_ip.addr6, &inp->in6p_laddr, sizeof(struct in6_addr)); + } else { + int ret; + + key->thk_family = AF_INET; + ret = ifnet_get_netsignature(ifn, AF_INET, &len, &flags, + key->thk_net_signature); + + /* + * ifnet_get_netsignature only returns EINVAL if ifn is NULL + * (we made sure that in the other cases it does not). So, + * in this case we should take the connection's address. + */ + if (ret == ENOENT || ret == EINVAL) + memcpy(&key->thk_ip.addr, &inp->inp_laddr, sizeof(struct in_addr)); + } +} + +static u_int16_t tcp_cache_hash(struct inpcb *inp, struct tcp_cache_key *key) +{ + u_int32_t hash; + + bzero(key, sizeof(struct tcp_cache_key)); + + tcp_cache_hash_src(inp, &key->tck_src); + + if (inp->inp_vflag & INP_IPV6) { + key->tck_family = AF_INET6; + memcpy(&key->tck_dst.addr6, &inp->in6p_faddr, + sizeof(struct in6_addr)); + } else { + key->tck_family = AF_INET; + memcpy(&key->tck_dst.addr, &inp->inp_faddr, + sizeof(struct in_addr)); + } + + hash = net_flowhash(key, sizeof(struct tcp_cache_key), + tcp_cache_hash_seed); + + return (hash & (tcp_cache_size - 1)); +} + +static void tcp_cache_unlock(struct tcp_cache_head *head) +{ + lck_mtx_unlock(&head->tch_mtx); +} + +/* + * Make sure that everything that happens after tcp_getcache_with_lock() + * is short enough to justify that you hold the per-bucket lock!!! + * + * Otherwise, better build another lookup-function that does not hold the + * lock and you copy out the bits and bytes. + * + * That's why we provide the head as a "return"-pointer so that the caller + * can give it back to use for tcp_cache_unlock(). + */ +static struct tcp_cache *tcp_getcache_with_lock(struct tcpcb *tp, int create, + struct tcp_cache_head **headarg) +{ + struct inpcb *inp = tp->t_inpcb; + struct tcp_cache *tpcache = NULL; + struct tcp_cache_head *head; + struct tcp_cache_key key; + u_int16_t hash; + int i = 0; + + hash = tcp_cache_hash(inp, &key); + head = &tcp_cache[hash]; + + lck_mtx_lock(&head->tch_mtx); + + /*** First step: Look for the tcp_cache in our bucket ***/ + SLIST_FOREACH(tpcache, &head->tcp_caches, list) { + if (memcmp(&tpcache->tc_key, &key, sizeof(key)) == 0) + break; + + i++; + } + + /*** Second step: If it's not there, create/recycle it ***/ + if ((tpcache == NULL) && create) { + if (i >= TCP_CACHE_BUCKET_SIZE) { + struct tcp_cache *oldest_cache = NULL; + u_int32_t max_age = 0; + + /* Look for the oldest tcp_cache in the bucket */ + SLIST_FOREACH(tpcache, &head->tcp_caches, list) { + u_int32_t age = tcp_now - tpcache->tc_last_access; + if (age > max_age) { + max_age = age; + oldest_cache = tpcache; + } + } + VERIFY(oldest_cache != NULL); + + tpcache = oldest_cache; + + /* We recycle, thus let's indicate that there is no cookie */ + tpcache->tc_tfo_cookie_len = 0; + } else { + /* Create a new cache and add it to the list */ + tpcache = _MALLOC(sizeof(struct tcp_cache), M_TEMP, + M_NOWAIT | M_ZERO); + if (tpcache == NULL) + goto out_null; + + SLIST_INSERT_HEAD(&head->tcp_caches, tpcache, list); + } + + memcpy(&tpcache->tc_key, &key, sizeof(key)); + } + + if (tpcache == NULL) + goto out_null; + + /* Update timestamp for garbage collection purposes */ + tpcache->tc_last_access = tcp_now; + *headarg = head; + + return (tpcache); + +out_null: + tcp_cache_unlock(head); + return (NULL); +} + +void tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t len) +{ + struct tcp_cache_head *head; + struct tcp_cache *tpcache; + + /* Call lookup/create function */ + tpcache = tcp_getcache_with_lock(tp, 1, &head); + if (tpcache == NULL) + return; + + tpcache->tc_tfo_cookie_len = len; + memcpy(tpcache->tc_tfo_cookie, cookie, len); + + tcp_cache_unlock(head); +} + +/* + * Get the cookie related to 'tp', and copy it into 'cookie', provided that len + * is big enough (len designates the available memory. + * Upon return, 'len' is set to the cookie's length. + * + * Returns 0 if we should request a cookie. + * Returns 1 if the cookie has been found and written. + */ +int tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t *len) +{ + struct tcp_cache_head *head; + struct tcp_cache *tpcache; + + /* Call lookup/create function */ + tpcache = tcp_getcache_with_lock(tp, 1, &head); + if (tpcache == NULL) + return (0); + + if (tpcache->tc_tfo_cookie_len == 0) { + tcp_cache_unlock(head); + return (0); + } + + /* + * Not enough space - this should never happen as it has been checked + * in tcp_tfo_check. So, fail here! + */ + VERIFY(tpcache->tc_tfo_cookie_len <= *len); + + memcpy(cookie, tpcache->tc_tfo_cookie, tpcache->tc_tfo_cookie_len); + *len = tpcache->tc_tfo_cookie_len; + + tcp_cache_unlock(head); + + return (1); +} + +unsigned int tcp_cache_get_cookie_len(struct tcpcb *tp) +{ + struct tcp_cache_head *head; + struct tcp_cache *tpcache; + unsigned int cookie_len; + + /* Call lookup/create function */ + tpcache = tcp_getcache_with_lock(tp, 1, &head); + if (tpcache == NULL) + return (0); + + cookie_len = tpcache->tc_tfo_cookie_len; + + tcp_cache_unlock(head); + + return cookie_len; +} + +static u_int16_t tcp_heuristics_hash(struct inpcb *inp, + struct tcp_heuristic_key *key) +{ + u_int32_t hash; + + bzero(key, sizeof(struct tcp_heuristic_key)); + + tcp_cache_hash_src(inp, key); + + hash = net_flowhash(key, sizeof(struct tcp_heuristic_key), + tcp_cache_hash_seed); + + return (hash & (tcp_cache_size - 1)); +} + +static void tcp_heuristic_unlock(struct tcp_heuristics_head *head) +{ + lck_mtx_unlock(&head->thh_mtx); +} + +/* + * Make sure that everything that happens after tcp_getheuristic_with_lock() + * is short enough to justify that you hold the per-bucket lock!!! + * + * Otherwise, better build another lookup-function that does not hold the + * lock and you copy out the bits and bytes. + * + * That's why we provide the head as a "return"-pointer so that the caller + * can give it back to use for tcp_heur_unlock(). + * + * + * ToDo - way too much code-duplication. We should create an interface to handle + * bucketized hashtables with recycling of the oldest element. + */ +static struct tcp_heuristic *tcp_getheuristic_with_lock(struct tcpcb *tp, + int create, struct tcp_heuristics_head **headarg) +{ + struct inpcb *inp = tp->t_inpcb; + struct tcp_heuristic *tpheur = NULL; + struct tcp_heuristics_head *head; + struct tcp_heuristic_key key; + u_int16_t hash; + int i = 0; + + hash = tcp_heuristics_hash(inp, &key); + head = &tcp_heuristics[hash]; + + lck_mtx_lock(&head->thh_mtx); + + /*** First step: Look for the tcp_heur in our bucket ***/ + SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) { + if (memcmp(&tpheur->th_key, &key, sizeof(key)) == 0) + break; + + i++; + } + + /*** Second step: If it's not there, create/recycle it ***/ + if ((tpheur == NULL) && create) { + if (i >= TCP_CACHE_BUCKET_SIZE) { + struct tcp_heuristic *oldest_heur = NULL; + u_int32_t max_age = 0; + + /* Look for the oldest tcp_heur in the bucket */ + SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) { + u_int32_t age = tcp_now - tpheur->th_last_access; + if (age > max_age) { + max_age = age; + oldest_heur = tpheur; + } + } + VERIFY(oldest_heur != NULL); + + tpheur = oldest_heur; + + /* We recycle - set everything to 0 */ + tpheur->th_tfo_cookie_loss = 0; + tpheur->th_tfo_fallback_trials = 0; + tpheur->th_tfo_cookie_backoff = 0; + tpheur->th_tfo_in_backoff = 0; + tpheur->th_tfo_aggressive_fallback = 0; + tpheur->th_tfo_snd_middlebox_supp = 0; + tpheur->th_tfo_rcv_middlebox_supp = 0; + } else { + /* Create a new heuristic and add it to the list */ + tpheur = _MALLOC(sizeof(struct tcp_heuristic), M_TEMP, + M_NOWAIT | M_ZERO); + if (tpheur == NULL) + goto out_null; + + SLIST_INSERT_HEAD(&head->tcp_heuristics, tpheur, list); + } + + memcpy(&tpheur->th_key, &key, sizeof(key)); + } + + if (tpheur == NULL) + goto out_null; + + /* Update timestamp for garbage collection purposes */ + tpheur->th_last_access = tcp_now; + *headarg = head; + + return (tpheur); + +out_null: + tcp_heuristic_unlock(head); + return (NULL); +} + +void tcp_heuristic_tfo_success(struct tcpcb *tp) +{ + struct tcp_heuristics_head *head; + + struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head); + if (tpheur == NULL) + return; + + tpheur->th_tfo_cookie_loss = 0; + + tcp_heuristic_unlock(head); +} + +void tcp_heuristic_tfo_rcv_good(struct tcpcb *tp) +{ + struct tcp_heuristics_head *head; + + struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head); + if (tpheur == NULL) + return; + + tpheur->th_tfo_rcv_middlebox_supp = 1; + + tcp_heuristic_unlock(head); + + tp->t_tfo_flags |= TFO_F_NO_RCVPROBING; +} + +void tcp_heuristic_tfo_snd_good(struct tcpcb *tp) +{ + struct tcp_heuristics_head *head; + + struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head); + if (tpheur == NULL) + return; + + tpheur->th_tfo_snd_middlebox_supp = 1; + + tcp_heuristic_unlock(head); + + tp->t_tfo_flags |= TFO_F_NO_SNDPROBING; +} + +void tcp_heuristic_tfo_inc_loss(struct tcpcb *tp) +{ + struct tcp_heuristics_head *head; + struct tcp_heuristic *tpheur; + + tpheur = tcp_getheuristic_with_lock(tp, 1, &head); + if (tpheur == NULL) + return; + + /* Potential integer overflow, but tfo_cookie_loss is 32-bits */ + tpheur->th_tfo_cookie_loss++; + + tcp_heuristic_unlock(head); +} + +void tcp_heuristic_tfo_middlebox(struct tcpcb *tp) +{ + struct tcp_heuristics_head *head; + struct tcp_heuristic *tpheur; + + tpheur = tcp_getheuristic_with_lock(tp, 1, &head); + if (tpheur == NULL) + return; + + tpheur->th_tfo_aggressive_fallback = 1; + + tcp_heuristic_unlock(head); +} + +void tcp_heuristic_tfo_reset_loss(struct tcpcb *tp) +{ + struct tcp_heuristics_head *head; + struct tcp_heuristic *tpheur; + + /* + * Don't attempt to create it! Keep the heuristics clean if the + * server does not support TFO. This reduces the lookup-cost on + * our side. + */ + tpheur = tcp_getheuristic_with_lock(tp, 0, &head); + if (tpheur == NULL) + return; + + tpheur->th_tfo_cookie_loss = 0; + tpheur->th_tfo_aggressive_fallback = 0; + + tcp_heuristic_unlock(head); +} + +boolean_t tcp_heuristic_do_tfo(struct tcpcb *tp) +{ + struct tcp_heuristics_head *head; + struct tcp_heuristic *tpheur; + + /* Get the tcp-heuristic. */ + tpheur = tcp_getheuristic_with_lock(tp, 0, &head); + if (tpheur == NULL) + return (true); + + if (tpheur->th_tfo_aggressive_fallback) { + /* Aggressive fallback - don't do TFO anymore... :'( */ + tcp_heuristic_unlock(head); + return (false); + } + + if (tpheur->th_tfo_cookie_loss >= TFO_MAX_COOKIE_LOSS && + (tpheur->th_tfo_fallback_trials < tcp_tfo_fallback_min || + TSTMP_GT(tpheur->th_tfo_cookie_backoff, tcp_now))) { + /* + * So, when we are in SYN-loss mode we try to stop using TFO + * for the next 'tcp_tfo_fallback_min' connections. That way, + * we are sure that never more than 1 out of tcp_tfo_fallback_min + * connections will suffer from our nice little middelbox. + * + * After that we first wait for 2 minutes. If we fail again, + * we wait for yet another 60 minutes. + */ + tpheur->th_tfo_fallback_trials++; + if (tpheur->th_tfo_fallback_trials >= tcp_tfo_fallback_min && + !tpheur->th_tfo_in_backoff) { + if (tpheur->th_tfo_cookie_loss == TFO_MAX_COOKIE_LOSS) + /* Backoff for 2 minutes */ + tpheur->th_tfo_cookie_backoff = tcp_now + (60 * 2 * TCP_RETRANSHZ); + else + /* Backoff for 60 minutes */ + tpheur->th_tfo_cookie_backoff = tcp_now + (60 * 60 * TCP_RETRANSHZ); + + tpheur->th_tfo_in_backoff = 1; + } + + tcp_heuristic_unlock(head); + return (false); + } + + /* + * We give it a new shot, set trials back to 0. This allows to + * start counting again from zero in case we get yet another SYN-loss + */ + tpheur->th_tfo_fallback_trials = 0; + tpheur->th_tfo_in_backoff = 0; + + if (tpheur->th_tfo_rcv_middlebox_supp) + tp->t_tfo_flags |= TFO_F_NO_RCVPROBING; + if (tpheur->th_tfo_snd_middlebox_supp) + tp->t_tfo_flags |= TFO_F_NO_SNDPROBING; + + tcp_heuristic_unlock(head); + + return (true); +} + +static void sysctl_cleartfocache(void) +{ + int i; + + for (i = 0; i < tcp_cache_size; i++) { + struct tcp_cache_head *head = &tcp_cache[i]; + struct tcp_cache *tpcache, *tmp; + struct tcp_heuristics_head *hhead = &tcp_heuristics[i]; + struct tcp_heuristic *tpheur, *htmp; + + lck_mtx_lock(&head->tch_mtx); + SLIST_FOREACH_SAFE(tpcache, &head->tcp_caches, list, tmp) { + SLIST_REMOVE(&head->tcp_caches, tpcache, tcp_cache, list); + _FREE(tpcache, M_TEMP); + } + lck_mtx_unlock(&head->tch_mtx); + + lck_mtx_lock(&hhead->thh_mtx); + SLIST_FOREACH_SAFE(tpheur, &hhead->tcp_heuristics, list, htmp) { + SLIST_REMOVE(&hhead->tcp_heuristics, tpheur, tcp_heuristic, list); + _FREE(tpheur, M_TEMP); + } + lck_mtx_unlock(&hhead->thh_mtx); + } +} + +/* This sysctl is useful for testing purposes only */ +static int tcpcleartfo = 0; + +static int sysctl_cleartfo SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error = 0, val, oldval = tcpcleartfo; + + val = oldval; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return (error); + + /* + * The actual value does not matter. If the value is set, it triggers + * the clearing of the TFO cache. If a future implementation does not + * use the route entry to hold the TFO cache, replace the route sysctl. + */ + + if (val != oldval) + sysctl_cleartfocache(); + + tcpcleartfo = val; + + return (error); +} + +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, clear_tfocache, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_LOCKED, &tcpcleartfo, 0, &sysctl_cleartfo, "I", + "Toggle to clear the TFO destination based heuristic cache"); + +void tcp_cache_init(void) +{ + uint64_t sane_size_meg = sane_size / 1024 / 1024; + int i; + + /* + * On machines with <100MB of memory this will result in a (full) cache-size + * of 32 entries, thus 32 * 5 * 64bytes = 10KB. (about 0.01 %) + * On machines with > 4GB of memory, we have a cache-size of 1024 entries, + * thus about 327KB. + * + * Side-note: we convert to u_int32_t. If sane_size is more than + * 16000 TB, we loose precision. But, who cares? :) + */ + tcp_cache_size = tcp_cache_roundup2((u_int32_t)(sane_size_meg >> 2)); + if (tcp_cache_size < 32) + tcp_cache_size = 32; + else if (tcp_cache_size > 1024) + tcp_cache_size = 1024; + + tcp_cache = _MALLOC(sizeof(struct tcp_cache_head) * tcp_cache_size, + M_TEMP, M_ZERO); + if (tcp_cache == NULL) + panic("Allocating tcp_cache failed at boot-time!"); + + tcp_cache_mtx_grp_attr = lck_grp_attr_alloc_init(); + tcp_cache_mtx_grp = lck_grp_alloc_init("tcpcache", tcp_cache_mtx_grp_attr); + tcp_cache_mtx_attr = lck_attr_alloc_init(); + + tcp_heuristics = _MALLOC(sizeof(struct tcp_heuristics_head) * tcp_cache_size, + M_TEMP, M_ZERO); + if (tcp_heuristics == NULL) + panic("Allocating tcp_heuristic failed at boot-time!"); + + tcp_heuristic_mtx_grp_attr = lck_grp_attr_alloc_init(); + tcp_heuristic_mtx_grp = lck_grp_alloc_init("tcpheuristic", tcp_heuristic_mtx_grp_attr); + tcp_heuristic_mtx_attr = lck_attr_alloc_init(); + + for (i = 0; i < tcp_cache_size; i++) { + lck_mtx_init(&tcp_cache[i].tch_mtx, tcp_cache_mtx_grp, + tcp_cache_mtx_attr); + SLIST_INIT(&tcp_cache[i].tcp_caches); + + lck_mtx_init(&tcp_heuristics[i].thh_mtx, tcp_heuristic_mtx_grp, + tcp_heuristic_mtx_attr); + SLIST_INIT(&tcp_heuristics[i].tcp_heuristics); + } + + tcp_cache_hash_seed = RandomULong(); +} diff --git a/bsd/netinet/tcp_cache.h b/bsd/netinet/tcp_cache.h new file mode 100644 index 000000000..601aec807 --- /dev/null +++ b/bsd/netinet/tcp_cache.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* TCP-cache to store and retrieve TCP-related information */ + +#ifndef _NETINET_TCP_CACHE_H +#define _NETINET_TCP_CACHE_H + +#include +#include + +extern void tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t len); +extern int tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t *len); +extern unsigned int tcp_cache_get_cookie_len(struct tcpcb *tp); + +extern void tcp_heuristic_tfo_inc_loss(struct tcpcb *tp); +extern void tcp_heuristic_tfo_snd_good(struct tcpcb *tp); +extern void tcp_heuristic_tfo_rcv_good(struct tcpcb *tp); +extern void tcp_heuristic_tfo_middlebox(struct tcpcb *tp); +extern void tcp_heuristic_tfo_reset_loss(struct tcpcb *tp); +extern void tcp_heuristic_tfo_success(struct tcpcb *tp); +extern boolean_t tcp_heuristic_do_tfo(struct tcpcb *tp); + +extern void tcp_cache_init(void); + +#endif /* _NETINET_TCP_CACHE_H */ + diff --git a/bsd/netinet/tcp_cc.c b/bsd/netinet/tcp_cc.c index fe20ea9a9..fdb4f8fbf 100644 --- a/bsd/netinet/tcp_cc.c +++ b/bsd/netinet/tcp_cc.c @@ -51,6 +51,7 @@ struct tcp_cc_debug_state { uint32_t ccd_snd_cwnd; uint32_t ccd_snd_wnd; uint32_t ccd_snd_ssthresh; + uint32_t ccd_pipeack; uint32_t ccd_rttcur; uint32_t ccd_rxtcur; uint32_t ccd_srtt; @@ -93,6 +94,13 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, use_newreno, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_use_newreno, 0, "Use TCP NewReno by default"); +static int tcp_check_cwnd_nonvalidated = 1; +#if (DEBUG || DEVELOPMENT) +SYSCTL_INT(_net_inet_tcp, OID_AUTO, cwnd_nonvalidated, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_check_cwnd_nonvalidated, 0, + "Check if congestion window is non-validated"); +#endif /* (DEBUG || DEVELOPMENT) */ + #define SET_SNDSB_IDEAL_SIZE(sndsb, size) \ sndsb->sb_idealsize = min(max(tcp_sendspace, tp->snd_ssthresh), \ tcp_autosndbuf_max); @@ -224,6 +232,7 @@ tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th, int32_t event) dbg_state.ccd_snd_cwnd = tp->snd_cwnd; dbg_state.ccd_snd_wnd = tp->snd_wnd; dbg_state.ccd_snd_ssthresh = tp->snd_ssthresh; + dbg_state.ccd_pipeack = tp->t_pipeack; dbg_state.ccd_rttcur = tp->t_rttcur; dbg_state.ccd_rxtcur = tp->t_rxtcur; dbg_state.ccd_srtt = tp->t_srtt >> TCP_RTT_SHIFT; @@ -403,3 +412,62 @@ tcp_cc_after_idle_stretchack(struct tcpcb *tp) tcp_reset_stretch_ack(tp); } } + +/* + * Detect if the congestion window is non-vlidated according to + * draft-ietf-tcpm-newcwv-07 + */ + +inline uint32_t +tcp_cc_is_cwnd_nonvalidated(struct tcpcb *tp) +{ + if (tp->t_pipeack == 0 || tcp_check_cwnd_nonvalidated == 0) { + tp->t_flagsext &= ~TF_CWND_NONVALIDATED; + return (0); + } + if (tp->t_pipeack >= (tp->snd_cwnd) >> 1) + tp->t_flagsext &= ~TF_CWND_NONVALIDATED; + else + tp->t_flagsext |= TF_CWND_NONVALIDATED; + return (tp->t_flagsext & TF_CWND_NONVALIDATED); +} + +/* + * Adjust congestion window in response to congestion in non-validated + * phase. + */ +inline void +tcp_cc_adjust_nonvalidated_cwnd(struct tcpcb *tp) +{ + tp->t_pipeack = tcp_get_max_pipeack(tp); + tcp_clear_pipeack_state(tp); + tp->snd_cwnd = (max(tp->t_pipeack, tp->t_lossflightsize) >> 1); + tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES); + tp->snd_cwnd += tp->t_maxseg * tcprexmtthresh; + tp->t_flagsext &= ~TF_CWND_NONVALIDATED; +} + +/* + * Return maximum of all the pipeack samples. Since the number of samples + * TCP_PIPEACK_SAMPLE_COUNT is 3 at this time, it will be simpler to do + * a comparision. We should change ths if the number of samples increases. + */ +inline u_int32_t +tcp_get_max_pipeack(struct tcpcb *tp) +{ + u_int32_t max_pipeack = 0; + max_pipeack = (tp->t_pipeack_sample[0] > tp->t_pipeack_sample[1]) ? + tp->t_pipeack_sample[0] : tp->t_pipeack_sample[1]; + max_pipeack = (tp->t_pipeack_sample[2] > max_pipeack) ? + tp->t_pipeack_sample[2] : max_pipeack; + + return (max_pipeack); +} + +inline void +tcp_clear_pipeack_state(struct tcpcb *tp) +{ + bzero(tp->t_pipeack_sample, sizeof(tp->t_pipeack_sample)); + tp->t_pipeack_ind = 0; + tp->t_lossflightsize = 0; +} diff --git a/bsd/netinet/tcp_cc.h b/bsd/netinet/tcp_cc.h index 6ee5567a6..e9df6b451 100644 --- a/bsd/netinet/tcp_cc.h +++ b/bsd/netinet/tcp_cc.h @@ -132,6 +132,11 @@ extern struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT]; #define CC_ALGO(tp) (tcp_cc_algo_list[tp->tcp_cc_index]) #define TCP_CC_CWND_INIT_BYTES 4380 +/* + * The congestion window will have to be reset after a + * non-validated period -- currently set to 3 minutes + */ +#define TCP_CC_CWND_NONVALIDATED_PERIOD (3 * 60 * TCP_RETRANSHZ) extern void tcp_cc_init(void); extern void tcp_cc_resize_sndbuf(struct tcpcb *tp); @@ -142,6 +147,10 @@ extern void tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th, int32_t event); extern void tcp_cc_allocate_state(struct tcpcb *tp); extern void tcp_cc_after_idle_stretchack(struct tcpcb *tp); +extern uint32_t tcp_cc_is_cwnd_nonvalidated(struct tcpcb *tp); +extern void tcp_cc_adjust_nonvalidated_cwnd(struct tcpcb *tp); +extern u_int32_t tcp_get_max_pipeack(struct tcpcb *tp); +extern void tcp_clear_pipeack_state(struct tcpcb *tp); #endif /* KERNEL */ #endif /* _NETINET_CC_H_ */ diff --git a/bsd/netinet/tcp_cubic.c b/bsd/netinet/tcp_cubic.c index 2eb86f1a9..29a3aed78 100644 --- a/bsd/netinet/tcp_cubic.c +++ b/bsd/netinet/tcp_cubic.c @@ -130,6 +130,11 @@ static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp) tcp_cubic_clear_state(tp); tcp_cc_cwnd_init_or_reset(tp); + tp->t_pipeack = 0; + tcp_clear_pipeack_state(tp); + + /* Start counting bytes for RFC 3465 again */ + tp->t_bytes_acked = 0; /* * slow start threshold could get initialized to a lower value @@ -144,9 +149,6 @@ static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp) /* Initialize cubic last max to be same as ssthresh */ tp->t_ccstate->cub_last_max = tp->snd_ssthresh; - - /* If stretch ack was auto-disabled, re-evaluate it */ - tcp_cc_after_idle_stretchack(tp); } /* @@ -273,6 +275,10 @@ tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th) { u_int32_t cubic_target_win, tcp_win, rtt; + /* Do not increase congestion window in non-validated phase */ + if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) + return; + tp->t_bytes_acked += BYTES_ACKED(th, tp); rtt = get_base_rtt(tp); @@ -320,6 +326,10 @@ tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th) static void tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) { + /* Do not increase the congestion window in non-validated phase */ + if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) + return; + if (tp->snd_cwnd >= tp->snd_ssthresh) { /* Congestion avoidance phase */ tcp_cubic_congestion_avd(tp, th); @@ -329,6 +339,7 @@ tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) * by RFC 3465 section 2.3 */ uint32_t acked, abc_lim, incr; + acked = BYTES_ACKED(th, tp); abc_lim = (tcp_do_rfc3465_lim2 && tp->snd_nxt == tp->snd_max) ? @@ -352,6 +363,12 @@ tcp_cubic_pre_fr(struct tcpcb *tp) tp->t_ccstate->cub_tcp_bytes_acked = 0; win = min(tp->snd_cwnd, tp->snd_wnd); + if (tp->t_flagsext & TF_CWND_NONVALIDATED) { + tp->t_lossflightsize = tp->snd_max - tp->snd_una; + win = (max(tp->t_pipeack, tp->t_lossflightsize)) >> 1; + } else { + tp->t_lossflightsize = 0; + } /* * Note the congestion window at which packet loss occurred as * cub_last_max. @@ -427,6 +444,27 @@ tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th) if (SEQ_LEQ(th->th_ack, tp->snd_max)) flight_size = tp->snd_max - th->th_ack; + + if (SACK_ENABLED(tp) && tp->t_lossflightsize > 0) { + u_int32_t total_rxt_size = 0, ncwnd; + /* + * When SACK is enabled, the number of retransmitted bytes + * can be counted more accurately. + */ + total_rxt_size = tcp_rxtseg_total_size(tp); + ncwnd = max(tp->t_pipeack, tp->t_lossflightsize); + if (total_rxt_size <= ncwnd) { + ncwnd = ncwnd - total_rxt_size; + } + + /* + * To avoid sending a large burst at the end of recovery + * set a max limit on ncwnd + */ + ncwnd = min(ncwnd, (tp->t_maxseg << 6)); + ncwnd = ncwnd >> 1; + flight_size = max(ncwnd, flight_size); + } /* * Complete ack. The current window was inflated for fast recovery. * It has to be deflated post recovery. @@ -450,6 +488,16 @@ static void tcp_cubic_after_timeout(struct tcpcb *tp) { VERIFY(tp->t_ccstate != NULL); + + /* + * Avoid adjusting congestion window due to SYN retransmissions. + * If more than one byte (SYN) is outstanding then it is still + * needed to adjust the window. + */ + if (tp->t_state < TCPS_ESTABLISHED && + ((int)(tp->snd_max - tp->snd_una) <= 1)) + return; + if (!IN_FASTRECOVERY(tp)) { tcp_cubic_clear_state(tp); tcp_cubic_pre_fr(tp); @@ -479,8 +527,6 @@ tcp_cubic_switch_cc(struct tcpcb *tp, uint16_t old_cc_index) { #pragma unused(old_cc_index) tcp_cubic_cwnd_init_or_reset(tp); - /* Start counting bytes for RFC 3465 again */ - tp->t_bytes_acked = 0; OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); } diff --git a/bsd/netinet/tcp_debug.h b/bsd/netinet/tcp_debug.h index d7a7130a5..89dc86651 100644 --- a/bsd/netinet/tcp_debug.h +++ b/bsd/netinet/tcp_debug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c index ab5242853..8f2a92cc8 100644 --- a/bsd/netinet/tcp_input.c +++ b/bsd/netinet/tcp_input.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -107,6 +107,7 @@ #include #endif #include +#include #include #include #include @@ -144,6 +145,8 @@ struct tcphdr tcp_savetcp; #include #endif /* MPTCP */ +#include + #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETTCP, 0) #define DBG_LAYER_END NETDBG_CODE(DBG_NETTCP, 2) #define DBG_FNC_TCP_INPUT NETDBG_CODE(DBG_NETTCP, (3 << 8)) @@ -154,119 +157,141 @@ tcp_cc tcp_ccgen; struct tcpstat tcpstat; static int log_in_vain = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW | CTLFLAG_LOCKED, - &log_in_vain, 0, "Log all incoming TCP connections"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, + CTLFLAG_RW | CTLFLAG_LOCKED, &log_in_vain, 0, + "Log all incoming TCP connections"); static int blackhole = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW | CTLFLAG_LOCKED, - &blackhole, 0, "Do not send RST when dropping refused connections"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, + CTLFLAG_RW | CTLFLAG_LOCKED, &blackhole, 0, + "Do not send RST when dropping refused connections"); int tcp_delack_enabled = 3; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_delack_enabled, 0, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_delack_enabled, 0, "Delay ACK to try and piggyback it onto a data packet"); int tcp_lq_overflow = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_lq_overflow, 0, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_lq_overflow, 0, "Listen Queue Overflow"); int tcp_recv_bg = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbg, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_recv_bg, 0, - "Receive background"); + &tcp_recv_bg, 0, "Receive background"); #if TCP_DROP_SYNFIN static int drop_synfin = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW | CTLFLAG_LOCKED, - &drop_synfin, 0, "Drop TCP packets with SYN+FIN set"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, + CTLFLAG_RW | CTLFLAG_LOCKED, &drop_synfin, 0, + "Drop TCP packets with SYN+FIN set"); #endif SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "TCP Segment Reassembly Queue"); static int tcp_reass_overflows = 0; -SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcp_reass_overflows, 0, +SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, + CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_reass_overflows, 0, "Global number of TCP Segment Reassembly Queue Overflows"); __private_extern__ int slowlink_wsize = 8192; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowlink_wsize, CTLFLAG_RW | CTLFLAG_LOCKED, - &slowlink_wsize, 0, "Maximum advertised window size for slowlink"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowlink_wsize, + CTLFLAG_RW | CTLFLAG_LOCKED, + &slowlink_wsize, 0, "Maximum advertised window size for slowlink"); int maxseg_unacked = 8; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, maxseg_unacked, CTLFLAG_RW | CTLFLAG_LOCKED, - &maxseg_unacked, 0, "Maximum number of outstanding segments left unacked"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, maxseg_unacked, + CTLFLAG_RW | CTLFLAG_LOCKED, &maxseg_unacked, 0, + "Maximum number of outstanding segments left unacked"); -int tcp_do_rfc3465 = 1; +int tcp_do_rfc3465 = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_do_rfc3465, 0, ""); - -int tcp_do_rfc3465_lim2 = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465_lim2, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_do_rfc3465_lim2, 0, "Appropriate bytes counting w/ L=2*SMSS"); + &tcp_do_rfc3465, 0, ""); -int rtt_samples_per_slot = 20; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_samples_per_slot, CTLFLAG_RW | CTLFLAG_LOCKED, - &rtt_samples_per_slot, 0, "Number of RTT samples stored for rtt history"); +int tcp_do_rfc3465_lim2 = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465_lim2, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_rfc3465_lim2, 0, + "Appropriate bytes counting w/ L=2*SMSS"); -int tcp_allowed_iaj = ALLOWED_IAJ; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, recv_allowed_iaj, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_allowed_iaj, 0, "Allowed inter-packet arrival jiter"); +int rtt_samples_per_slot = 20; -int tcp_acc_iaj_high_thresh = ACC_IAJ_HIGH_THRESH; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_high_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_acc_iaj_high_thresh, 0, "Used in calculating maximum accumulated IAJ"); +int tcp_allowed_iaj = ALLOWED_IAJ; +int tcp_acc_iaj_high_thresh = ACC_IAJ_HIGH_THRESH; +u_int32_t tcp_autorcvbuf_inc_shift = 3; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, recv_allowed_iaj, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_allowed_iaj, 0, + "Allowed inter-packet arrival jiter"); +#if (DEVELOPMENT || DEBUG) +SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_high_thresh, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_acc_iaj_high_thresh, 0, + "Used in calculating maximum accumulated IAJ"); + +SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufincshift, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_autorcvbuf_inc_shift, 0, + "Shift for increment in receive socket buffer size"); +#endif /* (DEVELOPMENT || DEBUG) */ u_int32_t tcp_do_autorcvbuf = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, doautorcvbuf, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_do_autorcvbuf, 0, "Enable automatic socket buffer tuning"); - -u_int32_t tcp_autorcvbuf_inc_shift = 3; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufincshift, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_autorcvbuf_inc_shift, 0, "Shift for increment in receive socket buffer size"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, doautorcvbuf, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_autorcvbuf, 0, + "Enable automatic socket buffer tuning"); u_int32_t tcp_autorcvbuf_max = 512 * 1024; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufmax, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_autorcvbuf_max, 0, "Maximum receive socket buffer size"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufmax, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_autorcvbuf_max, 0, + "Maximum receive socket buffer size"); int sw_lro = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED, &sw_lro, 0, "Used to coalesce TCP packets"); int lrodebug = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, lrodbg, CTLFLAG_RW | CTLFLAG_LOCKED, - &lrodebug, 0, "Used to debug SW LRO"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, lrodbg, + CTLFLAG_RW | CTLFLAG_LOCKED, &lrodebug, 0, + "Used to debug SW LRO"); int lro_start = 4; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro_startcnt, CTLFLAG_RW | CTLFLAG_LOCKED, - &lro_start, 0, "Segments for starting LRO computed as power of 2"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro_startcnt, + CTLFLAG_RW | CTLFLAG_LOCKED, &lro_start, 0, + "Segments for starting LRO computed as power of 2"); extern int tcp_do_autosendbuf; int limited_txmt = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, limited_transmit, CTLFLAG_RW | CTLFLAG_LOCKED, - &limited_txmt, 0, "Enable limited transmit"); - int early_rexmt = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, early_rexmt, CTLFLAG_RW | CTLFLAG_LOCKED, - &early_rexmt, 0, "Enable Early Retransmit"); - int sack_ackadv = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_ackadv, CTLFLAG_RW | CTLFLAG_LOCKED, - &sack_ackadv, 0, "Use SACK with cumulative ack advancement as a dupack"); +int tcp_dsack_enable = 1; + +#if (DEVELOPMENT || DEBUG) +SYSCTL_INT(_net_inet_tcp, OID_AUTO, limited_transmit, + CTLFLAG_RW | CTLFLAG_LOCKED, &limited_txmt, 0, + "Enable limited transmit"); + +SYSCTL_INT(_net_inet_tcp, OID_AUTO, early_rexmt, + CTLFLAG_RW | CTLFLAG_LOCKED, &early_rexmt, 0, + "Enable Early Retransmit"); + +SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_ackadv, + CTLFLAG_RW | CTLFLAG_LOCKED, &sack_ackadv, 0, + "Use SACK with cumulative ack advancement as a dupack"); + +SYSCTL_INT(_net_inet_tcp, OID_AUTO, dsack_enable, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_dsack_enable, 0, + "use DSACK TCP option to report duplicate segments"); +#endif /* (DEVELOPMENT || DEBUG) */ #if CONFIG_IFEF_NOWINDOWSCALE int tcp_obey_ifef_nowindowscale = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_obey_ifef_nowindowscale, 0, ""); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale, + CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_obey_ifef_nowindowscale, 0, ""); #endif extern int tcp_TCPTV_MIN; extern int tcp_acc_iaj_high; extern int tcp_acc_iaj_react_limit; -extern struct zone *tcp_reass_zone; int tcprexmtthresh = 3; @@ -279,19 +304,20 @@ struct inpcbhead tcb; struct inpcbinfo tcbinfo; static void tcp_dooptions(struct tcpcb *, u_char *, int, struct tcphdr *, - struct tcpopt *, unsigned int); -static void tcp_pulloutofband(struct socket *, - struct tcphdr *, struct mbuf *, int); + struct tcpopt *); +static void tcp_finalize_options(struct tcpcb *, struct tcpopt *, unsigned int); +static void tcp_pulloutofband(struct socket *, + struct tcphdr *, struct mbuf *, int); static int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *, struct ifnet *); -static void tcp_xmit_timer(struct tcpcb *, int, u_int32_t, tcp_seq); +static void tcp_xmit_timer(struct tcpcb *, int, u_int32_t, tcp_seq); static inline unsigned int tcp_maxmtu(struct rtentry *); static inline int tcp_stretch_ack_enable(struct tcpcb *tp); static inline void tcp_adaptive_rwtimo_check(struct tcpcb *, int); #if TRAFFIC_MGT static inline void update_iaj_state(struct tcpcb *tp, uint32_t tlen, - int reset_size); + int reset_size); void compute_iaj(struct tcpcb *tp, int nlropkts, int lro_delay_factor); static void compute_iaj_meat(struct tcpcb *tp, uint32_t cur_iaj); #endif /* TRAFFIC_MGT */ @@ -301,19 +327,19 @@ static inline unsigned int tcp_maxmtu6(struct rtentry *); #endif static void tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sb, - struct tcpopt *to, u_int32_t tlen); + struct tcpopt *to, u_int32_t tlen); void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb); static void tcp_sbsnd_trim(struct sockbuf *sbsnd); static inline void tcp_sbrcv_tstmp_check(struct tcpcb *tp); static inline void tcp_sbrcv_reserve(struct tcpcb *tp, struct sockbuf *sb, - u_int32_t newsize, u_int32_t idealsize); + u_int32_t newsize, u_int32_t idealsize); static void tcp_bad_rexmt_restore_state(struct tcpcb *tp, struct tcphdr *th); static void tcp_compute_rtt(struct tcpcb *tp, struct tcpopt *to, - struct tcphdr *th); + struct tcphdr *th); static void tcp_early_rexmt_check(struct tcpcb *tp, struct tcphdr *th); static void tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th, - struct tcpopt *to); + struct tcpopt *to); /* * Constants used for resizing receive socket buffer * when timestamps are not supported @@ -328,7 +354,7 @@ static void tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th, #define TCP_EARLY_REXMT_WIN (60 * TCP_RETRANSHZ) /* 60 seconds */ #define TCP_EARLY_REXMT_LIMIT 10 -extern void ipfwsyslog( int level, const char *format,...); +extern void ipfwsyslog( int level, const char *format,...); extern int fw_verbose; #if IPFIREWALL @@ -357,7 +383,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rcvsspktcnt, CTLFLAG_RW | CTLFLAG_LOCKED, static int tcp_dropdropablreq(struct socket *head); static void tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th); - static void update_base_rtt(struct tcpcb *tp, uint32_t rtt); void tcp_set_background_cc(struct socket *so); void tcp_set_foreground_cc(struct socket *so); @@ -570,6 +595,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, boolean_t cell = IFNET_IS_CELLULAR(ifp); boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp)); + boolean_t dsack_set = FALSE; /* * Call with th==0 after become established to @@ -632,10 +658,25 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, * segment. If it provides all of our data, drop us. */ if (p != NULL) { - register int i; + int i; /* conversion to int (in i) handles seq wraparound */ i = p->tqe_th->th_seq + p->tqe_len - th->th_seq; if (i > 0) { + if (TCP_DSACK_ENABLED(tp) && i > 1) { + /* + * Note duplicate data sequnce numbers + * to report in DSACK option + */ + tp->t_dsack_lseq = th->th_seq; + tp->t_dsack_rseq = th->th_seq + + min(i, *tlenp); + + /* + * Report only the first part of partial/ + * non-contiguous duplicate sequence space + */ + dsack_set = TRUE; + } if (i >= *tlenp) { tcpstat.tcps_rcvduppack++; tcpstat.tcps_rcvdupbyte += *tlenp; @@ -681,9 +722,31 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, * if they are completely covered, dequeue them. */ while (q) { - register int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq; + int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq; if (i <= 0) break; + + /* + * Report only the first part of partial/non-contiguous + * duplicate segment in dsack option. The variable + * dsack_set will be true if a previous entry has some of + * the duplicate sequence space. + */ + if (TCP_DSACK_ENABLED(tp) && i > 1 && !dsack_set) { + if (tp->t_dsack_lseq == 0) { + tp->t_dsack_lseq = q->tqe_th->th_seq; + tp->t_dsack_rseq = + tp->t_dsack_lseq + min(i, q->tqe_len); + } else { + /* + * this segment overlaps data in multple + * entries in the reassembly queue, move + * the right sequence number further. + */ + tp->t_dsack_rseq = + tp->t_dsack_rseq + min(i, q->tqe_len); + } + } if (i < q->tqe_len) { q->tqe_th->th_seq += i; q->tqe_len -= i; @@ -828,7 +891,8 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, } /* - * Reduce congestion window. + * Reduce congestion window -- used when ECN is seen or when a tail loss + * probe recovers the last packet. */ static void tcp_reduce_congestion_window( @@ -842,25 +906,43 @@ tcp_reduce_congestion_window( if (CC_ALGO(tp)->pre_fr != NULL) CC_ALGO(tp)->pre_fr(tp); ENTER_FASTRECOVERY(tp); - tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_SENTFIN) + tp->snd_recover = tp->snd_max - 1; + else + tp->snd_recover = tp->snd_max; tp->t_timer[TCPT_REXMT] = 0; tp->t_timer[TCPT_PTO] = 0; tp->t_rtttime = 0; - tp->snd_cwnd = tp->snd_ssthresh + - tp->t_maxseg * tcprexmtthresh; + if (tp->t_flagsext & TF_CWND_NONVALIDATED) { + tcp_cc_adjust_nonvalidated_cwnd(tp); + } else { + tp->snd_cwnd = tp->snd_ssthresh + + tp->t_maxseg * tcprexmtthresh; + } } /* - * The application wants to get an event if there - * is a stall during read. Set the initial keepalive - * timeout to be equal to twice RTO. + * This function is called upon reception of data on a socket. It's purpose is + * to handle the adaptive keepalive timers that monitor whether the connection + * is making progress. First the adaptive read-timer, second the TFO probe-timer. + * + * The application wants to get an event if there is a stall during read. + * Set the initial keepalive timeout to be equal to twice RTO. + * + * If the outgoing interface is in marginal conditions, we need to + * enable read probes for that too. */ static inline void -tcp_adaptive_rwtimo_check(struct tcpcb *tp, int tlen) +tcp_adaptive_rwtimo_check(struct tcpcb *tp, int tlen) { - if (tp->t_adaptive_rtimo > 0 && tlen > 0 && - tp->t_state == TCPS_ESTABLISHED) { - tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + struct ifnet *outifp = tp->t_inpcb->inp_last_outifp; + + if ((tp->t_adaptive_rtimo > 0 || + (outifp != NULL && + (outifp->if_eflags & IFEF_PROBE_CONNECTIVITY))) + && tlen > 0 && + tp->t_state == TCPS_ESTABLISHED) { + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, (TCP_REXMTVAL(tp) << 1)); tp->t_flagsext |= TF_DETECT_READSTALL; tp->t_rtimo_probes = 0; @@ -982,7 +1064,9 @@ tcp_sbrcv_reserve(struct tcpcb *tp, struct sockbuf *sbrcv, */ static void tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv, - struct tcpopt *to, u_int32_t pktlen) { + struct tcpopt *to, u_int32_t pktlen) +{ + struct socket *so = sbrcv->sb_so; /* * Do not grow the receive socket buffer if @@ -1000,6 +1084,7 @@ tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv, tcp_cansbgrow(sbrcv) == 0 || sbrcv->sb_hiwat >= tcp_autorcvbuf_max || (tp->t_flagsext & TF_RECV_THROTTLE) || + (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) || !LIST_EMPTY(&tp->t_segq)) { /* Can not resize the socket buffer, just return */ goto out; @@ -1247,8 +1332,7 @@ tcp_detect_bad_rexmt(struct tcpcb *tp, struct tcphdr *th, bad_rexmt_win = (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); /* If the ack has ECN CE bit, then cwnd has to be adjusted */ - if ((tp->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON) - && (th->th_flags & TH_ECE)) + if (TCP_ECN_ENABLED(tp) && (th->th_flags & TH_ECE)) return (0); if (TSTMP_SUPPORTED(tp)) { if (rxtime > 0 && (to->to_flags & TOF_TS) @@ -1297,6 +1381,9 @@ tcp_bad_rexmt_restore_state(struct tcpcb *tp, struct tcphdr *th) tp->snd_ssthresh = tp->snd_ssthresh_prev; if (tp->t_flags & TF_WASFRECOVERY) ENTER_FASTRECOVERY(tp); + + /* Do not use the loss flight size in this case */ + tp->t_lossflightsize = 0; } tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES); tp->snd_recover = tp->snd_recover_prev; @@ -1340,6 +1427,19 @@ tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to) && tp->t_tlphighrxt > 0 && SEQ_GEQ(th->th_ack, tp->t_tlphighrxt) && !tcp_detect_bad_rexmt(tp, th, to, tp->t_tlpstart)) { + /* + * check DSACK information also to make sure that + * the TLP was indeed needed + */ + if (tcp_rxtseg_dsack_for_tlp(tp)) { + /* + * received a DSACK to indicate that TLP was + * not needed + */ + tcp_rxtseg_clean(tp); + goto out; + } + /* * The tail loss probe recovered the last packet and * we need to adjust the congestion window to take @@ -1351,8 +1451,17 @@ tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to) EXIT_FASTRECOVERY(tp); } tcp_ccdbg_trace(tp, th, TCP_CC_TLP_RECOVER_LASTPACKET); + } else if (tcp_rxtseg_detect_bad_rexmt(tp, th->th_ack)) { + /* + * All of the retransmitted segments were duplicated, this + * can be an indication of bad fast retransmit. + */ + tcpstat.tcps_dsack_badrexmt++; + tcp_bad_rexmt_restore_state(tp, th); + tcp_ccdbg_trace(tp, th, TCP_CC_DSACK_BAD_REXMT); + tcp_rxtseg_clean(tp); } - +out: tp->t_flagsext &= ~(TF_SENT_TLPROBE); tp->t_tlphighrxt = 0; tp->t_tlpstart = 0; @@ -1482,6 +1591,135 @@ tcp_early_rexmt_check (struct tcpcb *tp, struct tcphdr *th) tp->t_tlpstart = 0; } +static boolean_t +tcp_tfo_syn(tp, to) + struct tcpcb *tp; + struct tcpopt *to; +{ + u_char out[CCAES_BLOCK_SIZE]; + unsigned char len; + + if (!(to->to_flags & (TOF_TFO | TOF_TFOREQ)) || + !(tcp_fastopen & TCP_FASTOPEN_SERVER)) + return (FALSE); + + if ((to->to_flags & TOF_TFOREQ)) { + tp->t_tfo_flags |= TFO_F_OFFER_COOKIE; + + tp->t_tfo_stats |= TFO_S_COOKIEREQ_RECV; + tcpstat.tcps_tfo_cookie_req_rcv++; + return (FALSE); + } + + /* Ok, then it must be an offered cookie. We need to check that ... */ + tcp_tfo_gen_cookie(tp->t_inpcb, out, sizeof(out)); + + len = *to->to_tfo - TCPOLEN_FASTOPEN_REQ; + to->to_tfo++; + if (memcmp(out, to->to_tfo, len)) { + /* Cookies are different! Let's return and offer a new cookie */ + tp->t_tfo_flags |= TFO_F_OFFER_COOKIE; + + tp->t_tfo_stats |= TFO_S_COOKIE_INVALID; + tcpstat.tcps_tfo_cookie_invalid++; + return (FALSE); + } + + if (OSIncrementAtomic(&tcp_tfo_halfcnt) >= tcp_tfo_backlog) { + /* Need to decrement again as we just increased it... */ + OSDecrementAtomic(&tcp_tfo_halfcnt); + return (FALSE); + } + + tp->t_tfo_flags |= TFO_F_COOKIE_VALID; + + tp->t_tfo_stats |= TFO_S_SYNDATA_RCV; + tcpstat.tcps_tfo_syn_data_rcv++; + + return (TRUE); +} + +static void +tcp_tfo_synack(tp, to) + struct tcpcb *tp; + struct tcpopt *to; +{ + if (to->to_flags & TOF_TFO) { + unsigned char len = *to->to_tfo - TCPOLEN_FASTOPEN_REQ; + + /* + * If this happens, things have gone terribly wrong. len should + * have been check in tcp_dooptions. + */ + VERIFY(len <= TFO_COOKIE_LEN_MAX); + + to->to_tfo++; + + tcp_cache_set_cookie(tp, to->to_tfo, len); + tcp_heuristic_tfo_success(tp); + + tp->t_tfo_stats |= TFO_S_COOKIE_RCV; + tcpstat.tcps_tfo_cookie_rcv++; + } else { + /* + * Thus, no cookie in the response, but we either asked for one + * or sent SYN+DATA. Now, we need to check whether we had to + * rexmit the SYN. If that's the case, it's better to start + * backing of TFO-cookie requests. + */ + if (tp->t_tfo_flags & TFO_F_SYN_LOSS) + tcp_heuristic_tfo_inc_loss(tp); + else + tcp_heuristic_tfo_reset_loss(tp); + } +} + +static void +tcp_tfo_rcv_probe(struct tcpcb *tp, int tlen) +{ + if (tlen == 0) { + tp->t_tfo_probe_state = TFO_PROBE_PROBING; + + /* + * We send the probe out rather quickly (after one RTO). It does not + * really hurt that much, it's only one additional segment on the wire. + */ + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, (TCP_REXMTVAL(tp))); + } else { + /* If SYN/ACK+data, don't probe. We got the data! */ + tcp_heuristic_tfo_rcv_good(tp); + } +} + +static void +tcp_tfo_rcv_data(struct tcpcb *tp) +{ + /* Transition from PROBING to NONE as data has been received */ + if (tp->t_tfo_probe_state >= TFO_PROBE_PROBING) { + tp->t_tfo_probe_state = TFO_PROBE_NONE; + + /* Data has been received - we are good to go! */ + tcp_heuristic_tfo_rcv_good(tp); + } +} + +static void +tcp_tfo_rcv_ack(struct tcpcb *tp, struct tcphdr *th) +{ + if (tp->t_tfo_probe_state == TFO_PROBE_PROBING && + tp->t_tfo_probes > 0) { + if (th->th_seq == tp->rcv_nxt) { + /* No hole, so stop probing */ + tp->t_tfo_probe_state = TFO_PROBE_NONE; + } else if (SEQ_GT(th->th_seq, tp->rcv_nxt)) { + /* There is a hole! Wait a bit for data... */ + tp->t_tfo_probe_state = TFO_PROBE_WAIT_DATA; + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + TCP_REXMTVAL(tp)); + } + } +} + void tcp_input(m, off0) struct mbuf *m; @@ -1506,11 +1744,13 @@ tcp_input(m, off0) int iss = 0, nosock = 0; u_int32_t tiwin, sack_bytes_acked = 0; struct tcpopt to; /* options in this segment */ - struct sockaddr_in *next_hop = NULL; #if TCPDEBUG short ostate = 0; #endif +#if IPFIREWALL + struct sockaddr_in *next_hop = NULL; struct m_tag *fwd_tag; +#endif /* IPFIREWALL */ u_char ip_ecn = IPTOS_ECN_NOTECT; unsigned int ifscope; uint8_t isconnected, isdisconnected; @@ -1520,11 +1760,11 @@ tcp_input(m, off0) int turnoff_lro = 0, win; #if MPTCP struct mptcb *mp_tp = NULL; - uint16_t mptcp_csum = 0; #endif /* MPTCP */ boolean_t cell = IFNET_IS_CELLULAR(ifp); boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp)); + boolean_t recvd_dsack = FALSE; struct tcp_respond_args tra; #define TCP_INC_VAR(stat, npkts) do { \ @@ -1532,7 +1772,7 @@ tcp_input(m, off0) } while (0) TCP_INC_VAR(tcpstat.tcps_rcvtotal, nlropkts); - +#if IPFIREWALL /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if (!SLIST_EMPTY(&m->m_pkthdr.tags)) { fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, @@ -1547,6 +1787,7 @@ tcp_input(m, off0) next_hop = ipfwd_tag->next_hop; m_tag_delete(m, fwd_tag); } +#endif /* IPFIREWALL */ #if INET6 struct ip6_hdr *ip6 = NULL; @@ -1803,24 +2044,6 @@ tcp_input(m, off0) */ if (inp != NULL && (inp->inp_flags & INP_BOUND_IF)) ifscope = inp->inp_boundifp->if_index; -#if NECP - if (inp != NULL && ( -#if INET6 - isipv6 ? !necp_socket_is_allowed_to_send_recv_v6(inp, - th->th_dport, th->th_sport, &ip6->ip6_dst, - &ip6->ip6_src, ifp, NULL) : -#endif - !necp_socket_is_allowed_to_send_recv_v4(inp, th->th_dport, - th->th_sport, &ip->ip_dst, &ip->ip_src, - ifp, NULL))) { - if (in_pcb_checkstate(inp, WNT_RELEASE, 0) - == WNT_STOPUSING) { - inp = NULL; /* pretend we didn't find it */ - } - IF_TCP_STATINC(ifp, badformatipsec); - goto dropnosock; - } -#endif /* NECP */ /* * If the state is CLOSED (i.e., TCB does not exist) then @@ -1917,10 +2140,35 @@ tcp_input(m, off0) tcp_lock(so, 1, 0); if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { tcp_unlock(so, 1, (void *)2); - inp = NULL; // pretend we didn't find it + inp = NULL; // pretend we didn't find it goto dropnosock; } +#if NECP +#if INET6 + if (isipv6) { + if (!necp_socket_is_allowed_to_send_recv_v6(inp, th->th_dport, + th->th_sport, + &ip6->ip6_dst, + &ip6->ip6_src, + ifp, NULL, NULL)) { + IF_TCP_STATINC(ifp, badformatipsec); + goto drop; + } + } else +#endif + { + if (!necp_socket_is_allowed_to_send_recv_v4(inp, th->th_dport, + th->th_sport, + &ip->ip_dst, + &ip->ip_src, + ifp, NULL, NULL)) { + IF_TCP_STATINC(ifp, badformatipsec); + goto drop; + } + } +#endif /* NECP */ + tp = intotcpcb(inp); if (tp == 0) { rstreason = BANDLIM_RST_CLOSEDPORT; @@ -2210,7 +2458,10 @@ tcp_input(m, off0) M_NOWAIT); } else #endif /* INET6 */ + { inp->inp_options = ip_srcroute(); + inp->inp_ip_tos = oinp->inp_ip_tos; + } tcp_lock(oso, 0, 0); #if IPSEC /* copy old policy into new socket's */ @@ -2229,7 +2480,7 @@ tcp_input(m, off0) struct tcpcb *, tp, int32_t, TCPS_LISTEN); tp->t_state = TCPS_LISTEN; tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT|TF_NODELAY); - tp->t_flagsext |= (tp0->t_flagsext & (TF_RXTFINDROP|TF_NOTIMEWAIT)); + tp->t_flagsext |= (tp0->t_flagsext & (TF_RXTFINDROP|TF_NOTIMEWAIT|TF_FASTOPEN)); tp->t_keepinit = tp0->t_keepinit; tp->t_keepcnt = tp0->t_keepcnt; tp->t_keepintvl = tp0->t_keepintvl; @@ -2292,9 +2543,12 @@ tcp_input(m, off0) * TE_SENDECE will be cleared when we receive a packet with TH_CWR set. */ if (ip_ecn == IPTOS_ECN_CE && tp->t_state == TCPS_ESTABLISHED && - ((tp->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON)) && tlen > 0 && - SEQ_GEQ(th->th_seq, tp->last_ack_sent) && - SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { + TCP_ECN_ENABLED(tp) && tlen > 0 && + SEQ_GEQ(th->th_seq, tp->last_ack_sent) && + SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { + tcpstat.tcps_ecn_recv_ce++; + /* Mark this connection as it received CE from network */ + tp->ecn_flags |= TE_RECV_ECN_CE; tp->ecn_flags |= TE_SENDECE; } @@ -2302,7 +2556,7 @@ tcp_input(m, off0) * Clear TE_SENDECE if TH_CWR is set. This is harmless, so we don't * bother doing extensive checks for state and whatnot. */ - if ((thflags & TH_CWR) == TH_CWR) { + if (thflags & TH_CWR) { tp->ecn_flags &= ~TE_SENDECE; } @@ -2314,8 +2568,10 @@ tcp_input(m, off0) */ if (tp->t_state == TCPS_ESTABLISHED && (tp->ecn_flags & TE_SETUPSENT) - && (ip_ecn == IPTOS_ECN_CE || (thflags & TH_CWR))) + && (ip_ecn == IPTOS_ECN_CE || (thflags & TH_CWR))) { tcp_reset_stretch_ack(tp); + CLEAR_IAJ_STATE(tp); + } /* * Try to determine if we are receiving a packet after a long time. @@ -2344,48 +2600,36 @@ tcp_input(m, off0) * else do it below (after getting remote address). */ if (tp->t_state != TCPS_LISTEN && optp) { - tcp_dooptions(tp, optp, optlen, th, &to, ifscope); + tcp_dooptions(tp, optp, optlen, th, &to); #if MPTCP - mptcp_csum = mptcp_input_csum(tp, m, drop_hdrlen); - if (mptcp_csum) { - tp->t_mpflags |= TMPF_SND_MPFAIL; - tp->t_mpflags &= ~TMPF_EMBED_DSN; - mptcp_notify_mpfail(so); - m_freem(m); - tcpstat.tcps_mp_badcsum++; + if (mptcp_input_preproc(tp, m, drop_hdrlen) != 0) { + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); tcp_check_timer_state(tp); tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); - return; + return; } - mptcp_insert_rmap(tp, m); #endif /* MPTCP */ } if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { - if (to.to_flags & TOF_TS) { - tp->t_flags |= TF_RCVD_TSTMP; - tp->ts_recent = to.to_tsval; - tp->ts_recent_age = tcp_now; - } - if (to.to_flags & TOF_MSS) - tcp_mss(tp, to.to_mss, ifscope); - if (SACK_ENABLED(tp)) { - if (!(to.to_flags & TOF_SACK)) - tp->t_flagsext &= ~(TF_SACK_ENABLE); - else - tp->t_flags |= TF_SACK_PERMIT; - } + if (!(thflags & TH_ACK) || + (SEQ_GT(th->th_ack, tp->iss) && + SEQ_LEQ(th->th_ack, tp->snd_max))) + tcp_finalize_options(tp, &to, ifscope); } #if TRAFFIC_MGT - /* Compute inter-packet arrival jitter. According to RFC 3550, inter-packet - * arrival jitter is defined as the difference in packet spacing at the - * receiver compared to the sender for a pair of packets. When two packets - * of maximum segment size come one after the other with consecutive - * sequence numbers, we consider them as packets sent together at the - * sender and use them as a pair to compute inter-packet arrival jitter. - * This metric indicates the delay induced by the network components due + /* + * Compute inter-packet arrival jitter. According to RFC 3550, + * inter-packet arrival jitter is defined as the difference in + * packet spacing at the receiver compared to the sender for a + * pair of packets. When two packets of maximum segment size come + * one after the other with consecutive sequence numbers, we + * consider them as packets sent together at the sender and use + * them as a pair to compute inter-packet arrival jitter. This + * metric indicates the delay induced by the network components due * to queuing in edge/access routers. */ if (tp->t_state == TCPS_ESTABLISHED && @@ -2405,15 +2649,17 @@ tcp_input(m, off0) } if ( tp->iaj_size == 0 || seg_size > tp->iaj_size || (seg_size == tp->iaj_size && tp->iaj_rcv_ts == 0)) { - /* State related to inter-arrival jitter is uninitialized - * or we are trying to find a good first packet to start - * computing the metric + /* + * State related to inter-arrival jitter is + * uninitialized or we are trying to find a good + * first packet to start computing the metric */ update_iaj_state(tp, seg_size, 0); } else { if (seg_size == tp->iaj_size) { - /* Compute inter-arrival jitter taking this packet - * as the second packet + /* + * Compute inter-arrival jitter taking + * this packet as the second packet */ if (pktf_sw_lro_pkt) compute_iaj(tp, nlropkts, @@ -2422,12 +2668,15 @@ tcp_input(m, off0) compute_iaj(tp, 1, 0); } if (seg_size < tp->iaj_size) { - /* There is a smaller packet in the stream. - * Some times the maximum size supported on a path can - * change if there is a new link with smaller MTU. - * The receiver will not know about this change. - * If there are too many packets smaller than iaj_size, - * we try to learn the iaj_size again. + /* + * There is a smaller packet in the stream. + * Some times the maximum size supported + * on a path can change if there is a new + * link with smaller MTU. The receiver will + * not know about this change. If there + * are too many packets smaller than + * iaj_size, we try to learn the iaj_size + * again. */ TCP_INC_VAR(tp->iaj_small_pkt, nlropkts); if (tp->iaj_small_pkt > RESET_IAJ_SIZE_THRESH) { @@ -2506,12 +2755,15 @@ tcp_input(m, off0) /* Recalculate the RTT */ tcp_compute_rtt(tp, &to, th); + VERIFY(SEQ_GEQ(th->th_ack, tp->snd_una)); acked = BYTES_ACKED(th, tp); tcpstat.tcps_rcvackpack++; tcpstat.tcps_rcvackbyte += acked; - /* Handle an ack that is in sequence during congestion - * avoidance phase. The calculations in this function + /* + * Handle an ack that is in sequence during + * congestion avoidance phase. The + * calculations in this function * assume that snd_una is not updated yet. */ if (CC_ALGO(tp)->congestion_avd != NULL) @@ -2559,6 +2811,10 @@ tcp_input(m, off0) OFFSET_FROM_START(tp, tp->t_rxtcur); } + if (!SLIST_EMPTY(&tp->t_rxt_segments) && + !TCP_DSACK_SEQ_IN_WINDOW(tp, + tp->t_dsack_lastuna, tp->snd_una)) + tcp_rxtseg_clean(tp); if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 && tp->t_bwmeas != NULL) @@ -2568,6 +2824,8 @@ tcp_input(m, off0) (void) tcp_output(tp); } + tcp_tfo_rcv_ack(tp, th); + tcp_check_timer_state(tp); tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); @@ -2687,6 +2945,9 @@ tcp_input(m, off0) tcp_adaptive_rwtimo_check(tp, tlen); + if (tlen > 0) + tcp_tfo_rcv_data(tp); + tcp_check_timer_state(tp); tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); @@ -2769,7 +3030,9 @@ tcp_input(m, off0) } else #endif { - lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert( + &((struct inpcb *)so->so_pcb)->inpcb_mtx, + LCK_MTX_ASSERT_OWNED); MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_NOWAIT); if (sin == NULL) @@ -2791,15 +3054,12 @@ tcp_input(m, off0) FREE(sin, M_SONAME); } - tcp_dooptions(tp, optp, optlen, th, &to, ifscope); + tcp_dooptions(tp, optp, optlen, th, &to); + tcp_finalize_options(tp, &to, ifscope); + + if (tfo_enabled(tp) && tcp_tfo_syn(tp, &to)) + isconnected = TRUE; - if (SACK_ENABLED(tp)) { - if (!(to.to_flags & TOF_SACK)) - tp->t_flagsext &= ~(TF_SACK_ENABLE); - else - tp->t_flags |= TF_SACK_PERMIT; - } - if (iss) tp->iss = iss; else { @@ -2855,8 +3115,8 @@ tcp_input(m, off0) } /* - * If the state is SYN_RECEIVED: - * if seg contains an ACK, but not for our SYN/ACK, send a RST. + * If the state is SYN_RECEIVED and the seg contains an ACK, + * but not for our SYN/ACK, send a RST. */ case TCPS_SYN_RECEIVED: if ((thflags & TH_ACK) && @@ -2874,6 +3134,7 @@ tcp_input(m, off0) * lower if we assume scaling and the other end does not. */ if ((thflags & TH_SYN) && + (tp->irs == th->th_seq) && !(to.to_flags & TOF_SCALE)) tp->t_flags &= ~TF_RCVD_SCALE; break; @@ -2928,9 +3189,12 @@ tcp_input(m, off0) if ((thflags & (TH_ECE | TH_CWR)) == (TH_ECE)) { /* ECN-setup SYN-ACK */ tp->ecn_flags |= TE_SETUPRECEIVED; - tcpstat.tcps_ecn_setup++; - } - else { + if (TCP_ECN_ENABLED(tp)) + tcpstat.tcps_ecn_client_success++; + } else { + if (tp->ecn_flags & TE_SETUPSENT && + tp->t_rxtshift == 0) + tcpstat.tcps_ecn_not_supported++; /* non-ECN-setup SYN-ACK */ tp->ecn_flags &= ~TE_SENDIPECT; } @@ -2941,13 +3205,25 @@ tcp_input(m, off0) /* XXXMAC: SOCK_UNLOCK(so); */ #endif /* Do window scaling on this connection? */ - if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == - (TF_RCVD_SCALE|TF_REQ_SCALE)) { + if (TCP_WINDOW_SCALE_ENABLED(tp)) { tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } + tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN << tp->rcv_scale); tp->snd_una++; /* SYN is acked */ + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + + /* + * We have sent more in the SYN than what is being + * acked. (e.g., TFO) + * We should restart the sending from what the receiver + * has acknowledged immediately. + */ + if (SEQ_GT(tp->snd_nxt, th->th_ack)) + tp->snd_nxt = th->th_ack; + /* * If there's data, delay ACK; if there's also a FIN * ACKNOW will be turned on later. @@ -2971,19 +3247,24 @@ tcp_input(m, off0) tp->t_starttime = tcp_now; tcp_sbrcv_tstmp_check(tp); if (tp->t_flags & TF_NEEDFIN) { - DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1); + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, inp, + struct tcpcb *, tp, int32_t, + TCPS_FIN_WAIT_1); tp->t_state = TCPS_FIN_WAIT_1; tp->t_flags &= ~TF_NEEDFIN; thflags &= ~TH_SYN; } else { - DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED); + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, inp, struct tcpcb *, + tp, int32_t, TCPS_ESTABLISHED); tp->t_state = TCPS_ESTABLISHED; - tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, - TCP_CONN_KEEPIDLE(tp)); + tp->t_timer[TCPT_KEEP] = + OFFSET_FROM_START(tp, + TCP_CONN_KEEPIDLE(tp)); if (nstat_collect) - nstat_route_connect_success(tp->t_inpcb->inp_route.ro_rt); + nstat_route_connect_success( + tp->t_inpcb->inp_route.ro_rt); } #if MPTCP /* @@ -3001,6 +3282,19 @@ tcp_input(m, off0) } else #endif /* MPTCP */ isconnected = TRUE; + + if (tp->t_tfo_flags & (TFO_F_COOKIE_REQ | TFO_F_COOKIE_SENT)) { + tcp_tfo_synack(tp, &to); + + if ((tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) && + SEQ_LT(tp->snd_una, th->th_ack)) { + tp->t_tfo_stats |= TFO_S_SYN_DATA_ACKED; + tcpstat.tcps_tfo_syn_data_acked++; + + if (!(tp->t_tfo_flags & TFO_F_NO_RCVPROBING)) + tcp_tfo_rcv_probe(tp, tlen); + } + } } else { /* * Received initial SYN in SYN-SENT[*] state => simul- @@ -3016,6 +3310,12 @@ tcp_input(m, off0) struct tcpcb *, tp, int32_t, TCPS_SYN_RECEIVED); tp->t_state = TCPS_SYN_RECEIVED; + /* + * During simultaneous open, TFO should not be used. + * So, we disable it here, to prevent that data gets + * sent on the SYN/ACK. + */ + tcp_disable_tfo(tp); } trimthenstep6: @@ -3062,8 +3362,8 @@ tcp_input(m, off0) * or recovers by adjusting its sequence numberering */ case TCPS_ESTABLISHED: - if (thflags & TH_SYN) - goto dropafterack; + if (thflags & TH_SYN) + goto dropafterack; break; } @@ -3216,7 +3516,7 @@ tcp_input(m, off0) rxbytes, tlen); tp->t_stat.rxduplicatebytes += tlen; } - if (tlen) + if (tlen > 0) goto dropafterack; goto drop; } @@ -3275,6 +3575,16 @@ tcp_input(m, off0) tcpstat.tcps_rcvpartduppack++; tcpstat.tcps_rcvpartdupbyte += todrop; } + + if (TCP_DSACK_ENABLED(tp) && todrop > 1) { + /* + * Note the duplicate data sequence space so that + * it can be reported in DSACK option. + */ + tp->t_dsack_lseq = th->th_seq; + tp->t_dsack_rseq = th->th_seq + todrop; + tp->t_flags |= TF_ACKNOW; + } if (nstat_collect) { nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, todrop, NSTAT_RX_FLAG_DUPLICATE); @@ -3294,15 +3604,19 @@ tcp_input(m, off0) } /* - * If new data are received on a connection after the user processes - * are gone, then RST the other end. Note that an MPTCP subflow socket - * would have SS_NOFDREF set by default, so check to make sure that - * we test for SOF_MP_SUBFLOW socket flag (which would be cleared when - * the socket is closed.) + * If new data are received on a connection after the user + * processes are gone, then RST the other end. + * Send also a RST when we received a data segment after we've + * sent our FIN when the socket is defunct. + * Note that an MPTCP subflow socket would have SS_NOFDREF set + * by default so check to make sure that we test for SOF_MP_SUBFLOW + * socket flag (which would be cleared when the socket is closed.) */ - if (!(so->so_flags & SOF_MP_SUBFLOW) && - (so->so_state & SS_NOFDREF) && - tp->t_state > TCPS_CLOSE_WAIT && tlen) { + if (!(so->so_flags & SOF_MP_SUBFLOW) && tlen && + (((so->so_state & SS_NOFDREF) && + tp->t_state > TCPS_CLOSE_WAIT) || + ((so->so_flags & SOF_DEFUNCT) && + tp->t_state > TCPS_FIN_WAIT_1))) { tp = tcp_close(tp); tcpstat.tcps_rcvafterclose++; rstreason = BANDLIM_UNLIMITED; @@ -3397,9 +3711,34 @@ tcp_input(m, off0) */ if ((thflags & TH_ACK) == 0) { if (tp->t_state == TCPS_SYN_RECEIVED || - (tp->t_flags & TF_NEEDSYN)) + (tp->t_flags & TF_NEEDSYN)) { + if ((tfo_enabled(tp))) { + /* + * So, we received a valid segment while in + * SYN-RECEIVED (TF_NEEDSYN is actually never + * set, so this is dead code). + * As this cannot be an RST (see that if a bit + * higher), and it does not have the ACK-flag + * set, we want to retransmit the SYN/ACK. + * Thus, we have to reset snd_nxt to snd_una to + * trigger the going back to sending of the + * SYN/ACK. This is more consistent with the + * behavior of tcp_output(), which expects + * to send the segment that is pointed to by + * snd_nxt. + */ + tp->snd_nxt = tp->snd_una; + + /* + * We need to make absolutely sure that we are + * going to reply upon a duplicate SYN-segment. + */ + if (th->th_flags & TH_SYN) + needoutput = 1; + } + goto step6; - else if (tp->t_flags & TF_ACKNOW) + } else if (tp->t_flags & TF_ACKNOW) goto dropafterack; else goto drop; @@ -3421,8 +3760,7 @@ tcp_input(m, off0) tcpstat.tcps_connects++; /* Do window scaling? */ - if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == - (TF_RCVD_SCALE|TF_REQ_SCALE)) { + if (TCP_WINDOW_SCALE_ENABLED(tp)) { tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; tp->snd_wnd = th->th_win << tp->snd_scale; @@ -3436,18 +3774,21 @@ tcp_input(m, off0) tp->t_starttime = tcp_now; tcp_sbrcv_tstmp_check(tp); if (tp->t_flags & TF_NEEDFIN) { - DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1); + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, inp, + struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1); tp->t_state = TCPS_FIN_WAIT_1; tp->t_flags &= ~TF_NEEDFIN; } else { - DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED); + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, inp, + struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED); tp->t_state = TCPS_ESTABLISHED; tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_CONN_KEEPIDLE(tp)); if (nstat_collect) - nstat_route_connect_success(tp->t_inpcb->inp_route.ro_rt); + nstat_route_connect_success( + tp->t_inpcb->inp_route.ro_rt); } /* * If segment contains data or ACK, will call tcp_reass() @@ -3458,7 +3799,6 @@ tcp_input(m, off0) NULL, ifp); tp->snd_wl1 = th->th_seq - 1; - /* FALLTHROUGH */ #if MPTCP /* * Do not send the connect notification for additional subflows @@ -3470,6 +3810,55 @@ tcp_input(m, off0) } else #endif /* MPTCP */ isconnected = TRUE; + if ((tp->t_tfo_flags & TFO_F_COOKIE_VALID)) { + /* Done this when receiving the SYN */ + isconnected = FALSE; + + OSDecrementAtomic(&tcp_tfo_halfcnt); + + /* Panic if something has gone terribly wrong. */ + VERIFY(tcp_tfo_halfcnt >= 0); + + tp->t_tfo_flags &= ~TFO_F_COOKIE_VALID; + } + + /* + * In case there is data in the send-queue (e.g., TFO is being + * used, or connectx+data has been done), then if we would + * "FALLTHROUGH", we would handle this ACK as if data has been + * acknowledged. But, we have to prevent this. And this + * can be prevented by increasing snd_una by 1, so that the + * SYN is not considered as data (snd_una++ is actually also + * done in SYN_SENT-state as part of the regular TCP stack). + * + * In case there is data on this ack as well, the data will be + * handled by the label "dodata" right after step6. + */ + if (so->so_snd.sb_cc) { + tp->snd_una++; /* SYN is acked */ + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + + /* + * No duplicate-ACK handling is needed. So, we + * directly advance to processing the ACK (aka, + * updating the RTT estimation,...) + * + * But, we first need to handle eventual SACKs, + * because TFO will start sending data with the + * SYN/ACK, so it might be that the client + * includes a SACK with its ACK. + */ + if (SACK_ENABLED(tp) && + (to.to_nsacks > 0 || + !TAILQ_EMPTY(&tp->snd_holes))) + tcp_sack_doack(tp, &to, th, + &sack_bytes_acked); + + goto process_ACK; + } + + /* FALLTHROUGH */ /* * In ESTABLISHED state: drop duplicate ACKs; ACK out of range @@ -3490,6 +3879,21 @@ tcp_input(m, off0) tcpstat.tcps_rcvacktoomuch++; goto dropafterack; } + if (SACK_ENABLED(tp) && to.to_nsacks > 0) { + recvd_dsack = tcp_sack_process_dsack(tp, &to, th); + /* + * If DSACK is received and this packet has no + * other SACK information, it can be dropped. + * We do not want to treat it as a duplicate ack. + */ + if (recvd_dsack && + SEQ_LEQ(th->th_ack, tp->snd_una) && + to.to_nsacks == 0) { + tcp_bad_rexmt_check(tp, th, &to); + goto drop; + } + } + if (SACK_ENABLED(tp) && (to.to_nsacks > 0 || !TAILQ_EMPTY(&tp->snd_holes))) tcp_sack_doack(tp, &to, th, &sack_bytes_acked); @@ -3506,9 +3910,11 @@ tcp_input(m, off0) tp->t_mpflags |= TMPF_MPTCP_TRUE; so->so_flags |= SOF_MPTCP_TRUE; - if (mptcp_dbg >= MP_ERR_DEBUG) - printf("MPTCP SUCCESS" - " %s \n",__func__); + mptcplog((LOG_DEBUG, "MPTCP " + "Sockets: %s \n",__func__), + MPTCP_SOCKET_DBG, + MPTCP_LOGLVL_LOG); + tp->t_timer[TCPT_JACK_RXMT] = 0; tp->t_mprxtshift = 0; isconnected = TRUE; @@ -3522,6 +3928,9 @@ tcp_input(m, off0) } } #endif /* MPTCP */ + + tcp_tfo_rcv_ack(tp, th); + /* * If we have outstanding data (other than * a window probe), this is a completely @@ -3538,11 +3947,10 @@ tcp_input(m, off0) * instead of the dupack */ if ((thflags & TH_FIN) && - (tp->t_flags & TF_SENTFIN) && - !TCPS_HAVERCVDFIN(tp->t_state) && - (th->th_ack + 1) == tp->snd_max) { + (tp->t_flags & TF_SENTFIN) && + !TCPS_HAVERCVDFIN(tp->t_state) && + (th->th_ack + 1) == tp->snd_max) break; - } process_dupack: #if MPTCP /* @@ -3554,8 +3962,10 @@ tcp_input(m, off0) } if ((isconnected) && (tp->t_mpflags & TMPF_JOINED_FLOW)) { - if (mptcp_dbg >= MP_ERR_DEBUG) - printf("%s: bypass ack recovery\n",__func__); + mptcplog((LOG_DEBUG, "MPTCP " + "Sockets: bypass ack recovery\n"), + MPTCP_SOCKET_DBG, + MPTCP_LOGLVL_VERBOSE); break; } #endif /* MPTCP */ @@ -3683,8 +4093,10 @@ tcp_input(m, off0) break; } } - - tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_SENTFIN) + tp->snd_recover = tp->snd_max - 1; + else + tp->snd_recover = tp->snd_max; tp->t_timer[TCPT_PTO] = 0; tp->t_rtttime = 0; @@ -3700,7 +4112,8 @@ tcp_input(m, off0) == TF_PKTS_REORDERED && !IN_FASTRECOVERY(tp) && tp->t_reorderwin > 0 && - tp->t_state == TCPS_ESTABLISHED) { + (tp->t_state == TCPS_ESTABLISHED || + tp->t_state == TCPS_FIN_WAIT_1)) { tp->t_timer[TCPT_DELAYFR] = OFFSET_FROM_START(tp, tp->t_reorderwin); @@ -3711,6 +4124,7 @@ tcp_input(m, off0) break; } + tcp_rexmt_save_state(tp); /* * If the current tcp cc module has * defined a hook for tasks to run @@ -3720,35 +4134,29 @@ tcp_input(m, off0) CC_ALGO(tp)->pre_fr(tp); ENTER_FASTRECOVERY(tp); tp->t_timer[TCPT_REXMT] = 0; - if ((tp->ecn_flags & TE_ECN_ON) - == TE_ECN_ON) + if (TCP_ECN_ENABLED(tp)) tp->ecn_flags |= TE_SENDCWR; if (SACK_ENABLED(tp)) { tcpstat.tcps_sack_recovery_episode++; tp->sack_newdata = tp->snd_nxt; tp->snd_cwnd = tp->t_maxseg; - - /* - * Enable probe timeout to detect - * a tail loss in the recovery - * window. - */ - tp->t_timer[TCPT_PTO] = - OFFSET_FROM_START(tp, - max(10, (tp->t_srtt >> TCP_RTT_SHIFT))); - + tp->t_flagsext &= + ~TF_CWND_NONVALIDATED; tcp_ccdbg_trace(tp, th, TCP_CC_ENTER_FASTRECOVERY); - (void) tcp_output(tp); goto drop; } tp->snd_nxt = th->th_ack; tp->snd_cwnd = tp->t_maxseg; (void) tcp_output(tp); - tp->snd_cwnd = tp->snd_ssthresh + - tp->t_maxseg * tp->t_dupacks; + if (tp->t_flagsext & TF_CWND_NONVALIDATED) { + tcp_cc_adjust_nonvalidated_cwnd(tp); + } else { + tp->snd_cwnd = tp->snd_ssthresh + + tp->t_maxseg * tp->t_dupacks; + } if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; tcp_ccdbg_trace(tp, th, @@ -3801,7 +4209,8 @@ tcp_input(m, off0) EXIT_FASTRECOVERY(tp); if (CC_ALGO(tp)->post_fr != NULL) CC_ALGO(tp)->post_fr(tp, th); - + tp->t_pipeack = 0; + tcp_clear_pipeack_state(tp); tcp_ccdbg_trace(tp, th, TCP_CC_EXIT_FASTRECOVERY); } @@ -3849,14 +4258,14 @@ tcp_input(m, off0) tp->t_flags &= ~TF_NEEDSYN; tp->snd_una++; /* Do window scaling? */ - if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == - (TF_RCVD_SCALE|TF_REQ_SCALE)) { + if (TCP_WINDOW_SCALE_ENABLED(tp)) { tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } } process_ACK: + VERIFY(SEQ_GEQ(th->th_ack, tp->snd_una)); acked = BYTES_ACKED(th, tp); tcpstat.tcps_rcvackpack++; tcpstat.tcps_rcvackbyte += acked; @@ -3895,9 +4304,21 @@ tcp_input(m, off0) if (acked == 0) goto step6; + /* + * When outgoing data has been acked (except the SYN+data), we + * mark this connection as "sending good" for TFO. + */ + if ((tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) && + !(tp->t_tfo_flags & TFO_F_NO_SNDPROBING) && + !(th->th_flags & TH_SYN)) + tcp_heuristic_tfo_snd_good(tp); - if ((thflags & TH_ECE) != 0 && - ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON)) { + /* + * If TH_ECE is received, make sure that ECN is enabled + * on that connection and we have sent ECT on data packets. + */ + if ((thflags & TH_ECE) != 0 && TCP_ECN_ENABLED(tp) && + (tp->ecn_flags & TE_SENDIPECT)) { /* * Reduce the congestion window if we haven't * done so. @@ -3905,6 +4326,12 @@ tcp_input(m, off0) if (!IN_FASTRECOVERY(tp)) { tcp_reduce_congestion_window(tp); tp->ecn_flags |= (TE_INRECOVERY|TE_SENDCWR); + /* + * Also note that the connection received + * ECE atleast once + */ + tp->ecn_flags |= TE_RECV_ECN_ECE; + tcpstat.tcps_ecn_recv_ece++; tcp_ccdbg_trace(tp, th, TCP_CC_ECN_RCVD); } } @@ -3957,6 +4384,10 @@ tcp_input(m, off0) } if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; + if (!SLIST_EMPTY(&tp->t_rxt_segments) && + !TCP_DSACK_SEQ_IN_WINDOW(tp, tp->t_dsack_lastuna, + tp->snd_una)) + tcp_rxtseg_clean(tp); if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 && tp->t_bwmeas != NULL) tcp_bwmeas_check(tp); @@ -4181,9 +4612,15 @@ tcp_input(m, off0) * is presented to the user (this happens in tcp_usrreq.c, * case PRU_RCVD). If a FIN has already been received on this * connection then we just ignore the text. + * + * If we are in SYN-received state and got a valid TFO cookie, we want + * to process the data. */ if ((tlen || (thflags & TH_FIN)) && - TCPS_HAVERCVDFIN(tp->t_state) == 0) { + TCPS_HAVERCVDFIN(tp->t_state) == 0 && + (TCPS_HAVEESTABLISHED(tp->t_state) || + (tp->t_state == TCPS_SYN_RECEIVED && + (tp->t_tfo_flags & TFO_F_COOKIE_VALID)))) { tcp_seq save_start = th->th_seq; tcp_seq save_end = th->th_seq + tlen; m_adj(m, drop_hdrlen); /* delayed header drop */ @@ -4199,9 +4636,7 @@ tcp_input(m, off0) * immediately when segments are out of order (so * fast retransmit can work). */ - if (th->th_seq == tp->rcv_nxt && - LIST_EMPTY(&tp->t_segq) && - TCPS_HAVEESTABLISHED(tp->t_state)) { + if (th->th_seq == tp->rcv_nxt && LIST_EMPTY(&tp->t_segq)) { TCP_INC_VAR(tp->t_unacksegs, nlropkts); /* * Calculate the RTT on the receiver only if the @@ -4255,6 +4690,9 @@ tcp_input(m, off0) tcp_adaptive_rwtimo_check(tp, tlen); + if (tlen > 0) + tcp_tfo_rcv_data(tp); + if (tp->t_flags & TF_DELACK) { #if INET6 @@ -4497,17 +4935,12 @@ tcp_input(m, off0) return; } -static void -tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) /* * Parse TCP options and place in tcpopt. */ - struct tcpcb *tp; - u_char *cp; - int cnt; - struct tcphdr *th; - struct tcpopt *to; - unsigned int input_ifscope; +static void +tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcphdr *th, + struct tcpopt *to) { u_short mss = 0; int opt, optlen; @@ -4537,6 +4970,8 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) continue; bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); NTOHS(mss); + to->to_mss = mss; + to->to_flags |= TOF_MSS; break; case TCPOPT_WINDOW: @@ -4545,8 +4980,7 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) if (!(th->th_flags & TH_SYN)) continue; to->to_flags |= TOF_SCALE; - tp->t_flags |= TF_RCVD_SCALE; - tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT); + to->to_requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT); break; case TCPOPT_TIMESTAMP: @@ -4559,15 +4993,10 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) bcopy((char *)cp + 6, (char *)&to->to_tsecr, sizeof(to->to_tsecr)); NTOHL(to->to_tsecr); - /* - * A timestamp received in a SYN makes - * it ok to send timestamp requests and replies. - */ - if (th->th_flags & TH_SYN) { - tp->t_flags |= TF_RCVD_TSTMP; - tp->ts_recent = to->to_tsval; - tp->ts_recent_age = tcp_now; - } + /* Re-enable sending Timestamps if we received them */ + if (!(tp->t_flags & TF_REQ_TSTMP) && + tcp_do_rfc1323 == 1) + tp->t_flags |= TF_REQ_TSTMP; break; case TCPOPT_SACK_PERMITTED: if (!tcp_do_sack || @@ -4584,7 +5013,26 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) tcpstat.tcps_sack_rcv_blocks++; break; - + case TCPOPT_FASTOPEN: + if (optlen == TCPOLEN_FASTOPEN_REQ) { + if (tp->t_state != TCPS_LISTEN) + continue; + + to->to_flags |= TOF_TFOREQ; + } else { + if (optlen < TCPOLEN_FASTOPEN_REQ || + (optlen - TCPOLEN_FASTOPEN_REQ) > TFO_COOKIE_LEN_MAX || + (optlen - TCPOLEN_FASTOPEN_REQ) < TFO_COOKIE_LEN_MIN) + continue; + if (tp->t_state != TCPS_LISTEN && + tp->t_state != TCPS_SYN_SENT) + continue; + + to->to_flags |= TOF_TFO; + to->to_tfo = cp + 1; + } + + break; #if MPTCP case TCPOPT_MULTIPATH: tcp_do_mptcp_options(tp, cp, th, to, optlen); @@ -4592,8 +5040,33 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) #endif /* MPTCP */ } } - if (th->th_flags & TH_SYN) - tcp_mss(tp, mss, input_ifscope); /* sets t_maxseg */ +} + +static void +tcp_finalize_options(struct tcpcb *tp, struct tcpopt *to, unsigned int ifscope) +{ + if (to->to_flags & TOF_TS) { + tp->t_flags |= TF_RCVD_TSTMP; + tp->ts_recent = to->to_tsval; + tp->ts_recent_age = tcp_now; + + } + if (to->to_flags & TOF_MSS) + tcp_mss(tp, to->to_mss, ifscope); + if (SACK_ENABLED(tp)) { + if (!(to->to_flags & TOF_SACK)) + tp->t_flagsext &= ~(TF_SACK_ENABLE); + else + tp->t_flags |= TF_SACK_PERMIT; + } + if (to->to_flags & TOF_SCALE) { + tp->t_flags |= TF_RCVD_SCALE; + tp->requested_s_scale = to->to_requested_s_scale; + + /* Re-enable window scaling, if the option is received */ + if (tp->request_r_scale > 0) + tp->t_flags |= TF_REQ_SCALE; + } } /* @@ -4693,15 +5166,38 @@ update_base_rtt(struct tcpcb *tp, uint32_t rtt) static void tcp_compute_rtt(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th) { + int rtt = 0; VERIFY(to != NULL && th != NULL); + if (tp->t_rtttime != 0 && SEQ_GT(th->th_ack, tp->t_rtseq)) { + u_int32_t pipe_ack_val; + rtt = tcp_now - tp->t_rtttime; + /* + * Compute pipe ack -- the amount of data acknowledged + * in the last RTT + */ + if (SEQ_GT(th->th_ack, tp->t_pipeack_lastuna)) { + pipe_ack_val = th->th_ack - tp->t_pipeack_lastuna; + /* Update the sample */ + tp->t_pipeack_sample[tp->t_pipeack_ind++] = + pipe_ack_val; + tp->t_pipeack_ind %= TCP_PIPEACK_SAMPLE_COUNT; + + /* Compute the max of the pipeack samples */ + pipe_ack_val = tcp_get_max_pipeack(tp); + tp->t_pipeack = (pipe_ack_val > + TCP_CC_CWND_INIT_BYTES) ? + pipe_ack_val : 0; + } + /* start another measurement */ + tp->t_rtttime = 0; + } if (((to->to_flags & TOF_TS) != 0) && (to->to_tsecr != 0) && TSTMP_GEQ(tcp_now, to->to_tsecr)) { - tcp_xmit_timer(tp, tcp_now - to->to_tsecr, + tcp_xmit_timer(tp, (tcp_now - to->to_tsecr), to->to_tsecr, th->th_ack); - } else if (tp->t_rtttime != 0 && SEQ_GT(th->th_ack, tp->t_rtseq)) { - tcp_xmit_timer(tp, tcp_now - tp->t_rtttime, 0, - th->th_ack); + } else if (rtt > 0) { + tcp_xmit_timer(tp, rtt, 0, th->th_ack); } } @@ -4800,7 +5296,6 @@ tcp_xmit_timer(register struct tcpcb *tp, int rtt, compute_rto: nstat_route_rtt(tp->t_inpcb->inp_route.ro_rt, tp->t_srtt, tp->t_rttvar); - tp->t_rtttime = 0; tp->t_rxtshift = 0; tp->t_rxtstart = 0; @@ -4848,10 +5343,9 @@ static inline unsigned int tcp_maxmtu6(struct rtentry *rt) { unsigned int maxmtu; - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; RT_LOCK_ASSERT_HELD(rt); - lck_rw_lock_shared(nd_if_rwlock); if ((ndi = ND_IFINFO(rt->rt_ifp)) != NULL && !ndi->initialized) ndi = NULL; if (ndi != NULL) @@ -4862,7 +5356,6 @@ tcp_maxmtu6(struct rtentry *rt) maxmtu = MIN(rt->rt_rmx.rmx_mtu, IN6_LINKMTU(rt->rt_ifp)); if (ndi != NULL) lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); return (maxmtu); } @@ -5005,6 +5498,12 @@ tcp_mss(tp, offer, input_ifscope) #else mss = tcp_maxmtu(rt); #endif + +#if NECP + // At this point, the mss is just the MTU. Adjust if necessary. + mss = necp_socket_get_effective_mtu(inp, mss); +#endif /* NECP */ + mss -= min_protoh; if (rt->rt_rmx.rmx_mtu == 0) { @@ -5163,6 +5662,12 @@ tcp_mssopt(tp) #endif /* Route locked during lookup above */ RT_UNLOCK(rt); + +#if NECP + // At this point, the mss is just the MTU. Adjust if necessary. + mss = necp_socket_get_effective_mtu(tp->t_inpcb, mss); +#endif /* NECP */ + return (mss - min_protoh); } @@ -5477,7 +5982,7 @@ inp_fc_unthrottle_tcp(struct inpcb *inp) CC_ALGO(tp)->pre_fr(tp); tp->snd_cwnd = tp->snd_ssthresh; - + tp->t_flagsext &= ~TF_CWND_NONVALIDATED; /* * Restart counting for ABC as we changed the * congestion window just now. @@ -5490,6 +5995,7 @@ inp_fc_unthrottle_tcp(struct inpcb *inp) * to backoff retransmit timer. */ tp->t_rxtshift = 0; + tp->t_rtttime = 0; /* * Start the output stream again. Since we are @@ -5699,8 +6205,8 @@ tcp_input_checksum(int af, struct mbuf *m, struct tcphdr *th, int off, int tlen) } SYSCTL_PROC(_net_inet_tcp, TCPCTL_STATS, stats, - CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, - tcp_getstat, "S,tcpstat", "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, tcp_getstat, + "S,tcpstat", "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); static int sysctl_rexmtthresh SYSCTL_HANDLER_ARGS @@ -5727,5 +6233,7 @@ sysctl_rexmtthresh SYSCTL_HANDLER_ARGS return (0); } -SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmt_thresh, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, - &tcprexmtthresh, 0, &sysctl_rexmtthresh, "I", "Duplicate ACK Threshold for Fast Retransmit"); +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmt_thresh, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_LOCKED, &tcprexmtthresh, 0, &sysctl_rexmtthresh, "I", + "Duplicate ACK Threshold for Fast Retransmit"); + diff --git a/bsd/netinet/tcp_ledbat.c b/bsd/netinet/tcp_ledbat.c index 09f594baf..aa2d32dbc 100644 --- a/bsd/netinet/tcp_ledbat.c +++ b/bsd/netinet/tcp_ledbat.c @@ -358,9 +358,6 @@ tcp_ledbat_after_idle(struct tcpcb *tp) { /* Reset the congestion window */ tp->snd_cwnd = tp->t_maxseg * bg_ss_fltsz; - - /* If stretch ack was auto disabled, re-evaluate the situation */ - tcp_cc_after_idle_stretchack(tp); } /* Function to change the congestion window when the retransmit diff --git a/bsd/netinet/tcp_newreno.c b/bsd/netinet/tcp_newreno.c index a1e590a0a..f2de1c010 100644 --- a/bsd/netinet/tcp_newreno.c +++ b/bsd/netinet/tcp_newreno.c @@ -134,9 +134,6 @@ int tcp_newreno_cleanup(struct tcpcb *tp) { void tcp_newreno_cwnd_init_or_reset(struct tcpcb *tp) { tcp_cc_cwnd_init_or_reset(tp); - - /* If stretch ack was auto disabled, re-evaluate the situation */ - tcp_cc_after_idle_stretchack(tp); } diff --git a/bsd/netinet/tcp_output.c b/bsd/netinet/tcp_output.c index b693e0512..e348fadde 100644 --- a/bsd/netinet/tcp_output.c +++ b/bsd/netinet/tcp_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -101,6 +101,7 @@ #endif #include #define TCPOUTFLAGS +#include #include #include #include @@ -128,6 +129,8 @@ #include #endif +#include + #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETTCP, 1) #define DBG_LAYER_END NETDBG_CODE(DBG_NETTCP, 3) #define DBG_FNC_TCP_OUTPUT NETDBG_CODE(DBG_NETTCP, (4 << 8) | 1) @@ -245,6 +248,137 @@ static int tcp_ip_output(struct socket *, struct tcpcb *, struct mbuf *, int, static struct mbuf* tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th); static int tcp_recv_throttle(struct tcpcb *tp); +static int32_t tcp_tfo_check(struct tcpcb *tp, int32_t len) +{ + struct socket *so = tp->t_inpcb->inp_socket; + unsigned int optlen = 0; + unsigned int cookie_len; + + if (tp->t_flags & TF_NOOPT) + goto fallback; + + if (!tcp_heuristic_do_tfo(tp)) + goto fallback; + + optlen += TCPOLEN_MAXSEG; + + if (tp->t_flags & TF_REQ_SCALE) + optlen += 4; + +#if MPTCP + if ((so->so_flags & SOF_MP_SUBFLOW) && mptcp_enable && + tp->t_rxtshift <= mptcp_mpcap_retries) + optlen += sizeof(struct mptcp_mpcapable_opt_common) + sizeof(mptcp_key_t); +#endif /* MPTCP */ + + if (tp->t_flags & TF_REQ_TSTMP) + optlen += TCPOLEN_TSTAMP_APPA; + + if (SACK_ENABLED(tp)) + optlen += TCPOLEN_SACK_PERMITTED; + + /* Now, decide whether to use TFO or not */ + + /* Don't even bother trying if there is no space at all... */ + if (MAX_TCPOPTLEN - optlen < TCPOLEN_FASTOPEN_REQ) + goto fallback; + + cookie_len = tcp_cache_get_cookie_len(tp); + if (cookie_len == 0) + /* No cookie, so we request one */ + return (0); + + /* Do not send SYN+data if there is more in the queue than MSS */ + if (so->so_snd.sb_cc > (tp->t_maxopd - MAX_TCPOPTLEN)) + goto fallback; + + /* Ok, everything looks good. We can go on and do TFO */ + return (len); + +fallback: + tp->t_flagsext &= ~TF_FASTOPEN; + return (0); +} + +/* Returns the number of bytes written to the TCP option-space */ +static unsigned +tcp_tfo_write_cookie_rep(struct tcpcb *tp, unsigned optlen, u_char *opt) +{ + u_char out[CCAES_BLOCK_SIZE]; + unsigned ret = 0; + u_char *bp; + + if ((MAX_TCPOPTLEN - optlen) < + (TCPOLEN_FASTOPEN_REQ + TFO_COOKIE_LEN_DEFAULT)) + return (ret); + + tcp_tfo_gen_cookie(tp->t_inpcb, out, sizeof(out)); + + bp = opt + optlen; + + *bp++ = TCPOPT_FASTOPEN; + *bp++ = 2 + TFO_COOKIE_LEN_DEFAULT; + memcpy(bp, out, TFO_COOKIE_LEN_DEFAULT); + ret += 2 + TFO_COOKIE_LEN_DEFAULT; + + tp->t_tfo_stats |= TFO_S_COOKIE_SENT; + tcpstat.tcps_tfo_cookie_sent++; + + return (ret); +} + +static unsigned +tcp_tfo_write_cookie(struct tcpcb *tp, unsigned optlen, int32_t *len, + u_char *opt) +{ + u_int8_t tfo_len = MAX_TCPOPTLEN - optlen - TCPOLEN_FASTOPEN_REQ; + unsigned ret = 0; + int res; + u_char *bp; + + bp = opt + optlen; + + /* + * The cookie will be copied in the appropriate place within the + * TCP-option space. That way we avoid the need for an intermediate + * variable. + */ + res = tcp_cache_get_cookie(tp, bp + TCPOLEN_FASTOPEN_REQ, &tfo_len); + if (res == 0) { + *bp++ = TCPOPT_FASTOPEN; + *bp++ = TCPOLEN_FASTOPEN_REQ; + ret += TCPOLEN_FASTOPEN_REQ; + + tp->t_tfo_flags |= TFO_F_COOKIE_REQ; + + tp->t_tfo_stats |= TFO_S_COOKIE_REQ; + tcpstat.tcps_tfo_cookie_req++; + } else { + *bp++ = TCPOPT_FASTOPEN; + *bp++ = TCPOLEN_FASTOPEN_REQ + tfo_len; + + ret += TCPOLEN_FASTOPEN_REQ + tfo_len; + + tp->t_tfo_flags |= TFO_F_COOKIE_SENT; + + /* If there is some data, let's track it */ + if (*len) { + tp->t_tfo_stats |= TFO_S_SYN_DATA_SENT; + tcpstat.tcps_tfo_syn_data_sent++; + } + } + + return (ret); +} + +static inline bool +tcp_send_ecn_flags_on_syn(struct tcpcb *tp, struct socket *so) +{ + return(!((tp->ecn_flags & TE_SETUPSENT) || + (so->so_flags & SOF_MP_SUBFLOW) || + (tp->t_flagsext & TF_FASTOPEN))); +} + /* * Tcp output routine: figure out what should be sent and send it. * @@ -291,6 +425,7 @@ tcp_output(struct tcpcb *tp) int i, sack_rxmit; int tso = 0; int sack_bytes_rxmt; + tcp_seq old_snd_nxt = 0; struct sackhole *p; #if IPSEC unsigned ipsec_optlen = 0; @@ -319,6 +454,7 @@ tcp_output(struct tcpcb *tp) boolean_t cell = FALSE; boolean_t wifi = FALSE; boolean_t wired = FALSE; + boolean_t sack_rescue_rxt = FALSE; /* * Determine length of data that should be transmitted, @@ -333,9 +469,22 @@ tcp_output(struct tcpcb *tp) */ idle_time = tcp_now - tp->t_rcvtime; if (idle && idle_time >= TCP_IDLETIMEOUT(tp)) { - if (CC_ALGO(tp)->after_idle != NULL) + if (CC_ALGO(tp)->after_idle != NULL && + (tp->tcp_cc_index != TCP_CC_ALGO_CUBIC_INDEX || + idle_time >= TCP_CC_CWND_NONVALIDATED_PERIOD)) { CC_ALGO(tp)->after_idle(tp); - tcp_ccdbg_trace(tp, NULL, TCP_CC_IDLE_TIMEOUT); + tcp_ccdbg_trace(tp, NULL, TCP_CC_IDLE_TIMEOUT); + } + + /* + * Do some other tasks that need to be done after + * idle time + */ + if (!SLIST_EMPTY(&tp->t_rxt_segments)) + tcp_rxtseg_clean(tp); + + /* If stretch ack was auto-disabled, re-evaluate it */ + tcp_cc_after_idle_stretchack(tp); } tp->t_flags &= ~TF_LASTIDLE; if (idle) { @@ -631,11 +780,16 @@ tcp_output(struct tcpcb *tp) * in which case len is already set. */ if (sack_rxmit == 0) { - if (sack_bytes_rxmt == 0) + if (sack_bytes_rxmt == 0) { len = min(so->so_snd.sb_cc, sendwin) - off; - else { + } else { int32_t cwin; + cwin = tp->snd_cwnd - + (tp->snd_nxt - tp->sack_newdata) - + sack_bytes_rxmt; + if (cwin < 0) + cwin = 0; /* * We are inside of a SACK recovery episode and are * sending new data, having retransmitted all the @@ -652,15 +806,37 @@ tcp_output(struct tcpcb *tp) * of len is bungled by the optimizer. */ if (len > 0) { - cwin = tp->snd_cwnd - - (tp->snd_nxt - tp->sack_newdata) - - sack_bytes_rxmt; - if (cwin < 0) - cwin = 0; len = imin(len, cwin); - } - else + } else { len = 0; + } + /* + * At this point SACK recovery can not send any + * data from scoreboard or any new data. Check + * if we can do a rescue retransmit towards the + * tail end of recovery window. + */ + if (len == 0 && cwin > 0 && + SEQ_LT(tp->snd_fack, tp->snd_recover) && + !(tp->t_flagsext & TF_RESCUE_RXT)) { + len = min((tp->snd_recover - tp->snd_fack), + tp->t_maxseg); + len = imin(len, cwin); + old_snd_nxt = tp->snd_nxt; + sack_rescue_rxt = TRUE; + tp->snd_nxt = tp->snd_recover - len; + /* + * If FIN has been sent, snd_max + * must have been advanced to cover it. + */ + if ((tp->t_flags & TF_SENTFIN) && + tp->snd_max == tp->snd_recover) + tp->snd_nxt--; + + off = tp->snd_nxt - tp->snd_una; + sendalot = 0; + tp->t_flagsext |= TF_RESCUE_RXT; + } } } @@ -686,7 +862,7 @@ tcp_output(struct tcpcb *tp) * know that foreign host supports TAO, suppress sending segment. */ if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { - if (tp->t_state != TCPS_SYN_RECEIVED) + if (tp->t_state != TCPS_SYN_RECEIVED || tfo_enabled(tp)) flags &= ~TH_SYN; off--, len++; if (len > 0 && tp->t_state == TCPS_SYN_SENT) { @@ -731,12 +907,19 @@ tcp_output(struct tcpcb *tp) * Be careful not to send data and/or FIN on SYN segments. * This measure is needed to prevent interoperability problems * with not fully conformant TCP implementations. + * + * In case of TFO, we handle the setting of the len in + * tcp_tfo_check. In case TFO is not enabled, never ever send + * SYN+data. */ - if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) { + if ((flags & TH_SYN) && !tfo_enabled(tp)) { len = 0; flags &= ~TH_FIN; } + if ((flags & TH_SYN) && tp->t_state <= TCPS_SYN_SENT && tfo_enabled(tp)) + len = tcp_tfo_check(tp, len); + /* * The check here used to be (len < 0). Some times len is zero * when the congestion window is closed and we need to check @@ -872,7 +1055,8 @@ tcp_output(struct tcpcb *tp) (tp->t_state > TCPS_CLOSED) && ((tp->t_mpflags & TMPF_SND_MPPRIO) || (tp->t_mpflags & TMPF_SND_REM_ADDR) || - (tp->t_mpflags & TMPF_SND_MPFAIL))) { + (tp->t_mpflags & TMPF_SND_MPFAIL) || + (tp->t_mpflags & TMPF_MPCAP_RETRANSMIT))) { if (len > 0) { len = 0; } @@ -1125,7 +1309,7 @@ tcp_output(struct tcpcb *tp) return (0); send: - /* + /* * Set TF_MAXSEGSNT flag if the segment size is greater than * the max segment size. */ @@ -1178,101 +1362,9 @@ tcp_output(struct tcpcb *tp) } #endif /* MPTCP */ } - } - - /* - * RFC 3168 states that: - * - If you ever sent an ECN-setup SYN/SYN-ACK you must be prepared - * to handle the TCP ECE flag, even if you also later send a - * non-ECN-setup SYN/SYN-ACK. - * - If you ever send a non-ECN-setup SYN/SYN-ACK, you must not set - * the ip ECT flag. - * - * It is not clear how the ECE flag would ever be set if you never - * set the IP ECT flag on outbound packets. All the same, we use - * the TE_SETUPSENT to indicate that we have committed to handling - * the TCP ECE flag correctly. We use the TE_SENDIPECT to indicate - * whether or not we should set the IP ECT flag on outbound packet - * - * For a SYN-ACK, send an ECN setup SYN-ACK - */ - if ((tcp_ecn_inbound || (tp->t_flags & TF_ENABLE_ECN)) - && (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { - if ((tp->ecn_flags & TE_SETUPRECEIVED) != 0) { - if ((tp->ecn_flags & TE_SETUPSENT) == 0) { - /* Setting TH_ECE makes this an ECN-setup SYN-ACK */ - flags |= TH_ECE; - - /* - * Record that we sent the ECN-setup and - * default to setting IP ECT. - */ - tp->ecn_flags |= (TE_SETUPSENT|TE_SENDIPECT); - tcpstat.tcps_ecn_setup++; - } else { - /* - * We sent an ECN-setup SYN-ACK but it was - * dropped. Fallback to non-ECN-setup - * SYN-ACK and clear flag to indicate that - * we should not send data with IP ECT set - * - * Pretend we didn't receive an - * ECN-setup SYN. - */ - tp->ecn_flags &= ~TE_SETUPRECEIVED; - /* - * We already incremented the counter - * assuming that the ECN setup will - * succeed. Decrementing here to - * correct it. - */ - tcpstat.tcps_ecn_setup--; - } - } - } else if ((tcp_ecn_outbound || (tp->t_flags & TF_ENABLE_ECN)) - && (flags & (TH_SYN | TH_ACK)) == TH_SYN) { - if ((tp->ecn_flags & TE_SETUPSENT) == 0) { - /* Setting TH_ECE and TH_CWR makes this an ECN-setup SYN */ - flags |= (TH_ECE | TH_CWR); - - /* - * Record that we sent the ECN-setup and default to - * setting IP ECT. - */ - tp->ecn_flags |= (TE_SETUPSENT | TE_SENDIPECT); - } else { - /* - * We sent an ECN-setup SYN but it was dropped. - * Fall back to no ECN and clear flag indicating - * we should send data with IP ECT set. - */ - tp->ecn_flags &= ~TE_SENDIPECT; - } - } - - /* - * Check if we should set the TCP CWR flag. - * CWR flag is sent when we reduced the congestion window because - * we received a TCP ECE or we performed a fast retransmit. We - * never set the CWR flag on retransmitted packets. We only set - * the CWR flag on data packets. Pure acks don't have this set. - */ - if ((tp->ecn_flags & TE_SENDCWR) != 0 && len != 0 && - !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) { - flags |= TH_CWR; - tp->ecn_flags &= ~TE_SENDCWR; - tcpstat.tcps_sent_cwr++; - } - - /* - * Check if we should set the TCP ECE flag. - */ - if ((tp->ecn_flags & TE_SENDECE) != 0 && len == 0) { - flags |= TH_ECE; - tcpstat.tcps_sent_ece++; } - /* + /* * Send a timestamp and echo-reply if this is a SYN and our side * wants to use timestamps (TF_REQ_TSTMP is set) or both our side * and our peer have sent timestamps in our SYN's. @@ -1339,6 +1431,15 @@ tcp_output(struct tcpcb *tp) } #endif /* MPTCP */ + if (tfo_enabled(tp) && !(tp->t_flags & TF_NOOPT) && + (flags & (TH_SYN | TH_ACK)) == TH_SYN) + optlen += tcp_tfo_write_cookie(tp, optlen, &len, opt); + + if (tfo_enabled(tp) && + (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK) && + (tp->t_tfo_flags & TFO_F_OFFER_COOKIE)) + optlen += tcp_tfo_write_cookie_rep(tp, optlen, opt); + if (SACK_ENABLED(tp) && ((tp->t_flags & TF_NOOPT) == 0)) { /* * Send SACKs if necessary. This should be the last @@ -1354,14 +1455,16 @@ tcp_output(struct tcpcb *tp) * 10 bytes for SACK options 40 - (12 + 18). */ if (TCPS_HAVEESTABLISHED(tp->t_state) && - (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0 && + (tp->t_flags & TF_SACK_PERMIT) && + (tp->rcv_numsacks > 0 || TCP_SEND_DSACK_OPT(tp)) && MAX_TCPOPTLEN - optlen - 2 >= TCPOLEN_SACK) { int nsack, padlen; u_char *bp = (u_char *)opt + optlen; u_int32_t *lp; nsack = (MAX_TCPOPTLEN - optlen - 2) / TCPOLEN_SACK; - nsack = min(nsack, tp->rcv_numsacks); + nsack = min(nsack, (tp->rcv_numsacks + + (TCP_SEND_DSACK_OPT(tp) ? 1 : 0))); sackoptlen = (2 + nsack * TCPOLEN_SACK); /* @@ -1378,6 +1481,21 @@ tcp_output(struct tcpcb *tp) *bp++ = TCPOPT_SACK; *bp++ = sackoptlen; lp = (u_int32_t *)(void *)bp; + + /* + * First block of SACK option should represent + * DSACK. Prefer to send SACK information if there + * is space for only one SACK block. This will + * allow for faster recovery. + */ + if (TCP_SEND_DSACK_OPT(tp) && nsack > 0 && + (tp->rcv_numsacks == 0 || nsack > 1)) { + *lp++ = htonl(tp->t_dsack_lseq); + *lp++ = htonl(tp->t_dsack_rseq); + tcpstat.tcps_dsack_sent++; + nsack--; + } + VERIFY(nsack == 0 || tp->rcv_numsacks >= nsack); for (i = 0; i < nsack; i++) { struct sackblk sack = tp->sackblks[i]; *lp++ = htonl(sack.start); @@ -1399,8 +1517,119 @@ tcp_output(struct tcpcb *tp) } } + /* + * RFC 3168 states that: + * - If you ever sent an ECN-setup SYN/SYN-ACK you must be prepared + * to handle the TCP ECE flag, even if you also later send a + * non-ECN-setup SYN/SYN-ACK. + * - If you ever send a non-ECN-setup SYN/SYN-ACK, you must not set + * the ip ECT flag. + * + * It is not clear how the ECE flag would ever be set if you never + * set the IP ECT flag on outbound packets. All the same, we use + * the TE_SETUPSENT to indicate that we have committed to handling + * the TCP ECE flag correctly. We use the TE_SENDIPECT to indicate + * whether or not we should set the IP ECT flag on outbound packet + * + * For a SYN-ACK, send an ECN setup SYN-ACK + */ + if ((tcp_ecn_inbound || (tp->t_flags & TF_ENABLE_ECN)) + && (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { + if (tp->ecn_flags & TE_SETUPRECEIVED) { + if (tcp_send_ecn_flags_on_syn(tp, so)) { + /* + * Setting TH_ECE makes this an ECN-setup + * SYN-ACK + */ + flags |= TH_ECE; + + /* + * Record that we sent the ECN-setup and + * default to setting IP ECT. + */ + tp->ecn_flags |= (TE_SETUPSENT|TE_SENDIPECT); + tcpstat.tcps_ecn_server_setup++; + tcpstat.tcps_ecn_server_success++; + } else { + /* + * We sent an ECN-setup SYN-ACK but it was + * dropped. Fallback to non-ECN-setup + * SYN-ACK and clear flag to indicate that + * we should not send data with IP ECT set + * + * Pretend we didn't receive an + * ECN-setup SYN. + * + * We already incremented the counter + * assuming that the ECN setup will + * succeed. Decrementing here + * tcps_ecn_server_success to correct it. + */ + if (tp->ecn_flags & TE_SETUPSENT) { + tcpstat.tcps_ecn_lost_synack++; + tcpstat.tcps_ecn_server_success--; + } + + tp->ecn_flags &= + ~(TE_SETUPRECEIVED | TE_SENDIPECT | + TE_SENDCWR); + } + } + } else if ((tcp_ecn_outbound || (tp->t_flags & TF_ENABLE_ECN)) + && (flags & (TH_SYN | TH_ACK)) == TH_SYN) { + if (tcp_send_ecn_flags_on_syn(tp, so)) { + /* + * Setting TH_ECE and TH_CWR makes this an + * ECN-setup SYN + */ + flags |= (TH_ECE | TH_CWR); + tcpstat.tcps_ecn_client_setup++; + + /* + * Record that we sent the ECN-setup and default to + * setting IP ECT. + */ + tp->ecn_flags |= (TE_SETUPSENT | TE_SENDIPECT); + } else { + /* + * We sent an ECN-setup SYN but it was dropped. + * Fall back to non-ECN and clear flag indicating + * we should send data with IP ECT set. + */ + if (tp->ecn_flags & TE_SETUPSENT) + tcpstat.tcps_ecn_lost_syn++; + tp->ecn_flags &= ~TE_SENDIPECT; + } + } + + /* + * Check if we should set the TCP CWR flag. + * CWR flag is sent when we reduced the congestion window because + * we received a TCP ECE or we performed a fast retransmit. We + * never set the CWR flag on retransmitted packets. We only set + * the CWR flag on data packets. Pure acks don't have this set. + */ + if ((tp->ecn_flags & TE_SENDCWR) != 0 && len != 0 && + !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) { + flags |= TH_CWR; + tp->ecn_flags &= ~TE_SENDCWR; + } + + /* + * Check if we should set the TCP ECE flag. + */ + if ((tp->ecn_flags & TE_SENDECE) != 0 && len == 0) { + flags |= TH_ECE; + tcpstat.tcps_ecn_sent_ece++; + } + + hdrlen += optlen; + /* Reset DSACK sequence numbers */ + tp->t_dsack_lseq = 0; + tp->t_dsack_rseq = 0; + #if INET6 if (isipv6) ipoptlen = ip6_optlen(inp); @@ -1501,6 +1730,7 @@ tcp_output(struct tcpcb *tp) * the template for sends on this connection. */ if (len) { + tp->t_pmtud_lastseg_size = len + optlen + ipoptlen; if ((tp->t_flagsext & TF_FORCE) && len == 1) tcpstat.tcps_sndprobe++; else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) { @@ -1636,8 +1866,10 @@ tcp_output(struct tcpcb *tp) * (This will keep happy those implementations which only * give data to the user when a buffer fills or * a PUSH comes in.) + * + * On SYN-segments we should not add the PUSH-flag. */ - if (off + len == so->so_snd.sb_cc) + if (off + len == so->so_snd.sb_cc && !(flags & TH_SYN)) flags |= TH_PUSH; } else { if (tp->t_flags & TF_ACKNOW) @@ -1696,8 +1928,9 @@ tcp_output(struct tcpcb *tp) /* this picks up the pseudo header (w/o the length) */ tcp_fillheaders(tp, ip, th); if ((tp->ecn_flags & TE_SENDIPECT) != 0 && len && - !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) { - ip->ip_tos = IPTOS_ECN_ECT0; + !SEQ_LT(tp->snd_nxt, tp->snd_max) && + !sack_rxmit && !(flags & TH_SYN)) { + ip->ip_tos |= IPTOS_ECN_ECT0; } #if PF_ECN m->m_pkthdr.pf_mtag.pftag_hdr = (void *)ip; @@ -1710,7 +1943,7 @@ tcp_output(struct tcpcb *tp) * window for use in delaying messages about window sizes. * If resending a FIN, be sure not to use a new sequence number. */ - if (flags & TH_FIN && (tp->t_flags & TF_SENTFIN) && + if ((flags & TH_FIN) && (tp->t_flags & TF_SENTFIN) && tp->snd_nxt == tp->snd_max) tp->snd_nxt--; /* @@ -1725,16 +1958,30 @@ tcp_output(struct tcpcb *tp) * right edge of the window, so use snd_nxt in that * case, since we know we aren't doing a retransmission. * (retransmit and persist are mutually exclusive...) + * + * Note the state of this retransmit segment to detect spurious + * retransmissions. */ if (sack_rxmit == 0) { - if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST]) + if (len || (flags & (TH_SYN|TH_FIN)) || + tp->t_timer[TCPT_PERSIST]) { th->th_seq = htonl(tp->snd_nxt); - else + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { + if (SACK_ENABLED(tp) && len > 1) { + tcp_rxtseg_insert(tp, tp->snd_nxt, + (tp->snd_nxt + len - 1)); + } + m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT; + } + } else { th->th_seq = htonl(tp->snd_max); + } } else { th->th_seq = htonl(p->rxmit); + tcp_rxtseg_insert(tp, p->rxmit, (p->rxmit + len - 1)); p->rxmit += len; tp->sackhint.sack_bytes_rexmit += len; + m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT; } th->th_ack = htonl(tp->rcv_nxt); tp->last_ack_sent = tp->rcv_nxt; @@ -1873,7 +2120,13 @@ tcp_output(struct tcpcb *tp) } if (sack_rxmit) goto timer; - tp->snd_nxt += len; + if (sack_rescue_rxt == TRUE) { + tp->snd_nxt = old_snd_nxt; + sack_rescue_rxt = FALSE; + tcpstat.tcps_pto_in_recovery++; + } else { + tp->snd_nxt += len; + } if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { tp->snd_max = tp->snd_nxt; /* @@ -1884,6 +2137,9 @@ tcp_output(struct tcpcb *tp) tp->t_rtttime = tcp_now; tp->t_rtseq = startseq; tcpstat.tcps_segstimed++; + + /* update variables related to pipe ack */ + tp->t_pipeack_lastuna = tp->snd_una; } } @@ -2031,20 +2287,21 @@ tcp_output(struct tcpcb *tp) #endif /* INET6 */ if (path_mtu_discovery && (tp->t_flags & TF_PMTUD)) ip->ip_off |= IP_DF; - + #if NECP { necp_kernel_policy_id policy_id; - if (!necp_socket_is_allowed_to_send_recv(inp, &policy_id)) { + u_int32_t route_rule_id; + if (!necp_socket_is_allowed_to_send_recv(inp, &policy_id, &route_rule_id)) { m_freem(m); error = EHOSTUNREACH; goto out; } - necp_mark_packet_from_socket(m, inp, policy_id); + necp_mark_packet_from_socket(m, inp, policy_id, route_rule_id); } #endif /* NECP */ - + #if IPSEC if (inp->inp_sp != NULL) ipsec_setsocket(m, so); diff --git a/bsd/netinet/tcp_sack.c b/bsd/netinet/tcp_sack.c index e3b339360..7d8b715ed 100644 --- a/bsd/netinet/tcp_sack.c +++ b/bsd/netinet/tcp_sack.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2014 Apple Inc. All rights reserved. + * Copyright (c) 2004-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -122,8 +122,29 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalholes, CTLFLAG_RD | CTLFLAG_LOCKE &tcp_sack_globalholes, 0, "Global number of TCP SACK holes currently allocated"); +static int tcp_detect_reordering = 1; +static int tcp_dsack_ignore_hw_duplicates = 0; + +#if (DEVELOPMENT || DEBUG) +SYSCTL_INT(_net_inet_tcp, OID_AUTO, detect_reordering, + CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_detect_reordering, 0, ""); + +SYSCTL_INT(_net_inet_tcp, OID_AUTO, ignore_hw_duplicates, + CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_dsack_ignore_hw_duplicates, 0, ""); +#endif /* (DEVELOPMENT || DEBUG) */ + extern struct zone *sack_hole_zone; +#define TCP_VALIDATE_SACK_SEQ_NUMBERS(_tp_, _sb_, _ack_) \ + (SEQ_GT((_sb_)->end, (_sb_)->start) && \ + SEQ_GT((_sb_)->start, (_tp_)->snd_una) && \ + SEQ_GT((_sb_)->start, (_ack_)) && \ + SEQ_LT((_sb_)->start, (_tp_)->snd_max) && \ + SEQ_GT((_sb_)->end, (_tp_)->snd_una) && \ + SEQ_LEQ((_sb_)->end, (_tp_)->snd_max)) + /* * This function is called upon receipt of new valid data (while not in header * prediction mode), and it updates the ordered list of sacks. @@ -294,7 +315,7 @@ tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end, if (tp->sackhint.nexthole == NULL) tp->sackhint.nexthole = hole; - return hole; + return(hole); } /* @@ -349,7 +370,8 @@ tcp_sack_detect_reordering(struct tcpcb *tp, struct sackhole *s, } if (reordered) { - if (!(tp->t_flagsext & TF_PKTS_REORDERED)) { + if (tcp_detect_reordering == 1 && + !(tp->t_flagsext & TF_PKTS_REORDERED)) { tp->t_flagsext |= TF_PKTS_REORDERED; tcpstat.tcps_detect_reordering++; } @@ -415,12 +437,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, &sack, sizeof(sack)); sack.start = ntohl(sack.start); sack.end = ntohl(sack.end); - if (SEQ_GT(sack.end, sack.start) && - SEQ_GT(sack.start, tp->snd_una) && - SEQ_GT(sack.start, th_ack) && - SEQ_LT(sack.start, tp->snd_max) && - SEQ_GT(sack.end, tp->snd_una) && - SEQ_LEQ(sack.end, tp->snd_max)) + if (TCP_VALIDATE_SACK_SEQ_NUMBERS(tp, &sack, th_ack)) sack_blocks[num_sack_blks++] = sack; } @@ -651,7 +668,20 @@ tcp_sack_partialack(tp, th) num_segs * tp->t_maxseg); if (tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; - tp->t_flags |= TF_ACKNOW; + if (SEQ_LT(tp->snd_fack, tp->snd_recover) && + tp->snd_fack == th->th_ack && TAILQ_EMPTY(&tp->snd_holes)) { + struct sackhole *temp; + /* + * we received a partial ack but there is no sack_hole + * that will cover the remaining seq space. In this case, + * create a hole from snd_fack to snd_recover so that + * the sack recovery will continue. + */ + temp = tcp_sackhole_insert(tp, tp->snd_fack, + tp->snd_recover, NULL); + if (temp != NULL) + tp->snd_fack = tp->snd_recover; + } (void) tcp_output(tp); } @@ -762,7 +792,7 @@ tcp_sack_adjust(struct tcpcb *tp) } /* - * This function returns true if more than (tcprexmtthresh - 1) * SMSS + * This function returns TRUE if more than (tcprexmtthresh - 1) * SMSS * bytes with sequence numbers greater than snd_una have been SACKed. */ boolean_t @@ -785,3 +815,131 @@ tcp_sack_byte_islost(struct tcpcb *tp) return ((unacked_bytes - sndhole_bytes) > ((tcprexmtthresh - 1) * tp->t_maxseg)); } + +/* + * Process any DSACK options that might be present on an input packet + */ + +boolean_t +tcp_sack_process_dsack(struct tcpcb *tp, struct tcpopt *to, + struct tcphdr *th) +{ + struct sackblk first_sack, second_sack; + struct tcp_rxt_seg *rxseg; + + bcopy(to->to_sacks, &first_sack, sizeof(first_sack)); + first_sack.start = ntohl(first_sack.start); + first_sack.end = ntohl(first_sack.end); + + if (to->to_nsacks > 1) { + bcopy((to->to_sacks + TCPOLEN_SACK), &second_sack, + sizeof(second_sack)); + second_sack.start = ntohl(second_sack.start); + second_sack.end = ntohl(second_sack.end); + } + + if (SEQ_LT(first_sack.start, th->th_ack) && + SEQ_LEQ(first_sack.end, th->th_ack)) { + /* + * There is a dsack option reporting a duplicate segment + * also covered by cumulative acknowledgement. + * + * Validate the sequence numbers before looking at dsack + * option. The duplicate notification can come after + * snd_una moves forward. In order to set a window of valid + * sequence numbers to look for, we set a maximum send + * window within which the DSACK option will be processed. + */ + if (!(TCP_DSACK_SEQ_IN_WINDOW(tp, first_sack.start, th->th_ack) && + TCP_DSACK_SEQ_IN_WINDOW(tp, first_sack.end, th->th_ack))) { + to->to_nsacks--; + to->to_sacks += TCPOLEN_SACK; + tcpstat.tcps_dsack_recvd_old++; + + /* + * returning true here so that the ack will not be + * treated as duplicate ack. + */ + return (TRUE); + } + } else if (to->to_nsacks > 1 && + SEQ_LEQ(second_sack.start, first_sack.start) && + SEQ_GEQ(second_sack.end, first_sack.end)) { + /* + * there is a dsack option in the first block not + * covered by the cumulative acknowledgement but covered + * by the second sack block. + * + * verify the sequence numbes on the second sack block + * before processing the DSACK option. Returning false + * here will treat the ack as a duplicate ack. + */ + if (!TCP_VALIDATE_SACK_SEQ_NUMBERS(tp, &second_sack, + th->th_ack)) { + to->to_nsacks--; + to->to_sacks += TCPOLEN_SACK; + tcpstat.tcps_dsack_recvd_old++; + return (TRUE); + } + } else { + /* no dsack options, proceed with processing the sack */ + return (FALSE); + } + + /* Update the tcpopt pointer to exclude dsack block */ + to->to_nsacks--; + to->to_sacks += TCPOLEN_SACK; + tcpstat.tcps_dsack_recvd++; + + /* ignore DSACK option, if DSACK is disabled */ + if (tp->t_flagsext & TF_DISABLE_DSACK) + return (TRUE); + + /* If the DSACK is for TLP mark it as such */ + if ((tp->t_flagsext & TF_SENT_TLPROBE) && + first_sack.end == tp->t_tlphighrxt) { + if ((rxseg = tcp_rxtseg_find(tp, first_sack.start, + (first_sack.end - 1))) != NULL) + rxseg->rx_flags |= TCP_RXT_DSACK_FOR_TLP; + } + /* Update the sender's retransmit segment state */ + if (((tp->t_rxtshift == 1 && first_sack.start == tp->snd_una) || + ((tp->t_flagsext & TF_SENT_TLPROBE) && + first_sack.end == tp->t_tlphighrxt)) && + TAILQ_EMPTY(&tp->snd_holes) && + SEQ_GT(th->th_ack, tp->snd_una)) { + /* + * If the dsack is for a retransmitted packet and one of + * the two cases is true, it indicates ack loss: + * - retransmit timeout and first_sack.start == snd_una + * - TLP probe and first_sack.end == tlphighrxt + * + * Ignore dsack and do not update state when there is + * ack loss + */ + tcpstat.tcps_dsack_ackloss++; + + return (TRUE); + } else if ((rxseg = tcp_rxtseg_find(tp, first_sack.start, + (first_sack.end - 1))) == NULL) { + /* + * Duplicate notification was not triggered by a + * retransmission. This might be due to network duplication, + * disable further DSACK processing. + */ + if (!tcp_dsack_ignore_hw_duplicates) { + tp->t_flagsext |= TF_DISABLE_DSACK; + tcpstat.tcps_dsack_disable++; + } + } else { + /* + * If the segment was retransmitted only once, mark it as + * spurious. Otherwise ignore the duplicate notification. + */ + if (rxseg->rx_count == 1) + rxseg->rx_flags |= TCP_RXT_SPURIOUS; + else + rxseg->rx_flags &= ~TCP_RXT_SPURIOUS; + } + return (TRUE); +} diff --git a/bsd/netinet/tcp_subr.c b/bsd/netinet/tcp_subr.c index e68e181da..6fafa0f5f 100644 --- a/bsd/netinet/tcp_subr.c +++ b/bsd/netinet/tcp_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -117,6 +117,7 @@ #include #include #include +#include #include #if INET6 @@ -145,6 +146,8 @@ #include #endif /* MAC_NET */ +#include +#include #include #include #include @@ -171,6 +174,28 @@ SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, extern int tcp_do_autorcvbuf; +int tcp_sysctl_fastopenkey(struct sysctl_oid *, void *, int , + struct sysctl_req *); +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, fastopen_key, + CTLTYPE_STRING | CTLFLAG_WR, + 0 , 0, tcp_sysctl_fastopenkey, "S", "TCP Fastopen key"); + +/* Current count of half-open TFO connections */ +int tcp_tfo_halfcnt = 0; + +/* Maximum of half-open TFO connection backlog */ +int tcp_tfo_backlog = 10; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, fastopen_backlog, CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_tfo_backlog, 0, "Backlog queue for half-open TFO connections"); + +int tcp_fastopen = TCP_FASTOPEN_CLIENT | TCP_FASTOPEN_SERVER; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_fastopen, 0, "Enable TCP Fastopen (RFC 7413)"); + +int tcp_tfo_fallback_min = 10; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, fastopen_fallback_min, CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_tfo_fallback_min, 0, "Mininum number of trials without TFO when in fallback mode"); + /* * Minimum MSS we accept and use. This prevents DoS attacks where * we are forced to a ridiculous low MSS like 20 and send hundreds @@ -182,10 +207,12 @@ extern int tcp_do_autorcvbuf; int tcp_minmss = TCP_MINMSS; SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_minmss , 0, "Minmum TCP Maximum Segment Size"); - -static int tcp_do_rfc1323 = 1; -SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions"); +int tcp_do_rfc1323 = 1; +#if (DEVELOPMENT || DEBUG) +SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_rfc1323 , 0, + "Enable rfc1323 (high performance TCP) extensions"); +#endif /* (DEVELOPMENT || DEBUG) */ // Not used static int tcp_do_rfc1644 = 0; @@ -208,15 +235,16 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW | CTLFLAG_LOCKED, & "Certain ICMP unreachable messages may abort connections in SYN_SENT"); static int tcp_strict_rfc1948 = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, strict_rfc1948, CTLFLAG_RW | CTLFLAG_LOCKED, +static int tcp_isn_reseed_interval = 0; +#if (DEVELOPMENT || DEBUG) +SYSCTL_INT(_net_inet_tcp, OID_AUTO, strict_rfc1948, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_strict_rfc1948, 0, "Determines if RFC1948 is followed exactly"); -static int tcp_isn_reseed_interval = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW | CTLFLAG_LOCKED, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret"); -static int tcp_background_io_enabled = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_io_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_background_io_enabled, 0, "Background IO Enabled"); +#endif /* (DEVELOPMENT || DEBUG) */ int tcp_TCPTV_MIN = 100; /* 100ms minimum RTT */ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_min, CTLFLAG_RW | CTLFLAG_LOCKED, @@ -231,7 +259,8 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, randomize_ports, CTLFLAG_RW | CTLFLAG_LOCKED &tcp_use_randomport, 0, "Randomize TCP port numbers"); __private_extern__ int tcp_win_scale = 3; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, win_scale_factor, CTLFLAG_RW | CTLFLAG_LOCKED, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, win_scale_factor, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_win_scale, 0, "Window scaling factor"); static void tcp_cleartaocache(void); @@ -240,6 +269,7 @@ static void tcp_notify(struct inpcb *, int); struct zone *sack_hole_zone; struct zone *tcp_reass_zone; struct zone *tcp_bwmeas_zone; +struct zone *tcp_rxt_seg_zone; extern int slowlink_wsize; /* window correction for slow links */ extern int path_mtu_discovery; @@ -292,6 +322,71 @@ static lck_grp_t *tcp_uptime_mtx_grp = NULL; /* mutex group definition */ static lck_grp_attr_t *tcp_uptime_mtx_grp_attr = NULL; /* mutex group attributes */ int tcp_notsent_lowat_check(struct socket *so); +static aes_encrypt_ctx tfo_ctx; /* Crypto-context for TFO */ + +void +tcp_tfo_gen_cookie(struct inpcb *inp, u_char *out, size_t blk_size) +{ + u_char in[CCAES_BLOCK_SIZE]; +#if INET6 + int isipv6 = inp->inp_vflag & INP_IPV6; +#endif + + VERIFY(blk_size == CCAES_BLOCK_SIZE); + + bzero(&in[0], CCAES_BLOCK_SIZE); + bzero(&out[0], CCAES_BLOCK_SIZE); + +#if INET6 + if (isipv6) + memcpy(in, &inp->in6p_faddr, sizeof(struct in6_addr)); + else +#endif /* INET6 */ + memcpy(in, &inp->inp_faddr, sizeof(struct in_addr)); + + aes_encrypt_cbc(in, NULL, 1, out, &tfo_ctx); +} + +__private_extern__ int +tcp_sysctl_fastopenkey(__unused struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + int error = 0; + /* TFO-key is expressed as a string in hex format (+1 to account for \0 char) */ + char keystring[TCP_FASTOPEN_KEYLEN * 2 + 1]; + u_int32_t key[TCP_FASTOPEN_KEYLEN / sizeof(u_int32_t)]; + int i; + + /* -1, because newlen is len without the terminating \0 character */ + if (req->newlen != (sizeof(keystring) - 1)) { + error = EINVAL; + goto exit; + } + + /* sysctl_io_string copies keystring into the oldptr of the sysctl_req. + * Make sure everything is zero, to avoid putting garbage in there or + * leaking the stack. + */ + bzero(keystring, sizeof(keystring)); + + error = sysctl_io_string(req, keystring, sizeof(keystring), 0, NULL); + if (error) + goto exit; + + for (i = 0; i < (TCP_FASTOPEN_KEYLEN / sizeof(u_int32_t)); i++) { + /* We jump over the keystring in 8-character (4 byte in hex) steps */ + if (sscanf(&keystring[i * 8], "%8x", &key[i]) != 1) { + error = EINVAL; + goto exit; + } + } + + aes_encrypt_key128((u_char *)key, &tfo_ctx); + +exit: + return (error); +} + int get_inpcb_str_size(void) { return sizeof(struct inpcb); @@ -342,6 +437,15 @@ static int scale_to_powerof2(int size) { return ret; } +static void +tcp_tfo_init() +{ + u_char key[TCP_FASTOPEN_KEYLEN]; + + read_random(key, sizeof(key)); + aes_encrypt_key128(key, &tfo_ctx); +} + /* * Tcp initialization */ @@ -373,6 +477,8 @@ tcp_init(struct protosw *pp, struct domain *dp) read_random(&tcp_now, sizeof(tcp_now)); tcp_now = tcp_now & 0x3fffffff; /* Starts tcp internal clock at a random value */ + tcp_tfo_init(); + LIST_INIT(&tcb); tcbinfo.ipi_listhead = &tcb; @@ -421,6 +527,7 @@ tcp_init(struct protosw *pp, struct domain *dp) zone_change(tcbinfo.ipi_zone, Z_EXPAND, TRUE); tcbinfo.ipi_gc = tcp_gc; + tcbinfo.ipi_timer = tcp_itimer; in_pcbinfo_attach(&tcbinfo); str_size = P2ROUNDUP(sizeof(struct sackhole), sizeof(u_int64_t)); @@ -452,6 +559,12 @@ tcp_init(struct protosw *pp, struct domain *dp) zone_change(tcp_cc_zone, Z_CALLERACCT, FALSE); zone_change(tcp_cc_zone, Z_EXPAND, TRUE); + str_size = P2ROUNDUP(sizeof(struct tcp_rxt_seg), sizeof(u_int64_t)); + tcp_rxt_seg_zone = zinit(str_size, 10000 * str_size, 0, + "tcp_rxt_seg_zone"); + zone_change(tcp_rxt_seg_zone, Z_CALLERACCT, FALSE); + zone_change(tcp_rxt_seg_zone, Z_EXPAND, TRUE); + #if INET6 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) #else /* INET6 */ @@ -494,6 +607,9 @@ tcp_init(struct protosw *pp, struct domain *dp) /* Initialize TCP LRO data structures */ tcp_lro_init(); + /* Initialize TCP Cache */ + tcp_cache_init(); + /* * If more than 60 MB of mbuf pool is available, increase the * maximum allowed receive and send socket buffer size. @@ -776,9 +892,9 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, #endif #if NECP - necp_mark_packet_from_socket(m, tp ? tp->t_inpcb : NULL, 0); + necp_mark_packet_from_socket(m, tp ? tp->t_inpcb : NULL, 0, 0); #endif /* NECP */ - + #if IPSEC if (tp != NULL && tp->t_inpcb->inp_sp != NULL && ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL) != 0) { @@ -888,7 +1004,7 @@ tcp_newtcpcb(inp) calculate_tcp_clock(); - if (!so->cached_in_sock_layer) { + if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { it = (struct inp_tp *)(void *)inp; tp = &it->tcb; } else { @@ -909,6 +1025,7 @@ tcp_newtcpcb(inp) tp->t_flagsext |= TF_SACK_ENABLE; TAILQ_INIT(&tp->snd_holes); + SLIST_INIT(&tp->t_rxt_segments); tp->t_inpcb = inp; /* XXX */ /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no @@ -1199,12 +1316,29 @@ tcp_close(tp) /* free the reassembly queue, if any */ (void) tcp_freeq(tp); + if (TCP_ECN_ENABLED(tp)) { + if (tp->ecn_flags & TE_RECV_ECN_CE) + tcpstat.tcps_ecn_conn_recv_ce++; + if (tp->ecn_flags & TE_RECV_ECN_ECE) + tcpstat.tcps_ecn_conn_recv_ece++; + if (tp->ecn_flags & (TE_RECV_ECN_CE | TE_RECV_ECN_ECE)) { + if (tp->t_stat.txretransmitbytes > 0 || + tp->t_stat.rxoutoforderbytes > 0) + tcpstat.tcps_ecn_conn_pl_ce++; + else + tcpstat.tcps_ecn_conn_nopl_ce++; + } else { + if (tp->t_stat.txretransmitbytes > 0 || + tp->t_stat.rxoutoforderbytes > 0) + tcpstat.tcps_ecn_conn_plnoce++; + } + } tcp_free_sackholes(tp); if (tp->t_bwmeas != NULL) { tcp_bwmeas_free(tp); } - + tcp_rxtseg_clean(tp); /* Free the packet list */ if (tp->t_pktlist_head != NULL) m_freem_list(tp->t_pktlist_head); @@ -1220,7 +1354,7 @@ tcp_close(tp) tp->t_mptcb = NULL; #endif /* MPTCP */ - if (so->cached_in_sock_layer) + if (so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) inp->inp_saved_ppcb = (caddr_t) tp; tp->t_state = TCPS_CLOSED; @@ -1259,6 +1393,16 @@ tcp_close(tp) } tp->tcp_cc_index = TCP_CC_ALGO_NONE; + /* Can happen if we close the socket before receiving the third ACK */ + if ((tp->t_tfo_flags & TFO_F_COOKIE_VALID)) { + OSDecrementAtomic(&tcp_tfo_halfcnt); + + /* Panic if something has gone terribly wrong. */ + VERIFY(tcp_tfo_halfcnt >= 0); + + tp->t_tfo_flags &= ~TFO_F_COOKIE_VALID; + } + #if INET6 if (SOCK_CHECK_DOM(so, PF_INET6)) in6_pcbdetach(inp); @@ -1292,39 +1436,44 @@ tcp_freeq(tp) return (rv); } + +/* + * Walk the tcpbs, if existing, and flush the reassembly queue, + * if there is one when do_tcpdrain is enabled + * Also defunct the extended background idle socket + * Do it next time if the pcbinfo lock is in use + */ void tcp_drain() { - if (do_tcpdrain) - { - struct inpcb *inp; - struct tcpcb *tp; - /* - * Walk the tcpbs, if existing, and flush the reassembly queue, - * if there is one... - * Do it next time if the pcbinfo lock is in use - */ - if (!lck_rw_try_lock_exclusive(tcbinfo.ipi_lock)) - return; + struct inpcb *inp; + struct tcpcb *tp; - LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) { - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != - WNT_STOPUSING) { - tcp_lock(inp->inp_socket, 1, 0); - if (in_pcb_checkstate(inp, WNT_RELEASE, 1) - == WNT_STOPUSING) { - /* lost a race, try the next one */ - tcp_unlock(inp->inp_socket, 1, 0); - continue; - } - tp = intotcpcb(inp); - tcp_freeq(tp); + if (!lck_rw_try_lock_exclusive(tcbinfo.ipi_lock)) + return; + + LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != + WNT_STOPUSING) { + tcp_lock(inp->inp_socket, 1, 0); + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) + == WNT_STOPUSING) { + /* lost a race, try the next one */ tcp_unlock(inp->inp_socket, 1, 0); - } - } - lck_rw_done(tcbinfo.ipi_lock); + continue; + } + tp = intotcpcb(inp); + + if (do_tcpdrain) + tcp_freeq(tp); + so_drain_extended_bk_idle(inp->inp_socket); + + tcp_unlock(inp->inp_socket, 1, 0); + } } + lck_rw_done(tcbinfo.ipi_lock); + } /* @@ -1813,6 +1962,9 @@ tcp_ctlinput(cmd, sa, vip) if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; + if ((unsigned)cmd >= PRC_NCMDS) + return; + if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; else if (icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || @@ -1826,7 +1978,7 @@ tcp_ctlinput(cmd, sa, vip) /* Source quench is deprecated */ else if (cmd == PRC_QUENCH) return; - else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0) + else if (inetctlerrmap[cmd] == 0) return; if (ip) { struct tcphdr th; @@ -1931,10 +2083,12 @@ tcp6_ctlinput(cmd, sa, d) sa->sa_len != sizeof(struct sockaddr_in6)) return; + if ((unsigned)cmd >= PRC_NCMDS) + return; + if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; - else if (!PRC_IS_REDIRECT(cmd) && - ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) + else if (!PRC_IS_REDIRECT(cmd) && (inet6ctlerrmap[cmd] == 0)) return; /* Source quench is deprecated */ else if (cmd == PRC_QUENCH) @@ -2117,6 +2271,7 @@ tcp_mtudisc( struct socket *so = inp->inp_socket; int offered; int mss; + u_int32_t mtu; #if INET6 int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ @@ -2142,7 +2297,17 @@ tcp_mtudisc( } taop = rmx_taop(rt->rt_rmx); offered = taop->tao_mssopt; - mss = rt->rt_rmx.rmx_mtu - + mtu = rt->rt_rmx.rmx_mtu; + + /* Route locked during lookup above */ + RT_UNLOCK(rt); + +#if NECP + // Adjust MTU if necessary. + mtu = necp_socket_get_effective_mtu(inp, mtu); +#endif /* NECP */ + + mss = mtu - #if INET6 (isipv6 ? sizeof(struct ip6_hdr) + sizeof(struct tcphdr) : @@ -2153,9 +2318,6 @@ tcp_mtudisc( #endif /* INET6 */ ; - /* Route locked during lookup above */ - RT_UNLOCK(rt); - if (offered) mss = min(mss, offered); /* @@ -2592,14 +2754,16 @@ tcp_getlock( } } -/* Determine if we can grow the recieve socket buffer to avoid sending +/* + * Determine if we can grow the recieve socket buffer to avoid sending * a zero window update to the peer. We allow even socket buffers that * have fixed size (set by the application) to grow if the resource * constraints are met. They will also be trimmed after the application * reads data. */ static void -tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb) { +tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb) +{ u_int32_t rcvbufinc = tp->t_maxseg << 4; u_int32_t rcvbuf = sb->sb_hiwat; struct socket *so = tp->t_inpcb->inp_socket; @@ -2614,6 +2778,7 @@ tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb) { if (tcp_do_autorcvbuf == 1 && tcp_cansbgrow(sb) && (tp->t_flags & TF_SLOWLINK) == 0 && + (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) == 0 && (rcvbuf - sb->sb_cc) < rcvbufinc && rcvbuf < tcp_autorcvbuf_max && (sb->sb_idealsize > 0 && @@ -2783,6 +2948,10 @@ calculate_tcp_clock() void tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so) { u_int32_t maxsockbufsize; + if (!tcp_do_rfc1323) { + tp->request_r_scale = 0; + return; + } tp->request_r_scale = max(tcp_win_scale, tp->request_r_scale); maxsockbufsize = ((so->so_rcv.sb_flags & SB_USRSIZE) != 0) ? @@ -2827,5 +2996,197 @@ tcp_notsent_lowat_check(struct socket *so) { return(0); } +void +tcp_rxtseg_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end) { + struct tcp_rxt_seg *rxseg = NULL, *prev = NULL, *next = NULL; + u_int32_t rxcount = 0; + + if (SLIST_EMPTY(&tp->t_rxt_segments)) + tp->t_dsack_lastuna = tp->snd_una; + /* + * First check if there is a segment already existing for this + * sequence space. + */ + + SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) { + if (SEQ_GT(rxseg->rx_start, start)) + break; + prev = rxseg; + } + next = rxseg; + + /* check if prev seg is for this sequence */ + if (prev != NULL && SEQ_LEQ(prev->rx_start, start) && + SEQ_GEQ(prev->rx_end, end)) { + prev->rx_count++; + return; + } + + /* + * There are a couple of possibilities at this point. + * 1. prev overlaps with the beginning of this sequence + * 2. next overlaps with the end of this sequence + * 3. there is no overlap. + */ + + if (prev != NULL && SEQ_GT(prev->rx_end, start)) { + if (prev->rx_start == start && SEQ_GT(end, prev->rx_end)) { + start = prev->rx_end + 1; + prev->rx_count++; + } else { + prev->rx_end = (start - 1); + rxcount = prev->rx_count; + } + } + + if (next != NULL && SEQ_LT(next->rx_start, end)) { + if (SEQ_LEQ(next->rx_end, end)) { + end = next->rx_start - 1; + next->rx_count++; + } else { + next->rx_start = end + 1; + rxcount = next->rx_count; + } + } + if (!SEQ_LT(start, end)) + return; + + rxseg = (struct tcp_rxt_seg *) zalloc(tcp_rxt_seg_zone); + if (rxseg == NULL) { + return; + } + bzero(rxseg, sizeof(*rxseg)); + rxseg->rx_start = start; + rxseg->rx_end = end; + rxseg->rx_count = rxcount + 1; + + if (prev != NULL) { + SLIST_INSERT_AFTER(prev, rxseg, rx_link); + } else { + SLIST_INSERT_HEAD(&tp->t_rxt_segments, rxseg, rx_link); + } + return; +} + +struct tcp_rxt_seg * +tcp_rxtseg_find(struct tcpcb *tp, tcp_seq start, tcp_seq end) +{ + struct tcp_rxt_seg *rxseg; + if (SLIST_EMPTY(&tp->t_rxt_segments)) + return (NULL); + + SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) { + if (SEQ_LEQ(rxseg->rx_start, start) && + SEQ_GEQ(rxseg->rx_end, end)) + return (rxseg); + if (SEQ_GT(rxseg->rx_start, start)) + break; + } + return (NULL); +} + +void +tcp_rxtseg_clean(struct tcpcb *tp) +{ + struct tcp_rxt_seg *rxseg, *next; + + SLIST_FOREACH_SAFE(rxseg, &tp->t_rxt_segments, rx_link, next) { + SLIST_REMOVE(&tp->t_rxt_segments, rxseg, + tcp_rxt_seg, rx_link); + zfree(tcp_rxt_seg_zone, rxseg); + } + tp->t_dsack_lastuna = tp->snd_max; +} + +boolean_t +tcp_rxtseg_detect_bad_rexmt(struct tcpcb *tp, tcp_seq th_ack) +{ + boolean_t bad_rexmt; + struct tcp_rxt_seg *rxseg; + + if (SLIST_EMPTY(&tp->t_rxt_segments)) + return (FALSE); + + /* + * If all of the segments in this window are not cumulatively + * acknowledged, then there can still be undetected packet loss. + * Do not restore congestion window in that case. + */ + if (SEQ_LT(th_ack, tp->snd_recover)) + return (FALSE); + + bad_rexmt = TRUE; + SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) { + if (rxseg->rx_count > 1 || + !(rxseg->rx_flags & TCP_RXT_SPURIOUS)) { + bad_rexmt = FALSE; + break; + } + } + return (bad_rexmt); +} + +boolean_t +tcp_rxtseg_dsack_for_tlp(struct tcpcb *tp) +{ + boolean_t dsack_for_tlp = FALSE; + struct tcp_rxt_seg *rxseg; + if (SLIST_EMPTY(&tp->t_rxt_segments)) + return (FALSE); + + SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) { + if (rxseg->rx_count == 1 && + SLIST_NEXT(rxseg,rx_link) == NULL && + (rxseg->rx_flags & TCP_RXT_DSACK_FOR_TLP)) { + dsack_for_tlp = TRUE; + break; + } + } + return (dsack_for_tlp); +} + +u_int32_t +tcp_rxtseg_total_size(struct tcpcb *tp) { + struct tcp_rxt_seg *rxseg; + u_int32_t total_size = 0; + + SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) { + total_size += (rxseg->rx_end - rxseg->rx_start) + 1; + } + return (total_size); +} + +void +tcp_get_connectivity_status(struct tcpcb *tp, + struct tcp_conn_status *connstatus) +{ + if (tp == NULL || connstatus == NULL) + return; + bzero(connstatus, sizeof(*connstatus)); + if (tp->t_rxtshift >= TCP_CONNECTIVITY_PROBES_MAX) { + if (TCPS_HAVEESTABLISHED(tp->t_state)) { + connstatus->write_probe_failed = 1; + } else { + connstatus->conn_probe_failed = 1; + } + } + if (tp->t_rtimo_probes >= TCP_CONNECTIVITY_PROBES_MAX) + connstatus->read_probe_failed = 1; + if (tp->t_inpcb != NULL && tp->t_inpcb->inp_last_outifp != NULL + && (tp->t_inpcb->inp_last_outifp->if_eflags & IFEF_PROBE_CONNECTIVITY)) + connstatus->probe_activated = 1; + return; +} + +boolean_t +tfo_enabled(const struct tcpcb *tp) +{ + return !!(tp->t_flagsext & TF_FASTOPEN); +} + +void +tcp_disable_tfo(struct tcpcb *tp) +{ + tp->t_flagsext &= ~TF_FASTOPEN; +} -/* DSEP Review Done pl-20051213-v02 @3253,@3391,@3400 */ diff --git a/bsd/netinet/tcp_timer.c b/bsd/netinet/tcp_timer.c index aa2317164..046163f7b 100644 --- a/bsd/netinet/tcp_timer.c +++ b/bsd/netinet/tcp_timer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -89,6 +89,7 @@ #endif #include #include +#include #include #include #include @@ -131,6 +132,12 @@ /* Max number of times a stretch ack can be delayed on a connection */ #define TCP_STRETCHACK_DELAY_THRESHOLD 5 +/* + * If the host processor has been sleeping for too long, this is the threshold + * used to avoid sending stale retransmissions. + */ +#define TCP_SLEEP_TOO_LONG (10 * 60 * 1000) /* 10 minutes in ms */ + /* tcp timer list */ struct tcptimerlist tcp_timer_list; @@ -220,17 +227,9 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_fastmode_idlemax, * SYN retransmits. Setting it to 0 disables the dropping off of those * two options. */ -static int tcp_broken_peer_syn_rxmit_thres = 7; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rxmit_thres, - CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_broken_peer_syn_rxmit_thres, 0, - "Number of retransmitted SYNs before " - "TCP disables rfc1323 and rfc1644 during the rest of attempts"); - -/* A higher threshold on local connections for disabling RFC 1323 options */ -static int tcp_broken_peer_syn_rxmit_thres_local = 10; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rexmit_thres_local, - CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_broken_peer_syn_rxmit_thres_local, 0, +static int tcp_broken_peer_syn_rxmit_thres = 10; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rexmit_thres, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_broken_peer_syn_rxmit_thres, 0, "Number of retransmitted SYNs before disabling RFC 1323 " "options on local connections"); @@ -254,6 +253,14 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_pmtud_black_hole_mss, 0, "Path MTU Discovery Black Hole Detection lowered MSS"); +#define TCP_REPORT_STATS_INTERVAL 43200 /* 12 hours, in seconds */ +int tcp_report_stats_interval = TCP_REPORT_STATS_INTERVAL; +#if (DEVELOPMENT || DEBUG) +SYSCTL_INT(_net_inet_tcp, OID_AUTO, report_stats_interval, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_report_stats_interval, 0, + "Report stats interval"); +#endif /* (DEVELOPMENT || DEBUG) */ + /* performed garbage collection of "used" sockets */ static boolean_t tcp_gc_done = FALSE; @@ -274,28 +281,66 @@ int tcp_delack = TCP_RETRANSHZ / 10; int tcp_jack_rxmt = TCP_RETRANSHZ / 2; #endif /* MPTCP */ +static boolean_t tcp_itimer_done = FALSE; + static void tcp_remove_timer(struct tcpcb *tp); static void tcp_sched_timerlist(uint32_t offset); -static u_int32_t tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *mode); +static u_int32_t tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *mode, + u_int16_t probe_if_index); static void tcp_sched_timers(struct tcpcb *tp); static inline void tcp_set_lotimer_index(struct tcpcb *); -static void tcp_rexmt_save_state(struct tcpcb *tp); __private_extern__ void tcp_remove_from_time_wait(struct inpcb *inp); __private_extern__ void tcp_report_stats(void); /* - * Macro to compare two timers. If there is a reset of the sign bit, - * it is safe to assume that the timer has wrapped around. By doing - * signed comparision, we take care of wrap around such that the value + * Macro to compare two timers. If there is a reset of the sign bit, + * it is safe to assume that the timer has wrapped around. By doing + * signed comparision, we take care of wrap around such that the value * with the sign bit reset is actually ahead of the other. */ inline int32_t -timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2) { +timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2) { return (int32_t)((t1 + toff1) - (t2 + toff2)); }; static u_int64_t tcp_last_report_time; -#define TCP_REPORT_STATS_INTERVAL 345600 /* 4 days, in seconds */ + +/* + * Structure to store previously reported stats so that we can send + * incremental changes in each report interval. + */ +struct tcp_last_report_stats { + u_int32_t tcps_connattempt; + u_int32_t tcps_accepts; + u_int32_t tcps_ecn_client_setup; + u_int32_t tcps_ecn_server_setup; + u_int32_t tcps_ecn_client_success; + u_int32_t tcps_ecn_server_success; + u_int32_t tcps_ecn_not_supported; + u_int32_t tcps_ecn_lost_syn; + u_int32_t tcps_ecn_lost_synack; + u_int32_t tcps_ecn_recv_ce; + u_int32_t tcps_ecn_recv_ece; + u_int32_t tcps_ecn_sent_ece; + u_int32_t tcps_ecn_conn_recv_ce; + u_int32_t tcps_ecn_conn_recv_ece; + u_int32_t tcps_ecn_conn_plnoce; + u_int32_t tcps_ecn_conn_pl_ce; + u_int32_t tcps_ecn_conn_nopl_ce; + + /* TFO-related statistics */ + u_int32_t tcps_tfo_syn_data_rcv; + u_int32_t tcps_tfo_cookie_req_rcv; + u_int32_t tcps_tfo_cookie_sent; + u_int32_t tcps_tfo_cookie_invalid; + u_int32_t tcps_tfo_cookie_req; + u_int32_t tcps_tfo_cookie_rcv; + u_int32_t tcps_tfo_syn_data_sent; + u_int32_t tcps_tfo_syn_data_acked; + u_int32_t tcps_tfo_syn_loss; + u_int32_t tcps_tfo_blackhole; +}; + /* Returns true if the timer is on the timer list */ #define TIMER_IS_ON_LIST(tp) ((tp)->t_flags & TF_TIMER_ONLIST) @@ -350,6 +395,9 @@ add_to_time_wait(struct tcpcb *tp, uint32_t delay) if (tp->t_inpcb->inp_socket->so_options & SO_NOWAKEFROMSLEEP) socket_post_kev_msg_closed(tp->t_inpcb->inp_socket); + /* 19182803: Notify nstat that connection is closing before waiting. */ + nstat_pcb_detach(tp->t_inpcb); + if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { tcp_unlock(tp->t_inpcb->inp_socket, 0, 0); lck_rw_lock_exclusive(pcbinfo->ipi_lock); @@ -592,7 +640,7 @@ int tcp_backoff[TCP_MAXRXTSHIFT + 1] = static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ -static void tcp_rexmt_save_state(struct tcpcb *tp) +void tcp_rexmt_save_state(struct tcpcb *tp) { u_int32_t fsize; if (TSTMP_SUPPORTED(tp)) { @@ -669,6 +717,8 @@ tcp_timers(tp, timer) #if INET6 int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0; #endif /* INET6 */ + u_int64_t accsleep_ms; + u_int32_t last_sleep_ms = 0; so = tp->t_inpcb->inp_socket; idle_time = tcp_now - tp->t_rcvtime; @@ -702,6 +752,9 @@ tcp_timers(tp, timer) * to a longer retransmit interval and retransmit one segment. */ case TCPT_REXMT: + accsleep_ms = mach_absolutetime_asleep / 1000000UL; + if (accsleep_ms > tp->t_accsleep_ms) + last_sleep_ms = accsleep_ms - tp->t_accsleep_ms; /* * Drop a connection in the retransmit timer * 1. If we have retransmitted more than TCP_MAXRXTSHIFT @@ -714,14 +767,15 @@ tcp_timers(tp, timer) * receiving an ack */ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT || - (tp->t_rxt_conndroptime > 0 - && tp->t_rxtstart > 0 && - (tcp_now - tp->t_rxtstart) >= tp->t_rxt_conndroptime) - || ((tp->t_flagsext & TF_RXTFINDROP) != 0 && - (tp->t_flags & TF_SENTFIN) != 0 && - tp->t_rxtshift >= 4)) { + (tp->t_rxt_conndroptime > 0 && tp->t_rxtstart > 0 && + (tcp_now - tp->t_rxtstart) >= tp->t_rxt_conndroptime) || + ((tp->t_flagsext & TF_RXTFINDROP) != 0 && + (tp->t_flags & TF_SENTFIN) != 0 && tp->t_rxtshift >= 4) || + (tp->t_rxtshift > 4 && last_sleep_ms >= TCP_SLEEP_TOO_LONG)) { if ((tp->t_flagsext & TF_RXTFINDROP) != 0) { tcpstat.tcps_rxtfindrop++; + } else if (last_sleep_ms >= TCP_SLEEP_TOO_LONG) { + tcpstat.tcps_drop_after_sleep++; } else { tcpstat.tcps_timeoutdrop++; } @@ -736,6 +790,7 @@ tcp_timers(tp, timer) } tcpstat.tcps_rexmttimeo++; + tp->t_accsleep_ms = accsleep_ms; if (tp->t_rxtshift == 1 && tp->t_state == TCPS_ESTABLISHED) { @@ -788,9 +843,41 @@ tcp_timers(tp, timer) tp->t_flagsext &= ~(TF_DELAY_RECOVERY); } + if (tp->t_state == TCPS_SYN_RECEIVED) + tcp_disable_tfo(tp); + + if ((tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) && + !(tp->t_tfo_flags & TFO_F_NO_SNDPROBING) && + ((tp->t_state != TCPS_SYN_SENT && tp->t_rxtshift > 1) || + tp->t_rxtshift > 2)) { + /* + * For regular retransmissions, a first one is being + * done for tail-loss probe. + * Thus, if rxtshift > 1, this means we have sent the segment + * a total of 3 times. + * + * If we are in SYN-SENT state, then there is no tail-loss + * probe thus we have to let rxtshift go up to 3. + */ + tcp_heuristic_tfo_middlebox(tp); + + so->so_error = ENODATA; + sorwakeup(so); + sowwakeup(so); + } + if (tp->t_state == TCPS_SYN_SENT) { rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; tp->t_stat.synrxtshift = tp->t_rxtshift; + + /* When retransmitting, disable TFO */ + if (tfo_enabled(tp)) { + tp->t_flagsext &= ~TF_FASTOPEN; + tp->t_tfo_flags |= TFO_F_SYN_LOSS; + + tp->t_tfo_stats |= TFO_S_SYN_LOSS; + tcpstat.tcps_tfo_syn_loss++; + } } else { rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; } @@ -810,9 +897,10 @@ tcp_timers(tp, timer) if (tcp_pmtud_black_hole_detect && !(tp->t_flagsext & TF_NOBLACKHOLE_DETECTION) && (tp->t_state == TCPS_ESTABLISHED)) { - if (((tp->t_flags & (TF_PMTUD|TF_MAXSEGSNT)) - == (TF_PMTUD|TF_MAXSEGSNT)) && - (tp->t_rxtshift == 2)) { + if ((tp->t_flags & TF_PMTUD) && + ((tp->t_flags & TF_MAXSEGSNT) + || tp->t_pmtud_lastseg_size > tcp_pmtud_black_hole_mss) && + tp->t_rxtshift == 2) { /* * Enter Path MTU Black-hole Detection mechanism: * - Disable Path MTU Discovery (IP "DF" bit). @@ -874,10 +962,7 @@ tcp_timers(tp, timer) * Do this only on non-local connections. */ if (tp->t_state == TCPS_SYN_SENT && - ((!(tp->t_flags & TF_LOCAL) && - tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres) || - ((tp->t_flags & TF_LOCAL) && - tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres_local))) + tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres) tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC); /* @@ -923,17 +1008,23 @@ tcp_timers(tp, timer) * least once, the value of ssthresh is held constant */ if (tp->t_rxtshift == 1 && - CC_ALGO(tp)->after_timeout != NULL) + CC_ALGO(tp)->after_timeout != NULL) { CC_ALGO(tp)->after_timeout(tp); + /* + * CWR notifications are to be sent on new data + * right after Fast Retransmits and ECE + * notification receipts. + */ + if (TCP_ECN_ENABLED(tp)) + tp->ecn_flags |= TE_SENDCWR; + } EXIT_FASTRECOVERY(tp); - /* CWR notifications are to be sent on new data right after - * RTOs, Fast Retransmits and ECE notification receipts. - */ - if ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON) { - tp->ecn_flags |= TE_SENDCWR; - } + /* Exit cwnd non validated phase */ + tp->t_flagsext &= ~TF_CWND_NONVALIDATED; + + fc_output: tcp_ccdbg_trace(tp, NULL, TCP_CC_REXMT_TIMEOUT); @@ -999,7 +1090,8 @@ tcp_timers(tp, timer) goto dropit; if ((always_keepalive || (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) || - (tp->t_flagsext & TF_DETECT_READSTALL)) && + (tp->t_flagsext & TF_DETECT_READSTALL) || + (tp->t_tfo_probe_state == TFO_PROBE_PROBING)) && (tp->t_state <= TCPS_CLOSING || tp->t_state == TCPS_FIN_WAIT_2)) { if (idle_time >= TCP_CONN_KEEPIDLE(tp) + TCP_CONN_MAXIDLE(tp)) goto dropit; @@ -1037,12 +1129,14 @@ tcp_timers(tp, timer) tp->t_rtimo_probes++; } tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, - TCP_CONN_KEEPINTVL(tp)); + TCP_CONN_KEEPINTVL(tp)); } else { tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, - TCP_CONN_KEEPIDLE(tp)); + TCP_CONN_KEEPIDLE(tp)); } if (tp->t_flagsext & TF_DETECT_READSTALL) { + struct ifnet *outifp = tp->t_inpcb->inp_last_outifp; + bool reenable_probe = false; /* * The keep alive packets sent to detect a read * stall did not get a response from the @@ -1050,17 +1144,54 @@ tcp_timers(tp, timer) * If the number of probes sent reaches the limit, * generate an event. */ - if (tp->t_rtimo_probes > tp->t_adaptive_rtimo) { - /* Generate an event */ - soevent(so, - (SO_FILT_HINT_LOCKED| - SO_FILT_HINT_ADAPTIVE_RTIMO)); - tcp_keepalive_reset(tp); + if (tp->t_adaptive_rtimo > 0) { + if (tp->t_rtimo_probes > tp->t_adaptive_rtimo) { + /* Generate an event */ + soevent(so, + (SO_FILT_HINT_LOCKED | + SO_FILT_HINT_ADAPTIVE_RTIMO)); + tcp_keepalive_reset(tp); + } else { + reenable_probe = true; + } + } else if (outifp != NULL && + (outifp->if_eflags & IFEF_PROBE_CONNECTIVITY) && + tp->t_rtimo_probes <= TCP_CONNECTIVITY_PROBES_MAX) { + reenable_probe = true; } else { + tp->t_flagsext &= ~TF_DETECT_READSTALL; + } + if (reenable_probe) { + int ind = min(tp->t_rtimo_probes, + TCP_MAXRXTSHIFT); tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START( - tp, TCP_REXMTVAL(tp)); + tp, tcp_backoff[ind] * TCP_REXMTVAL(tp)); } } + if (tp->t_tfo_probe_state == TFO_PROBE_PROBING) { + int ind; + + tp->t_tfo_probes++; + ind = min(tp->t_tfo_probes, TCP_MAXRXTSHIFT); + + /* + * We take the minimum among the time set by true + * keepalive (see above) and the backoff'd RTO. That + * way we backoff in case of packet-loss but will never + * timeout slower than regular keepalive due to the + * backing off. + */ + tp->t_timer[TCPT_KEEP] = min(OFFSET_FROM_START( + tp, tcp_backoff[ind] * TCP_REXMTVAL(tp)), + tp->t_timer[TCPT_KEEP]); + } else if (tp->t_tfo_probe_state == TFO_PROBE_WAIT_DATA) { + /* Still no data! Let's assume a TFO-error and err out... */ + tcp_heuristic_tfo_middlebox(tp); + + so->so_error = ENODATA; + sorwakeup(so); + tcpstat.tcps_tfo_blackhole++; + } break; case TCPT_DELACK: if (tcp_delack_enabled && (tp->t_flags & TF_DELACK)) { @@ -1138,10 +1269,7 @@ tcp_timers(tp, timer) case TCPT_PTO: { - tcp_seq old_snd_nxt; int32_t snd_len; - boolean_t rescue_rxt = FALSE; - tp->t_flagsext &= ~(TF_SENT_TLPROBE); /* @@ -1149,50 +1277,34 @@ tcp_timers(tp, timer) * send a probe */ if (tp->t_state != TCPS_ESTABLISHED || - tp->t_rxtshift > 0 || tp->snd_max == tp->snd_una || - !SACK_ENABLED(tp) || TAILQ_EMPTY(&tp->snd_holes) || - (IN_FASTRECOVERY(tp) && - (SEQ_GEQ(tp->snd_fack, tp->snd_recover) || - SEQ_GT(tp->snd_nxt, tp->sack_newdata)))) + (tp->t_rxtshift > 0 && !(tp->t_flagsext & TF_PROBING)) + || tp->snd_max == tp->snd_una || + !SACK_ENABLED(tp) || !TAILQ_EMPTY(&tp->snd_holes) || + IN_FASTRECOVERY(tp)) break; + /* + * If there is no new data to send or if the + * connection is limited by receive window then + * retransmit the last segment, otherwise send + * new data. + */ + snd_len = min(so->so_snd.sb_cc, tp->snd_wnd) + - (tp->snd_max - tp->snd_una); + if (snd_len > 0) { + tp->snd_nxt = tp->snd_max; + } else { + snd_len = min((tp->snd_max - tp->snd_una), + tp->t_maxseg); + tp->snd_nxt = tp->snd_max - snd_len; + } + tcpstat.tcps_pto++; + if (tp->t_flagsext & TF_PROBING) + tcpstat.tcps_probe_if++; /* If timing a segment in this window, stop the timer */ tp->t_rtttime = 0; - - if (IN_FASTRECOVERY(tp)) { - /* - * Send a probe to detect tail loss in a - * recovery window when the connection is in - * fast_recovery. - */ - old_snd_nxt = tp->snd_nxt; - rescue_rxt = TRUE; - VERIFY(SEQ_GEQ(tp->snd_fack, tp->snd_una)); - snd_len = min((tp->snd_recover - tp->snd_fack), - tp->t_maxseg); - tp->snd_nxt = tp->snd_recover - snd_len; - tcpstat.tcps_pto_in_recovery++; - tcp_ccdbg_trace(tp, NULL, TCP_CC_TLP_IN_FASTRECOVERY); - } else { - /* - * If there is no new data to send or if the - * connection is limited by receive window then - * retransmit the last segment, otherwise send - * new data. - */ - snd_len = min(so->so_snd.sb_cc, tp->snd_wnd) - - (tp->snd_max - tp->snd_una); - if (snd_len > 0) { - tp->snd_nxt = tp->snd_max; - } else { - snd_len = min((tp->snd_max - tp->snd_una), - tp->t_maxseg); - tp->snd_nxt = tp->snd_max - snd_len; - } - } - /* Note that tail loss probe is being sent */ tp->t_flagsext |= TF_SENT_TLPROBE; tp->t_tlpstart = tcp_now; @@ -1202,14 +1314,6 @@ tcp_timers(tp, timer) tp->snd_cwnd -= tp->t_maxseg; tp->t_tlphighrxt = tp->snd_nxt; - - /* - * If a tail loss probe was sent after entering recovery, - * restore the old snd_nxt value so that other packets - * will get retransmitted correctly. - */ - if (rescue_rxt) - tp->snd_nxt = old_snd_nxt; break; } case TCPT_DELAYFR: @@ -1227,11 +1331,13 @@ tcp_timers(tp, timer) break; VERIFY(SACK_ENABLED(tp)); - if (CC_ALGO(tp)->pre_fr != NULL) + tcp_rexmt_save_state(tp); + if (CC_ALGO(tp)->pre_fr != NULL) { CC_ALGO(tp)->pre_fr(tp); + if (TCP_ECN_ENABLED(tp)) + tp->ecn_flags |= TE_SENDCWR; + } ENTER_FASTRECOVERY(tp); - if ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON) - tp->ecn_flags |= TE_SENDCWR; tp->t_timer[TCPT_REXMT] = 0; tcpstat.tcps_sack_recovery_episode++; @@ -1332,7 +1438,6 @@ need_to_resched_timerlist(u_int32_t runtime, u_int16_t mode) void tcp_sched_timerlist(uint32_t offset) { - uint64_t deadline = 0; struct tcptimerlist *listp = &tcp_timer_list; @@ -1361,8 +1466,9 @@ tcp_sched_timerlist(uint32_t offset) * timers for this connection. */ u_int32_t -tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *te_mode) { - +tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *te_mode, + u_int16_t probe_if_index) +{ struct socket *so; u_int16_t i = 0, index = TCPT_NONE, lo_index = TCPT_NONE; u_int32_t timer_val, offset = 0, lo_timer = 0; @@ -1390,6 +1496,18 @@ tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *te_mode) { goto done; } + /* + * If this connection is over an interface that needs to + * be probed, send probe packets to reinitiate communication. + */ + if (probe_if_index > 0 && tp->t_inpcb->inp_last_outifp != NULL && + tp->t_inpcb->inp_last_outifp->if_index == probe_if_index) { + tp->t_flagsext |= TF_PROBING; + tcp_timers(tp, TCPT_PTO); + tp->t_timer[TCPT_PTO] = 0; + tp->t_flagsext &= TF_PROBING; + } + /* * Since the timer thread needs to wait for tcp lock, it may race * with another thread that can cancel or reschedule the timer @@ -1551,7 +1669,8 @@ tcp_run_timerlist(void * arg1, void * arg2) { lck_mtx_unlock(listp->mtx); - offset = tcp_run_conn_timer(tp, &te_mode); + offset = tcp_run_conn_timer(tp, &te_mode, + listp->probe_if_index); lck_mtx_lock(listp->mtx); @@ -1617,12 +1736,13 @@ tcp_run_timerlist(void * arg1, void * arg2) { listp->running = FALSE; listp->pref_mode = 0; listp->pref_offset = 0; + listp->probe_if_index = 0; lck_mtx_unlock(listp->mtx); } /* - * Function to check if the timerlist needs to be reschduled to run this + * Function to check if the timerlist needs to be rescheduled to run this * connection's timers correctly. */ void @@ -1745,7 +1865,8 @@ tcp_sched_timers(struct tcpcb *tp) } static inline void -tcp_set_lotimer_index(struct tcpcb *tp) { +tcp_set_lotimer_index(struct tcpcb *tp) +{ uint16_t i, lo_index = TCPT_NONE, mode = 0; uint32_t lo_timer = 0; for (i = 0; i < TCPT_NTIMERS; ++i) { @@ -1770,8 +1891,8 @@ tcp_set_lotimer_index(struct tcpcb *tp) { } void -tcp_check_timer_state(struct tcpcb *tp) { - +tcp_check_timer_state(struct tcpcb *tp) +{ lck_mtx_assert(&tp->t_inpcb->inpcb_mtx, LCK_MTX_ASSERT_OWNED); if (tp->t_inpcb->inp_flags2 & INP2_TIMEWAIT) @@ -1783,6 +1904,19 @@ tcp_check_timer_state(struct tcpcb *tp) { return; } +static inline void +tcp_cumulative_stat(u_int32_t cur, u_int32_t *prev, u_int32_t *dest) +{ + /* handle wrap around */ + int32_t diff = (int32_t) (cur - *prev); + if (diff > 0) + *dest = diff; + else + *dest = 0; + *prev = cur; + return; +} + __private_extern__ void tcp_report_stats(void) { @@ -1790,11 +1924,12 @@ tcp_report_stats(void) struct sockaddr_in dst; struct sockaddr_in6 dst6; struct rtentry *rt = NULL; + static struct tcp_last_report_stats prev; u_int64_t var, uptime; #define stat data.u.tcp_stats if (((uptime = net_uptime()) - tcp_last_report_time) < - TCP_REPORT_STATS_INTERVAL) + tcp_report_stats_interval) return; tcp_last_report_time = uptime; @@ -1869,7 +2004,274 @@ tcp_report_stats(void) (var * 100) / tcpstat.tcps_sndpack; } + if (tcp_ecn_outbound == 1) + stat.ecn_client_enabled = 1; + if (tcp_ecn_inbound == 1) + stat.ecn_server_enabled = 1; + tcp_cumulative_stat(tcpstat.tcps_connattempt, + &prev.tcps_connattempt, &stat.connection_attempts); + tcp_cumulative_stat(tcpstat.tcps_accepts, + &prev.tcps_accepts, &stat.connection_accepts); + tcp_cumulative_stat(tcpstat.tcps_ecn_client_setup, + &prev.tcps_ecn_client_setup, &stat.ecn_client_setup); + tcp_cumulative_stat(tcpstat.tcps_ecn_server_setup, + &prev.tcps_ecn_server_setup, &stat.ecn_server_setup); + tcp_cumulative_stat(tcpstat.tcps_ecn_client_success, + &prev.tcps_ecn_client_success, &stat.ecn_client_success); + tcp_cumulative_stat(tcpstat.tcps_ecn_server_success, + &prev.tcps_ecn_server_success, &stat.ecn_server_success); + tcp_cumulative_stat(tcpstat.tcps_ecn_not_supported, + &prev.tcps_ecn_not_supported, &stat.ecn_not_supported); + tcp_cumulative_stat(tcpstat.tcps_ecn_lost_syn, + &prev.tcps_ecn_lost_syn, &stat.ecn_lost_syn); + tcp_cumulative_stat(tcpstat.tcps_ecn_lost_synack, + &prev.tcps_ecn_lost_synack, &stat.ecn_lost_synack); + tcp_cumulative_stat(tcpstat.tcps_ecn_recv_ce, + &prev.tcps_ecn_recv_ce, &stat.ecn_recv_ce); + tcp_cumulative_stat(tcpstat.tcps_ecn_recv_ece, + &prev.tcps_ecn_recv_ece, &stat.ecn_recv_ece); + tcp_cumulative_stat(tcpstat.tcps_ecn_recv_ece, + &prev.tcps_ecn_recv_ece, &stat.ecn_recv_ece); + tcp_cumulative_stat(tcpstat.tcps_ecn_sent_ece, + &prev.tcps_ecn_sent_ece, &stat.ecn_sent_ece); + tcp_cumulative_stat(tcpstat.tcps_ecn_sent_ece, + &prev.tcps_ecn_sent_ece, &stat.ecn_sent_ece); + tcp_cumulative_stat(tcpstat.tcps_ecn_conn_recv_ce, + &prev.tcps_ecn_conn_recv_ce, &stat.ecn_conn_recv_ce); + tcp_cumulative_stat(tcpstat.tcps_ecn_conn_recv_ece, + &prev.tcps_ecn_conn_recv_ece, &stat.ecn_conn_recv_ece); + tcp_cumulative_stat(tcpstat.tcps_ecn_conn_plnoce, + &prev.tcps_ecn_conn_plnoce, &stat.ecn_conn_plnoce); + tcp_cumulative_stat(tcpstat.tcps_ecn_conn_pl_ce, + &prev.tcps_ecn_conn_pl_ce, &stat.ecn_conn_pl_ce); + tcp_cumulative_stat(tcpstat.tcps_ecn_conn_nopl_ce, + &prev.tcps_ecn_conn_nopl_ce, &stat.ecn_conn_nopl_ce); + tcp_cumulative_stat(tcpstat.tcps_tfo_syn_data_rcv, + &prev.tcps_tfo_syn_data_rcv, &stat.tfo_syn_data_rcv); + tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_req_rcv, + &prev.tcps_tfo_cookie_req_rcv, &stat.tfo_cookie_req_rcv); + tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_sent, + &prev.tcps_tfo_cookie_sent, &stat.tfo_cookie_sent); + tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_invalid, + &prev.tcps_tfo_cookie_invalid, &stat.tfo_cookie_invalid); + tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_req, + &prev.tcps_tfo_cookie_req, &stat.tfo_cookie_req); + tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_rcv, + &prev.tcps_tfo_cookie_rcv, &stat.tfo_cookie_rcv); + tcp_cumulative_stat(tcpstat.tcps_tfo_syn_data_sent, + &prev.tcps_tfo_syn_data_sent, &stat.tfo_syn_data_sent); + tcp_cumulative_stat(tcpstat.tcps_tfo_syn_data_acked, + &prev.tcps_tfo_syn_data_acked, &stat.tfo_syn_data_acked); + tcp_cumulative_stat(tcpstat.tcps_tfo_syn_loss, + &prev.tcps_tfo_syn_loss, &stat.tfo_syn_loss); + tcp_cumulative_stat(tcpstat.tcps_tfo_blackhole, + &prev.tcps_tfo_blackhole, &stat.tfo_blackhole); + nstat_sysinfo_send_data(&data); #undef stat } + +void +tcp_interface_send_probe(u_int16_t probe_if_index) +{ + int32_t offset = 0; + struct tcptimerlist *listp = &tcp_timer_list; + + /* Make sure TCP clock is up to date */ + calculate_tcp_clock(); + + lck_mtx_lock(listp->mtx); + if (listp->probe_if_index > 0) { + tcpstat.tcps_probe_if_conflict++; + goto done; + } + + listp->probe_if_index = probe_if_index; + if (listp->running) + goto done; + + /* + * Reschedule the timerlist to run within the next 10ms, which is + * the fastest that we can do. + */ + offset = TCP_TIMER_10MS_QUANTUM; + if (listp->scheduled) { + int32_t diff; + diff = timer_diff(listp->runtime, 0, tcp_now, offset); + if (diff <= 0) { + /* The timer will fire sooner than what's needed */ + goto done; + } + } + listp->mode = TCP_TIMERLIST_10MS_MODE; + listp->idleruns = 0; + + tcp_sched_timerlist(offset); + +done: + lck_mtx_unlock(listp->mtx); + return; +} + +/* + * Enable read probes on this connection, if: + * - it is in established state + * - doesn't have any data outstanding + * - the outgoing ifp matches + * - we have not already sent any read probes + */ +static void +tcp_enable_read_probe(struct tcpcb *tp, struct ifnet *ifp) +{ + if (tp->t_state == TCPS_ESTABLISHED && + tp->snd_max == tp->snd_una && + tp->t_inpcb->inp_last_outifp == ifp && + !(tp->t_flagsext & TF_DETECT_READSTALL) && + tp->t_rtimo_probes == 0) { + tp->t_flagsext |= TF_DETECT_READSTALL; + tp->t_rtimo_probes = 0; + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + TCP_TIMER_10MS_QUANTUM); + if (tp->tentry.index == TCPT_NONE) { + tp->tentry.index = TCPT_KEEP; + tp->tentry.runtime = tcp_now + + TCP_TIMER_10MS_QUANTUM; + } else { + int32_t diff = 0; + + /* Reset runtime to be in next 10ms */ + diff = timer_diff(tp->tentry.runtime, 0, + tcp_now, TCP_TIMER_10MS_QUANTUM); + if (diff > 0) { + tp->tentry.index = TCPT_KEEP; + tp->tentry.runtime = tcp_now + + TCP_TIMER_10MS_QUANTUM; + if (tp->tentry.runtime == 0) + tp->tentry.runtime++; + } + } + } +} + +/* + * Disable read probe and reset the keep alive timer + */ +static void +tcp_disable_read_probe(struct tcpcb *tp) +{ + if (tp->t_adaptive_rtimo == 0 && + ((tp->t_flagsext & TF_DETECT_READSTALL) || + tp->t_rtimo_probes > 0)) { + tcp_keepalive_reset(tp); + } +} + +/* + * Reschedule the tcp timerlist in the next 10ms to re-enable read/write + * probes on connections going over a particular interface. + */ +void +tcp_probe_connectivity(struct ifnet *ifp, u_int32_t enable) +{ + int32_t offset; + struct tcptimerlist *listp = &tcp_timer_list; + struct inpcbinfo *pcbinfo = &tcbinfo; + struct inpcb *inp, *nxt; + + if (ifp == NULL) + return; + + /* update clock */ + calculate_tcp_clock(); + + /* + * Enable keep alive timer on all connections that are + * active/established on this interface. + */ + lck_rw_lock_shared(pcbinfo->ipi_lock); + + LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, nxt) { + struct tcpcb *tp = NULL; + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == + WNT_STOPUSING) + continue; + + /* Acquire lock to look at the state of the connection */ + tcp_lock(inp->inp_socket, 1, 0); + + /* Release the want count */ + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + tcp_unlock(inp->inp_socket, 1, 0); + continue; + } + + tp = intotcpcb(inp); + if (enable) + tcp_enable_read_probe(tp, ifp); + else + tcp_disable_read_probe(tp); + + tcp_unlock(inp->inp_socket, 1, 0); + } + lck_rw_done(pcbinfo->ipi_lock); + + lck_mtx_lock(listp->mtx); + if (listp->running) { + listp->pref_mode |= TCP_TIMERLIST_10MS_MODE; + goto done; + } + + /* Reschedule within the next 10ms */ + offset = TCP_TIMER_10MS_QUANTUM; + if (listp->scheduled) { + int32_t diff; + diff = timer_diff(listp->runtime, 0, tcp_now, offset); + if (diff <= 0) { + /* The timer will fire sooner than what's needed */ + goto done; + } + } + listp->mode = TCP_TIMERLIST_10MS_MODE; + listp->idleruns = 0; + + tcp_sched_timerlist(offset); +done: + lck_mtx_unlock(listp->mtx); + return; +} + +void +tcp_itimer(struct inpcbinfo *ipi) +{ + struct inpcb *inp, *nxt; + + if (lck_rw_try_lock_exclusive(ipi->ipi_lock) == FALSE) { + if (tcp_itimer_done == TRUE) { + tcp_itimer_done = FALSE; + atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); + return; + } + /* Upgrade failed, lost lock now take it again exclusive */ + lck_rw_lock_exclusive(ipi->ipi_lock); + } + tcp_itimer_done = TRUE; + + LIST_FOREACH_SAFE(inp, &tcb, inp_list, nxt) { + struct socket *so; + + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + continue; + so = inp->inp_socket; + tcp_lock(so, 1, 0); + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + tcp_unlock(so, 1, 0); + continue; + } + so_check_extended_bk_idle_time(so); + tcp_unlock(so, 1, 0); + } + + lck_rw_done(ipi->ipi_lock); +} + diff --git a/bsd/netinet/tcp_timer.h b/bsd/netinet/tcp_timer.h index 0e7a43f11..177cd162c 100644 --- a/bsd/netinet/tcp_timer.h +++ b/bsd/netinet/tcp_timer.h @@ -262,6 +262,7 @@ struct tcptimerlist { uint32_t pref_offset; /* Preferred offset set by a connection */ uint32_t idleruns; /* Number of times the list has been idle in fast mode */ struct tcptimerentry *next_te; /* next timer entry pointer to process */ + u_int16_t probe_if_index; /* Interface index that needs to send probes */ }; diff --git a/bsd/netinet/tcp_usrreq.c b/bsd/netinet/tcp_usrreq.c index 350884ae1..96b17ba5a 100644 --- a/bsd/netinet/tcp_usrreq.c +++ b/bsd/netinet/tcp_usrreq.c @@ -121,6 +121,8 @@ void tcp_fill_info(struct tcpcb *, struct tcp_info *); errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *); int tcp_sysctl_info(struct sysctl_oid *, void *, int , struct sysctl_req *); +static void tcp_connection_fill_info(struct tcpcb *tp, + struct tcp_connection_info *tci); /* * TCP protocol interface to socket abstraction. @@ -395,6 +397,27 @@ tcp6_usr_listen(struct socket *so, struct proc *p) } #endif /* INET6 */ +static int +tcp_connect_complete(struct socket *so) +{ + struct tcpcb *tp = sototcpcb(so); + int error = 0; + + /* TFO delays the tcp_output until later, when the app calls write() */ + if (so->so_flags1 & SOF1_PRECONNECT_DATA) { + if (!necp_socket_is_allowed_to_send_recv(sotoinpcb(so), NULL, NULL)) + return (EHOSTUNREACH); + + /* Initialize enough state so that we can actually send data */ + tcp_mss(tp, -1, IFSCOPE_NONE); + tp->snd_wnd = tp->t_maxseg; + } else { + error = tcp_output(tp); + } + + return (error); +} + /* * Initiate connection to peer. * Create a template for use in transmissions on this connection. @@ -433,6 +456,9 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) } else { error = ENETDOWN; } + + /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */ + so->so_flags1 &= ~SOF1_PRECONNECT_DATA; return error; } #endif /* FLOW_DIVERT */ @@ -463,15 +489,18 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) if ((error = tcp_connect(tp, nam, p)) != 0) goto out; - error = tcp_output(tp); + + error = tcp_connect_complete(so); + COMMON_END(PRU_CONNECT); } static int tcp_usr_connectx_common(struct socket *so, int af, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, - struct proc *p, uint32_t ifscope, associd_t aid, connid_t *pcid, - uint32_t flags, void *arg, uint32_t arglen) + struct proc *p, uint32_t ifscope, sae_associd_t aid, sae_connid_t *pcid, + uint32_t flags, void *arg, uint32_t arglen, struct uio *auio, + user_ssize_t *bytes_written) { #pragma unused(aid) #if !MPTCP @@ -480,6 +509,7 @@ tcp_usr_connectx_common(struct socket *so, int af, struct sockaddr_entry *src_se = NULL, *dst_se = NULL; struct inpcb *inp = sotoinpcb(so); int error; + user_ssize_t datalen = 0; if (inp == NULL) return (EINVAL); @@ -499,7 +529,11 @@ tcp_usr_connectx_common(struct socket *so, int af, #if NECP inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL, dst_se ? dst_se->se_addr : NULL, ifscope); #endif /* NECP */ - + + if ((so->so_flags1 & SOF1_DATA_IDEMPOTENT) && + (tcp_fastopen & TCP_FASTOPEN_CLIENT)) + sototcpcb(so)->t_flagsext |= TF_FASTOPEN; + /* * We get here for 2 cases: * @@ -513,7 +547,7 @@ tcp_usr_connectx_common(struct socket *so, int af, * bind to source address and/or interface as necessary. */ #if MPTCP - if (flags & TCP_CONNREQF_MPTCP) { + if (flags & CONNREQF_MPTCP) { struct mptsub_connreq *mpcr = arg; /* Check to make sure this came down from MPTCP */ @@ -559,8 +593,37 @@ tcp_usr_connectx_common(struct socket *so, int af, /* NOTREACHED */ } + if (error != 0) + return (error); + + /* if there is data, copy it */ + if (auio != NULL) { + socket_unlock(so, 0); + + VERIFY(bytes_written != NULL); + + datalen = uio_resid(auio); + error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, + (uio_t)auio, NULL, NULL, 0); + socket_lock(so, 0); + + if (error == 0 || error == EWOULDBLOCK) + *bytes_written = datalen - uio_resid(auio); + + /* + * sosend returns EWOULDBLOCK if it's a non-blocking + * socket or a timeout occured (this allows to return + * the amount of queued data through sendit()). + * + * However, connectx() returns EINPROGRESS in case of a + * blocking socket. So we change the return value here. + */ + if (error == EWOULDBLOCK) + error = EINPROGRESS; + } + if (error == 0 && pcid != NULL) - *pcid = 1; /* there is only 1 connection for a TCP */ + *pcid = 1; /* there is only one connection in regular TCP */ return (error); } @@ -568,11 +631,12 @@ tcp_usr_connectx_common(struct socket *so, int af, static int tcp_usr_connectx(struct socket *so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written) { return (tcp_usr_connectx_common(so, AF_INET, src_sl, dst_sl, - p, ifscope, aid, pcid, flags, arg, arglen)); + p, ifscope, aid, pcid, flags, arg, arglen, uio, + bytes_written)); } #if INET6 @@ -648,27 +712,28 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) inp->inp_vflag &= ~INP_IPV6; if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) goto out; - error = tcp_output(tp); + + error = tcp_connect_complete(so); goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((error = tcp6_connect(tp, nam, p)) != 0) goto out; - error = tcp_output(tp); - if (error) - goto out; + + error = tcp_connect_complete(so); COMMON_END(PRU_CONNECT); } static int tcp6_usr_connectx(struct socket *so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written) { return (tcp_usr_connectx_common(so, AF_INET6, src_sl, dst_sl, - p, ifscope, aid, pcid, flags, arg, arglen)); + p, ifscope, aid, pcid, flags, arg, arglen, uio, + bytes_written)); } #endif /* INET6 */ @@ -704,10 +769,10 @@ tcp_usr_disconnect(struct socket *so) * User-protocol pru_disconnectx callback. */ static int -tcp_usr_disconnectx(struct socket *so, associd_t aid, connid_t cid) +tcp_usr_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid) { #pragma unused(cid) - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) return (EINVAL); return (tcp_usr_disconnect(so)); @@ -871,7 +936,13 @@ tcp_usr_rcvd(struct socket *so, __unused int flags) goto out; tcp_sbrcv_trim(tp, &so->so_rcv); - tcp_output(tp); + /* + * This tcp_output is solely there to trigger window-updates. + * However, we really do not want these window-updates while we + * are still in SYN_SENT or SYN_RECEIVED. + */ + if (TCPS_HAVEESTABLISHED(tp->t_state)) + tcp_output(tp); #if CONTENT_FILTER cfil_sock_buf_update(&so->so_rcv); @@ -995,7 +1066,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, VERIFY(!(so->so_flags & SOF_MP_SUBFLOW) || (so->so_snd.sb_flags & SB_NOCOMPRESS)); - if(!(flags & PRUS_OOB)) { + if(!(flags & PRUS_OOB) || (so->so_flags1 & SOF1_PRECONNECT_DATA)) { /* Call msg send if message delivery is enabled */ if (so->so_flags & SOF_ENABLE_MSGS) sbappendmsg_snd(&so->so_snd, m); @@ -1003,6 +1074,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, sbappendstream(&so->so_snd, m); if (nam && tp->t_state < TCPS_SYN_SENT) { + /* * Do implied connect if not yet connected, * initialize window to default value, and @@ -1076,6 +1148,19 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, error = tcp_output(tp); tp->t_flagsext &= ~TF_FORCE; } + + + /* + * We wait for the socket to successfully connect before returning. + * This allows us to signal a timeout to the application. + */ + if (so->so_state & SS_ISCONNECTING) { + if (so->so_state & SS_NBIO) + error = EWOULDBLOCK; + else + error = sbwait(&so->so_snd); + } + COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); } @@ -1133,6 +1218,17 @@ tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) COMMON_END(PRU_RCVOOB); } +static int +tcp_usr_preconnect(struct socket *so) +{ + int error = tcp_output(sototcpcb(so)); + + /* One read has been done. This was enough. Get back to "normal" behavior. */ + so->so_flags1 &= ~SOF1_PRECONNECT_DATA; + + COMMON_END(PRU_PRECONNECT); +} + /* xxx - should be const */ struct pr_usrreqs tcp_usrreqs = { .pru_abort = tcp_usr_abort, @@ -1154,6 +1250,7 @@ struct pr_usrreqs tcp_usrreqs = { .pru_sockaddr = in_getsockaddr, .pru_sosend = sosend, .pru_soreceive = soreceive, + .pru_preconnect = tcp_usr_preconnect, }; #if INET6 @@ -1177,6 +1274,7 @@ struct pr_usrreqs tcp6_usrreqs = { .pru_sockaddr = in6_mapped_sockaddr, .pru_sosend = sosend, .pru_soreceive = soreceive, + .pru_preconnect = tcp_usr_preconnect, }; #endif /* INET6 */ @@ -1462,21 +1560,19 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) ti->tcpi_state = tp->t_state; if (tp->t_state > TCPS_LISTEN) { - if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) + if (TSTMP_SUPPORTED(tp)) ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; - if (tp->t_flags & TF_SACK_PERMIT) + if (SACK_ENABLED(tp)) ti->tcpi_options |= TCPI_OPT_SACK; - if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { + if (TCP_WINDOW_SCALE_ENABLED(tp)) { ti->tcpi_options |= TCPI_OPT_WSCALE; ti->tcpi_snd_wscale = tp->snd_scale; ti->tcpi_rcv_wscale = tp->rcv_scale; } /* Are we in retranmission episode */ - if (tp->snd_max != tp->snd_nxt) + if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY; - else - ti->tcpi_flags &= ~TCPI_FLAG_LOSSRECOVERY; ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0; ti->tcpi_snd_mss = tp->t_maxseg; @@ -1489,7 +1585,7 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) ti->tcpi_snd_ssthresh = tp->snd_ssthresh; ti->tcpi_snd_cwnd = tp->snd_cwnd; - ti->tcpi_snd_sbbytes = tp->t_inpcb->inp_socket->so_snd.sb_cc; + ti->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc; ti->tcpi_rcv_space = tp->rcv_wnd; @@ -1535,6 +1631,18 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) ti->tcpi_wired_rxbytes = inp->inp_Wstat->rxbytes; ti->tcpi_wired_txpackets = inp->inp_Wstat->txpackets; ti->tcpi_wired_txbytes = inp->inp_Wstat->txbytes; + tcp_get_connectivity_status(tp, &ti->tcpi_connstatus); + + ti->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV); + ti->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV); + ti->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT); + ti->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID); + + ti->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ); + ti->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV); + ti->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT); + ti->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED); + ti->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS); } } @@ -1599,6 +1707,59 @@ tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti) return 0; } +static void +tcp_connection_fill_info(struct tcpcb *tp, struct tcp_connection_info *tci) +{ + struct inpcb *inp = tp->t_inpcb; + + bzero(tci, sizeof(*tci)); + tci->tcpi_state = tp->t_state; + if (tp->t_state > TCPS_LISTEN) { + if (TSTMP_SUPPORTED(tp)) + tci->tcpi_options |= TCPCI_OPT_TIMESTAMPS; + if (SACK_ENABLED(tp)) + tci->tcpi_options |= TCPCI_OPT_SACK; + if (TCP_WINDOW_SCALE_ENABLED(tp)) { + tci->tcpi_options |= TCPCI_OPT_WSCALE; + tci->tcpi_snd_wscale = tp->snd_scale; + tci->tcpi_rcv_wscale = tp->rcv_scale; + } + if (TCP_ECN_ENABLED(tp)) + tci->tcpi_options |= TCPCI_OPT_ECN; + if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) + tci->tcpi_flags |= TCPCI_FLAG_LOSSRECOVERY; + if (tp->t_flagsext & TF_PKTS_REORDERED) + tci->tcpi_flags |= TCPCI_FLAG_REORDERING_DETECTED; + tci->tcpi_rto = (tp->t_timer[TCPT_REXMT] > 0) ? + tp->t_rxtcur : 0; + tci->tcpi_maxseg = tp->t_maxseg; + tci->tcpi_snd_ssthresh = tp->snd_ssthresh; + tci->tcpi_snd_cwnd = tp->snd_cwnd; + tci->tcpi_snd_wnd = tp->snd_wnd; + tci->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc; + tci->tcpi_rcv_wnd = tp->rcv_wnd; + tci->tcpi_rttcur = tp->t_rttcur; + tci->tcpi_srtt = (tp->t_srtt >> TCP_RTT_SHIFT); + tci->tcpi_rttvar = (tp->t_rttvar >> TCP_RTTVAR_SHIFT); + tci->tcpi_txpackets = inp->inp_stat->txpackets; + tci->tcpi_txbytes = inp->inp_stat->txbytes; + tci->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes; + tci->tcpi_rxpackets = inp->inp_stat->rxpackets; + tci->tcpi_rxbytes = inp->inp_stat->rxbytes; + tci->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes; + + tci->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV); + tci->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV); + tci->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT); + tci->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID); + tci->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ); + tci->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV); + tci->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT); + tci->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED); + tci->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS); + } +} + __private_extern__ int tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) @@ -1978,11 +2139,11 @@ tcp_ctloutput(so, sopt) break; case TCP_ADAPTIVE_READ_TIMEOUT: error = sooptcopyin(sopt, &optval, sizeof (optval), - sizeof(optval)); + sizeof(optval)); if (error) break; if (optval < 0 || - optval > TCP_ADAPTIVE_TIMEOUT_MAX) { + optval > TCP_ADAPTIVE_TIMEOUT_MAX) { error = EINVAL; break; } else if (optval == 0) { @@ -1994,11 +2155,11 @@ tcp_ctloutput(so, sopt) break; case TCP_ADAPTIVE_WRITE_TIMEOUT: error = sooptcopyin(sopt, &optval, sizeof (optval), - sizeof (optval)); + sizeof (optval)); if (error) break; if (optval < 0 || - optval > TCP_ADAPTIVE_TIMEOUT_MAX) { + optval > TCP_ADAPTIVE_TIMEOUT_MAX) { error = EINVAL; break; } else { @@ -2076,6 +2237,29 @@ tcp_ctloutput(so, sopt) tcp_pmtud_revert_segment_size(tp); } break; + case TCP_FASTOPEN: + if (!(tcp_fastopen & TCP_FASTOPEN_SERVER)) { + error = ENOTSUP; + break; + } + + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + if (optval < 0 || optval > 1) { + error = EINVAL; + break; + } + if (tp->t_state != TCPS_LISTEN) { + error = EINVAL; + break; + } + if (optval) + tp->t_flagsext |= TF_FASTOPEN; + else + tcp_disable_tfo(tp); + break; case SO_FLUSH: if ((error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval))) != 0) @@ -2145,6 +2329,14 @@ tcp_ctloutput(so, sopt) case TCP_NOTIMEWAIT: optval = (tp->t_flagsext & TF_NOTIMEWAIT) ? 1 : 0; break; + case TCP_FASTOPEN: + if (tp->t_state != TCPS_LISTEN || + !(tcp_fastopen & TCP_FASTOPEN_SERVER)) { + error = ENOTSUP; + break; + } + optval = tfo_enabled(tp); + break; case TCP_MEASURE_SND_BW: optval = tp->t_flagsext & TF_MEASURESNDBW; break; @@ -2156,6 +2348,13 @@ tcp_ctloutput(so, sopt) goto done; /* NOT REACHED */ } + case TCP_CONNECTION_INFO: { + struct tcp_connection_info tci; + tcp_connection_fill_info(tp, &tci); + error = sooptcopyout(sopt, &tci, + sizeof(struct tcp_connection_info)); + goto done; + } case TCP_MEASURE_BW_BURST: { struct tcp_measure_bw_burst out; if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 || @@ -2271,6 +2470,14 @@ SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_recvspace , 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size"); +/* Sysctl for testing and tuning the connectx with data api */ +#define TCP_PRECONNECT_SBSZ_MAX 1460 +#define TCP_PRECONNECT_SBSZ_MIN (TCP_MSS) +#define TCP_PRECONNECT_SBSZ_DEF (TCP6_MSS) +static int tcp_preconnect_sbspace = TCP_PRECONNECT_SBSZ_DEF; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, preconn_sbsz, CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_preconnect_sbspace, 0, "Maximum preconnect space"); + /* * Attach TCP protocol to socket, allocating @@ -2306,6 +2513,12 @@ tcp_attach(so, p) if (error) return (error); } + + if (so->so_snd.sb_preconn_hiwat == 0) { + soreserve_preconnect(so, imin(TCP_PRECONNECT_SBSZ_MAX, + imax(tcp_preconnect_sbspace, TCP_PRECONNECT_SBSZ_MIN))); + } + if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0) so->so_rcv.sb_flags |= SB_AUTOSIZE; if ((so->so_snd.sb_flags & SB_USRSIZE) == 0) diff --git a/bsd/netinet/tcp_var.h b/bsd/netinet/tcp_var.h index bf9fb3f20..1ec0559ee 100644 --- a/bsd/netinet/tcp_var.h +++ b/bsd/netinet/tcp_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -63,9 +63,11 @@ #ifndef _NETINET_TCP_VAR_H_ #define _NETINET_TCP_VAR_H_ +#include #include #include #include +#include #include #if defined(__LP64__) @@ -145,7 +147,9 @@ struct name { \ * The maximum value of adaptive timeout is set to 10 which will allow * transmission of enough number of probes to the peer. */ -#define TCP_ADAPTIVE_TIMEOUT_MAX 10 +#define TCP_ADAPTIVE_TIMEOUT_MAX 10 + +#define TCP_CONNECTIVITY_PROBES_MAX 5 /* * Kernel variables for tcp. @@ -178,6 +182,16 @@ struct sackhint { int sack_bytes_rexmit; }; +struct tcp_rxt_seg { + tcp_seq rx_start; + tcp_seq rx_end; + u_int16_t rx_count; + u_int16_t rx_flags; +#define TCP_RXT_SPURIOUS 0x1 /* received DSACK notification */ +#define TCP_RXT_DSACK_FOR_TLP 0x2 + SLIST_ENTRY(tcp_rxt_seg) rx_link; +}; + struct tcptemp { u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ struct tcphdr tt_t; @@ -235,11 +249,11 @@ struct tcp_ccstate { struct tcpcb { struct tsegqe_head t_segq; int t_dupacks; /* consecutive dup acks recd */ + int t_state; /* state of this connection */ uint32_t t_timer[TCPT_NTIMERS]; /* tcp timers */ struct tcptimerentry tentry; /* entry in timer list */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ - int t_state; /* state of this connection */ uint32_t t_flags; #define TF_ACKNOW 0x00001 /* ack peer immediately */ #define TF_DELACK 0x00002 /* ack, but try to delay it */ @@ -291,9 +305,9 @@ struct tcpcb { u_int32_t rcv_wnd; /* receive window */ tcp_seq rcv_up; /* receive urgent pointer */ - u_int32_t snd_wnd; /* send window */ - u_int32_t snd_cwnd; /* congestion-controlled window */ - u_int32_t snd_ssthresh; /* snd_cwnd size threshold for + u_int32_t snd_wnd; /* send window */ + u_int32_t snd_cwnd; /* congestion-controlled window */ + u_int32_t snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch */ @@ -314,15 +328,16 @@ struct tcpcb { int t_srtt; /* smoothed round-trip time */ int t_rttvar; /* variance in round-trip time */ + u_int64_t t_accsleep_ms; /* accumulated sleep time since last boot */ u_int16_t t_reassqlen; /* length of reassembly queue */ u_int16_t t_rxtshift; /* log(2) of rexmt exp. backoff */ - u_int t_rttmin; /* minimum rtt allowed */ - u_int t_rttbest; /* best rtt we've seen */ - u_int t_rttcur; /* most recent value of rtt */ - u_int32_t t_rttupdated; /* number of times rtt sampled */ + u_int32_t t_rttmin; /* minimum rtt allowed */ + u_int32_t t_rttbest; /* best rtt we've seen */ + u_int32_t t_rttcur; /* most recent value of rtt */ + u_int32_t t_rttupdated; /* number of times rtt sampled */ u_int32_t t_rxt_conndroptime; /* retxmt conn gets dropped after this time, when set */ - u_int32_t t_rxtstart; /* time at which retransmission started */ - u_int32_t max_sndwnd; /* largest window peer has offered */ + u_int32_t t_rxtstart; /* time at which retransmission started */ + u_int32_t max_sndwnd; /* largest window peer has offered */ int t_softerror; /* possible error not yet reported */ /* out-of-band data */ @@ -331,9 +346,9 @@ struct tcpcb { #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 /* RFC 1323 variables */ - u_int8_t snd_scale; /* window scaling for send window */ - u_int8_t rcv_scale; /* window scaling for recv window */ - u_int8_t request_r_scale; /* pending window scaling */ + u_int8_t snd_scale; /* window scaling for send window */ + u_int8_t rcv_scale; /* window scaling for recv window */ + u_int8_t request_r_scale; /* pending window scaling */ u_int8_t requested_s_scale; u_int8_t tcp_cc_index; /* index of congestion control algorithm */ u_int8_t t_adaptive_rtimo; /* Read timeout used as a multiple of RTT */ @@ -344,21 +359,21 @@ struct tcpcb { u_int16_t t_early_rexmt_count; /* count of early rexmts */ u_int32_t t_early_rexmt_win; /* window for limiting early rexmts */ - u_int32_t ts_recent; /* timestamp echo data */ + u_int32_t ts_recent; /* timestamp echo data */ - u_int32_t ts_recent_age; /* when last updated */ + u_int32_t ts_recent_age; /* when last updated */ tcp_seq last_ack_sent; /* RFC 1644 variables */ tcp_cc cc_send; /* send connection count */ tcp_cc cc_recv; /* receive connection count */ /* RFC 3465 variables */ - u_int32_t t_bytes_acked; /* ABC "bytes_acked" parameter */ + u_int32_t t_bytes_acked; /* ABC "bytes_acked" parameter */ int t_lastchain; /* amount of packets chained last time around */ u_int16_t t_unacksegs; /* received but unacked segments for delaying acks */ u_int8_t t_rexmtthresh; /* duplicate ack threshold for entering fast recovery */ u_int8_t t_rtimo_probes; /* number of adaptive rtimo probes sent */ - u_int32_t t_persist_timeout; /* ZWP persistence limit as set by PERSIST_TIMEOUT */ + u_int32_t t_persist_timeout; /* ZWP persistence limit as set by PERSIST_TIMEOUT */ u_int32_t t_persist_stop; /* persistence limit deadline if triggered by ZWP */ u_int32_t t_notsent_lowat; /* Low water for not sent data */ @@ -374,6 +389,8 @@ struct tcpcb { #define TE_SENDCWR 0x08 /* Indicate that the next non-retransmit should have the TCP CWR flag set */ #define TE_SENDECE 0x10 /* Indicate that the next packet should have the TCP ECE flag set */ #define TE_INRECOVERY 0x20 /* connection entered recovery after receiving ECE */ +#define TE_RECV_ECN_CE 0x40 /* Received IPTOS_ECN_CE marking atleast once */ +#define TE_RECV_ECN_ECE 0x80 /* Received ECE marking atleast once */ #define TE_ECN_ON (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */ /* state for bad retransmit recovery */ @@ -389,18 +406,18 @@ struct tcpcb { /* SACK related state */ int16_t snd_numholes; /* number of holes seen by sender */ + tcp_seq sack_newdata; /* New data xmitted in this recovery + episode starts at this seq number */ TAILQ_HEAD(sackhole_head, sackhole) snd_holes; /* SACK scoreboard (sorted) */ tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/ int rcv_numsacks; /* # distinct sack blks present */ struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ - tcp_seq sack_newdata; /* New data xmitted in this recovery - episode starts at this seq number */ struct sackhint sackhint; /* SACK scoreboard hint */ - u_int32_t t_pktlist_sentlen; /* total bytes in transmit chain */ struct mbuf *t_pktlist_head; /* First packet in transmit chain */ struct mbuf *t_pktlist_tail; /* Last packet in transmit chain */ + u_int32_t t_pktlist_sentlen; /* total bytes in transmit chain */ u_int32_t t_keepidle; /* keepalive idle timer (override global if > 0) */ u_int32_t t_keepinit; /* connection timeout, i.e. idle time @@ -409,7 +426,8 @@ struct tcpcb { u_int32_t t_keepcnt; /* number of keepalives before close */ u_int32_t tso_max_segment_size; /* TSO maximum segment unit for NIC */ - u_int32_t t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */ + u_int16_t t_pmtud_lastseg_size; /* size of the last sent segment */ + u_int16_t t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */ u_int32_t t_pmtud_start_ts; /* Time of PMTUD blackhole detection */ struct @@ -445,12 +463,18 @@ struct tcpcb { #define TF_FORCE 0x8000 /* force 1 byte out */ #define TF_DISABLE_STRETCHACK 0x10000 /* auto-disable stretch ack */ #define TF_NOBLACKHOLE_DETECTION 0x20000 /* Disable PMTU blackhole detection */ +#define TF_DISABLE_DSACK 0x40000 /* Ignore DSACK due to n/w duplication */ +#define TF_RESCUE_RXT 0x80000 /* SACK rescue retransmit */ +#define TF_CWND_NONVALIDATED 0x100000 /* cwnd non validated */ +#define TF_PROBING 0x200000 /* Trigger probe timeout */ +#define TF_FASTOPEN 0x400000 /* TCP Fastopen is enabled */ #if TRAFFIC_MGT /* Inter-arrival jitter related state */ uint32_t iaj_rcv_ts; /* tcp clock when the first packet was received */ uint16_t iaj_size; /* Size of packet for iaj measurement */ - uint16_t iaj_small_pkt; /* Count of packets smaller than iaj_size */ + uint8_t iaj_small_pkt; /* Count of packets smaller than iaj_size */ + uint8_t t_pipeack_ind; /* index for next pipeack sample */ uint16_t iaj_pktcnt; /* packet count, to avoid throttling initially */ uint16_t acc_iaj; /* Accumulated iaj */ tcp_seq iaj_rwintop; /* recent max advertised window */ @@ -465,6 +489,19 @@ struct tcpcb { /* Tail loss probe related state */ tcp_seq t_tlphighrxt; /* snd_nxt after PTO */ u_int32_t t_tlpstart; /* timestamp at PTO */ +/* DSACK data receiver state */ + tcp_seq t_dsack_lseq; /* DSACK left sequence */ + tcp_seq t_dsack_rseq; /* DSACK right sequence */ +/* DSACK data sender state */ + SLIST_HEAD(tcp_rxt_seghead, tcp_rxt_seg) t_rxt_segments; + tcp_seq t_dsack_lastuna; /* snd_una when last recovery episode started */ +/* state for congestion window validation (draft-ietf-tcpm-newcwv-07) */ +#define TCP_PIPEACK_SAMPLE_COUNT 3 + u_int32_t t_pipeack_sample[TCP_PIPEACK_SAMPLE_COUNT]; /* pipeack, bytes acked within RTT */ + tcp_seq t_pipeack_lastuna; /* una when pipeack measurement started */ + u_int32_t t_pipeack; + u_int32_t t_lossflightsize; + #if MPTCP u_int32_t t_mpflags; /* flags for multipath TCP */ @@ -492,14 +529,64 @@ struct tcpcb { #define TMPF_SND_MPFAIL 0x00200000 /* Received mapping csum failure */ #define TMPF_FASTJOIN_SEND 0x00400000 /* Fast join early data send */ #define TMPF_FASTJOINBY2_SEND 0x00800000 /* Fast join send after 3 WHS */ +#define TMPF_MPCAP_RETRANSMIT 0x01000000 /* Retransmission of 3rd ACK */ - void *t_mptcb; /* pointer to MPTCP TCB */ tcp_seq t_mpuna; /* unacknowledged sequence */ + void *t_mptcb; /* pointer to MPTCP TCB */ struct mpt_dsn_map t_rcv_map; /* Receive mapping list */ u_int8_t t_local_aid; /* Addr Id for authentication */ u_int8_t t_rem_aid; /* Addr ID of another subflow */ u_int8_t t_mprxtshift; /* join retransmission */ #endif /* MPTCP */ + +#define TFO_F_OFFER_COOKIE 0x01 /* We will offer a cookie */ +#define TFO_F_COOKIE_VALID 0x02 /* The received cookie is valid */ +#define TFO_F_COOKIE_REQ 0x04 /* Client requested a new cookie */ +#define TFO_F_COOKIE_SENT 0x08 /* Client did send a cookie in the SYN */ +#define TFO_F_SYN_LOSS 0x10 /* A SYN-loss triggered a fallback to regular TCP on the client-side */ +#define TFO_F_NO_RCVPROBING 0x20 /* This network is guaranteed to support TFO in the downstream direction */ +#define TFO_F_NO_SNDPROBING 0x40 /* This network is guaranteed to support TFO in the upstream direction */ + u_int8_t t_tfo_flags; +#define TFO_S_SYNDATA_RCV 0x01 /* SYN+data has been received */ +#define TFO_S_COOKIEREQ_RECV 0x02 /* TFO-cookie request received */ +#define TFO_S_COOKIE_SENT 0x04 /* TFO-cookie announced in SYN/ACK */ +#define TFO_S_COOKIE_INVALID 0x08 /* Received TFO-cookie is invalid */ +#define TFO_S_COOKIE_REQ 0x10 /* TFO-cookie requested within the SYN */ +#define TFO_S_COOKIE_RCV 0x20 /* TFO-cookie received in SYN/ACK */ +#define TFO_S_SYN_DATA_SENT 0x40 /* SYN+data sent */ +#define TFO_S_SYN_DATA_ACKED 0x80 /* SYN+data has been acknowledged in SYN/ACK */ +#define TFO_S_SYN_LOSS 0x0100 /* SYN+TFO has been lost - fallback to regular TCP */ + u_int16_t t_tfo_stats; + + u_int8_t t_tfo_probes; /* TFO-probes we did send */ +/* + * This here is the TFO-probing state-machine. Transitions are as follows: + * + * Current state: PROBE_NONE + * Event: SYN+DATA acknowledged + * Action: Transition to PROBE_PROBING and set keepalive-timer + * + * Current state: PROBE_PROBING (initial state) + * Event: Receive data + * Action: Transition to PROBE_NONE and cancel keepalive-timer + * Event: Receive ACK that does not indicate a hole + * Action: Transition to PROBE_NONE and cancel keepalive-timer + * Event: Receive ACK that indicates a hole + * Action: Transition to PROBE_WAIT_DATA and set a short timer + * to wait for the final segment. + * Event: Keepalive-timeout (did not receive any segment) + * Action: Signal ETIMEDOUT as with regular keepalive-timers + * + * Current state: PROBE_WAIT_DATA + * Event: Receive data + * Action: Transition to PROBE_NONE and cancel keepalive-timer + * Event: Data-timeout (did not receive the expected data) + * Action: Signal ENODATA up to the app and close everything. + */ +#define TFO_PROBE_NONE 0 /* Not probing now */ +#define TFO_PROBE_PROBING 1 /* Sending out TCP-keepalives waiting for reply */ +#define TFO_PROBE_WAIT_DATA 2 /* Received reply, waiting for data */ + u_int8_t t_tfo_probe_state; }; #define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY) @@ -516,6 +603,8 @@ struct tcpcb { (_tp_)->t_flags |= TF_FASTRECOVERY; \ if (INP_IS_FLOW_CONTROLLED((_tp_)->t_inpcb)) \ inp_reset_fc_state((_tp_)->t_inpcb); \ + if (!SLIST_EMPTY(&tp->t_rxt_segments)) \ + tcp_rxtseg_clean(tp); \ } while(0) #define EXIT_FASTRECOVERY(_tp_) do { \ @@ -525,6 +614,8 @@ struct tcpcb { (_tp_)->t_bytes_acked = 0; \ (_tp_)->ecn_flags &= ~TE_INRECOVERY; \ (_tp_)->t_timer[TCPT_PTO] = 0; \ + (_tp_)->t_flagsext &= ~TF_RESCUE_RXT; \ + (_tp_)->t_lossflightsize = 0; \ } while(0) /* @@ -546,34 +637,72 @@ extern int tcprexmtthresh; (((_tp_)->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) == \ (TF_REQ_TSTMP|TF_RCVD_TSTMP)) +/* + * This condition is true if window scale option is supported + * on a connection + */ +#define TCP_WINDOW_SCALE_ENABLED(_tp_) \ + (((_tp_)->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == \ + (TF_RCVD_SCALE|TF_REQ_SCALE)) + +/* Is ECN enabled end-to-end */ +#define TCP_ECN_ENABLED(_tp_) \ + (((_tp_)->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON)) + /* * Gives number of bytes acked by this ack */ #define BYTES_ACKED(_th_, _tp_) \ ((_th_)->th_ack - (_tp_)->snd_una) +/* Returns true if a DSACK option should be sent */ +#define TCP_SEND_DSACK_OPT(_tp_) \ + ((_tp_)->t_dsack_lseq > 0 && (_tp_)->t_dsack_rseq > 0) + +/* Check if DSACK option should be processed */ +#define TCP_DSACK_ENABLED(tp) (tcp_dsack_enable == 1 && \ + !(tp->t_flagsext & TF_DISABLE_DSACK)) + +/* + * Returns true if a DSACK sequence is within the max send window that will + * be accepted. In order to set a window to validate sequence numbers, the + * max send window within which a DSACK option is processed is limited. + * + * We need to choose a maximum window to check if the sequence number is + * within the window. One arbitrary choice is 256 * MSS because if the + * window is as large as 256 segments it might be big enough to ignore the + * DSACK option. Choosing a much larger limit means that the memory for + * retransmit segments can be held for a longer time. + */ +#define TCP_DSACK_MAX_SEND_WINDOW(_tp_) ((_tp_)->t_maxseg << 8) +#define TCP_DSACK_SEQ_IN_WINDOW(_tp_, _seq_, _una_) \ + (SEQ_LEQ((_seq_), (_tp_)->snd_max) && \ + SEQ_GEQ((_seq_), ((_una_) - TCP_DSACK_MAX_SEND_WINDOW(_tp_)))) + + enum tcp_cc_event { - TCP_CC_CWND_INIT, - TCP_CC_INSEQ_ACK_RCVD, - TCP_CC_ACK_RCVD, - TCP_CC_ENTER_FASTRECOVERY, - TCP_CC_IN_FASTRECOVERY, - TCP_CC_EXIT_FASTRECOVERY, - TCP_CC_PARTIAL_ACK, - TCP_CC_IDLE_TIMEOUT, - TCP_CC_REXMT_TIMEOUT, - TCP_CC_ECN_RCVD, - TCP_CC_BAD_REXMT_RECOVERY, - TCP_CC_OUTPUT_ERROR, - TCP_CC_CHANGE_ALGO, - TCP_CC_FLOW_CONTROL, - TCP_CC_SUSPEND, - TCP_CC_LIMITED_TRANSMIT, - TCP_CC_EARLY_RETRANSMIT, - TCP_CC_TLP_RECOVERY, - TCP_CC_TLP_RECOVER_LASTPACKET, - TCP_CC_DELAY_FASTRECOVERY, - TCP_CC_TLP_IN_FASTRECOVERY + TCP_CC_CWND_INIT, /* 0 */ + TCP_CC_INSEQ_ACK_RCVD, /* 1 */ + TCP_CC_ACK_RCVD, /* 2 */ + TCP_CC_ENTER_FASTRECOVERY, /* 3 */ + TCP_CC_IN_FASTRECOVERY, /* 4 */ + TCP_CC_EXIT_FASTRECOVERY, /* 5 */ + TCP_CC_PARTIAL_ACK, /* 6 */ + TCP_CC_IDLE_TIMEOUT, /* 7 */ + TCP_CC_REXMT_TIMEOUT, /* 8 */ + TCP_CC_ECN_RCVD, /* 9 */ + TCP_CC_BAD_REXMT_RECOVERY, /* 10 */ + TCP_CC_OUTPUT_ERROR, /* 11 */ + TCP_CC_CHANGE_ALGO, /* 12 */ + TCP_CC_FLOW_CONTROL, /* 13 */ + TCP_CC_SUSPEND, /* 14 */ + TCP_CC_LIMITED_TRANSMIT, /* 15 */ + TCP_CC_EARLY_RETRANSMIT, /* 16 */ + TCP_CC_TLP_RECOVERY, /* 17 */ + TCP_CC_TLP_RECOVER_LASTPACKET, /* 18 */ + TCP_CC_DELAY_FASTRECOVERY, /* 19 */ + TCP_CC_TLP_IN_FASTRECOVERY, /* 20 */ + TCP_CC_DSACK_BAD_REXMT /* 21 */ }; /* @@ -591,12 +720,15 @@ struct tcpopt { #define TOF_SIGLEN 0x0080 /* signature length valid (RFC2385) */ #define TOF_SACK 0x0100 /* Peer sent SACK option */ #define TOF_MPTCP 0x0200 /* MPTCP options to be dropped */ +#define TOF_TFO 0x0400 /* TFO cookie option present */ +#define TOF_TFOREQ 0x0800 /* TFO cookie request present */ u_int32_t to_tsval; u_int32_t to_tsecr; u_int16_t to_mss; u_int8_t to_requested_s_scale; u_int8_t to_nsacks; /* number of SACK blocks */ u_char *to_sacks; /* pointer to the first SACK blocks */ + u_char *to_tfo; /* pointer to the TFO cookie */ }; /* @@ -635,6 +767,11 @@ struct rmxp_tao { #define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 4 bits */ #define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ +/* TFO-specific defines */ +#define TFO_COOKIE_LEN_MIN 4 +#define TFO_COOKIE_LEN_DEFAULT 8 +#define TFO_COOKIE_LEN_MAX 16 + /* * The initial retransmission should happen at rtt + 4 * rttvar. * Because of the way we do the smoothing, srtt and rttvar @@ -767,6 +904,9 @@ struct tcpcb { u_int32_t t_badrxtwin; /* window for retransmit recovery */ }; +#define tcps_ecn_setup tcps_ecn_client_success +#define tcps_sent_cwr tcps_ecn_recv_ece +#define tcps_sent_ece tcps_ecn_sent_ece /* * TCP statistics. @@ -924,17 +1064,61 @@ struct tcpstat { u_int32_t tcps_rto_after_pto; /* RTO after a probe */ u_int32_t tcps_tlp_recovery; /* TLP induced fast recovery */ u_int32_t tcps_tlp_recoverlastpkt; /* TLP recoverd last pkt */ - u_int32_t tcps_ecn_setup; /* connection negotiated ECN */ - u_int32_t tcps_sent_cwr; /* Sent CWR, ECE received */ - u_int32_t tcps_sent_ece; /* Sent ECE notification */ + u_int32_t tcps_ecn_client_success; /* client-side connection negotiated ECN */ + u_int32_t tcps_ecn_recv_ece; /* ECE received, sent CWR */ + u_int32_t tcps_ecn_sent_ece; /* Sent ECE notification */ u_int32_t tcps_detect_reordering; /* Detect pkt reordering */ u_int32_t tcps_delay_recovery; /* Delay fast recovery */ u_int32_t tcps_avoid_rxmt; /* Retransmission was avoided */ u_int32_t tcps_unnecessary_rxmt; /* Retransmission was not needed */ u_int32_t tcps_nostretchack; /* disabled stretch ack algorithm on a connection */ u_int32_t tcps_rescue_rxmt; /* SACK rescue retransmit */ - u_int32_t tcps_pto_in_recovery; /* PTO during fast recovery */ - u_int32_t tcps_pmtudbh_reverted; /* PMTU Blackhole detection, segement size reverted */ + u_int32_t tcps_pto_in_recovery; /* rescue retransmit in fast recovery */ + u_int32_t tcps_pmtudbh_reverted; /* PMTU Blackhole detection, segment size reverted */ + + /* DSACK related statistics */ + u_int32_t tcps_dsack_disable; /* DSACK disabled due to n/w duplication */ + u_int32_t tcps_dsack_ackloss; /* ignore DSACK due to ack loss */ + u_int32_t tcps_dsack_badrexmt; /* DSACK based bad rexmt recovery */ + u_int32_t tcps_dsack_sent; /* Sent DSACK notification */ + u_int32_t tcps_dsack_recvd; /* Received a valid DSACK option */ + u_int32_t tcps_dsack_recvd_old; /* Received an out of window DSACK option */ + + /* MPTCP Subflow selection stats */ + u_int32_t tcps_mp_sel_symtomsd; /* By symptomsd */ + u_int32_t tcps_mp_sel_rtt; /* By RTT comparison */ + u_int32_t tcps_mp_sel_rto; /* By RTO comparison */ + u_int32_t tcps_mp_sel_peer; /* By peer's output pattern */ + u_int32_t tcps_mp_num_probes; /* Number of probes sent */ + u_int32_t tcps_mp_verdowngrade; /* MPTCP version downgrade */ + u_int32_t tcps_drop_after_sleep; /* drop after long AP sleep */ + u_int32_t tcps_probe_if; /* probe packets after interface availability */ + u_int32_t tcps_probe_if_conflict; /* Can't send probe packets for interface */ + + u_int32_t tcps_ecn_client_setup; /* Attempted ECN setup from client side */ + u_int32_t tcps_ecn_server_setup; /* Attempted ECN setup from server side */ + u_int32_t tcps_ecn_server_success; /* server-side connection negotiated ECN */ + u_int32_t tcps_ecn_lost_synack; /* Lost SYN-ACK with ECN setup */ + u_int32_t tcps_ecn_lost_syn; /* Lost SYN with ECN setup */ + u_int32_t tcps_ecn_not_supported; /* Server did not support ECN setup */ + u_int32_t tcps_ecn_recv_ce; /* Received CE from the network */ + u_int32_t tcps_ecn_conn_recv_ce; /* Number of connections received CE atleast once */ + u_int32_t tcps_ecn_conn_recv_ece; /* Number of connections received ECE atleast once */ + u_int32_t tcps_ecn_conn_plnoce; /* Number of connections that received no CE and sufferred packet loss */ + u_int32_t tcps_ecn_conn_pl_ce; /* Number of connections that received CE and sufferred packet loss */ + u_int32_t tcps_ecn_conn_nopl_ce; /* Number of connections that received CE and sufferred no packet loss */ + + /* TFO-related statistics */ + u_int32_t tcps_tfo_syn_data_rcv; /* Received a SYN+data with valid cookie */ + u_int32_t tcps_tfo_cookie_req_rcv;/* Received a TFO cookie-request */ + u_int32_t tcps_tfo_cookie_sent; /* Offered a TFO-cookie to the client */ + u_int32_t tcps_tfo_cookie_invalid;/* Received an invalid TFO-cookie */ + u_int32_t tcps_tfo_cookie_req; /* Cookie requested with the SYN */ + u_int32_t tcps_tfo_cookie_rcv; /* Cookie received in a SYN/ACK */ + u_int32_t tcps_tfo_syn_data_sent; /* SYN+data+cookie sent */ + u_int32_t tcps_tfo_syn_data_acked;/* SYN+data has been acknowledged */ + u_int32_t tcps_tfo_syn_loss; /* SYN+TFO has been lost and we fallback */ + u_int32_t tcps_tfo_blackhole; /* TFO got blackholed by a middlebox. */ }; struct tcpstat_local { @@ -1188,21 +1372,22 @@ struct xtcpcb_n { SYSCTL_DECL(_net_inet_tcp); #endif /* SYSCTL_DECL */ -/* - * Flags for TCP's connectx(2) user-protocol request routine. - */ -#if MPTCP -#define TCP_CONNREQF_MPTCP 0x1 /* called internally by MPTCP */ -#endif /* MPTCP */ - extern struct inpcbhead tcb; /* head of queue of active tcpcb's */ extern struct inpcbinfo tcbinfo; extern struct tcpstat tcpstat; /* tcp statistics */ extern int tcp_mssdflt; /* XXX */ extern int tcp_minmss; +#define TCP_FASTOPEN_SERVER 0x01 +#define TCP_FASTOPEN_CLIENT 0x02 + +extern int tcp_tfo_halfcnt; +extern int tcp_tfo_backlog; +extern int tcp_fastopen; +extern int tcp_tfo_fallback_min; extern int ss_fltsz; extern int ss_fltsz_local; extern int tcp_do_rfc3390; /* Calculate ss_fltsz according to RFC 3390 */ +extern int tcp_do_rfc1323; extern int target_qdelay; extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ extern struct timeval tcp_uptime; @@ -1213,6 +1398,10 @@ extern int tcp_do_rfc3465; extern int tcp_do_rfc3465_lim2; extern int maxseg_unacked; extern int tcp_use_newreno; +extern struct zone *tcp_reass_zone; +extern struct zone *tcp_rxt_seg_zone; +extern int tcp_ecn_outbound; +extern int tcp_ecn_inbound; #if CONFIG_IFEF_NOWINDOWSCALE @@ -1255,6 +1444,7 @@ struct rtentry * tcp_rtlookup(struct inpcb *, unsigned int); void tcp_setpersist(struct tcpcb *); void tcp_gc(struct inpcbinfo *); +void tcp_itimer(struct inpcbinfo *ipi); void tcp_check_timer_state(struct tcpcb *tp); void tcp_run_timerlist(void *arg1, void *arg2); @@ -1265,6 +1455,8 @@ void tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int); void tcp_sack_doack(struct tcpcb *, struct tcpopt *, struct tcphdr *, u_int32_t *); +extern boolean_t tcp_sack_process_dsack(struct tcpcb *, struct tcpopt *, + struct tcphdr *); int tcp_detect_bad_rexmt(struct tcpcb *, struct tcphdr *, struct tcpopt *, u_int32_t rxtime); void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); @@ -1320,8 +1512,25 @@ extern int tcp_input_checksum(int, struct mbuf *, struct tcphdr *, int, int); extern void tcp_getconninfo(struct socket *, struct conninfo_tcp *); extern void add_to_time_wait(struct tcpcb *, uint32_t delay); extern void tcp_pmtud_revert_segment_size(struct tcpcb *tp); +extern void tcp_rxtseg_insert(struct tcpcb *, tcp_seq, tcp_seq); +extern struct tcp_rxt_seg *tcp_rxtseg_find(struct tcpcb *, tcp_seq, tcp_seq); +extern void tcp_rxtseg_clean(struct tcpcb *); +extern boolean_t tcp_rxtseg_detect_bad_rexmt(struct tcpcb *, tcp_seq); +extern boolean_t tcp_rxtseg_dsack_for_tlp(struct tcpcb *); +extern u_int32_t tcp_rxtseg_total_size(struct tcpcb *tp); +extern void tcp_rexmt_save_state(struct tcpcb *tp); +extern void tcp_interface_send_probe(u_int16_t if_index_available); +extern void tcp_probe_connectivity(struct ifnet *ifp, u_int32_t enable); +extern void tcp_get_connectivity_status(struct tcpcb *, + struct tcp_conn_status *); + +extern boolean_t tfo_enabled(const struct tcpcb *tp); +extern void tcp_disable_tfo(struct tcpcb *tp); +extern void tcp_tfo_gen_cookie(struct inpcb *inp, u_char *out, size_t blk_size); +#define TCP_FASTOPEN_KEYLEN 16 + #if MPTCP -extern uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, int); +extern int mptcp_input_preproc(struct tcpcb *, struct mbuf *, int); extern void mptcp_output_csum(struct tcpcb *, struct mbuf *, int32_t, unsigned, u_int64_t, u_int32_t *); extern int mptcp_adj_mss(struct tcpcb *, boolean_t); diff --git a/bsd/netinet/udp.h b/bsd/netinet/udp.h index ef82a6f15..3f4a47346 100644 --- a/bsd/netinet/udp.h +++ b/bsd/netinet/udp.h @@ -80,4 +80,36 @@ struct udphdr { * User-settable options (used with setsockopt). */ #define UDP_NOCKSUM 0x01 /* don't checksum outbound payloads */ -#endif +#ifdef PRIVATE +#define UDP_KEEPALIVE_OFFLOAD 0x02 /* Send keep-alive at a given interval */ +#endif /* PRIVATE */ + +#ifdef PRIVATE +/* + * This is a mechanism to offload keep-alive or heartbeat messages + * to the Wifi driver when the host processor is sleeping. The application + * will give a small amount of data that can be placed in the message. The + * application will also specify an interval at which these messages + * should be sent. + * + * The purpose of these messages is to detect loss of connectivity in + * peer-to-peer communication without keeping the host processor awake. + * + * The application will pass this data to the kernel using setsockopt. It + * can set the interval to 0 to disable keepalive offload. + */ +#define UDP_KEEPALIVE_OFFLOAD_DATA_SIZE 32 + +/* Maximum keep alive interval in seconds */ +#define UDP_KEEPALIVE_INTERVAL_MAX_SECONDS 65536 + +struct udp_keepalive_offload { + u_char ka_data[UDP_KEEPALIVE_OFFLOAD_DATA_SIZE]; + u_int16_t ka_interval; /* interval in seconds */ + u_int8_t ka_data_len; /* valid length of ka_data */ + u_int8_t ka_type; /* type of application */ +#define UDP_KEEPALIVE_OFFLOAD_TYPE_AIRPLAY 0x1 +}; + +#endif /* PRIVATE */ +#endif /* _NETINET_UDP_H */ diff --git a/bsd/netinet/udp_usrreq.c b/bsd/netinet/udp_usrreq.c index af468dc31..09a5a3631 100644 --- a/bsd/netinet/udp_usrreq.c +++ b/bsd/netinet/udp_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -114,6 +114,10 @@ extern int esp_udp_encap_port; #include #endif /* NECP */ +#if FLOW_DIVERT +#include +#endif /* FLOW_DIVERT */ + #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETUDP, 0) #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETUDP, 2) #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETUDP, 1) @@ -202,11 +206,11 @@ static int udp_attach(struct socket *, int, struct proc *); static int udp_bind(struct socket *, struct sockaddr *, struct proc *); static int udp_connect(struct socket *, struct sockaddr *, struct proc *); static int udp_connectx(struct socket *, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, - uint32_t, void *, uint32_t); + struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *); static int udp_detach(struct socket *); static int udp_disconnect(struct socket *); -static int udp_disconnectx(struct socket *, associd_t, connid_t); +static int udp_disconnectx(struct socket *, sae_associd_t, sae_connid_t); static int udp_send(struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); static void udp_append(struct inpcb *, struct ip *, struct mbuf *, int, @@ -237,6 +241,7 @@ struct pr_usrreqs udp_usrreqs = { .pru_sockaddr = in_getsockaddr, .pru_sosend = sosend, .pru_soreceive = soreceive, + .pru_soreceive_list = soreceive_list, }; void @@ -509,7 +514,9 @@ udp_input(struct mbuf *m, int iphlen) #if NECP skipit = 0; - if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport, uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL)) { + if (!necp_socket_is_allowed_to_send_recv_v4(inp, + uh->uh_dport, uh->uh_sport, &ip->ip_dst, + &ip->ip_src, ifp, NULL, NULL)) { /* do not inject data to pcb */ skipit = 1; } @@ -683,7 +690,8 @@ udp_input(struct mbuf *m, int iphlen) goto bad; } #if NECP - if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport, uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL)) { + if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport, + uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL, NULL)) { udp_unlock(inp->inp_socket, 1, 0); IF_UDP_STATINC(ifp, badipsec); goto bad; @@ -944,7 +952,78 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt) else inp->inp_flags &= ~INP_UDP_NOCKSUM; break; + case UDP_KEEPALIVE_OFFLOAD: + { + struct udp_keepalive_offload ka; + /* + * If the socket is not connected, the stack will + * not know the destination address to put in the + * keepalive datagram. Return an error now instead + * of failing later. + */ + if (!(so->so_state & SS_ISCONNECTED)) { + error = EINVAL; + break; + } + if (sopt->sopt_valsize != sizeof(ka)) { + error = EINVAL; + break; + } + if ((error = sooptcopyin(sopt, &ka, sizeof(ka), + sizeof(ka))) != 0) + break; + + /* application should specify the type */ + if (ka.ka_type == 0) + return (EINVAL); + + if (ka.ka_interval == 0) { + /* + * if interval is 0, disable the offload + * mechanism + */ + if (inp->inp_keepalive_data != NULL) + FREE(inp->inp_keepalive_data, + M_TEMP); + inp->inp_keepalive_data = NULL; + inp->inp_keepalive_datalen = 0; + inp->inp_keepalive_interval = 0; + inp->inp_keepalive_type = 0; + inp->inp_flags2 &= ~INP2_KEEPALIVE_OFFLOAD; + } else { + if (inp->inp_keepalive_data != NULL) { + FREE(inp->inp_keepalive_data, + M_TEMP); + inp->inp_keepalive_data = NULL; + } + inp->inp_keepalive_datalen = min( + ka.ka_data_len, + UDP_KEEPALIVE_OFFLOAD_DATA_SIZE); + if (inp->inp_keepalive_datalen > 0) { + MALLOC(inp->inp_keepalive_data, + u_int8_t *, + inp->inp_keepalive_datalen, + M_TEMP, M_WAITOK); + if (inp->inp_keepalive_data == NULL) { + inp->inp_keepalive_datalen = 0; + error = ENOMEM; + break; + } + bcopy(ka.ka_data, + inp->inp_keepalive_data, + inp->inp_keepalive_datalen); + } else { + inp->inp_keepalive_datalen = 0; + } + inp->inp_keepalive_interval = + min(UDP_KEEPALIVE_INTERVAL_MAX_SECONDS, + ka.ka_interval); + inp->inp_keepalive_type = ka.ka_type; + inp->inp_flags2 |= INP2_KEEPALIVE_OFFLOAD; + } + break; + } case SO_FLUSH: if ((error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval))) != 0) @@ -1516,7 +1595,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, * Calculate data length and get a mbuf * for UDP and IP headers. */ - M_PREPEND(m, sizeof (struct udpiphdr), M_DONTWAIT); + M_PREPEND(m, sizeof (struct udpiphdr), M_DONTWAIT, 1); if (m == 0) { error = ENOBUFS; goto abort; @@ -1553,19 +1632,21 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, KERNEL_DEBUG(DBG_LAYER_OUT_END, ui->ui_dport, ui->ui_sport, ui->ui_src.s_addr, ui->ui_dst.s_addr, ui->ui_ulen); - + #if NECP { necp_kernel_policy_id policy_id; - if (!necp_socket_is_allowed_to_send_recv_v4(inp, lport, fport, &laddr, &faddr, NULL, &policy_id)) { + u_int32_t route_rule_id; + if (!necp_socket_is_allowed_to_send_recv_v4(inp, lport, fport, + &laddr, &faddr, NULL, &policy_id, &route_rule_id)) { error = EHOSTUNREACH; goto abort; } - necp_mark_packet_from_socket(m, inp, policy_id); + necp_mark_packet_from_socket(m, inp, policy_id, route_rule_id); } #endif /* NECP */ - + #if IPSEC if (inp->inp_sp != NULL && ipsec_setsocket(m, inp->inp_socket) != 0) { error = ENOBUFS; @@ -1667,8 +1748,17 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, * If the destination route is unicast, update outifp with * that of the route interface used by IP. */ - if (rt != NULL && (outifp = rt->rt_ifp) != inp->inp_last_outifp) - inp->inp_last_outifp = outifp; /* no reference needed */ + if (rt != NULL && + (outifp = rt->rt_ifp) != inp->inp_last_outifp) { + inp->inp_last_outifp = outifp; /* no reference needed */ + + so->so_pktheadroom = P2ROUNDUP( + sizeof(struct udphdr) + + sizeof(struct ip) + + ifnet_hdrlen(outifp) + + ifnet_packetpreamblelen(outifp), + sizeof(u_int32_t)); + } } else { ROUTE_RELEASE(&inp->inp_route); } @@ -1794,12 +1884,8 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct proc *p) return (EAFNOSUPPORT); inp = sotoinpcb(so); - if (inp == NULL -#if NECP - || (necp_socket_should_use_flow_divert(inp)) -#endif /* NECP */ - ) - return (inp == NULL ? EINVAL : EPROTOTYPE); + if (inp == NULL) + return (EINVAL); error = in_pcbbind(inp, nam, p); return (error); } @@ -1811,14 +1897,29 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p) int error; inp = sotoinpcb(so); - if (inp == NULL -#if NECP - || (necp_socket_should_use_flow_divert(inp)) -#endif /* NECP */ - ) - return (inp == NULL ? EINVAL : EPROTOTYPE); + if (inp == NULL) + return (EINVAL); if (inp->inp_faddr.s_addr != INADDR_ANY) return (EISCONN); + +#if NECP +#if FLOW_DIVERT + if (necp_socket_should_use_flow_divert(inp)) { + uint32_t fd_ctl_unit = + necp_socket_get_flow_divert_control_unit(inp); + if (fd_ctl_unit > 0) { + error = flow_divert_pcb_init(so, fd_ctl_unit); + if (error == 0) { + error = flow_divert_connect_out(so, nam, p); + } + } else { + error = ENETDOWN; + } + return (error); + } +#endif /* FLOW_DIVERT */ +#endif /* NECP */ + error = in_pcbconnect(inp, nam, p, IFSCOPE_NONE, NULL); if (error == 0) { soisconnected(so); @@ -1831,13 +1932,15 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p) int udp_connectx_common(struct socket *so, int af, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, - struct proc *p, uint32_t ifscope, associd_t aid, connid_t *pcid, - uint32_t flags, void *arg, uint32_t arglen) + struct proc *p, uint32_t ifscope, sae_associd_t aid, sae_connid_t *pcid, + uint32_t flags, void *arg, uint32_t arglen, + struct uio *uio, user_ssize_t *bytes_written) { #pragma unused(aid, flags, arg, arglen) struct sockaddr_entry *src_se = NULL, *dst_se = NULL; struct inpcb *inp = sotoinpcb(so); int error; + user_ssize_t datalen = 0; if (inp == NULL) return (EINVAL); @@ -1855,7 +1958,8 @@ udp_connectx_common(struct socket *so, int af, VERIFY(src_se == NULL || src_se->se_addr->sa_family == af); #if NECP - inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL, dst_se ? dst_se->se_addr : NULL, ifscope); + inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL, + dst_se ? dst_se->se_addr : NULL, ifscope); #endif /* NECP */ /* bind socket to the specified interface, if requested */ @@ -1885,8 +1989,39 @@ udp_connectx_common(struct socket *so, int af, /* NOTREACHED */ } + if (error != 0) + return (error); + + /* + * If there is data, copy it. DATA_IDEMPOTENT is ignored. + * CONNECT_RESUME_ON_READ_WRITE is ignored. + */ + if (uio != NULL) { + socket_unlock(so, 0); + + VERIFY(bytes_written != NULL); + + datalen = uio_resid(uio); + error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, + (uio_t)uio, NULL, NULL, 0); + socket_lock(so, 0); + + /* If error returned is EMSGSIZE, for example, disconnect */ + if (error == 0 || error == EWOULDBLOCK) + *bytes_written = datalen - uio_resid(uio); + else + (void)so->so_proto->pr_usrreqs->pru_disconnectx(so, + SAE_ASSOCID_ANY, SAE_CONNID_ANY); + /* + * mask the EWOULDBLOCK error so that the caller + * knows that atleast the connect was successful. + */ + if (error == EWOULDBLOCK) + error = 0; + } + if (error == 0 && pcid != NULL) - *pcid = 1; /* there is only 1 connection for a UDP */ + *pcid = 1; /* there is only 1 connection for UDP */ return (error); } @@ -1894,11 +2029,11 @@ udp_connectx_common(struct socket *so, int af, static int udp_connectx(struct socket *so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written) { return (udp_connectx_common(so, AF_INET, src_sl, dst_sl, - p, ifscope, aid, pcid, flags, arg, arglen)); + p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)); } static int @@ -1953,10 +2088,10 @@ udp_disconnect(struct socket *so) } static int -udp_disconnectx(struct socket *so, associd_t aid, connid_t cid) +udp_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid) { #pragma unused(cid) - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) return (EINVAL); return (udp_disconnect(so)); @@ -1966,22 +2101,29 @@ static int udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct proc *p) { +#ifndef FLOW_DIVERT #pragma unused(flags) +#endif /* !(FLOW_DIVERT) */ struct inpcb *inp; inp = sotoinpcb(so); - if (inp == NULL -#if NECP - || (necp_socket_should_use_flow_divert(inp)) -#endif /* NECP */ - ) { + if (inp == NULL) { if (m != NULL) m_freem(m); if (control != NULL) m_freem(control); - return (inp == NULL ? EINVAL : EPROTOTYPE); + return (EINVAL); } +#if NECP +#if FLOW_DIVERT + if (necp_socket_should_use_flow_divert(inp)) { + /* Implicit connect */ + return (flow_divert_implicit_data_out(so, flags, m, addr, control, p)); + } +#endif /* FLOW_DIVERT */ +#endif /* NECP */ + return (udp_output(inp, m, addr, control, p)); } @@ -2251,3 +2393,258 @@ udp_input_checksum(struct mbuf *m, struct udphdr *uh, int off, int ulen) return (0); } + +extern void +udp_fill_keepalive_offload_frames(ifnet_t ifp, + struct ifnet_keepalive_offload_frame *frames_array, + u_int32_t frames_array_count, size_t frame_data_offset, + u_int32_t *used_frames_count); + +void +udp_fill_keepalive_offload_frames(ifnet_t ifp, + struct ifnet_keepalive_offload_frame *frames_array, + u_int32_t frames_array_count, size_t frame_data_offset, + u_int32_t *used_frames_count) +{ + struct inpcb *inp; + inp_gen_t gencnt; + u_int32_t frame_index = *used_frames_count; + + if (ifp == NULL || frames_array == NULL || + frames_array_count == 0 || + frame_index >= frames_array_count || + frame_data_offset >= IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) + return; + + lck_rw_lock_shared(udbinfo.ipi_lock); + gencnt = udbinfo.ipi_gencnt; + LIST_FOREACH(inp, udbinfo.ipi_listhead, inp_list) { + struct socket *so; + u_int8_t *data; + struct ifnet_keepalive_offload_frame *frame; + struct mbuf *m = NULL; + + if (frame_index >= frames_array_count) + break; + + if (inp->inp_gencnt > gencnt || + inp->inp_state == INPCB_STATE_DEAD) + continue; + + if ((so = inp->inp_socket) == NULL || + (so->so_state & SS_DEFUNCT)) + continue; + /* + * check for keepalive offload flag without socket + * lock to avoid a deadlock + */ + if (!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD)) { + continue; + } + + udp_lock(so, 1, 0); + if (!(inp->inp_vflag & (INP_IPV4 | INP_IPV6))) { + udp_unlock(so, 1, 0); + continue; + } + if ((inp->inp_vflag & INP_IPV4) && + (inp->inp_laddr.s_addr == INADDR_ANY || + inp->inp_faddr.s_addr == INADDR_ANY)) { + udp_unlock(so, 1, 0); + continue; + } + if ((inp->inp_vflag & INP_IPV6) && + (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) || + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))) { + udp_unlock(so, 1, 0); + continue; + } + if (inp->inp_lport == 0 || inp->inp_fport == 0) { + udp_unlock(so, 1, 0); + continue; + } + if (inp->inp_last_outifp == NULL || + inp->inp_last_outifp->if_index != ifp->if_index) { + udp_unlock(so, 1, 0); + continue; + } + if ((inp->inp_vflag & INP_IPV4)) { + if ((frame_data_offset + sizeof(struct udpiphdr) + + inp->inp_keepalive_datalen) > + IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) { + udp_unlock(so, 1, 0); + continue; + } + if ((sizeof(struct udpiphdr) + + inp->inp_keepalive_datalen) > _MHLEN) { + udp_unlock(so, 1, 0); + continue; + } + } else { + if ((frame_data_offset + sizeof(struct ip6_hdr) + + sizeof(struct udphdr) + + inp->inp_keepalive_datalen) > + IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) { + udp_unlock(so, 1, 0); + continue; + } + if ((sizeof(struct ip6_hdr) + sizeof(struct udphdr) + + inp->inp_keepalive_datalen) > _MHLEN) { + udp_unlock(so, 1, 0); + continue; + } + } + MGETHDR(m, M_WAIT, MT_HEADER); + if (m == NULL) { + udp_unlock(so, 1, 0); + continue; + } + /* + * This inp has all the information that is needed to + * generate an offload frame. + */ + if (inp->inp_vflag & INP_IPV4) { + struct ip *ip; + struct udphdr *udp; + + frame = &frames_array[frame_index]; + frame->length = frame_data_offset + + sizeof(struct udpiphdr) + + inp->inp_keepalive_datalen; + frame->ether_type = + IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV4; + frame->interval = inp->inp_keepalive_interval; + switch (inp->inp_keepalive_type) { + case UDP_KEEPALIVE_OFFLOAD_TYPE_AIRPLAY: + frame->type = + IFNET_KEEPALIVE_OFFLOAD_FRAME_AIRPLAY; + break; + default: + break; + } + data = mtod(m, u_int8_t *); + bzero(data, sizeof(struct udpiphdr)); + ip = (__typeof__(ip))(void *)data; + udp = (__typeof__(udp))(void *) (data + + sizeof(struct ip)); + m->m_len = sizeof(struct udpiphdr); + data = data + sizeof(struct udpiphdr); + if (inp->inp_keepalive_datalen > 0 && + inp->inp_keepalive_data != NULL) { + bcopy(inp->inp_keepalive_data, data, + inp->inp_keepalive_datalen); + m->m_len += inp->inp_keepalive_datalen; + } + m->m_pkthdr.len = m->m_len; + + ip->ip_v = IPVERSION; + ip->ip_hl = (sizeof(struct ip) >> 2); + ip->ip_p = IPPROTO_UDP; + ip->ip_len = htons(sizeof(struct udpiphdr) + + (u_short)inp->inp_keepalive_datalen); + ip->ip_ttl = inp->inp_ip_ttl; + ip->ip_tos = inp->inp_ip_tos; + ip->ip_src = inp->inp_laddr; + ip->ip_dst = inp->inp_faddr; + ip->ip_sum = in_cksum_hdr_opt(ip); + + udp->uh_sport = inp->inp_lport; + udp->uh_dport = inp->inp_fport; + udp->uh_ulen = htons(sizeof(struct udphdr) + + (u_short)inp->inp_keepalive_datalen); + + if (!(inp->inp_flags & INP_UDP_NOCKSUM)) { + udp->uh_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + htons(sizeof(struct udphdr) + + (u_short)inp->inp_keepalive_datalen + + IPPROTO_UDP)); + m->m_pkthdr.csum_flags = CSUM_UDP; + m->m_pkthdr.csum_data = offsetof(struct udphdr, + uh_sum); + } + m->m_pkthdr.pkt_proto = IPPROTO_UDP; + in_delayed_cksum(m); + bcopy(m->m_data, frame->data + frame_data_offset, + m->m_len); + } else { + struct ip6_hdr *ip6; + struct udphdr *udp6; + + VERIFY(inp->inp_vflag & INP_IPV6); + frame = &frames_array[frame_index]; + frame->length = frame_data_offset + + sizeof(struct ip6_hdr) + + sizeof(struct udphdr) + + inp->inp_keepalive_datalen; + frame->ether_type = + IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV6; + frame->interval = inp->inp_keepalive_interval; + switch (inp->inp_keepalive_type) { + case UDP_KEEPALIVE_OFFLOAD_TYPE_AIRPLAY: + frame->type = + IFNET_KEEPALIVE_OFFLOAD_FRAME_AIRPLAY; + break; + default: + break; + } + data = mtod(m, u_int8_t *); + bzero(data, sizeof(struct ip6_hdr) + sizeof(struct udphdr)); + ip6 = (__typeof__(ip6))(void *)data; + udp6 = (__typeof__(udp6))(void *)(data + + sizeof(struct ip6_hdr)); + m->m_len = sizeof(struct ip6_hdr) + + sizeof(struct udphdr); + data = data + (sizeof(struct ip6_hdr) + + sizeof(struct udphdr)); + if (inp->inp_keepalive_datalen > 0 && + inp->inp_keepalive_data != NULL) { + bcopy(inp->inp_keepalive_data, data, + inp->inp_keepalive_datalen); + m->m_len += inp->inp_keepalive_datalen; + } + m->m_pkthdr.len = m->m_len; + ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_nxt = IPPROTO_UDP; + ip6->ip6_hlim = ip6_defhlim; + ip6->ip6_plen = htons(sizeof(struct udphdr) + + (u_short)inp->inp_keepalive_datalen); + ip6->ip6_src = inp->in6p_laddr; + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = 0; + + ip6->ip6_dst = inp->in6p_faddr; + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = 0; + + udp6->uh_sport = inp->in6p_lport; + udp6->uh_dport = inp->in6p_fport; + udp6->uh_ulen = htons(sizeof(struct udphdr) + + (u_short)inp->inp_keepalive_datalen); + if (!(inp->inp_flags & INP_UDP_NOCKSUM)) { + udp6->uh_sum = in6_pseudo(&ip6->ip6_src, + &ip6->ip6_dst, + htonl(sizeof(struct udphdr) + + (u_short)inp->inp_keepalive_datalen + + IPPROTO_UDP)); + m->m_pkthdr.csum_flags = CSUM_UDPIPV6; + m->m_pkthdr.csum_data = offsetof(struct udphdr, + uh_sum); + } + m->m_pkthdr.pkt_proto = IPPROTO_UDP; + in6_delayed_cksum(m); + bcopy(m->m_data, frame->data + frame_data_offset, + m->m_len); + } + if (m != NULL) { + m_freem(m); + m = NULL; + } + frame_index++; + udp_unlock(so, 1, 0); + } + lck_rw_done(udbinfo.ipi_lock); + *used_frames_count = frame_index; +} diff --git a/bsd/netinet/udp_var.h b/bsd/netinet/udp_var.h index 42d56a3c8..c82931b50 100644 --- a/bsd/netinet/udp_var.h +++ b/bsd/netinet/udp_var.h @@ -168,8 +168,8 @@ extern int udp_ctloutput(struct socket *, struct sockopt *); extern void udp_init(struct protosw *, struct domain *); extern void udp_input(struct mbuf *, int); extern int udp_connectx_common(struct socket *, int, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, - uint32_t, void *, uint32_t); + struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uint32_t, void *, uint32_t, struct uio*, user_ssize_t *); extern void udp_notify(struct inpcb *inp, int errno); extern int udp_shutdown(struct socket *so); extern int udp_lock(struct socket *, int, void *); diff --git a/bsd/netinet6/Makefile b/bsd/netinet6/Makefile index ae89bdbda..2e6677f6c 100644 --- a/bsd/netinet6/Makefile +++ b/bsd/netinet6/Makefile @@ -10,27 +10,34 @@ include $(MakeInc_def) DATAFILES = \ ah.h ipsec.h \ esp.h in6.h ipcomp.h raw_ip6.h \ - in6_var.h nd6.h + scope6_var.h in6_var.h nd6.h PRIVATE_DATAFILES = \ - in6_pcb.h ip6_var.h mld6_var.h ip6_fw.h + in6.h \ + in6_pcb.h \ + in6_var.h \ + ip6_fw.h \ + ip6_var.h \ + mld6_var.h \ + nd6.h \ + scope6_var.h PRIVATE_KERNELFILES = \ ah6.h esp6.h esp_rijndael.h in6_gif.h in6_ifattach.h \ ip6_ecn.h ip6protosw.h ipcomp6.h ipsec6.h \ - scope6_var.h tcp6_var.h udp6_var.h + tcp6_var.h udp6_var.h INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = netinet6 -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${DATAFILES} EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} -INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} +INSTALL_KF_MI_LCL_LIST = $(sort ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES}) include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/netinet6/ah_core.c b/bsd/netinet6/ah_core.c index 417c67360..60a6e5b69 100644 --- a/bsd/netinet6/ah_core.c +++ b/bsd/netinet6/ah_core.c @@ -961,10 +961,9 @@ ah_hmac_sha2_384_init(state, sav) state->sav = sav; state->foo = (void *)_MALLOC(128 + 128 + sizeof(SHA384_CTX), - M_TEMP, M_NOWAIT); + M_TEMP, M_NOWAIT | M_ZERO); if (!state->foo) return ENOBUFS; - bzero(state->foo, 128 + 128 + sizeof(SHA384_CTX)); ipad = (u_char *)state->foo; opad = (u_char *)(ipad + 128); @@ -1104,10 +1103,9 @@ ah_hmac_sha2_512_init(state, sav) state->sav = sav; state->foo = (void *)_MALLOC(128 + 128 + sizeof(SHA512_CTX), - M_TEMP, M_NOWAIT); + M_TEMP, M_NOWAIT | M_ZERO); if (!state->foo) return ENOBUFS; - bzero(state->foo, 128 + 128 + sizeof(SHA512_CTX)); ipad = (u_char *)state->foo; opad = (u_char *)(ipad + 128); diff --git a/bsd/netinet6/ah_input.c b/bsd/netinet6/ah_input.c index 00967821b..28f53d5cc 100644 --- a/bsd/netinet6/ah_input.c +++ b/bsd/netinet6/ah_input.c @@ -430,7 +430,10 @@ ah4_input(struct mbuf *m, int off) } ip = mtod(m, struct ip *); /* ECN consideration. */ - ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos); + if (ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos) == 0) { + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + goto fail; + } if (!key_checktunnelsanity(sav, AF_INET, (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) { ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch " @@ -607,7 +610,7 @@ ah6_input(struct mbuf **mp, int *offp, int proto) struct secasvar *sav = NULL; u_int16_t nxt; size_t stripsiz = 0; - + sa_family_t ifamily; IP6_EXTHDR_CHECK(m, off, sizeof(struct ah), {return IPPROTO_DONE;}); ah = (struct ah *)(void *)(mtod(m, caddr_t) + off); @@ -816,7 +819,7 @@ ah6_input(struct mbuf **mp, int *offp, int proto) /* RFC 2402 */ stripsiz = sizeof(struct newah) + siz1; } - if (ipsec6_tunnel_validate(m, off + stripsiz, nxt, sav)) { + if (ipsec6_tunnel_validate(m, off + stripsiz, nxt, sav, &ifamily)) { ifaddr_t ifa; struct sockaddr_storage addr; @@ -829,6 +832,12 @@ ah6_input(struct mbuf **mp, int *offp, int proto) */ u_int32_t flowinfo; /*net endian*/ + if (ifamily == AF_INET) { + ipseclog((LOG_NOTICE, "ipsec tunnel protocol mismatch " + "in IPv6 AH input: %s\n", ipsec_logsastr(sav))); + goto fail; + } + flowinfo = ip6->ip6_flow; m_adj(m, off + stripsiz); if (m->m_len < sizeof(*ip6)) { @@ -844,7 +853,10 @@ ah6_input(struct mbuf **mp, int *offp, int proto) } ip6 = mtod(m, struct ip6_hdr *); /* ECN consideration. */ - ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow); + if (ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow) == 0) { + IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); + goto fail; + } if (!key_checktunnelsanity(sav, AF_INET6, (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) { ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch " diff --git a/bsd/netinet6/esp.h b/bsd/netinet6/esp.h index 048a792cb..b2deaa2f3 100644 --- a/bsd/netinet6/esp.h +++ b/bsd/netinet6/esp.h @@ -117,6 +117,10 @@ struct esp_algorithm { struct secasvar *, u_int8_t *, u_int8_t *); int (*blockencrypt)(const struct esp_algorithm *, struct secasvar *, u_int8_t *, u_int8_t *); + /* For Authenticated Encryption Methods */ + size_t icvlen; + int (*finalizedecrypt)(struct secasvar *, u_int8_t *, uint); + int (*finalizeencrypt)(struct secasvar *, u_int8_t *, uint); }; extern const struct esp_algorithm *esp_algorithm_lookup(int); diff --git a/bsd/netinet6/esp_core.c b/bsd/netinet6/esp_core.c index b5236fdd7..90c01fe19 100644 --- a/bsd/netinet6/esp_core.c +++ b/bsd/netinet6/esp_core.c @@ -142,37 +142,54 @@ static int esp_cbc_decrypt(struct mbuf *, size_t, struct secasvar *, const struct esp_algorithm *, int); static int esp_cbc_encrypt(struct mbuf *, size_t, size_t, struct secasvar *, const struct esp_algorithm *, int); +static int esp_gcm_mature(struct secasvar *); #define MAXIVLEN 16 +#define ESP_AESGCM_KEYLEN128 160 // 16-bytes key + 4 bytes salt +#define ESP_AESGCM_KEYLEN192 224 // 24-bytes key + 4 bytes salt +#define ESP_AESGCM_KEYLEN256 288 // 32-bytes key + 4 bytes salt + static const struct esp_algorithm des_cbc = { 8, -1, esp_descbc_mature, 64, 64, esp_des_schedlen, "des-cbc", esp_descbc_ivlen, esp_cbc_decrypt, esp_cbc_encrypt, esp_des_schedule, - esp_des_blockdecrypt, esp_des_blockencrypt, }; + esp_des_blockdecrypt, esp_des_blockencrypt, + 0, 0, 0 }; static const struct esp_algorithm des3_cbc = { 8, 8, esp_cbc_mature, 192, 192, esp_3des_schedlen, "3des-cbc", esp_common_ivlen, esp_cbc_decrypt, esp_cbc_encrypt, esp_3des_schedule, - esp_3des_blockdecrypt, esp_3des_blockencrypt, }; + esp_3des_blockdecrypt, esp_3des_blockencrypt, + 0, 0, 0 }; static const struct esp_algorithm null_esp = { 1, 0, esp_null_mature, 0, 2048, 0, "null", esp_common_ivlen, esp_null_decrypt, - esp_null_encrypt, NULL, NULL, NULL }; + esp_null_encrypt, NULL, NULL, NULL, + 0, 0, 0 }; static const struct esp_algorithm aes_cbc = { 16, 16, esp_cbc_mature, 128, 256, esp_aes_schedlen, "aes-cbc", esp_common_ivlen, esp_cbc_decrypt_aes, esp_cbc_encrypt_aes, esp_aes_schedule, - 0, 0 }; + 0, 0, + 0, 0, 0 }; +static const struct esp_algorithm aes_gcm = + { 4, 8, esp_gcm_mature, ESP_AESGCM_KEYLEN128, ESP_AESGCM_KEYLEN256, esp_gcm_schedlen, + "aes-gcm", + esp_common_ivlen, esp_gcm_decrypt_aes, + esp_gcm_encrypt_aes, esp_gcm_schedule, + 0, 0, + 16, esp_gcm_decrypt_finalize, esp_gcm_encrypt_finalize}; static const struct esp_algorithm *esp_algorithms[] = { &des_cbc, &des3_cbc, &null_esp, - &aes_cbc + &aes_cbc, + &aes_gcm, }; const struct esp_algorithm * @@ -188,6 +205,8 @@ esp_algorithm_lookup(idx) return &null_esp; case SADB_X_EALG_RIJNDAELCBC: return &aes_cbc; + case SADB_X_EALG_AES_GCM: + return &aes_gcm; default: return NULL; } @@ -468,6 +487,62 @@ esp_cbc_mature(sav) return 0; } +static int +esp_gcm_mature(sav) + struct secasvar *sav; +{ + int keylen; + const struct esp_algorithm *algo; + + if (sav->flags & SADB_X_EXT_OLD) { + ipseclog((LOG_ERR, + "esp_gcm_mature: algorithm incompatible with esp-old\n")); + return 1; + } + if (sav->flags & SADB_X_EXT_DERIV) { + ipseclog((LOG_ERR, + "esp_gcm_mature: algorithm incompatible with derived\n")); + return 1; + } + + if (!sav->key_enc) { + ipseclog((LOG_ERR, "esp_gcm_mature: no key is given.\n")); + return 1; + } + + algo = esp_algorithm_lookup(sav->alg_enc); + if (!algo) { + ipseclog((LOG_ERR, + "esp_gcm_mature: unsupported algorithm.\n")); + return 1; + } + + keylen = sav->key_enc->sadb_key_bits; + if (keylen < algo->keymin || algo->keymax < keylen) { + ipseclog((LOG_ERR, + "esp_gcm_mature %s: invalid key length %d.\n", + algo->name, sav->key_enc->sadb_key_bits)); + return 1; + } + switch (sav->alg_enc) { + case SADB_X_EALG_AES_GCM: + /* allows specific key sizes only */ + if (!(keylen == ESP_AESGCM_KEYLEN128 || keylen == ESP_AESGCM_KEYLEN192 || keylen == ESP_AESGCM_KEYLEN256)) { + ipseclog((LOG_ERR, + "esp_gcm_mature %s: invalid key length %d.\n", + algo->name, keylen)); + return 1; + } + break; + default: + ipseclog((LOG_ERR, + "esp_gcm_mature %s: invalid algo %d.\n", sav->alg_enc)); + return 1; + } + + return 0; +} + static int esp_3des_schedlen( __unused const struct esp_algorithm *algo) diff --git a/bsd/netinet6/esp_input.c b/bsd/netinet6/esp_input.c index c8a809490..277e6963e 100644 --- a/bsd/netinet6/esp_input.c +++ b/bsd/netinet6/esp_input.c @@ -138,7 +138,7 @@ extern lck_mtx_t *sadb_mutex; ? sizeof(struct newesp) : sizeof(struct esp)) static struct ip * -esp4_input_strip_UDP_encap (struct mbuf *m, int iphlen) +esp4_input_strip_udp_encap (struct mbuf *m, int iphlen) { // strip the udp header that's encapsulating ESP struct ip *ip; @@ -155,6 +155,24 @@ esp4_input_strip_UDP_encap (struct mbuf *m, int iphlen) return ip; } +static struct ip6_hdr * +esp6_input_strip_udp_encap (struct mbuf *m, int ip6hlen) +{ + // strip the udp header that's encapsulating ESP + struct ip6_hdr *ip6; + size_t stripsiz = sizeof(struct udphdr); + + ip6 = mtod(m, __typeof__(ip6)); + ovbcopy((caddr_t)ip6, (caddr_t)(((u_char *)ip6) + stripsiz), ip6hlen); + m->m_data += stripsiz; + m->m_len -= stripsiz; + m->m_pkthdr.len -= stripsiz; + ip6 = mtod(m, __typeof__(ip6)); + ip6->ip6_plen = ip6->ip6_plen - stripsiz; + ip6->ip6_nxt = IPPROTO_ESP; + return ip6; +} + void esp4_input(m, off) struct mbuf *m; @@ -257,6 +275,16 @@ esp4_input(m, off) } seq = ntohl(((struct newesp *)esp)->esp_seq); + + /* Save ICV from packet for verification later */ + size_t siz = 0; + unsigned char saved_icv[AH_MAXSUMSIZE]; + if (algo->finalizedecrypt) { + siz = algo->icvlen; + m_copydata(m, m->m_pkthdr.len - siz, siz, (caddr_t) saved_icv); + goto delay_icv; + } + if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay && (sav->alg_auth && sav->key_auth))) goto noreplaycheck; @@ -283,7 +311,6 @@ esp4_input(m, off) u_char sum0[AH_MAXSUMSIZE] __attribute__((aligned(4))); u_char sum[AH_MAXSUMSIZE] __attribute__((aligned(4))); const struct ah_algorithm *sumalgo; - size_t siz; sumalgo = ah_algorithm_lookup(sav->alg_auth); if (!sumalgo) @@ -317,6 +344,8 @@ esp4_input(m, off) goto bad; } +delay_icv: + /* strip off the authentication data */ m_adj(m, -siz); ip = mtod(m, struct ip *); @@ -398,6 +427,23 @@ esp4_input(m, off) m->m_flags |= M_DECRYPTED; + if (algo->finalizedecrypt) + { + unsigned char tag[algo->icvlen]; + if ((*algo->finalizedecrypt)(sav, tag, algo->icvlen)) { + ipseclog((LOG_ERR, "packet decryption ICV failure\n")); + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0); + goto bad; + } + if (memcmp(saved_icv, tag, algo->icvlen)) { + ipseclog((LOG_ERR, "packet decryption ICV mismatch\n")); + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0); + goto bad; + } + } + /* * find the trailer of the ESP. */ @@ -448,7 +494,7 @@ esp4_input(m, off) sav->remote_ike_port = ntohs(encap_uh->uh_sport); } } - ip = esp4_input_strip_UDP_encap(m, off); + ip = esp4_input_strip_udp_encap(m, off); esp = (struct esp *)(void *)(((u_int8_t *)ip) + off); } @@ -491,7 +537,10 @@ esp4_input(m, off) } ip = mtod(m, struct ip *); /* ECN consideration. */ - ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos); + if (ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos) == 0) { + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + goto bad; + } if (!key_checktunnelsanity(sav, AF_INET, (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) { ipseclog((LOG_ERR, "ipsec tunnel address mismatch " @@ -533,8 +582,10 @@ esp4_input(m, off) ip6 = mtod(m, struct ip6_hdr *); /* ECN consideration. */ - /* XXX To be fixed later if needed */ - // ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos); + if (ip64_ecn_egress(ip4_ipsec_ecn, &tos, &ip6->ip6_flow) == 0) { + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + goto bad; + } if (!key_checktunnelsanity(sav, AF_INET6, (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) { @@ -747,6 +798,7 @@ esp6_input(struct mbuf **mp, int *offp, int proto) #pragma unused(proto) struct mbuf *m = *mp; int off = *offp; + struct ip *ip; struct ip6_hdr *ip6; struct esp *esp; struct esptail esptail; @@ -755,9 +807,11 @@ esp6_input(struct mbuf **mp, int *offp, int proto) struct secasvar *sav = NULL; size_t taillen; u_int16_t nxt; + char *nproto; const struct esp_algorithm *algo; int ivlen; size_t esplen; + sa_family_t ifamily; /* sanity check for alignment. */ if (off % 4 != 0 || m->m_pkthdr.len % 4 != 0) { @@ -789,6 +843,14 @@ esp6_input(struct mbuf **mp, int *offp, int proto) goto bad; } + nproto = ip6_get_prevhdr(m, off); + if (nproto == NULL || (*nproto != IPPROTO_ESP && + !(*nproto == IPPROTO_UDP && off >= sizeof(struct udphdr)))) { + ipseclog((LOG_DEBUG, "IPv6 ESP input: invalid protocol type\n")); + IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); + goto bad; + } + /* find the sassoc. */ spi = esp->esp_spi; @@ -832,6 +894,15 @@ esp6_input(struct mbuf **mp, int *offp, int proto) seq = ntohl(((struct newesp *)esp)->esp_seq); + /* Save ICV from packet for verification later */ + size_t siz = 0; + unsigned char saved_icv[AH_MAXSUMSIZE]; + if (algo->finalizedecrypt) { + siz = algo->icvlen; + m_copydata(m, m->m_pkthdr.len - siz, siz, (caddr_t) saved_icv); + goto delay_icv; + } + if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay && (sav->alg_auth && sav->key_auth))) goto noreplaycheck; @@ -858,7 +929,6 @@ esp6_input(struct mbuf **mp, int *offp, int proto) u_char sum0[AH_MAXSUMSIZE] __attribute__((aligned(4))); u_char sum[AH_MAXSUMSIZE] __attribute__((aligned(4))); const struct ah_algorithm *sumalgo; - size_t siz; sumalgo = ah_algorithm_lookup(sav->alg_auth); if (!sumalgo) @@ -892,6 +962,8 @@ esp6_input(struct mbuf **mp, int *offp, int proto) goto bad; } +delay_icv: + /* strip off the authentication data */ m_adj(m, -siz); ip6 = mtod(m, struct ip6_hdr *); @@ -969,6 +1041,23 @@ esp6_input(struct mbuf **mp, int *offp, int proto) m->m_flags |= M_DECRYPTED; + if (algo->finalizedecrypt) + { + unsigned char tag[algo->icvlen]; + if ((*algo->finalizedecrypt)(sav, tag, algo->icvlen)) { + ipseclog((LOG_ERR, "packet decryption ICV failure\n")); + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0); + goto bad; + } + if (memcmp(saved_icv, tag, algo->icvlen)) { + ipseclog((LOG_ERR, "packet decryption ICV mismatch\n")); + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0); + goto bad; + } + } + /* * find the trailer of the ESP. */ @@ -1002,8 +1091,38 @@ esp6_input(struct mbuf **mp, int *offp, int proto) } } + if (*nproto == IPPROTO_UDP) { + // offset includes the outer ip and udp header lengths. + if (m->m_len < off) { + m = m_pullup(m, off); + if (!m) { + ipseclog((LOG_DEBUG, + "IPv6 ESP input: invalid udp encapsulated ESP packet length\n")); + IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); + goto bad; + } + } + + // check the UDP encap header to detect changes in the source port, and then strip the header + off -= sizeof(struct udphdr); // off no longer includes the udphdr's size + // if peer is behind nat and this is the latest esp packet + if ((sav->flags & SADB_X_EXT_NATT_DETECTED_PEER) != 0 && + (sav->flags & SADB_X_EXT_OLD) == 0 && + seq && sav->replay && + seq >= sav->replay->lastseq) { + struct udphdr *encap_uh = (__typeof__(encap_uh))(void *)((caddr_t)ip6 + off); + if (encap_uh->uh_sport && + ntohs(encap_uh->uh_sport) != sav->remote_ike_port) { + sav->remote_ike_port = ntohs(encap_uh->uh_sport); + } + } + ip6 = esp6_input_strip_udp_encap(m, off); + esp = (struct esp *)(void *)(((u_int8_t *)ip6) + off); + } + + /* was it transmitted over the IPsec tunnel SA? */ - if (ipsec6_tunnel_validate(m, off + esplen + ivlen, nxt, sav)) { + if (ipsec6_tunnel_validate(m, off + esplen + ivlen, nxt, sav, &ifamily)) { ifaddr_t ifa; struct sockaddr_storage addr; @@ -1017,32 +1136,79 @@ esp6_input(struct mbuf **mp, int *offp, int proto) u_int32_t flowinfo; /*net endian*/ flowinfo = ip6->ip6_flow; m_adj(m, off + esplen + ivlen); - if (m->m_len < sizeof(*ip6)) { + if (ifamily == AF_INET6) { + if (m->m_len < sizeof(*ip6)) { #ifndef PULLDOWN_TEST - /* - * m_pullup is prohibited in KAME IPv6 input processing - * but there's no other way! - */ + /* + * m_pullup is prohibited in KAME IPv6 input processing + * but there's no other way! + */ #else - /* okay to pullup in m_pulldown style */ + /* okay to pullup in m_pulldown style */ #endif - m = m_pullup(m, sizeof(*ip6)); - if (!m) { + m = m_pullup(m, sizeof(*ip6)); + if (!m) { + IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); + goto bad; + } + } + ip6 = mtod(m, struct ip6_hdr *); + /* ECN consideration. */ + if (ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow) == 0) { IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); goto bad; } - } - ip6 = mtod(m, struct ip6_hdr *); - /* ECN consideration. */ - ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow); - if (!key_checktunnelsanity(sav, AF_INET6, - (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) { - ipseclog((LOG_ERR, "ipsec tunnel address mismatch " - "in IPv6 ESP input: %s %s\n", - ipsec6_logpacketstr(ip6, spi), - ipsec_logsastr(sav))); - IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); - goto bad; + if (!key_checktunnelsanity(sav, AF_INET6, + (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) { + ipseclog((LOG_ERR, "ipsec tunnel address mismatch " + "in IPv6 ESP input: %s %s\n", + ipsec6_logpacketstr(ip6, spi), + ipsec_logsastr(sav))); + IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); + goto bad; + } + + if (ip6_doscopedroute) { + struct sockaddr_in6 *ip6addr; + + bzero(&addr, sizeof(addr)); + ip6addr = (__typeof__(ip6addr))&addr; + ip6addr->sin6_family = AF_INET6; + ip6addr->sin6_len = sizeof(*ip6addr); + ip6addr->sin6_addr = ip6->ip6_dst; + } + } else if (ifamily == AF_INET) { + struct sockaddr_in *ipaddr; + + if (m->m_len < sizeof(*ip)) { + m = m_pullup(m, sizeof(*ip)); + if (!m) { + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + goto bad; + } + } + ip = mtod(m, struct ip *); + /* ECN consideration. */ + if (ip46_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip->ip_tos) == 0) { + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + goto bad; + } + if (!key_checktunnelsanity(sav, AF_INET, + (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) { + ipseclog((LOG_ERR, "ipsec tunnel address mismatch " + "in ESP input: %s %s\n", + ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav))); + IPSEC_STAT_INCREMENT(ipsecstat.in_inval); + goto bad; + } + + if (ip_doscopedroute) { + bzero(&addr, sizeof(addr)); + ipaddr = (__typeof__(ipaddr))&addr; + ipaddr->sin_family = AF_INET; + ipaddr->sin_len = sizeof(*ipaddr); + ipaddr->sin_addr = ip->ip_dst; + } } key_sa_recordxfer(sav, m); @@ -1052,15 +1218,7 @@ esp6_input(struct mbuf **mp, int *offp, int proto) goto bad; } - if (ip6_doscopedroute) { - struct sockaddr_in6 *ip6addr; - - bzero(&addr, sizeof(addr)); - ip6addr = (__typeof__(ip6addr))&addr; - ip6addr->sin6_family = AF_INET6; - ip6addr->sin6_len = sizeof(*ip6addr); - ip6addr->sin6_addr = ip6->ip6_dst; - + if (ip_doscopedroute || ip6_doscopedroute) { // update the receiving interface address based on the inner address ifa = ifa_ifwithaddr((struct sockaddr *)&addr); if (ifa) { @@ -1318,7 +1476,7 @@ esp6_ctlinput(cmd, sa, d) if (sav->state == SADB_SASTATE_MATURE || sav->state == SADB_SASTATE_DYING) valid++; - key_freesav(sav, KEY_SADB_LOCKED); + key_freesav(sav, KEY_SADB_UNLOCKED); } /* XXX Further validation? */ diff --git a/bsd/netinet6/esp_output.c b/bsd/netinet6/esp_output.c index 9f6c0e0f0..e17336346 100644 --- a/bsd/netinet6/esp_output.c +++ b/bsd/netinet6/esp_output.c @@ -127,16 +127,15 @@ extern lck_mtx_t *sadb_mutex; * compute ESP header size. */ size_t -esp_hdrsiz(isr) - struct ipsecrequest *isr; +esp_hdrsiz(__unused struct ipsecrequest *isr) { +#if 0 /* sanity check */ if (isr == NULL) panic("esp_hdrsiz: NULL was passed.\n"); -#if 0 lck_mtx_lock(sadb_mutex); { struct secasvar *sav; @@ -247,13 +246,14 @@ esp_output(m, nexthdrp, md, af, sav) u_int8_t nxt = 0; size_t plen; /*payload length to be encrypted*/ size_t espoff; + size_t esphlen; /* sizeof(struct esp/newesp) + ivlen */ int ivlen; int afnumber; size_t extendsiz; int error = 0; struct ipsecstat *stat; struct udphdr *udp = NULL; - int udp_encapsulate = (sav->flags & SADB_X_EXT_NATT && af == AF_INET && + int udp_encapsulate = (sav->flags & SADB_X_EXT_NATT && (af == AF_INET || af == AF_INET6) && (esp_udp_encap_port & 0xFFFF) != 0); KERNEL_DEBUG(DBG_FNC_ESPOUT | DBG_FUNC_START, sav->ivlen,0,0,0,0); @@ -339,7 +339,6 @@ esp_output(m, nexthdrp, md, af, sav) struct ip6_hdr *ip6 = NULL; #endif size_t esplen; /* sizeof(struct esp/newesp) */ - size_t esphlen; /* sizeof(struct esp/newesp) + ivlen */ size_t hlen = 0; /* ip header len */ if (sav->flags & SADB_X_EXT_OLD) { @@ -717,6 +716,21 @@ esp_output(m, nexthdrp, md, af, sav) /* * calculate ICV if required. */ + size_t siz = 0; + u_char authbuf[AH_MAXSUMSIZE] __attribute__((aligned(4))); + + if (algo->finalizeencrypt) { + siz = algo->icvlen; + if ((*algo->finalizeencrypt)(sav, authbuf, siz)) { + ipseclog((LOG_ERR, "packet encryption ICV failure\n")); + IPSEC_STAT_INCREMENT(stat->out_inval); + error = EINVAL; + KERNEL_DEBUG(DBG_FNC_ENCRYPT | DBG_FUNC_END, 1,error,0,0,0); + goto fail; + } + goto fill_icv; + } + if (!sav->replay) goto noantireplay; if (!sav->key_auth) @@ -726,12 +740,6 @@ esp_output(m, nexthdrp, md, af, sav) { const struct ah_algorithm *aalgo; - u_char authbuf[AH_MAXSUMSIZE] __attribute__((aligned(4))); - u_char *p; - size_t siz; - #if INET - struct ip *ip; - #endif aalgo = ah_algorithm_lookup(sav->alg_auth); if (!aalgo) @@ -747,7 +755,13 @@ esp_output(m, nexthdrp, md, af, sav) IPSEC_STAT_INCREMENT(stat->out_inval); goto fail; } - + } + + fill_icv: + { + struct ip *ip; + u_char *p; + n = m; while (n->m_next) n = n->m_next; @@ -803,10 +817,22 @@ esp_output(m, nexthdrp, md, af, sav) if (udp_encapsulate) { struct ip *ip; - ip = mtod(m, struct ip *); - udp->uh_ulen = htons(ntohs(ip->ip_len) - (IP_VHL_HL(ip->ip_vhl) << 2)); - } + struct ip6_hdr *ip6; + switch (af) { + case AF_INET: + ip = mtod(m, struct ip *); + udp->uh_ulen = htons(ntohs(ip->ip_len) - (IP_VHL_HL(ip->ip_vhl) << 2)); + break; + case AF_INET6: + ip6 = mtod(m, struct ip6_hdr *); + udp->uh_ulen = htons(plen + siz + extendsiz + esphlen); + udp->uh_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst, htonl(ntohs(udp->uh_ulen) + IPPROTO_UDP)); + m->m_pkthdr.csum_flags = CSUM_UDPIPV6; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + break; + } + } noantireplay: lck_mtx_lock(sadb_mutex); diff --git a/bsd/netinet6/esp_rijndael.c b/bsd/netinet6/esp_rijndael.c index 4ca425239..d05f8bf7e 100644 --- a/bsd/netinet6/esp_rijndael.c +++ b/bsd/netinet6/esp_rijndael.c @@ -83,9 +83,18 @@ #define MAX_REALIGN_LEN 2000 #define AES_BLOCKLEN 16 +#define ESP_GCM_SALT_LEN 4 // RFC 4106 Section 4 +#define ESP_GCM_IVLEN 8 +#define ESP_GCM_ALIGN 16 extern lck_mtx_t *sadb_mutex; +typedef struct { + ccgcm_ctx *decrypt; + ccgcm_ctx *encrypt; + ccgcm_ctx ctxt[0]; +} aes_gcm_ctx; + int esp_aes_schedlen( __unused const struct esp_algorithm *algo) @@ -535,3 +544,448 @@ esp_cbc_encrypt_aes( return 0; } + +int +esp_gcm_schedlen( + __unused const struct esp_algorithm *algo) +{ + return (sizeof(aes_gcm_ctx) + aes_decrypt_get_ctx_size_gcm() + aes_encrypt_get_ctx_size_gcm() + ESP_GCM_ALIGN); +} + +int +esp_gcm_schedule( __unused const struct esp_algorithm *algo, + struct secasvar *sav) +{ + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + aes_gcm_ctx *ctx = (aes_gcm_ctx*)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN); + int rc; + + ctx->decrypt = &ctx->ctxt[0]; + ctx->encrypt = &ctx->ctxt[aes_decrypt_get_ctx_size_gcm() / sizeof(ccgcm_ctx)]; + + rc = aes_decrypt_key_gcm((const unsigned char *) _KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc)-ESP_GCM_SALT_LEN, ctx->decrypt); + if (rc) { + return (rc); + } + + rc = aes_encrypt_key_gcm((const unsigned char *) _KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc)-ESP_GCM_SALT_LEN, ctx->encrypt); + if (rc) { + return (rc); + } + return (rc); +} + +int +esp_gcm_encrypt_finalize(struct secasvar *sav, + unsigned char *tag, unsigned int tag_bytes) +{ + aes_gcm_ctx *ctx = (aes_gcm_ctx*)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN); + return (aes_encrypt_finalize_gcm(tag, tag_bytes, ctx->encrypt)); +} + +int +esp_gcm_decrypt_finalize(struct secasvar *sav, + unsigned char *tag, unsigned int tag_bytes) +{ + aes_gcm_ctx *ctx = (aes_gcm_ctx*)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN); + return (aes_decrypt_finalize_gcm(tag, tag_bytes, ctx->decrypt)); +} + +int +esp_gcm_encrypt_aes( + struct mbuf *m, + size_t off, + __unused size_t plen, + struct secasvar *sav, + const struct esp_algorithm *algo __unused, + int ivlen) +{ + struct mbuf *s; + struct mbuf *d, *d0, *dp; + int soff; /* offset from the head of chain, to head of this mbuf */ + int sn, dn; /* offset from the head of the mbuf, to meat */ + size_t ivoff, bodyoff; + u_int8_t *dptr, *sp, *sp_unaligned, *sp_aligned = NULL; + aes_gcm_ctx *ctx; + struct mbuf *scut; + int scutoff; + int i, len; + unsigned char nonce[ESP_GCM_SALT_LEN+ivlen]; + + if (ivlen != ESP_GCM_IVLEN) { + ipseclog((LOG_ERR, "%s: unsupported ivlen %d\n", __FUNCTION__, ivlen)); + m_freem(m); + return EINVAL; + } + + if (sav->flags & SADB_X_EXT_OLD) { + /* RFC 1827 */ + ivoff = off + sizeof(struct esp); + bodyoff = off + sizeof(struct esp) + ivlen; + } else { + ivoff = off + sizeof(struct newesp); + bodyoff = off + sizeof(struct newesp) + ivlen; + } + + m_copyback(m, ivoff, ivlen, sav->iv); + + if (m->m_pkthdr.len < bodyoff) { + ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__, + m->m_pkthdr.len, (u_int32_t)bodyoff)); + m_freem(m); + return EINVAL; + } + + /* Set IV */ + memcpy(nonce, _KEYBUF(sav->key_enc)+_KEYLEN(sav->key_enc)-ESP_GCM_SALT_LEN, ESP_GCM_SALT_LEN); + memcpy(nonce+ESP_GCM_SALT_LEN, sav->iv, ivlen); + + ctx = (aes_gcm_ctx *)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN); + if (aes_encrypt_set_iv_gcm(nonce, sizeof(nonce), ctx->encrypt)) { + ipseclog((LOG_ERR, "%s: failed to set IV\n", __FUNCTION__)); + m_freem(m); + bzero(nonce, sizeof(nonce)); + return EINVAL; + } + bzero(nonce, sizeof(nonce)); + + /* Set Additional Authentication Data */ + if (!(sav->flags & SADB_X_EXT_OLD)) { + struct newesp esp; + m_copydata(m, off, sizeof(esp), (caddr_t) &esp); + if (aes_encrypt_aad_gcm((unsigned char*)&esp, sizeof(esp), ctx->encrypt)) { + ipseclog((LOG_ERR, "%s: packet decryption AAD failure\n", __FUNCTION__)); + m_freem(m); + return EINVAL; + } + } + + s = m; + soff = sn = dn = 0; + d = d0 = dp = NULL; + sp = dptr = NULL; + + /* skip headers/IV */ + while (soff < bodyoff) { + if (soff + s->m_len > bodyoff) { + sn = bodyoff - soff; + break; + } + + soff += s->m_len; + s = s->m_next; + } + scut = s; + scutoff = sn; + + /* skip over empty mbuf */ + while (s && s->m_len == 0) + s = s->m_next; + + while (soff < m->m_pkthdr.len) { + /* source */ + sp = mtod(s, u_int8_t *) + sn; + len = s->m_len - sn; + + /* destination */ + if (!d || (dn + len > d->m_len)) { + if (d) + dp = d; + MGET(d, M_DONTWAIT, MT_DATA); + i = m->m_pkthdr.len - (soff + sn); + if (d && i > MLEN) { + MCLGET(d, M_DONTWAIT); + if ((d->m_flags & M_EXT) == 0) { + d = m_mbigget(d, M_DONTWAIT); + if ((d->m_flags & M_EXT) == 0) { + m_free(d); + d = NULL; + } + } + } + if (!d) { + m_freem(m); + if (d0) + m_freem(d0); + return ENOBUFS; + } + if (!d0) + d0 = d; + if (dp) + dp->m_next = d; + + // try to make mbuf data aligned + if (!IPSEC_IS_P2ALIGNED(d->m_data)) { + m_adj(d, IPSEC_GET_P2UNALIGNED_OFS(d->m_data)); + } + + d->m_len = M_TRAILINGSPACE(d); + + if (d->m_len > i) + d->m_len = i; + + dptr = mtod(d, u_int8_t *); + dn = 0; + } + + /* adjust len if greater than space available */ + if (len > d->m_len - dn) + len = d->m_len - dn; + + /* encrypt */ + // check input pointer alignment and use a separate aligned buffer (if sp is not aligned on 4-byte boundary). + if (IPSEC_IS_P2ALIGNED(sp)) { + sp_unaligned = NULL; + } else { + sp_unaligned = sp; + if (len > MAX_REALIGN_LEN) { + return ENOBUFS; + } + if (sp_aligned == NULL) { + sp_aligned = (u_int8_t *)_MALLOC(MAX_REALIGN_LEN, M_SECA, M_DONTWAIT); + if (sp_aligned == NULL) + return ENOMEM; + } + sp = sp_aligned; + memcpy(sp, sp_unaligned, len); + } + + if (aes_encrypt_gcm(sp, len, dptr+dn, ctx->encrypt)) { + ipseclog((LOG_ERR, "%s: failed to encrypt\n", __FUNCTION__)); + m_freem(m); + return EINVAL; + } + + // update unaligned pointers + if (!IPSEC_IS_P2ALIGNED(sp_unaligned)) { + sp = sp_unaligned; + } + + /* update offsets */ + sn += len; + dn += len; + + /* find the next source block and skip empty mbufs */ + while (s && sn >= s->m_len) { + sn -= s->m_len; + soff += s->m_len; + s = s->m_next; + } + } + + /* free un-needed source mbufs and add dest mbufs to chain */ + m_freem(scut->m_next); + scut->m_len = scutoff; + scut->m_next = d0; + + // free memory + if (sp_aligned != NULL) { + FREE(sp_aligned, M_SECA); + sp_aligned = NULL; + } + + /* generate new iv */ + key_sa_stir_iv(sav); + + return 0; +} + +int +esp_gcm_decrypt_aes(m, off, sav, algo, ivlen) + struct mbuf *m; + size_t off; + struct secasvar *sav; + const struct esp_algorithm *algo __unused; + int ivlen; +{ + struct mbuf *s; + struct mbuf *d, *d0, *dp; + int soff; /* offset from the head of chain, to head of this mbuf */ + int sn, dn; /* offset from the head of the mbuf, to meat */ + size_t ivoff, bodyoff; + u_int8_t iv[ESP_GCM_IVLEN] __attribute__((aligned(4))), *dptr; + u_int8_t *sp, *sp_unaligned, *sp_aligned = NULL; + aes_gcm_ctx *ctx; + struct mbuf *scut; + int scutoff; + int i, len; + unsigned char nonce[ESP_GCM_SALT_LEN+ivlen]; + + if (ivlen != ESP_GCM_IVLEN) { + ipseclog((LOG_ERR, "%s: unsupported ivlen %d\n", __FUNCTION__, ivlen)); + m_freem(m); + return EINVAL; + } + + if (sav->flags & SADB_X_EXT_OLD) { + /* RFC 1827 */ + ivoff = off + sizeof(struct esp); + bodyoff = off + sizeof(struct esp) + ivlen; + } else { + ivoff = off + sizeof(struct newesp); + bodyoff = off + sizeof(struct newesp) + ivlen; + } + + if (m->m_pkthdr.len < bodyoff) { + ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__, + m->m_pkthdr.len, (u_int32_t)bodyoff)); + m_freem(m); + return EINVAL; + } + + /* grab iv */ + m_copydata(m, ivoff, ivlen, (caddr_t) iv); + + /* Set IV */ + memcpy(nonce, _KEYBUF(sav->key_enc)+_KEYLEN(sav->key_enc)-ESP_GCM_SALT_LEN, ESP_GCM_SALT_LEN); + memcpy(nonce+ESP_GCM_SALT_LEN, iv, ivlen); + + ctx = (aes_gcm_ctx *)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN); + if (aes_decrypt_set_iv_gcm(nonce, sizeof(nonce), ctx->decrypt)) { + ipseclog((LOG_ERR, "%s: failed to set IV\n", __FUNCTION__)); + m_freem(m); + bzero(nonce, sizeof(nonce)); + return EINVAL; + } + bzero(nonce, sizeof(nonce)); + + /* Set Additional Authentication Data */ + if (!(sav->flags & SADB_X_EXT_OLD)) { + struct newesp esp; + m_copydata(m, off, sizeof(esp), (caddr_t) &esp); + if (aes_decrypt_aad_gcm((unsigned char*)&esp, sizeof(esp), ctx->decrypt)) { + ipseclog((LOG_ERR, "%s: packet decryption AAD failure\n", __FUNCTION__)); + return EINVAL; + } + } + + s = m; + soff = sn = dn = 0; + d = d0 = dp = NULL; + sp = dptr = NULL; + + /* skip header/IV offset */ + while (soff < bodyoff) { + if (soff + s->m_len > bodyoff) { + sn = bodyoff - soff; + break; + } + + soff += s->m_len; + s = s->m_next; + } + scut = s; + scutoff = sn; + + /* skip over empty mbuf */ + while (s && s->m_len == 0) + s = s->m_next; + + while (soff < m->m_pkthdr.len) { + /* source */ + sp = mtod(s, u_int8_t *) + sn; + len = s->m_len - sn; + + /* destination */ + if (!d || (dn + len > d->m_len)) { + if (d) + dp = d; + MGET(d, M_DONTWAIT, MT_DATA); + i = m->m_pkthdr.len - (soff + sn); + if (d && i > MLEN) { + MCLGET(d, M_DONTWAIT); + if ((d->m_flags & M_EXT) == 0) { + d = m_mbigget(d, M_DONTWAIT); + if ((d->m_flags & M_EXT) == 0) { + m_free(d); + d = NULL; + } + } + } + if (!d) { + m_freem(m); + if (d0) + m_freem(d0); + return ENOBUFS; + } + if (!d0) + d0 = d; + if (dp) + dp->m_next = d; + + // try to make mbuf data aligned + if (!IPSEC_IS_P2ALIGNED(d->m_data)) { + m_adj(d, IPSEC_GET_P2UNALIGNED_OFS(d->m_data)); + } + + d->m_len = M_TRAILINGSPACE(d); + + if (d->m_len > i) + d->m_len = i; + + dptr = mtod(d, u_int8_t *); + dn = 0; + } + + /* adjust len if greater than space available in dest */ + if (len > d->m_len - dn) + len = d->m_len - dn; + + /* Decrypt */ + // check input pointer alignment and use a separate aligned buffer (if sp is unaligned on 4-byte boundary). + if (IPSEC_IS_P2ALIGNED(sp)) { + sp_unaligned = NULL; + } else { + sp_unaligned = sp; + if (len > MAX_REALIGN_LEN) { + return ENOBUFS; + } + if (sp_aligned == NULL) { + sp_aligned = (u_int8_t *)_MALLOC(MAX_REALIGN_LEN, M_SECA, M_DONTWAIT); + if (sp_aligned == NULL) + return ENOMEM; + } + sp = sp_aligned; + memcpy(sp, sp_unaligned, len); + } + // no need to check output pointer alignment + + if (aes_decrypt_gcm(sp, len, dptr + dn, ctx->decrypt)) { + ipseclog((LOG_ERR, "%s: failed to decrypt\n", __FUNCTION__)); + m_freem(m); + return EINVAL; + } + + // update unaligned pointers + if (!IPSEC_IS_P2ALIGNED(sp_unaligned)) { + sp = sp_unaligned; + } + + /* udpate offsets */ + sn += len; + dn += len; + + /* find the next source block */ + while (s && sn >= s->m_len) { + sn -= s->m_len; + soff += s->m_len; + s = s->m_next; + } + } + + /* free un-needed source mbufs and add dest mbufs to chain */ + m_freem(scut->m_next); + scut->m_len = scutoff; + scut->m_next = d0; + + // free memory + if (sp_aligned != NULL) { + FREE(sp_aligned, M_SECA); + sp_aligned = NULL; + } + + /* just in case */ + bzero(iv, sizeof(iv)); + + return 0; +} diff --git a/bsd/netinet6/esp_rijndael.h b/bsd/netinet6/esp_rijndael.h index 098d13321..75d92c6e8 100644 --- a/bsd/netinet6/esp_rijndael.h +++ b/bsd/netinet6/esp_rijndael.h @@ -68,4 +68,10 @@ int esp_cbc_encrypt_aes(struct mbuf *, size_t, size_t, struct secasvar *, const struct esp_algorithm *, int); +int esp_gcm_schedlen(const struct esp_algorithm *); +int esp_gcm_schedule(const struct esp_algorithm *, struct secasvar *); +int esp_gcm_encrypt_aes(struct mbuf *, size_t, size_t, struct secasvar *, const struct esp_algorithm *, int); +int esp_gcm_decrypt_aes(struct mbuf *, size_t, struct secasvar *, const struct esp_algorithm *, int); +int esp_gcm_encrypt_finalize(struct secasvar *, unsigned char *, unsigned int); +int esp_gcm_decrypt_finalize(struct secasvar *, unsigned char *, unsigned int); #endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/netinet6/frag6.c b/bsd/netinet6/frag6.c index abe6b2e21..1a0718415 100644 --- a/bsd/netinet6/frag6.c +++ b/bsd/netinet6/frag6.c @@ -234,8 +234,8 @@ frag6_icmp6_timeex_error(struct fq6_head *diq6) MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) { MBUFQ_REMOVE(diq6, m); MBUFQ_NEXT(m) = NULL; - icmp6_error(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_REASSEMBLY, 0); + icmp6_error_flag(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_REASSEMBLY, 0, 0); } } } diff --git a/bsd/netinet6/icmp6.c b/bsd/netinet6/icmp6.c index 99d92784c..fb8d179bf 100644 --- a/bsd/netinet6/icmp6.c +++ b/bsd/netinet6/icmp6.c @@ -284,7 +284,11 @@ icmp6_error2(struct mbuf *m, int type, int code, int param, * Generate an error packet of type error in response to bad IP6 packet. */ void -icmp6_error(struct mbuf *m, int type, int code, int param) +icmp6_error(struct mbuf *m, int type, int code, int param) { + icmp6_error_flag(m, type, code, param, ICMP6_ERROR_RST_MRCVIF); +} + +void icmp6_error_flag (struct mbuf *m, int type, int code, int param, int flags) { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; @@ -393,7 +397,7 @@ icmp6_error(struct mbuf *m, int type, int code, int param) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); - M_PREPEND(m, preplen, M_DONTWAIT); + M_PREPEND(m, preplen, M_DONTWAIT, 1); if (m && m->m_len < preplen) m = m_pullup(m, preplen); if (m == NULL) { @@ -420,7 +424,9 @@ icmp6_error(struct mbuf *m, int type, int code, int param) * clear m->m_pkthdr.rcvif for safety, we should have enough scope * information in ip header (nip6). */ - m->m_pkthdr.rcvif = NULL; + if (flags & ICMP6_ERROR_RST_MRCVIF) { + m->m_pkthdr.rcvif = NULL; + } icmp6stat.icp6s_outhist[type]++; icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ @@ -2140,7 +2146,7 @@ icmp6_reflect(m, off) int type, code; struct ifnet *outif = NULL; struct sockaddr_in6 sa6_src, sa6_dst; - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; u_int32_t oflow; struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR, 0 }; @@ -2291,7 +2297,6 @@ icmp6_reflect(m, off) ip6->ip6_flow |= (oflow & htonl(0x0ff00000)); } ip6->ip6_nxt = IPPROTO_ICMPV6; - lck_rw_lock_shared(nd_if_rwlock); if (outif != NULL && (ndi = ND_IFINFO(outif)) != NULL && ndi->initialized) { lck_mtx_lock(&ndi->lock); @@ -2308,23 +2313,21 @@ icmp6_reflect(m, off) } else { ip6->ip6_hlim = ip6_defhlim; } - lck_rw_done(nd_if_rwlock); /* Use the same traffic class as in the request to match IPv4 */ icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, - sizeof(struct ip6_hdr), plen); + sizeof(struct ip6_hdr), plen); /* * XXX option handling */ - m->m_flags &= ~(M_BCAST|M_MCAST); if (outif != NULL) { ifnet_release(outif); outif = NULL; } - m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.csum_data = 0; m->m_pkthdr.csum_flags = 0; ip6_output(m, NULL, NULL, IPV6_OUTARGS, NULL, &outif, &ip6oa); diff --git a/bsd/netinet6/in6.c b/bsd/netinet6/in6.c index e0db9a422..40ece5dfc 100644 --- a/bsd/netinet6/in6.c +++ b/bsd/netinet6/in6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2014 Apple Inc. All rights reserved. + * Copyright (c) 2003-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -210,15 +210,15 @@ static int in6_to_kamescope(struct sockaddr_in6 *, struct ifnet *); static void in6_ifaddr_set_dadprogress(struct in6_ifaddr *); static int in6_getassocids(struct socket *, uint32_t *, user_addr_t); -static int in6_getconnids(struct socket *, associd_t, uint32_t *, user_addr_t); -static int in6_getconninfo(struct socket *, connid_t, uint32_t *, +static int in6_getconnids(struct socket *, sae_associd_t, uint32_t *, + user_addr_t); +static int in6_getconninfo(struct socket *, sae_connid_t, uint32_t *, uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *, uint32_t *, user_addr_t, uint32_t *); static void in6_if_up_dad_start(struct ifnet *); extern lck_mtx_t *nd6_mutex; -extern int in6_init2done; #define IN6IFA_TRACE_HIST_SIZE 32 /* size of trace history */ @@ -799,7 +799,7 @@ in6ctl_llstop(struct ifnet *ifp) pr0.ndpr_ifp = ifp; pr0.ndpr_prefix.sin6_addr.s6_addr16[0] = IPV6_ADDR_INT16_ULL; in6_setscope(&pr0.ndpr_prefix.sin6_addr, ifp, NULL); - pr = nd6_prefix_lookup(&pr0); + pr = nd6_prefix_lookup(&pr0, ND6_PREFIX_EXPIRY_UNSPEC); if (pr) { lck_mtx_lock(nd6_mutex); NDPR_LOCK(pr); @@ -1144,64 +1144,85 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, if (ifp == NULL) return (ENXIO); + /* + * Unlock the socket since ifnet_ioctl() may be invoked by + * one of the ioctl handlers below. Socket will be re-locked + * prior to returning. + */ + if (so != NULL) { + socket_unlock(so, 0); + so_unlocked = TRUE; + } + /* * ioctls which require ifp but not interface address. */ switch (cmd) { case SIOCAUTOCONF_START: /* struct in6_ifreq */ - if (!privileged) - return (EPERM); - return (in6_autoconf(ifp, TRUE)); - /* NOTREACHED */ + if (!privileged) { + error = EPERM; + goto done; + } + error = in6_autoconf(ifp, TRUE); + goto done; case SIOCAUTOCONF_STOP: /* struct in6_ifreq */ - if (!privileged) - return (EPERM); - return (in6_autoconf(ifp, FALSE)); - /* NOTREACHED */ + if (!privileged) { + error = EPERM; + goto done; + } + error = in6_autoconf(ifp, FALSE); + goto done; case SIOCLL_START_32: /* struct in6_aliasreq_32 */ case SIOCLL_START_64: /* struct in6_aliasreq_64 */ - if (!privileged) - return (EPERM); - return (in6ctl_llstart(ifp, cmd, data)); - /* NOTREACHED */ + if (!privileged) { + error = EPERM; + goto done; + } + error = in6ctl_llstart(ifp, cmd, data); + goto done; case SIOCLL_STOP: /* struct in6_ifreq */ - if (!privileged) - return (EPERM); - return (in6ctl_llstop(ifp)); - /* NOTREACHED */ + if (!privileged) { + error = EPERM; + goto done; + } + error = in6ctl_llstop(ifp); + goto done; case SIOCSETROUTERMODE_IN6: /* struct in6_ifreq */ - if (!privileged) - return (EPERM); - + if (!privileged) { + error = EPERM; + goto done; + } bcopy(&((struct in6_ifreq *)(void *)data)->ifr_intval, &intval, sizeof (intval)); - return (in6_setrouter(ifp, intval)); - /* NOTREACHED */ + error = in6_setrouter(ifp, intval); + goto done; case SIOCPROTOATTACH_IN6_32: /* struct in6_aliasreq_32 */ case SIOCPROTOATTACH_IN6_64: /* struct in6_aliasreq_64 */ - if (!privileged) - return (EPERM); - return (in6_domifattach(ifp)); - /* NOTREACHED */ + if (!privileged) { + error = EPERM; + goto done; + } + error = in6_domifattach(ifp); + goto done; case SIOCPROTODETACH_IN6: /* struct in6_ifreq */ - if (!privileged) - return (EPERM); - + if (!privileged) { + error = EPERM; + goto done; + } /* Cleanup interface routes and addresses */ in6_purgeif(ifp); if ((error = proto_unplumb(PF_INET6, ifp))) log(LOG_ERR, "SIOCPROTODETACH_IN6: %s error=%d\n", if_name(ifp), error); - return (error); - /* NOTREACHED */ + goto done; case SIOCSNDFLUSH_IN6: /* struct in6_ifreq */ case SIOCSPFXFLUSH_IN6: /* struct in6_ifreq */ @@ -1209,8 +1230,10 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, case SIOCSDEFIFACE_IN6_32: /* struct in6_ndifreq_32 */ case SIOCSDEFIFACE_IN6_64: /* struct in6_ndifreq_64 */ case SIOCSIFINFO_FLAGS: /* struct in6_ndireq */ - if (!privileged) - return (EPERM); + if (!privileged) { + error = EPERM; + goto done; + } /* FALLTHRU */ case OSIOCGIFINFO_IN6: /* struct in6_ondireq */ case SIOCGIFINFO_IN6: /* struct in6_ondireq */ @@ -1222,8 +1245,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, case SIOCGNBRINFO_IN6_64: /* struct in6_nbrinfo_64 */ case SIOCGDEFIFACE_IN6_32: /* struct in6_ndifreq_32 */ case SIOCGDEFIFACE_IN6_64: /* struct in6_ndifreq_64 */ - return (nd6_ioctl(cmd, data, ifp)); - /* NOTREACHED */ + error = nd6_ioctl(cmd, data, ifp); + goto done; case SIOCSIFPREFIX_IN6: /* struct in6_prefixreq (deprecated) */ case SIOCDIFPREFIX_IN6: /* struct in6_prefixreq (deprecated) */ @@ -1234,26 +1257,27 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, log(LOG_NOTICE, "prefix ioctls are now invalidated. " "please use ifconfig.\n"); - return (EOPNOTSUPP); - /* NOTREACHED */ + error = EOPNOTSUPP; + goto done; case SIOCSSCOPE6: /* struct in6_ifreq (deprecated) */ case SIOCGSCOPE6: /* struct in6_ifreq (deprecated) */ case SIOCGSCOPE6DEF: /* struct in6_ifreq (deprecated) */ - return (EOPNOTSUPP); - /* NOTREACHED */ + error = EOPNOTSUPP; + goto done; case SIOCLL_CGASTART_32: /* struct in6_llstartreq_32 */ case SIOCLL_CGASTART_64: /* struct in6_llstartreq_64 */ if (!privileged) - return (EPERM); - return (in6ctl_cgastart(ifp, cmd, data)); - /* NOTREACHED */ + error = EPERM; + else + error = in6ctl_cgastart(ifp, cmd, data); + goto done; case SIOCGIFSTAT_IN6: /* struct in6_ifreq */ case SIOCGIFSTAT_ICMP6: /* struct in6_ifreq */ - return (in6ctl_gifstat(ifp, cmd, ifr)); - /* NOTREACHED */ + error = in6ctl_gifstat(ifp, cmd, ifr); + goto done; } /* @@ -1268,13 +1292,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * on a single interface, SIOCSIFxxx ioctls are deprecated. */ /* we decided to obsolete this command (20000704) */ - return (EOPNOTSUPP); - /* NOTREACHED */ + error = EOPNOTSUPP; + goto done; case SIOCAIFADDR_IN6_32: /* struct in6_aliasreq_32 */ case SIOCAIFADDR_IN6_64: /* struct in6_aliasreq_64 */ - if (!privileged) - return (EPERM); + if (!privileged) { + error = EPERM; + goto done; + } /* * Convert user ifra to the kernel form, when appropriate. * This allows the conversion between different data models @@ -1289,8 +1315,10 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, case SIOCDIFADDR_IN6: /* struct in6_ifreq */ case SIOCSIFALIFETIME_IN6: /* struct in6_ifreq */ - if (!privileged) - return (EPERM); + if (!privileged) { + error = EPERM; + goto done; + } /* FALLTHRU */ case SIOCGIFADDR_IN6: /* struct in6_ifreq */ case SIOCGIFDSTADDR_IN6: /* struct in6_ifreq */ @@ -1323,12 +1351,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, htons(ifp->if_index); } else if (sa6->sin6_addr.s6_addr16[1] != htons(ifp->if_index)) { - return (EINVAL); /* link ID contradicts */ + error = EINVAL; /* link ID contradicts */ + goto done; } if (sa6->sin6_scope_id) { if (sa6->sin6_scope_id != - (u_int32_t)ifp->if_index) - return (EINVAL); + (u_int32_t)ifp->if_index) { + error = EINVAL; + goto done; + } sa6->sin6_scope_id = 0; /* XXX: good way? */ } } @@ -1346,8 +1377,10 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, */ switch (cmd) { case SIOCDIFADDR_IN6: /* struct in6_ifreq */ - if (ia == NULL) - return (EADDRNOTAVAIL); + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto done; + } /* FALLTHROUGH */ case SIOCAIFADDR_IN6_32: /* struct in6_aliasreq_32 */ case SIOCAIFADDR_IN6_64: /* struct in6_aliasreq_64 */ @@ -1365,16 +1398,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, break; } - /* - * Unlock the socket since ifnet_ioctl() may be invoked by - * one of the ioctl handlers below. Socket will be re-locked - * prior to returning. - */ - if (so != NULL) { - socket_unlock(so, 0); - so_unlocked = TRUE; - } - /* * And finally process address-related ioctls. */ @@ -1508,8 +1531,8 @@ in6ctl_aifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra) pr0.ndpr_stateflags |= NDPRF_STATIC; lck_mtx_init(&pr0.ndpr_lock, ifa_mtx_grp, ifa_mtx_attr); - /* add the prefix if there's one. */ - if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { + /* add the prefix if there's none. */ + if ((pr = nd6_prefix_lookup(&pr0, ND6_PREFIX_EXPIRY_NEVER)) == NULL) { /* * nd6_prelist_add will install the corresponding interface * route. @@ -1530,7 +1553,7 @@ in6ctl_aifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra) /* if this is a new autoconfed addr */ addtmp = FALSE; - if ((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && ia->ia6_ndpr == NULL) { + if (ia->ia6_ndpr == NULL) { NDPR_LOCK(pr); ++pr->ndpr_addrcnt; VERIFY(pr->ndpr_addrcnt != 0); @@ -1541,7 +1564,11 @@ in6ctl_aifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra) * If this is the first autoconf address from the prefix, * create a temporary address as well (when specified). */ - addtmp = (ip6_use_tempaddr && pr->ndpr_addrcnt == 1); + if ((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && + ip6_use_tempaddr && + pr->ndpr_addrcnt == 1) { + addtmp = true; + } NDPR_UNLOCK(pr); } @@ -1606,21 +1633,11 @@ in6ctl_difaddr(struct ifnet *ifp, struct in6_ifaddr *ia) ia->ia_prefixmask.sin6_addr.s6_addr32[i]; } IFA_UNLOCK(&ia->ia_ifa); - /* - * The logic of the following condition is a bit complicated. - * We expire the prefix when - * 1. the address obeys autoconfiguration and it is the - * only owner of the associated prefix, or - * 2. the address does not obey autoconf and there is no - * other owner of the prefix. - */ - if ((pr = nd6_prefix_lookup(&pr0)) != NULL) { + + if ((pr = nd6_prefix_lookup(&pr0, ND6_PREFIX_EXPIRY_UNSPEC)) != NULL) { IFA_LOCK(&ia->ia_ifa); NDPR_LOCK(pr); - if (((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && - pr->ndpr_addrcnt == 1) || - ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0 && - pr->ndpr_addrcnt == 0)) { + if (pr->ndpr_addrcnt == 1) { /* XXX: just for expiration */ pr->ndpr_expire = 1; } @@ -1714,9 +1731,8 @@ in6_setrouter(struct ifnet *ifp, int enable) return (ENODEV); if (enable) { - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); if (ndi != NULL && ndi->initialized) { lck_mtx_lock(&ndi->lock); @@ -1724,14 +1740,10 @@ in6_setrouter(struct ifnet *ifp, int enable) /* No proxy if we are an advertising router */ ndi->flags &= ~ND6_IFF_PROXY_PREFIXES; lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); (void) nd6_if_prproxy(ifp, FALSE); } else { lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); } - } else { - lck_rw_done(nd_if_rwlock); } } @@ -1789,7 +1801,7 @@ in6_ifaupdate_aux(struct in6_ifaddr *ia, struct ifnet *ifp, int ifaupflags) struct in6_multi *in6m_sol; struct in6_multi_mship *imm; struct rtentry *rt; - int delay, error; + int delay, error = 0; VERIFY(ifp != NULL && ia != NULL); ifa = &ia->ia_ifa; @@ -1985,15 +1997,6 @@ in6_ifaupdate_aux(struct in6_ifaddr *ia, struct ifnet *ifp, int ifaupflags) } #undef MLTMASK_LEN - /* - * Make sure to initialize ND6 information. this is to workaround - * issues with interfaces with IPv6 addresses, which have never brought - * up. We are assuming that it is safe to nd6_ifattach multiple times. - * NOTE: this is how stf0 gets initialized - */ - if ((error = nd6_ifattach(ifp)) != 0) - goto unwind; - /* Ensure nd6_service() is scheduled as soon as it's convenient */ ++nd6_sched_timeout_want; @@ -2430,39 +2433,36 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) } /* - * When an autoconfigured address is being removed, release the - * reference to the base prefix. Also, since the release might - * affect the status of other (detached) addresses, call + * When IPv6 address is being removed, release the + * reference to the base prefix. + * Also, since the release might, affect the status + * of other (detached) addresses, call * pfxlist_onlink_check(). */ ifa = &oia->ia_ifa; IFA_LOCK(ifa); + if (oia->ia6_ndpr == NULL) { + log(LOG_NOTICE, "in6_unlink_ifa: IPv6 address " + "0x%llx has no prefix\n", + (uint64_t)VM_KERNEL_ADDRPERM(oia)); + } else { + struct nd_prefix *pr = oia->ia6_ndpr; + oia->ia6_flags &= ~IN6_IFF_AUTOCONF; + oia->ia6_ndpr = NULL; + NDPR_LOCK(pr); + VERIFY(pr->ndpr_addrcnt != 0); + pr->ndpr_addrcnt--; + NDPR_UNLOCK(pr); + NDPR_REMREF(pr); /* release addr reference */ + } + IFA_UNLOCK(ifa); + lck_rw_done(&in6_ifaddr_rwlock); + if ((oia->ia6_flags & IN6_IFF_AUTOCONF) != 0) { - if (oia->ia6_ndpr == NULL) { - log(LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address " - "0x%llx has no prefix\n", - (uint64_t)VM_KERNEL_ADDRPERM(oia)); - } else { - struct nd_prefix *pr = oia->ia6_ndpr; - - oia->ia6_flags &= ~IN6_IFF_AUTOCONF; - oia->ia6_ndpr = NULL; - NDPR_LOCK(pr); - VERIFY(pr->ndpr_addrcnt != 0); - pr->ndpr_addrcnt--; - NDPR_UNLOCK(pr); - NDPR_REMREF(pr); /* release addr reference */ - } - IFA_UNLOCK(ifa); - lck_rw_done(&in6_ifaddr_rwlock); lck_mtx_lock(nd6_mutex); pfxlist_onlink_check(); lck_mtx_unlock(nd6_mutex); - } else { - IFA_UNLOCK(ifa); - lck_rw_done(&in6_ifaddr_rwlock); } - /* * release another refcnt for the link from in6_ifaddrs. * Do this only if it's not already unlinked in the event that we lost @@ -3481,9 +3481,8 @@ in6_setmaxmtu(void) ifnet_head_lock_shared(); TAILQ_FOREACH(ifp, &ifnet_head, if_list) { - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; - lck_rw_lock_shared(nd_if_rwlock); if ((ndi = ND_IFINFO(ifp)) != NULL && !ndi->initialized) ndi = NULL; if (ndi != NULL) @@ -3493,7 +3492,6 @@ in6_setmaxmtu(void) maxmtu = IN6_LINKMTU(ifp); if (ndi != NULL) lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); } ifnet_head_done(); if (maxmtu) /* update only when maxmtu is positive */ @@ -3854,9 +3852,8 @@ in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia) if ((ifp->if_eflags & IFEF_IPV6_ROUTER) != 0) { optdad = 0; } else { - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); VERIFY (ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); @@ -3864,7 +3861,6 @@ in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia) optdad = 0; } lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); } } @@ -3880,6 +3876,19 @@ in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia) } else if (ia->ia6_flags & IN6_IFF_SECURED) { if (optdad & ND6_OPTIMISTIC_DAD_SECURED) flags = IN6_IFF_OPTIMISTIC; + } else { + /* + * Keeping the behavior for temp and CGA + * SLAAC addresses to have a knob for optimistic + * DAD. + * Other than that if ND6_OPTIMISTIC_DAD_AUTOCONF + * is set, we should default to optimistic + * DAD. + * For now this means SLAAC addresses with interface + * identifier derived from modified EUI-64 bit + * identifiers. + */ + flags = IN6_IFF_OPTIMISTIC; } } else if ((optdad & ND6_OPTIMISTIC_DAD_DYNAMIC) && (ia->ia6_flags & IN6_IFF_DYNAMIC)) { @@ -3920,13 +3929,13 @@ static int in6_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp) { struct in6pcb *in6p = sotoin6pcb(so); - associd_t aid; + sae_associd_t aid; if (in6p == NULL || in6p->inp_state == INPCB_STATE_DEAD) return (EINVAL); /* IN6PCB has no concept of association */ - aid = ASSOCID_ANY; + aid = SAE_ASSOCID_ANY; *cnt = 0; /* just asking how many there are? */ @@ -3940,16 +3949,16 @@ in6_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp) * Handle SIOCGCONNIDS ioctl for PF_INET6 domain. */ static int -in6_getconnids(struct socket *so, associd_t aid, uint32_t *cnt, +in6_getconnids(struct socket *so, sae_associd_t aid, uint32_t *cnt, user_addr_t cidp) { struct in6pcb *in6p = sotoin6pcb(so); - connid_t cid; + sae_connid_t cid; if (in6p == NULL || in6p->inp_state == INPCB_STATE_DEAD) return (EINVAL); - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) return (EINVAL); /* if connected, return 1 connection count */ @@ -3960,7 +3969,7 @@ in6_getconnids(struct socket *so, associd_t aid, uint32_t *cnt, return (0); /* if IN6PCB is connected, assign it connid 1 */ - cid = ((*cnt != 0) ? 1 : CONNID_ANY); + cid = ((*cnt != 0) ? 1 : SAE_CONNID_ANY); return (copyout(&cid, cidp, sizeof (cid))); } @@ -3969,7 +3978,7 @@ in6_getconnids(struct socket *so, associd_t aid, uint32_t *cnt, * Handle SIOCGCONNINFO ioctl for PF_INET6 domain. */ static int -in6_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, +in6_getconninfo(struct socket *so, sae_connid_t cid, uint32_t *flags, uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len, user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, user_addr_t aux_data, uint32_t *aux_len) @@ -3990,7 +3999,7 @@ in6_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, goto out; } - if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) { + if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL && cid != 1) { error = EINVAL; goto out; } diff --git a/bsd/netinet6/in6.h b/bsd/netinet6/in6.h index d8d71fe3e..bdda50103 100644 --- a/bsd/netinet6/in6.h +++ b/bsd/netinet6/in6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2013 Apple Inc. All rights reserved. + * Copyright (c) 2008-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/netinet6/in6_gif.c b/bsd/netinet6/in6_gif.c index 6142092c5..7058b3976 100644 --- a/bsd/netinet6/in6_gif.c +++ b/bsd/netinet6/in6_gif.c @@ -162,7 +162,7 @@ in6_gif_output( } /* prepend new IP header */ - M_PREPEND(m, sizeof (struct ip6_hdr), M_DONTWAIT); + M_PREPEND(m, sizeof (struct ip6_hdr), M_DONTWAIT, 1); if (m && mbuf_len(m) < sizeof (struct ip6_hdr)) m = m_pullup(m, sizeof (struct ip6_hdr)); if (m == NULL) { @@ -185,7 +185,7 @@ in6_gif_output( m_freem(m); return (ENETUNREACH); } - ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE, + ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_NORMAL : ECN_NOCARE, &otos, &itos); ip6->ip6_flow &= ~htonl(0xff << 20); ip6->ip6_flow |= htonl((u_int32_t)otos << 20); @@ -245,6 +245,7 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto) struct ip6_hdr *ip6; int af = 0; u_int32_t otos; + int egress_success = 0; ip6 = mtod(m, struct ip6_hdr *); @@ -274,9 +275,9 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto) } ip = mtod(m, struct ip *); if (gifp->if_flags & IFF_LINK1) - ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos); + egress_success = ip_ecn_egress(ECN_NORMAL, &otos8, &ip->ip_tos); else - ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos); + egress_success = ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos); break; } #endif /* INET */ @@ -291,9 +292,9 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto) } ip6 = mtod(m, struct ip6_hdr *); if (gifp->if_flags & IFF_LINK1) - ip6_ecn_egress(ECN_ALLOWED, &otos, &ip6->ip6_flow); + egress_success = ip6_ecn_egress(ECN_NORMAL, &otos, &ip6->ip6_flow); else - ip6_ecn_egress(ECN_NOCARE, &otos, &ip6->ip6_flow); + egress_success = ip6_ecn_egress(ECN_NOCARE, &otos, &ip6->ip6_flow); break; } #endif @@ -303,6 +304,12 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto) return (IPPROTO_DONE); } + if (egress_success == 0) { + ip6stat.ip6s_nogif++; + m_freem(m); + return (IPPROTO_DONE); + } + /* Replace the rcvif by gifp for ifnet_input to route it correctly */ if (m->m_pkthdr.rcvif) m->m_pkthdr.rcvif = gifp; diff --git a/bsd/netinet6/in6_ifattach.c b/bsd/netinet6/in6_ifattach.c index e2a232a90..8b5379f53 100644 --- a/bsd/netinet6/in6_ifattach.c +++ b/bsd/netinet6/in6_ifattach.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2014 Apple Inc. All rights reserved. + * Copyright (c) 2003-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -518,7 +518,7 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct in6_aliasreq *ifra) * address, and then reconfigure another one, the prefix is still * valid with referring to the old link-local address. */ - if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { + if ((pr = nd6_prefix_lookup(&pr0, ND6_PREFIX_EXPIRY_UNSPEC)) == NULL) { if ((error = nd6_prelist_add(&pr0, NULL, &pr, TRUE)) != 0) { IFA_REMREF(&ia->ia_ifa); lck_mtx_destroy(&pr0.ndpr_lock, ifa_mtx_grp); @@ -685,6 +685,7 @@ in6_ifattach_prelim(struct ifnet *ifp) struct in6_ifextra *ext; void **pbuf, *base; int error = 0; + struct in6_ifaddr *ia6 = NULL; VERIFY(ifp != NULL); @@ -720,13 +721,13 @@ in6_ifattach_prelim(struct ifnet *ifp) #endif if (ifp->if_inet6data == NULL) { - ext = (struct in6_ifextra *)_MALLOC(in6_extra_size, M_IFADDR, + ext = (struct in6_ifextra *)_MALLOC(in6_extra_bufsize, M_IFADDR, M_WAITOK|M_ZERO); if (!ext) return (ENOMEM); base = (void *)P2ROUNDUP((intptr_t)ext + sizeof(uint64_t), sizeof(uint64_t)); - VERIFY(((intptr_t)base + in6_extra_size) <= + VERIFY(((intptr_t)base + in6_extra_size) <= ((intptr_t)ext + in6_extra_bufsize)); pbuf = (void **)((intptr_t)base - sizeof(void *)); *pbuf = ext; @@ -734,7 +735,7 @@ in6_ifattach_prelim(struct ifnet *ifp) VERIFY(IS_P2ALIGNED(ifp->if_inet6data, sizeof(uint64_t))); } else { /* - * Since the structure is never freed, we need to zero out + * Since the structure is never freed, we need to zero out * some of its members. We avoid zeroing out the scope6 * structure on purpose because other threads might be * using its contents. @@ -743,12 +744,28 @@ in6_ifattach_prelim(struct ifnet *ifp) sizeof(IN6_IFEXTRA(ifp)->icmp6_ifstat)); bzero(&IN6_IFEXTRA(ifp)->in6_ifstat, sizeof(IN6_IFEXTRA(ifp)->in6_ifstat)); + /* + * XXX When recycling, nd_ifinfo gets initialized, other + * than the lock, inside nd6_ifattach + */ } - /* initialize NDP variables */ - if ((error = nd6_ifattach(ifp)) != 0) - return (error); - + /* + * XXX Only initialize NDP ifinfo for the interface + * if interface has not yet been configured with + * link local IPv6 address. + * Could possibly be optimized with an interface flag if need + * be. For now using in6ifa_ifpforlinklocal. + */ + ia6 = in6ifa_ifpforlinklocal(ifp, 0); + if (ia6 == NULL) { + /* initialize NDP variables */ + nd6_ifattach(ifp); + } else { + VERIFY(ND_IFINFO(ifp)->initialized); + IFA_REMREF(&ia6->ia_ifa); + ia6 = NULL; + } scope6_ifattach(ifp); /* initialize loopback interface address */ @@ -873,8 +890,8 @@ int in6_ifattach_llstartreq(struct ifnet *ifp, struct in6_llstartreq *llsr) { struct in6_aliasreq ifra; - struct in6_ifaddr *ia6; - struct nd_ifinfo *ndi; + struct in6_ifaddr *ia6 = NULL; + struct nd_ifinfo *ndi = NULL; int error; VERIFY(llsr != NULL); @@ -889,14 +906,11 @@ in6_ifattach_llstartreq(struct ifnet *ifp, struct in6_llstartreq *llsr) if (nd6_send_opstate == ND6_SEND_OPMODE_DISABLED) return (ENXIO); - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); VERIFY(ndi != NULL && ndi->initialized); if ((ndi->flags & ND6_IFF_INSECURE) != 0) { - lck_rw_done(nd_if_rwlock); return (ENXIO); } - lck_rw_done(nd_if_rwlock); /* assign a link-local address, only if there isn't one here already. */ ia6 = in6ifa_ifpforlinklocal(ifp, 0); @@ -1142,10 +1156,8 @@ in6_iid_mktmp(struct ifnet *ifp, u_int8_t *retbuf, const u_int8_t *baseid, int generate) { u_int8_t nullbuf[8]; - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = ND_IFINFO(ifp); - lck_rw_lock_shared(nd_if_rwlock); - ndi = ND_IFINFO(ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); bzero(nullbuf, sizeof (nullbuf)); @@ -1164,28 +1176,27 @@ in6_iid_mktmp(struct ifnet *ifp, u_int8_t *retbuf, const u_int8_t *baseid, bcopy(ndi->randomid, retbuf, 8); lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); } void in6_tmpaddrtimer(void *arg) { #pragma unused(arg) - int i; - struct nd_ifinfo *ndi; + struct ifnet *ifp = NULL; + struct nd_ifinfo *ndi = NULL; u_int8_t nullbuf[8]; timeout(in6_tmpaddrtimer, (caddr_t)0, (ip6_temp_preferred_lifetime - ip6_desync_factor - ip6_temp_regen_advance) * hz); - lck_rw_lock_shared(nd_if_rwlock); bzero(nullbuf, sizeof (nullbuf)); - for (i = 1; i < if_index + 1; i++) { - if (!nd_ifinfo || i >= nd_ifinfo_indexlim) - break; - ndi = &nd_ifinfo[i]; - if (!ndi->initialized) + ifnet_head_lock_shared(); + for (ifp = ifnet_head.tqh_first; ifp; + ifp = ifp->if_link.tqe_next) { + ndi = ND_IFINFO(ifp); + if ((NULL == ndi) || (FALSE == ndi->initialized)) { continue; + } lck_mtx_lock(&ndi->lock); if (bcmp(ndi->randomid, nullbuf, sizeof (nullbuf)) != 0) { /* @@ -1197,5 +1208,5 @@ in6_tmpaddrtimer(void *arg) } lck_mtx_unlock(&ndi->lock); } - lck_rw_done(nd_if_rwlock); + ifnet_head_done(); } diff --git a/bsd/netinet6/in6_mcast.c b/bsd/netinet6/in6_mcast.c index a76ce4fe0..74dd6496c 100644 --- a/bsd/netinet6/in6_mcast.c +++ b/bsd/netinet6/in6_mcast.c @@ -1655,7 +1655,7 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) if (error) return (error); /* we never use msfr.msfr_srcs; */ - memcpy(&msfr, &msfr64, sizeof(msfr)); + memcpy(&msfr, &msfr64, sizeof(msfr64)); } else { error = sooptcopyin(sopt, &msfr32, sizeof(struct __msfilterreq32), @@ -1663,7 +1663,7 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) if (error) return (error); /* we never use msfr.msfr_srcs; */ - memcpy(&msfr, &msfr32, sizeof(msfr)); + memcpy(&msfr, &msfr32, sizeof(msfr32)); } if (msfr.msfr_group.ss_family != AF_INET6 || @@ -1735,7 +1735,6 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) IM6O_UNLOCK(imo); return (ENOBUFS); } - bzero(tss, (size_t) msfr.msfr_nsrcs * sizeof(*tss)); } /* @@ -1784,7 +1783,7 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) msfr32.msfr_ifindex = msfr.msfr_ifindex; msfr32.msfr_fmode = msfr.msfr_fmode; msfr32.msfr_nsrcs = msfr.msfr_nsrcs; - memcpy(&msfr64.msfr_group, &msfr.msfr_group, + memcpy(&msfr32.msfr_group, &msfr.msfr_group, sizeof(struct sockaddr_storage)); error = sooptcopyout(sopt, &msfr32, sizeof(struct __msfilterreq32)); diff --git a/bsd/netinet6/in6_pcb.c b/bsd/netinet6/in6_pcb.c index 63beb9a91..660f8da4f 100644 --- a/bsd/netinet6/in6_pcb.c +++ b/bsd/netinet6/in6_pcb.c @@ -857,7 +857,7 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, u_int fport_arg, u_int32_t flowinfo; int errno; - if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6) + if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) return; sa6_dst = (struct sockaddr_in6 *)(void *)dst; @@ -1041,16 +1041,9 @@ void in6_losing(struct inpcb *in6p) { struct rtentry *rt; - struct rt_addrinfo info; if ((rt = in6p->in6p_route.ro_rt) != NULL) { RT_LOCK(rt); - bzero((caddr_t)&info, sizeof (info)); - info.rti_info[RTAX_DST] = - (struct sockaddr *)&in6p->in6p_route.ro_dst; - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; - info.rti_info[RTAX_NETMASK] = rt_mask(rt); - rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); if (rt->rt_flags & RTF_DYNAMIC) { /* * Prevent another thread from modifying rt_key, diff --git a/bsd/netinet6/in6_proto.c b/bsd/netinet6/in6_proto.c index 53e362c89..3aedbea2f 100644 --- a/bsd/netinet6/in6_proto.c +++ b/bsd/netinet6/in6_proto.c @@ -178,7 +178,7 @@ struct ip6protosw inet6sw[] = { .pr_type = SOCK_DGRAM, .pr_protocol = IPPROTO_UDP, .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK| - PR_EVCONNINFO, + PR_EVCONNINFO|PR_PRECONN_WRITE, .pr_input = udp6_input, .pr_ctlinput = udp6_ctlinput, .pr_ctloutput = ip6_ctloutput, @@ -194,7 +194,8 @@ struct ip6protosw inet6sw[] = { .pr_type = SOCK_STREAM, .pr_protocol = IPPROTO_TCP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_PCBLOCK| - PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO, + PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO| + PR_PRECONN_WRITE|PR_DATA_IDEMPOTENT, .pr_input = tcp6_input, .pr_ctlinput = tcp6_ctlinput, .pr_ctloutput = tcp_ctloutput, diff --git a/bsd/netinet6/in6_src.c b/bsd/netinet6/in6_src.c index 86b703bf5..fda321bb7 100644 --- a/bsd/netinet6/in6_src.c +++ b/bsd/netinet6/in6_src.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -133,6 +133,16 @@ SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcif_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0, "log source interface selection debug info"); +static int ip6_select_srcaddr_debug = 0; +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcaddr_debug, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcaddr_debug, 0, + "log source address selection debug info"); + +static int ip6_select_src_expensive_secondary_if = 0; +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_src_expensive_secondary_if, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_src_expensive_secondary_if, 0, + "allow source interface selection to use expensive secondaries"); + #define ADDR_LABEL_NOTAPP (-1) struct in6_addrpolicy defaultaddrpolicy; @@ -164,6 +174,18 @@ static int dump_addrsel_policyent(const struct in6_addrpolicy *, void *); static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); void addrsel_policy_init(void); +#define SASEL_DO_DBG(inp) \ + (ip6_select_srcaddr_debug && (inp) != NULL && \ + (inp)->inp_socket != NULL && \ + ((inp)->inp_socket->so_options & SO_DEBUG)) + +#define SASEL_LOG(fmt, ...) \ +do { \ + if (SASEL_DO_DBG(inp)) \ + printf("%s:%d " fmt "\n",\ + __FUNCTION__, __LINE__, ##__VA_ARGS__); \ +} while (0); \ + /* * Return an IPv6 address, which is the most appropriate for a given * destination and user specified options. @@ -171,21 +193,22 @@ void addrsel_policy_init(void); * an entry to the caller for later use. */ #define REPLACE(r) do {\ - if ((r) < sizeof (ip6stat.ip6s_sources_rule) / \ - sizeof (ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ - ip6stat.ip6s_sources_rule[(r)]++; \ + SASEL_LOG("REPLACE r %d ia %s ifp1 %s\n", \ + (r), s_src, ifp1->if_xname); \ + srcrule = (r); \ goto replace; \ } while (0) + #define NEXTSRC(r) do {\ - if ((r) < sizeof (ip6stat.ip6s_sources_rule) / \ - sizeof (ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ - ip6stat.ip6s_sources_rule[(r)]++; \ + SASEL_LOG("NEXTSRC r %d ia %s ifp1 %s\n", \ + (r), s_src, ifp1->if_xname); \ goto next; /* XXX: we can't use 'continue' here */ \ } while (0) + #define BREAK(r) do { \ - if ((r) < sizeof (ip6stat.ip6s_sources_rule) / \ - sizeof (ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ - ip6stat.ip6s_sources_rule[(r)]++; \ + SASEL_LOG("BREAK r %d ia %s ifp1 %s\n", \ + (r), s_src, ifp1->if_xname); \ + srcrule = (r); \ goto out; /* XXX: we can't use 'break' here */ \ } while (0) @@ -212,6 +235,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_out_args ip6oa = { ifscope, { 0 }, IP6OAF_SELECT_SRCIF, 0 }; boolean_t islocal = FALSE; uint64_t secs = net_uptime(); + char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN]; + const struct in6_addr *tmp; + int bestrule = IP6S_SRCRULE_0; dst = dstsock->sin6_addr; /* make a copy for local operation */ *errorp = 0; @@ -313,6 +339,17 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, goto done; } + if (SASEL_DO_DBG(inp)) { + (void) inet_ntop(AF_INET6, &dst, s_dst, sizeof (s_src)); + + tmp = &in6addr_any; + (void) inet_ntop(AF_INET6, tmp, s_src, sizeof (s_src)); + + printf("%s out src %s dst %s ifscope %d ifp %s\n", + __func__, s_src, s_dst, ifscope, + ifp ? ifp->if_xname : "NULL"); + } + *errorp = in6_setscope(&dst, ifp, &odstzone); if (*errorp != 0) { src_storage = NULL; @@ -326,6 +363,11 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, u_int32_t srczone, osrczone, dstzone; struct in6_addr src; struct ifnet *ifp1 = ia->ia_ifp; + int srcrule; + + if (SASEL_DO_DBG(inp)) + (void) inet_ntop(AF_INET6, &ia->ia_addr.sin6_addr, + s_src, sizeof (s_src)); IFA_LOCK(&ia->ia_ifa); /* @@ -335,27 +377,37 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * XXX: we should probably use sin6_scope_id here. */ if (in6_setscope(&dst, ifp1, &dstzone) || - odstzone != dstzone) + odstzone != dstzone) { + SASEL_LOG("NEXT ia %s ifp1 %s odstzone %d != dstzone %d\n", + s_src, ifp1->if_xname, odstzone, dstzone); goto next; - + } src = ia->ia_addr.sin6_addr; if (in6_setscope(&src, ifp, &osrczone) || in6_setscope(&src, ifp1, &srczone) || - osrczone != srczone) + osrczone != srczone) { + SASEL_LOG("NEXT ia %s ifp1 %s osrczone %d != srczone %d\n", + s_src, ifp1->if_xname, osrczone, srczone); goto next; - + } /* avoid unusable addresses */ if ((ia->ia6_flags & - (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) + (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { + SASEL_LOG("NEXT ia %s ifp1 %s ia6_flags 0x%x\n", + s_src, ifp1->if_xname, ia->ia6_flags); goto next; - - if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia, secs)) + } + if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia, secs)) { + SASEL_LOG("NEXT ia %s ifp1 %s IFA6_IS_DEPRECATED\n", + s_src, ifp1->if_xname); goto next; - + } if (!nd6_optimistic_dad && - (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) + (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) { + SASEL_LOG("NEXT ia %s ifp1 %s IN6_IFF_OPTIMISTIC\n", + s_src, ifp1->if_xname); goto next; - + } /* Rule 1: Prefer same address */ if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) BREAK(IP6S_SRCRULE_1); /* there should be no better candidate */ @@ -530,16 +582,39 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * Last resort: just keep the current candidate. * Or, do we need more rules? */ + if (ifp1 != ifp && (ifp1->if_eflags & IFEF_EXPENSIVE) && + ip6_select_src_expensive_secondary_if == 0) { + SASEL_LOG("NEXT ia %s ifp1 %s IFEF_EXPENSIVE\n", + s_src, ifp1->if_xname); + ip6stat.ip6s_sources_skip_expensive_secondary_if++; + goto next; + } + SASEL_LOG("NEXT ia %s ifp1 %s last resort\n", + s_src, ifp1->if_xname); IFA_UNLOCK(&ia->ia_ifa); continue; replace: + /* + * Ignore addresses on secondary interfaces that are marked + * expensive + */ + if (ifp1 != ifp && (ifp1->if_eflags & IFEF_EXPENSIVE) && + ip6_select_src_expensive_secondary_if == 0) { + SASEL_LOG("NEXT ia %s ifp1 %s IFEF_EXPENSIVE\n", + s_src, ifp1->if_xname); + ip6stat.ip6s_sources_skip_expensive_secondary_if++; + goto next; + } + bestrule = srcrule; best_scope = (new_scope >= 0 ? new_scope : in6_addrscope(&ia->ia_addr.sin6_addr)); best_policy = (new_policy ? new_policy : in6_addrsel_lookup_policy(&ia->ia_addr)); best_matchlen = (new_matchlen >= 0 ? new_matchlen : in6_matchlen(&ia->ia_addr.sin6_addr, &dst)); + SASEL_LOG("NEXT ia %s ifp1 %s best_scope %d new_scope %d dst_scope %d\n", + s_src, ifp1->if_xname, best_scope, new_scope, dst_scope); IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */ IFA_UNLOCK(&ia->ia_ifa); if (ia_best != NULL) @@ -577,10 +652,21 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } IFA_LOCK_SPIN(&ia->ia_ifa); + if (bestrule < IP6S_SRCRULE_COUNT) + ip6stat.ip6s_sources_rule[bestrule]++; *src_storage = satosin6(&ia->ia_addr)->sin6_addr; IFA_UNLOCK(&ia->ia_ifa); IFA_REMREF(&ia->ia_ifa); done: + if (SASEL_DO_DBG(inp)) { + (void) inet_ntop(AF_INET6, &dst, s_dst, sizeof (s_src)); + + tmp = (src_storage != NULL) ? src_storage : &in6addr_any; + (void) inet_ntop(AF_INET6, tmp, s_src, sizeof (s_src)); + + printf("%s out src %s dst %s ifscope %d dst_scope %d best_scope %d\n", + __func__, s_src, s_dst, ifscope, dst_scope, best_scope); + } if (ifpp != NULL) { /* if ifp is non-NULL, refcnt held in in6_selectif() */ *ifpp = ifp; @@ -668,6 +754,10 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, select_srcif = (ip6_doscopedroute && srcsock != NULL && !IN6_IS_ADDR_UNSPECIFIED(&srcsock->sin6_addr)); + if (ip6_select_srcif_debug) { + printf("%s src %s dst %s ifscope %d select_srcif %d\n", + __func__, s_src, s_dst, ifscope, select_srcif); + } /* * If Scoped Routing is disabled, ignore the given ifscope. * Otherwise even if source selection won't be performed, @@ -792,12 +882,15 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, if (ip6_select_srcif_debug && ifa != NULL) { if (ro->ro_rt != NULL) { - printf("%s->%s ifscope %d->%d ifa_if %s " - "ro_if %s\n", s_src, s_dst, ifscope, + printf("%s %s->%s ifscope %d->%d ifa_if %s " + "ro_if %s\n", + __func__, + s_src, s_dst, ifscope, scope, if_name(ifa->ifa_ifp), if_name(rt_ifp)); } else { - printf("%s->%s ifscope %d->%d ifa_if %s\n", + printf("%s %s->%s ifscope %d->%d ifa_if %s\n", + __func__, s_src, s_dst, ifscope, scope, if_name(ifa->ifa_ifp)); } @@ -827,10 +920,14 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr); if (ip6_select_srcif_debug && ifa != NULL) { - printf("%s->%s ifscope %d ifa_if %s\n", + printf("%s %s->%s ifscope %d ifa_if %s\n", + __func__, s_src, s_dst, ifscope, if_name(ifa->ifa_ifp)); + } else if (ip6_select_srcif_debug) { + printf("%s %s->%s ifscope %d ifa_if NULL\n", + __func__, + s_src, s_dst, ifscope); } - } getroute: @@ -1119,8 +1216,10 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, if (error == 0) { if (retrt != NULL && route != NULL) *retrt = route->ro_rt; /* ro_rt may be NULL */ - } else if (select_srcif && ip6_select_srcif_debug) { - printf("%s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n", + } + if (ip6_select_srcif_debug) { + printf("%s %s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n", + __func__, s_src, s_dst, ifscope, (ifa != NULL) ? if_name(ifa->ifa_ifp) : "NONE", (ifp != NULL) ? if_name(ifp) : "NONE", error); @@ -1137,7 +1236,7 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, * caller provides a non-NULL retifp. The caller is responsible for checking * if the returned ifp is valid and release its reference at all times. */ -static int +int in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ip6_out_args *ip6oa, struct ifnet **retifp) @@ -1235,25 +1334,19 @@ in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp) { if (in6p && in6p->in6p_hops >= 0) { return (in6p->in6p_hops); - } else { - lck_rw_lock_shared(nd_if_rwlock); - if (ifp && ifp->if_index < nd_ifinfo_indexlim) { - u_int8_t chlim; - struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; - - if (ndi->initialized) { - /* access chlim without lock, for performance */ - chlim = ndi->chlim; - } else { - chlim = ip6_defhlim; - } - lck_rw_done(nd_if_rwlock); - return (chlim); + } else if (NULL != ifp) { + u_int8_t chlim; + struct nd_ifinfo *ndi = ND_IFINFO(ifp); + if (ndi && ndi->initialized) { + /* access chlim without lock, for performance */ + chlim = ndi->chlim; } else { - lck_rw_done(nd_if_rwlock); - return (ip6_defhlim); + chlim = ip6_defhlim; } + return (chlim); } + + return (ip6_defhlim); } /* diff --git a/bsd/netinet6/in6_var.h b/bsd/netinet6/in6_var.h index 7157b4c04..07cc9e16f 100644 --- a/bsd/netinet6/in6_var.h +++ b/bsd/netinet6/in6_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -97,8 +97,8 @@ #ifdef BSD_KERNEL_PRIVATE #include #include -#include #endif /* BSD_KERNEL_PRIVATE */ +#include #include #include @@ -311,18 +311,6 @@ struct icmp6_ifstat { u_quad_t ifs6_out_mlddone; }; -#ifdef BSD_KERNEL_PRIVATE -/* - * Per-interface IPv6 structures. - */ -struct in6_ifextra { - struct scope6_id scope6_id; - struct in6_ifstat in6_ifstat; - struct icmp6_ifstat icmp6_ifstat; -}; -#define IN6_IFEXTRA(_ifp) ((struct in6_ifextra *)(_ifp->if_inet6data)) -#endif /* BSD_KERNEL_PRIVATE */ - struct in6_ifreq { char ifr_name[IFNAMSIZ]; union { @@ -336,7 +324,7 @@ struct in6_ifreq { struct in6_addrlifetime ifru_lifetime; struct in6_ifstat ifru_stat; struct icmp6_ifstat ifru_icmp6stat; - u_int32_t ifru_scope_id[16]; + u_int32_t ifru_scope_id[SCOPE6_ID_MAX]; } ifr_ifru; }; @@ -849,6 +837,22 @@ struct in6_multi_mship { LIST_ENTRY(in6_multi_mship) i6mm_chain; /* multicast options chain */ }; +#ifdef BSD_KERNEL_PRIVATE +#include +/* + * * Per-interface IPv6 structures. + * */ +struct in6_ifextra { + struct scope6_id scope6_id; + struct in6_ifstat in6_ifstat; + struct icmp6_ifstat icmp6_ifstat; + struct nd_ifinfo nd_ifinfo; + uint32_t netsig_len; + u_int8_t netsig[IFNET_SIGNATURELEN]; +}; +#define IN6_IFEXTRA(_ifp) ((struct in6_ifextra *)(_ifp->if_inet6data)) +#endif /* BSD_KERNEL_PRIVATE */ + struct mld_ifinfo; /* diff --git a/bsd/netinet6/ip6_ecn.h b/bsd/netinet6/ip6_ecn.h index 2506dd3af..cfad473a6 100644 --- a/bsd/netinet6/ip6_ecn.h +++ b/bsd/netinet6/ip6_ecn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2013 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013, 2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -66,5 +66,9 @@ #ifdef BSD_KERNEL_PRIVATE extern void ip6_ecn_ingress(int, u_int32_t *, const u_int32_t *); -extern void ip6_ecn_egress(int, const u_int32_t *, u_int32_t *); +extern int ip6_ecn_egress(int, const u_int32_t *, u_int32_t *); +extern void ip46_ecn_ingress(int, u_int32_t *, const u_int8_t *); +extern int ip46_ecn_egress(int, const u_int32_t *, u_int8_t *); +extern void ip64_ecn_ingress(int, u_int8_t *, const u_int32_t *); +extern int ip64_ecn_egress(int, const u_int8_t *, u_int32_t *); #endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/netinet6/ip6_forward.c b/bsd/netinet6/ip6_forward.c index 4f3c61ee0..2f5ab8fbf 100644 --- a/bsd/netinet6/ip6_forward.c +++ b/bsd/netinet6/ip6_forward.c @@ -223,8 +223,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, if (ip6->ip6_hlim <= IPV6_HLIMDEC) { /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ - icmp6_error(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_TRANSIT, 0); + icmp6_error_flag(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_TRANSIT, 0, 0); return (NULL); } diff --git a/bsd/netinet6/ip6_input.c b/bsd/netinet6/ip6_input.c index 4cc199ca8..e9521ba3c 100644 --- a/bsd/netinet6/ip6_input.c +++ b/bsd/netinet6/ip6_input.c @@ -121,6 +121,7 @@ #include #include #include +#include #include #include @@ -204,6 +205,9 @@ static lck_grp_attr_t *ip6_mutex_grp_attr; extern int loopattach_done; extern void addrsel_policy_init(void); +static int sysctl_reset_ip6_input_stats SYSCTL_HANDLER_ARGS; +static int sysctl_ip6_input_measure_bins SYSCTL_HANDLER_ARGS; +static int sysctl_ip6_input_getperf SYSCTL_HANDLER_ARGS; static void ip6_init_delayed(void); static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); @@ -223,6 +227,23 @@ SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, adj_clear_hwcksum, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_adj_clear_hwcksum, 0, "Invalidate hwcksum info when adjusting length"); +static int ip6_input_measure = 0; +SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, input_perf, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &ip6_input_measure, 0, sysctl_reset_ip6_input_stats, "I", "Do time measurement"); + +static uint64_t ip6_input_measure_bins = 0; +SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, input_perf_bins, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_input_measure_bins, 0, + sysctl_ip6_input_measure_bins, "I", + "bins for chaining performance data histogram"); + +static net_perf_t net_perf; +SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, input_perf_data, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_ip6_input_getperf, "S,net_perf", + "IP6 input performance data (struct net_perf, net/net_perf.h)"); + /* * On platforms which require strict alignment (currently for anything but * i386 or x86_64), check if the IP header pointer is 32-bit aligned; if not, @@ -263,7 +284,18 @@ static void ip6_proto_input(protocol_family_t protocol, mbuf_t packet) { #pragma unused(protocol) +#if INET + struct timeval start_tv; + if (ip6_input_measure) + net_perf_start_time(&net_perf, &start_tv); +#endif /* INET */ ip6_input(packet); +#if INET + if (ip6_input_measure) { + net_perf_measure_time(&net_perf, &start_tv, 1); + net_perf_histogram(&net_perf, 1); + } +#endif /* INET */ } /* @@ -605,6 +637,7 @@ ip6_input(struct mbuf *m) } ip6stat.ip6s_nxthist[ip6->ip6_nxt]++; + /* * Check against address spoofing/corruption. */ @@ -670,20 +703,20 @@ ip6_input(struct mbuf *m) } #endif #if IPFW2 - /* - * Check with the firewall... - */ - if (ip6_fw_enable && ip6_fw_chk_ptr) { - u_short port = 0; - /* If ipfw says divert, we have to just drop packet */ - /* use port as a dummy argument */ - if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) { - m_freem(m); - m = NULL; - } - if (!m) - goto done; - } + /* + * Check with the firewall... + */ + if (ip6_fw_enable && ip6_fw_chk_ptr) { + u_short port = 0; + /* If ipfw says divert, we have to just drop packet */ + /* use port as a dummy argument */ + if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) { + m_freem(m); + m = NULL; + } + if (!m) + goto done; + } #endif /* IPFW2 */ /* @@ -1697,7 +1730,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) } break; case IPPROTO_ROUTING: - if (!in6p->inp_flags & IN6P_RTHDR) + if (!(in6p->inp_flags & IN6P_RTHDR)) break; mp = sbcreatecontrol_mbuf((caddr_t)ip6e, elen, @@ -1994,3 +2027,57 @@ u_char inet6ctlerrmap[PRC_NCMDS] = { 0, 0, 0, 0, ENOPROTOOPT }; + +static int +sysctl_reset_ip6_input_stats SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + i = ip6_input_measure; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < 0 || i > 1) { + error = EINVAL; + goto done; + } + if (ip6_input_measure != i && i == 1) { + net_perf_initialize(&net_perf, ip6_input_measure_bins); + } + ip6_input_measure = i; +done: + return (error); +} + +static int +sysctl_ip6_input_measure_bins SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error; + uint64_t i; + + i = ip6_input_measure_bins; + error = sysctl_handle_quad(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* validate data */ + if (!net_perf_validate_bins(i)) { + error = EINVAL; + goto done; + } + ip6_input_measure_bins = i; +done: + return (error); +} + +static int +sysctl_ip6_input_getperf SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + if (req->oldptr == USER_ADDR_NULL) + req->oldlen = (size_t)sizeof (struct ipstat); + + return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen))); +} diff --git a/bsd/netinet6/ip6_output.c b/bsd/netinet6/ip6_output.c index 812bf2b3a..7767822d7 100644 --- a/bsd/netinet6/ip6_output.c +++ b/bsd/netinet6/ip6_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -119,6 +119,7 @@ #include #include #include +#include #include #include @@ -158,6 +159,9 @@ extern int ipsec_bypass; #include #endif /* PF */ +static int sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS; +static int sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS; +static int sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS; static int ip6_copyexthdr(struct mbuf **, caddr_t, int); static void ip6_out_cksum_stats(int, u_int32_t); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); @@ -176,6 +180,34 @@ static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int, static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *, int, uint32_t, uint32_t); +extern int udp_ctloutput(struct socket *, struct sockopt *); +static int ip6_do_fragmentation(struct mbuf **morig, + uint32_t optlen, struct ifnet *ifp, uint32_t unfragpartlen, + struct ip6_hdr *ip6, struct ip6_exthdrs *exthdrsp, uint32_t mtu, + int nxt0); +static int ip6_fragment_packet(struct mbuf **m, + struct ip6_pktopts *opt, struct ip6_exthdrs *exthdrsp, struct ifnet *ifp, + uint32_t mtu, boolean_t alwaysfrag, uint32_t unfragpartlen, + struct route_in6 *ro_pmtu, int nxt0, uint32_t optlen); + +SYSCTL_DECL(_net_inet6_ip6); + +static int ip6_output_measure = 0; +SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &ip6_output_measure, 0, sysctl_reset_ip6_output_stats, "I", "Do time measurement"); + +static uint64_t ip6_output_measure_bins = 0; +SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_bins, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_output_measure_bins, 0, + sysctl_ip6_output_measure_bins, "I", + "bins for chaining performance data histogram"); + +static net_perf_t net_perf; +SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_data, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_ip6_output_getperf, "S,net_perf", + "IP6 output performance data (struct net_perf, net/net_perf.h)"); #define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */ @@ -210,47 +242,19 @@ static struct zone *im6o_zone; /* zone for ip6_moptions */ #define IM6O_ZONE_MAX 64 /* maximum elements in zone */ #define IM6O_ZONE_NAME "ip6_moptions" /* zone name */ -SYSCTL_DECL(_net_inet6_ip6); - -static int ip6_maxchainsent = 0; -SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent, - CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0, - "use dlil_output_list"); - /* - * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only - * walks through the packet chain and sends each mbuf separately. + * ip6_output() calls ip6_output_list() to do the work */ int -ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt, +ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, struct ip6_out_args *ip6oa) { -#pragma unused(packetlist) - struct mbuf *m = m0, *nextpkt; - int error = 0; - - while (m != NULL) { - /* - * Break the chain before calling ip6_output() and free the - * mbufs if there was an error. - */ - nextpkt = m->m_nextpkt; - m->m_nextpkt = NULL; - error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oa); - if (error != 0) { - if (nextpkt != NULL) - m_freem_list(nextpkt); - return (error); - } - m = nextpkt; - } - - return (error); + return ip6_output_list(m0, 0, opt, ro, flags, im6o, ifpp, ip6oa); } /* - * IP6 output. The packet in mbuf chain m contains a skeletal IP6 + * IP6 output. Each packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. @@ -265,15 +269,18 @@ ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt, * which is rt_rmx.rmx_mtu. */ int -ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, - int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, - struct ip6_out_args *ip6oa) +ip6_output_list(struct mbuf *m0, int packetchain, struct ip6_pktopts *opt, + struct route_in6 *ro, int flags, struct ip6_moptions *im6o, + struct ifnet **ifpp, struct ip6_out_args *ip6oa) { struct ip6_hdr *ip6; u_char *nexthdrp; struct ifnet *ifp = NULL, *origifp = NULL; /* refcnt'd */ + struct ifnet **ifpp_save = ifpp; struct mbuf *m, *mprev; - int hlen, tlen, len, off, nxt0; + struct mbuf *sendchain = NULL, *sendchain_last = NULL; + struct mbuf *inputchain = NULL; + int nxt0; struct route_in6 *ro_pmtu = NULL; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, src_sa, dst_sa; @@ -287,6 +294,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, ipfilter_t inject_filter_ref; struct ipf_pktopts *ippo = NULL; struct flowadv *adv = NULL; + uint32_t pktcnt = 0; + uint32_t packets_processed = 0; + struct timeval start_tv; #if DUMMYNET struct m_tag *tag; struct ip6_out_args saved_ip6oa; @@ -331,6 +341,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, struct { boolean_t select_srcif : 1; boolean_t hdrsplit : 1; + boolean_t route_selected : 1; boolean_t dontfrag : 1; #if IPSEC boolean_t needipsec : 1; @@ -340,6 +351,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, uint32_t raw; } ip6obf = { .raw = 0 }; + if (ip6_output_measure) + net_perf_start_time(&net_perf, &start_tv); + VERIFY(m0->m_flags & M_PKTHDR); /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */ @@ -354,6 +368,13 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { struct dn_pkt_tag *dn_tag; + /* + * ip6_output_list() cannot handle chains of packets reinjected + * by dummynet. The same restriction applies to + * ip_output_list(). + */ + VERIFY(0 == packetchain); + dn_tag = (struct dn_pkt_tag *)(tag+1); args.fwa_pf_rule = dn_tag->dn_pf_rule; @@ -388,7 +409,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, #endif /* DUMMYNET */ m = m0; - m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO); #if IPSEC if (ipsec_bypass == 0) { @@ -412,10 +432,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, } #endif /* IPSEC */ - ip6 = mtod(m, struct ip6_hdr *); - nxt0 = ip6->ip6_nxt; - finaldst = ip6->ip6_dst; - inject_filter_ref = ipf_get_inject_filter(m); ippo = &ipf_pktopts; if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) { @@ -455,6 +471,14 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, ip6oa->ip6oa_retflags = 0; } + /* + * Clear out ifpp to be filled in after determining route. ifpp_save is + * used to keep old value to release reference properly and dtrace + * ipsec tunnel traffic properly. + */ + if (ifpp != NULL && *ifpp != NULL) + *ifpp = NULL; + #if DUMMYNET if (args.fwa_pf_rule) { ip6 = mtod(m, struct ip6_hdr *); @@ -463,6 +487,43 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, } #endif /* DUMMYNET */ +#if NECP + /* + * Since all packets are assumed to come from same socket, necp lookup + * only needs to happen once per function entry. + */ + necp_matched_policy_id = necp_ip6_output_find_policy_match(m, flags, + (flags & IPV6_OUTARGS) ? ip6oa : NULL, &necp_result, + &necp_result_parameter); +#endif /* NECP */ + + /* + * If a chain was passed in, prepare for ther first iteration. For all + * other iterations, this work will be done at evaluateloop: label. + */ + if (packetchain) { + /* + * Remove m from the chain during processing to avoid + * accidental frees on entire list. + */ + inputchain = m->m_nextpkt; + m->m_nextpkt = NULL; + } + +loopit: + packets_processed++; + m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO); + ip6 = mtod(m, struct ip6_hdr *); + nxt0 = ip6->ip6_nxt; + finaldst = ip6->ip6_dst; + ip6obf.hdrsplit = FALSE; + ro_pmtu = NULL; + + if (!SLIST_EMPTY(&m->m_pkthdr.tags)) + inject_filter_ref = ipf_get_inject_filter(m); + else + inject_filter_ref = NULL; + #define MAKE_EXTHDR(hp, mp) do { \ if (hp != NULL) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ @@ -499,46 +560,65 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, #undef MAKE_EXTHDR #if NECP - necp_matched_policy_id = necp_ip6_output_find_policy_match (m, flags, (flags & IPV6_OUTARGS) ? ip6oa : NULL, - &necp_result, &necp_result_parameter); if (necp_matched_policy_id) { necp_mark_packet_from_ip(m, necp_matched_policy_id); + switch (necp_result) { - case NECP_KERNEL_POLICY_RESULT_PASS: + case NECP_KERNEL_POLICY_RESULT_PASS: + goto skip_ipsec; + case NECP_KERNEL_POLICY_RESULT_DROP: + case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT: + /* + * Flow divert packets should be blocked at the IP + * layer. + */ + error = EHOSTUNREACH; + goto freehdrs; + case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: { + /* + * Verify that the packet is being routed to the tunnel + */ + struct ifnet *policy_ifp = + necp_get_ifnet_from_result_parameter( + &necp_result_parameter); + + if (policy_ifp == ifp) { goto skip_ipsec; - case NECP_KERNEL_POLICY_RESULT_DROP: - case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT: - /* Flow divert packets should be blocked at the IP layer */ - error = EHOSTUNREACH; - goto bad; - case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: { - /* Verify that the packet is being routed to the tunnel */ - struct ifnet *policy_ifp = necp_get_ifnet_from_result_parameter(&necp_result_parameter); - if (policy_ifp == ifp) { + } else { + if (necp_packet_can_rebind_to_ifnet(m, + policy_ifp, (struct route *)&necp_route, + AF_INET6)) { + /* + * Set scoped index to the tunnel + * interface, since it is compatible + * with the packet. This will only work + * for callers who pass IPV6_OUTARGS, + * but that covers all of the clients + * we care about today. + */ + if (flags & IPV6_OUTARGS) { + ip6oa->ip6oa_boundif = + policy_ifp->if_index; + ip6oa->ip6oa_flags |= + IP6OAF_BOUND_IF; + } + if (opt != NULL + && opt->ip6po_pktinfo != NULL) { + opt->ip6po_pktinfo-> + ipi6_ifindex = + policy_ifp->if_index; + } + ro = &necp_route; goto skip_ipsec; } else { - if (necp_packet_can_rebind_to_ifnet(m, policy_ifp, (struct route *)&necp_route, AF_INET6)) { - /* Set scoped index to the tunnel interface, since it is compatible with the packet */ - /* This will only work for callers who pass IPV6_OUTARGS, but that covers all of the - clients we care about today */ - if (flags & IPV6_OUTARGS) { - ip6oa->ip6oa_boundif = policy_ifp->if_index; - ip6oa->ip6oa_flags |= IP6OAF_BOUND_IF; - } - if (opt != NULL && opt->ip6po_pktinfo != NULL) { - opt->ip6po_pktinfo->ipi6_ifindex = policy_ifp->if_index; - } - ro = &necp_route; - goto skip_ipsec; - } else { - error = ENETUNREACH; - goto bad; - } + error = ENETUNREACH; + goto freehdrs; } - break; } - default: - break; + break; + } + default: + break; } } #endif /* NECP */ @@ -715,6 +795,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); + /* It is no longer safe to free the pointers in exthdrs. */ + exthdrs.merged = TRUE; + #undef MAKE_CHAIN #if IPSEC @@ -761,7 +844,10 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, (mbuf_t *)&m, ippo); if (result == EJUSTRETURN) { ipf_unref(); - goto done; + if (m != NULL) + m_freem(m); + m = NULL; + goto evaluateloop; } if (result != 0) { ipf_unref(); @@ -898,7 +984,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, ro = &ip6route; bzero((caddr_t)ro, sizeof (*ro)); } - VERIFY(ro_pmtu == NULL); /* must not get here if dummynet */ ro_pmtu = ro; if (opt != NULL && opt->ip6po_rthdr) ro = &opt->ip6po_route; @@ -958,7 +1043,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, #if IPSEC if (ip6obf.needipsec && needipsectun) { #if CONFIG_DTRACE - struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL; + struct ifnet *trace_ifp = (ifpp_save != NULL) ? (*ifpp_save) : NULL; #endif /* CONFIG_DTRACE */ /* * All the extension headers will become inaccessible @@ -983,15 +1068,16 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, error = ipsec6_output_tunnel(&ipsec_state, sp, flags); /* tunneled in IPv4? packet is gone */ - if (ipsec_state.tunneled == 4) - goto done; + if (ipsec_state.tunneled == 4) { + m = NULL; + goto evaluateloop; + } m = ipsec_state.m; ipsec_saved_route = ro; ro = (struct route_in6 *)&ipsec_state.ro; dst = SIN6(ipsec_state.dst); if (error) { /* mbuf is already reclaimed in ipsec6_output_tunnel. */ - m0 = m = NULL; m = NULL; switch (error) { case EHOSTUNREACH: @@ -1028,10 +1114,12 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, } #endif /* IPSEC */ - /* for safety */ + /* + * ifp should only be filled in for dummy net packets which will jump + * to check_with_pf label. + */ if (ifp != NULL) { - ifnet_release(ifp); - ifp = NULL; + VERIFY(ip6obf.route_selected); } /* adjust pointer */ @@ -1049,24 +1137,32 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, dst_sa.sin6_addr = ip6->ip6_dst; /* + * Only call in6_selectroute() on first iteration to avoid taking + * multiple references on ifp and rt. + * * in6_selectroute() might return an ifp with its reference held * even in the error case, so make sure to release its reference. * ip6oa may be NULL if IPV6_OUTARGS isn't set. */ - if ((error = in6_selectroute(ip6obf.select_srcif ? &src_sa : NULL, - &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa)) != 0) { - switch (error) { - case EHOSTUNREACH: - ip6stat.ip6s_noroute++; - break; - case EADDRNOTAVAIL: - default: - break; /* XXX statistics? */ + if (!ip6obf.route_selected) { + error = in6_selectroute( ip6obf.select_srcif ? &src_sa : NULL, + &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa); + + if (error != 0) { + switch (error) { + case EHOSTUNREACH: + ip6stat.ip6s_noroute++; + break; + case EADDRNOTAVAIL: + default: + break; /* XXX statistics? */ + } + if (ifp != NULL) + in6_ifstat_inc(ifp, ifs6_out_discard); + /* ifp (if non-NULL) will be released at the end */ + goto bad; } - if (ifp != NULL) - in6_ifstat_inc(ifp, ifs6_out_discard); - /* ifp (if non-NULL) will be released at the end */ - goto bad; + ip6obf.route_selected = TRUE; } if (rt == NULL) { /* @@ -1076,6 +1172,14 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, *dst = dst_sa; /* XXX */ } +#if NECP + /* Catch-all to check if the interface is allowed */ + if (!necp_packet_is_allowed_over_interface(m, ifp)) { + error = EHOSTUNREACH; + goto bad; + } +#endif /* NECP */ + /* * then rt (for unicast) and ifp must be non-NULL valid values. */ @@ -1084,9 +1188,11 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, } if (rt != NULL) { RT_LOCK(rt); - ia = (struct in6_ifaddr *)(rt->rt_ifa); - if (ia != NULL) - IFA_ADDREF(&ia->ia_ifa); + if (ia == NULL) { + ia = (struct in6_ifaddr *)(rt->rt_ifa); + if (ia != NULL) + IFA_ADDREF(&ia->ia_ifa); + } rt->rt_use++; RT_UNLOCK(rt); } @@ -1229,8 +1335,11 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, */ if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { - m_freem(m); - goto done; + /* remove m from the packetchain and continue looping */ + if (m != NULL) + m_freem(m); + m = NULL; + goto evaluateloop; } } @@ -1238,10 +1347,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ - if (ifpp != NULL) { + if (ifpp != NULL && *ifpp == NULL) { ifnet_reference(ifp); /* for caller */ - if (*ifpp != NULL) - ifnet_release(*ifpp); *ifpp = ifp; } @@ -1289,13 +1396,15 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, u_short port = 0; m->m_pkthdr.rcvif = NULL; /* XXX */ /* If ipfw says divert, we have to just drop packet */ - if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) { - m_freem(m); - goto done; - } - if (m == NULL) { - error = EACCES; - goto done; + if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m) || m == NULL) { + if (m != NULL) { + m_freem(m); + m = NULL; + goto evaluateloop; + } else { + error = EACCES; + goto bad; + } } } #endif /* IPFW2 */ @@ -1324,9 +1433,13 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), ((hbh->ip6h_len + 1) << 3) - sizeof (struct ip6_hbh), &dummy, &oplen) < 0) { - /* m was already freed at this point */ + /* + * m was already freed at this point. Set to NULL so it + * is not re-freed at end of ip6_output_list. + */ + m = NULL; error = EINVAL; /* better error? */ - goto done; + goto bad; } m->m_flags &= ~M_LOOP; /* XXX */ m->m_pkthdr.rcvif = NULL; @@ -1338,6 +1451,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, #if PF if (PF_IS_ENABLED) { #if DUMMYNET + /* * TODO: Need to save opt->ip6po_flags for reinjection * rdar://10434993 @@ -1362,58 +1476,206 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, #endif /* !DUMMYNET */ if (error != 0 || m == NULL) { - /* - * Note that if we ever handle packet chain, we will - * have to restore the linkage from the previous - * packet to the next like in ip_outout_list() - */ if (m != NULL) { panic("%s: unexpected packet %p\n", __func__, m); /* NOTREACHED */ } - /* Already freed by callee */ - goto done; + /* m was already freed by callee and is now NULL. */ + goto evaluateloop; } ip6 = mtod(m, struct ip6_hdr *); } #endif /* PF */ +#ifdef IPSEC + /* clean ipsec history before fragmentation */ + ipsec_delaux(m); +#endif /* IPSEC */ + /* - * Send the packet to the outgoing interface. - * If necessary, do IPv6 fragmentation before sending. - * - * the logic here is rather complex: - * 1: normal case (dontfrag == 0, alwaysfrag == 0) - * 1-a: send as is if tlen <= path mtu - * 1-b: fragment if tlen > path mtu - * - * 2: if user asks us not to fragment (dontfrag == 1) - * 2-a: send as is if tlen <= interface mtu - * 2-b: error if tlen > interface mtu - * - * 3: if we always need to attach fragment header (alwaysfrag == 1) - * always fragment - * - * 4: if dontfrag == 1 && alwaysfrag == 1 - * error, as we cannot handle this conflicting request + * Determine whether fragmentation is necessary. If so, m is passed + * back as a chain of packets and original mbuf is freed. Otherwise, m + * is unchanged. */ - tlen = m->m_pkthdr.len; + error = ip6_fragment_packet(&m, opt, + &exthdrs, ifp, mtu, alwaysfrag, unfragpartlen, ro_pmtu, nxt0, + optlen); - if (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG)) - ip6obf.dontfrag = TRUE; - else - ip6obf.dontfrag = FALSE; - if (ip6obf.dontfrag && alwaysfrag) { /* case 4 */ - /* conflicting request - can't transmit */ - error = EMSGSIZE; + if (error) goto bad; + +/* + * The evaluateloop label is where we decide whether to continue looping over + * packets or call into nd code to send. + */ +evaluateloop: + + /* + * m may be NULL when we jump to the evaluateloop label from PF or + * other code that can drop packets. + */ + if (m != NULL) { + /* + * If we already have a chain to send, tack m onto the end. + * Otherwise make m the start and end of the to-be-sent chain. + */ + if (sendchain != NULL) { + sendchain_last->m_nextpkt = m; + } else { + sendchain = m; + } + + /* Fragmentation may mean m is a chain. Find the last packet. */ + while (m->m_nextpkt) + m = m->m_nextpkt; + sendchain_last = m; + pktcnt++; + } + + /* Fill in next m from inputchain as appropriate. */ + m = inputchain; + if (m != NULL) { + /* Isolate m from rest of input chain. */ + inputchain = m->m_nextpkt; + m->m_nextpkt = NULL; + + /* + * Clear exthdrs and ipsec_state so stale contents are not + * reused. Note this also clears the exthdrs.merged flag. + */ + bzero(&exthdrs, sizeof(exthdrs)); + bzero(&ipsec_state, sizeof(ipsec_state)); + + /* Continue looping. */ + goto loopit; + } + + /* + * If we get here, there's no more mbufs in inputchain, so send the + * sendchain if there is one. + */ + if (pktcnt > 0) { + error = nd6_output_list(ifp, origifp, sendchain, dst, + ro->ro_rt, adv); + /* + * Fall through to done label even in error case because + * nd6_output_list frees packetchain in both success and + * failure cases. + */ + } + +done: + if (ifpp_save != NULL && *ifpp_save != NULL) { + ifnet_release(*ifpp_save); + *ifpp_save = NULL; + } + ROUTE_RELEASE(&ip6route); +#if IPSEC + ROUTE_RELEASE(&ipsec_state.ro); + if (sp != NULL) + key_freesp(sp, KEY_SADB_UNLOCKED); +#endif /* IPSEC */ +#if NECP + ROUTE_RELEASE(&necp_route); +#endif /* NECP */ +#if DUMMYNET + ROUTE_RELEASE(&saved_route); + ROUTE_RELEASE(&saved_ro_pmtu); +#endif /* DUMMYNET */ + + if (ia != NULL) + IFA_REMREF(&ia->ia_ifa); + if (src_ia != NULL) + IFA_REMREF(&src_ia->ia_ifa); + if (ifp != NULL) + ifnet_release(ifp); + if (origifp != NULL) + ifnet_release(origifp); + if (ip6_output_measure) { + net_perf_measure_time(&net_perf, &start_tv, packets_processed); + net_perf_histogram(&net_perf, packets_processed); + } + return (error); + +freehdrs: + if (exthdrs.ip6e_hbh != NULL) { + if (exthdrs.merged) + panic("Double free of ip6e_hbh"); + m_freem(exthdrs.ip6e_hbh); + } + if (exthdrs.ip6e_dest1 != NULL) { + if (exthdrs.merged) + panic("Double free of ip6e_dest1"); + m_freem(exthdrs.ip6e_dest1); + } + if (exthdrs.ip6e_rthdr != NULL) { + if (exthdrs.merged) + panic("Double free of ip6e_rthdr"); + m_freem(exthdrs.ip6e_rthdr); + } + if (exthdrs.ip6e_dest2 != NULL) { + if (exthdrs.merged) + panic("Double free of ip6e_dest2"); + m_freem(exthdrs.ip6e_dest2); + } + /* FALLTHRU */ +bad: + if (inputchain != NULL) + m_freem_list(inputchain); + if (sendchain != NULL) + m_freem_list(sendchain); + if (m != NULL) + m_freem(m); + + goto done; + +#undef ipf_pktopts +#undef exthdrs +#undef ip6route +#undef ipsec_state +#undef saved_route +#undef saved_ro_pmtu +#undef args +} + +/* ip6_fragment_packet + * + * The fragmentation logic is rather complex: + * 1: normal case (dontfrag == 0, alwaysfrag == 0) + * 1-a: send as is if tlen <= path mtu + * 1-b: fragment if tlen > path mtu + * + * 2: if user asks us not to fragment (dontfrag == 1) + * 2-a: send as is if tlen <= interface mtu + * 2-b: error if tlen > interface mtu + * + * 3: if we always need to attach fragment header (alwaysfrag == 1) + * always fragment + * + * 4: if dontfrag == 1 && alwaysfrag == 1 + * error, as we cannot handle this conflicting request + */ + +static int +ip6_fragment_packet(struct mbuf **mptr, struct ip6_pktopts *opt, + struct ip6_exthdrs *exthdrsp, struct ifnet *ifp, uint32_t mtu, + boolean_t alwaysfrag, uint32_t unfragpartlen, struct route_in6 *ro_pmtu, + int nxt0, uint32_t optlen) +{ + VERIFY(NULL != mptr); + struct mbuf *m = *mptr; + int error = 0; + size_t tlen = m->m_pkthdr.len; + boolean_t dontfrag = (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG)); + + if (dontfrag && alwaysfrag) { /* case 4 */ + /* conflicting request - can't transmit */ + return EMSGSIZE; } - lck_rw_lock_shared(nd_if_rwlock); /* Access without acquiring nd_ifinfo lock for performance */ - if (ip6obf.dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */ - lck_rw_done(nd_if_rwlock); + if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */ /* * Even if the DONTFRAG option is specified, we cannot send the * packet when the data length is larger than the MTU of the @@ -1429,51 +1691,71 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, bzero(&ip6cp, sizeof (ip6cp)); ip6cp.ip6c_cmdarg = (void *)&mtu32; pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp); - error = EMSGSIZE; - goto bad; - } else { - lck_rw_done(nd_if_rwlock); + return EMSGSIZE; } /* * transmit packet without fragmentation */ - if (ip6obf.dontfrag || (!alwaysfrag && /* case 1-a and 2-a */ + if (dontfrag || (!alwaysfrag && /* case 1-a and 2-a */ (tlen <= mtu || TSO_IPV6_OK(ifp, m) || (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) { -#ifdef IPSEC - /* clean ipsec history once it goes out of the node */ - ipsec_delaux(m); -#endif /* IPSEC */ - + /* + * mppn not updated in this case because no new chain is formed + * and inserted + */ ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen); - - if (ro->ro_rt) - RT_LOCK_ASSERT_NOTHELD(ro->ro_rt); - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv); - goto done; + } else { + /* + * time to fragment - cases 1-b and 3 are handled inside + * ip6_do_fragmentation(). + * mppn is passed down to be updated to point at fragment chain. + */ + error = ip6_do_fragmentation(mptr, optlen, ifp, + unfragpartlen, mtod(m, struct ip6_hdr *), exthdrsp, mtu, nxt0); } + return error; +} + +/* + * ip6_do_fragmentation() is called by ip6_fragment_packet() after determining + * the packet needs to be fragmented. on success, morig is freed and a chain + * of fragments is linked into the packet chain where morig existed. Otherwise, + * an errno is returned. + */ +static int +ip6_do_fragmentation(struct mbuf **mptr, uint32_t optlen, struct ifnet *ifp, + uint32_t unfragpartlen, struct ip6_hdr *ip6, struct ip6_exthdrs *exthdrsp, + uint32_t mtu, int nxt0) +{ + VERIFY(NULL != mptr); + int error = 0; + + struct mbuf *morig = *mptr; + struct mbuf *first_mbufp = NULL; + struct mbuf *last_mbufp = NULL; + + size_t tlen = morig->m_pkthdr.len; + /* * try to fragment the packet. case 1-b and 3 */ - if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) { + if ((morig->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) { /* TSO and fragment aren't compatible */ - error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); - goto bad; + return EMSGSIZE; } else if (mtu < IPV6_MMTU) { /* path MTU cannot be less than IPV6_MMTU */ - error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); - goto bad; + return EMSGSIZE; } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */ - error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); - goto bad; + return EMSGSIZE; } else { - struct mbuf **mnext, *m_frgpart; + size_t hlen, len, off; + struct mbuf **mnext = NULL; struct ip6_frag *ip6f; u_int32_t id = htonl(ip6_randomid()); u_char nextproto; @@ -1489,84 +1771,95 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, len = (mtu - hlen - sizeof (struct ip6_frag)) & ~7; if (len < 8) { - error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); - goto bad; + return EMSGSIZE; } - mnext = &m->m_nextpkt; - /* * Change the next header field of the last header in the * unfragmentable part. */ - if (exthdrs.ip6e_rthdr != NULL) { - nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); - *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; - } else if (exthdrs.ip6e_dest1 != NULL) { - nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); - *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; - } else if (exthdrs.ip6e_hbh != NULL) { - nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); - *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; + if (exthdrsp->ip6e_rthdr != NULL) { + nextproto = *mtod(exthdrsp->ip6e_rthdr, u_char *); + *mtod(exthdrsp->ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; + } else if (exthdrsp->ip6e_dest1 != NULL) { + nextproto = *mtod(exthdrsp->ip6e_dest1, u_char *); + *mtod(exthdrsp->ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; + } else if (exthdrsp->ip6e_hbh != NULL) { + nextproto = *mtod(exthdrsp->ip6e_hbh, u_char *); + *mtod(exthdrsp->ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { nextproto = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_FRAGMENT; } - if (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) - in6_delayed_cksum_offset(m, 0, optlen, nxt0); + if (morig->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) + in6_delayed_cksum_offset(morig, 0, optlen, nxt0); /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto * chain. */ - m0 = m; for (off = hlen; off < tlen; off += len) { - struct ip6_hdr *mhip6; + struct ip6_hdr *new_mhip6; + struct mbuf *new_m; + struct mbuf *m_frgpart; - MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */ - if (m == NULL) { + MGETHDR(new_m, M_DONTWAIT, MT_HEADER); /* MAC-OK */ + if (new_m == NULL) { error = ENOBUFS; ip6stat.ip6s_odropped++; - goto sendorfree; + break; + } + new_m->m_pkthdr.rcvif = NULL; + new_m->m_flags = morig->m_flags & M_COPYFLAGS; + + if (first_mbufp != NULL) { + /* Every pass through loop but first */ + *mnext = new_m; + last_mbufp = new_m; + } else { + /* This is the first element of the fragment chain */ + first_mbufp = new_m; + last_mbufp = new_m; } - m->m_pkthdr.rcvif = NULL; - m->m_flags = m0->m_flags & M_COPYFLAGS; - *mnext = m; - mnext = &m->m_nextpkt; - m->m_data += max_linkhdr; - mhip6 = mtod(m, struct ip6_hdr *); - *mhip6 = *ip6; - m->m_len = sizeof (*mhip6); - error = ip6_insertfraghdr(m0, m, hlen, &ip6f); + mnext = &new_m->m_nextpkt; + + new_m->m_data += max_linkhdr; + new_mhip6 = mtod(new_m, struct ip6_hdr *); + *new_mhip6 = *ip6; + new_m->m_len = sizeof (*new_mhip6); + + error = ip6_insertfraghdr(morig, new_m, hlen, &ip6f); if (error) { ip6stat.ip6s_odropped++; - goto sendorfree; + break; } + ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); if (off + len >= tlen) len = tlen - off; else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; - mhip6->ip6_plen = htons((u_short)(len + hlen + + new_mhip6->ip6_plen = htons((u_short)(len + hlen + sizeof (*ip6f) - sizeof (struct ip6_hdr))); - if ((m_frgpart = m_copy(m0, off, len)) == NULL) { + + if ((m_frgpart = m_copy(morig, off, len)) == NULL) { error = ENOBUFS; ip6stat.ip6s_odropped++; - goto sendorfree; + break; } - m_cat(m, m_frgpart); - m->m_pkthdr.len = len + hlen + sizeof (*ip6f); - m->m_pkthdr.rcvif = NULL; + m_cat(new_m, m_frgpart); + new_m->m_pkthdr.len = len + hlen + sizeof (*ip6f); + new_m->m_pkthdr.rcvif = NULL; - M_COPY_CLASSIFIER(m, m0); - M_COPY_PFTAG(m, m0); + M_COPY_CLASSIFIER(new_m, morig); + M_COPY_PFTAG(new_m, morig); #ifdef notyet #if CONFIG_MACF_NET - mac_create_fragment(m0, m); + mac_create_fragment(morig, new_m); #endif /* CONFIG_MACF_NET */ #endif /* notyet */ @@ -1577,81 +1870,23 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, in6_ifstat_inc(ifp, ifs6_out_fragcreat); } - in6_ifstat_inc(ifp, ifs6_out_fragok); - } - - /* - * Remove leading garbages. - */ -sendorfree: - m = m0->m_nextpkt; - m0->m_nextpkt = NULL; - m_freem(m0); - for (m0 = m; m != NULL; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = NULL; - if (error == 0) { -#if IPSEC - /* clean ipsec history once it goes out of the node */ - ipsec_delaux(m); -#endif /* IPSEC */ - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, - adv); + if (error) { + /* free all the fragments created */ + if (first_mbufp != NULL) { + m_freem_list(first_mbufp); + first_mbufp = NULL; + } + last_mbufp = NULL; } else { - m_freem(m); + /* successful fragmenting */ + m_freem(morig); + *mptr = first_mbufp; + last_mbufp->m_nextpkt = NULL; + ip6stat.ip6s_fragmented++; + in6_ifstat_inc(ifp, ifs6_out_fragok); } } - - if (error == 0) - ip6stat.ip6s_fragmented++; - -done: - ROUTE_RELEASE(&ip6route); -#if IPSEC - ROUTE_RELEASE(&ipsec_state.ro); - if (sp != NULL) - key_freesp(sp, KEY_SADB_UNLOCKED); -#endif /* IPSEC */ -#if NECP - ROUTE_RELEASE(&necp_route); -#endif /* NECP */ -#if DUMMYNET - ROUTE_RELEASE(&saved_route); - ROUTE_RELEASE(&saved_ro_pmtu); -#endif /* DUMMYNET */ - - if (ia != NULL) - IFA_REMREF(&ia->ia_ifa); - if (src_ia != NULL) - IFA_REMREF(&src_ia->ia_ifa); - if (ifp != NULL) - ifnet_release(ifp); - if (origifp != NULL) - ifnet_release(origifp); - return (error); - -freehdrs: - if (exthdrs.ip6e_hbh != NULL) - m_freem(exthdrs.ip6e_hbh); - if (exthdrs.ip6e_dest1 != NULL) - m_freem(exthdrs.ip6e_dest1); - if (exthdrs.ip6e_rthdr != NULL) - m_freem(exthdrs.ip6e_rthdr); - if (exthdrs.ip6e_dest2 != NULL) - m_freem(exthdrs.ip6e_dest2); - /* FALLTHRU */ -bad: - if (m != NULL) - m_freem(m); - goto done; - -#undef ipf_pktopts -#undef exthdrs -#undef ip6route -#undef ipsec_state -#undef saved_route -#undef saved_ro_pmtu -#undef args + return error; } static int @@ -2010,10 +2245,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, if (ifp == NULL) ifp = ro_pmtu->ro_rt->rt_ifp; - lck_rw_lock_shared(nd_if_rwlock); /* Access without acquiring nd_ifinfo lock for performance */ ifmtu = IN6_LINKMTU(ifp); - lck_rw_done(nd_if_rwlock); /* * Access rmx_mtu without holding the route entry lock, @@ -2050,10 +2283,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, } } else { if (ifp) { - lck_rw_lock_shared(nd_if_rwlock); /* Don't hold nd_ifinfo lock for performance */ mtu = IN6_LINKMTU(ifp); - lck_rw_done(nd_if_rwlock); } else { error = EHOSTUNREACH; /* XXX */ } @@ -2722,6 +2953,8 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) } break; } + } else if (level == IPPROTO_UDP) { + error = udp_ctloutput(so, sopt); } else { error = EINVAL; } @@ -3894,3 +4127,58 @@ ip6_optlen(struct in6pcb *in6p) return (len); #undef elen } + +static int +sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + i = ip6_output_measure; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < 0 || i > 1) { + error = EINVAL; + goto done; + } + if (ip6_output_measure != i && i == 1) { + net_perf_initialize(&net_perf, ip6_output_measure_bins); + } + ip6_output_measure = i; +done: + return (error); +} + +static int +sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error; + uint64_t i; + + i = ip6_output_measure_bins; + error = sysctl_handle_quad(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* validate data */ + if (!net_perf_validate_bins(i)) { + error = EINVAL; + goto done; + } + ip6_output_measure_bins = i; +done: + return (error); +} + +static int +sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + if (req->oldptr == USER_ADDR_NULL) + req->oldlen = (size_t)sizeof (struct ipstat); + + return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen))); +} + diff --git a/bsd/netinet6/ip6_var.h b/bsd/netinet6/ip6_var.h index 30926d2dc..dc2b4399d 100644 --- a/bsd/netinet6/ip6_var.h +++ b/bsd/netinet6/ip6_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -182,6 +182,7 @@ struct ip6_exthdrs { struct mbuf *ip6e_dest1; struct mbuf *ip6e_rthdr; struct mbuf *ip6e_dest2; + boolean_t merged; }; /* @@ -253,6 +254,9 @@ struct ip6_pktopts { */ #endif /* BSD_KERNEL_PRIVATE */ +#define IP6S_SRCRULE_COUNT 16 +#include + struct ip6stat { u_quad_t ip6s_total; /* total packets received */ u_quad_t ip6s_tooshort; /* packet too short */ @@ -289,32 +293,35 @@ struct ip6stat { /* * statistics for improvement of the source address selection * algorithm: - * XXX: hardcoded 16 = # of ip6 multicast scope types + 1 */ /* number of times that address selection fails */ u_quad_t ip6s_sources_none; /* number of times that an address on the outgoing I/F is chosen */ - u_quad_t ip6s_sources_sameif[16]; + u_quad_t ip6s_sources_sameif[SCOPE6_ID_MAX]; /* number of times that an address on a non-outgoing I/F is chosen */ - u_quad_t ip6s_sources_otherif[16]; + u_quad_t ip6s_sources_otherif[SCOPE6_ID_MAX]; /* * number of times that an address that has the same scope * from the destination is chosen. */ - u_quad_t ip6s_sources_samescope[16]; + u_quad_t ip6s_sources_samescope[SCOPE6_ID_MAX]; /* * number of times that an address that has a different scope * from the destination is chosen. */ - u_quad_t ip6s_sources_otherscope[16]; + u_quad_t ip6s_sources_otherscope[SCOPE6_ID_MAX]; /* number of times that a deprecated address is chosen */ - u_quad_t ip6s_sources_deprecated[16]; + u_quad_t ip6s_sources_deprecated[SCOPE6_ID_MAX]; u_quad_t ip6s_forward_cachehit; u_quad_t ip6s_forward_cachemiss; /* number of times that each rule of source selection is applied. */ - u_quad_t ip6s_sources_rule[16]; + u_quad_t ip6s_sources_rule[IP6S_SRCRULE_COUNT]; + + /* number of times we ignored address on expensive secondary interfaces */ + u_quad_t ip6s_sources_skip_expensive_secondary_if; + /* pkt dropped, no mbufs for control data */ u_quad_t ip6s_pktdropcntrl; diff --git a/bsd/netinet6/ipsec.c b/bsd/netinet6/ipsec.c index 90920d436..43259eea9 100644 --- a/bsd/netinet6/ipsec.c +++ b/bsd/netinet6/ipsec.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2014 Apple Inc. All rights reserved. + * Copyright (c) 2008-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -155,7 +155,7 @@ int ip4_esp_net_deflev = IPSEC_LEVEL_USE; int ip4_ah_trans_deflev = IPSEC_LEVEL_USE; int ip4_ah_net_deflev = IPSEC_LEVEL_USE; struct secpolicy ip4_def_policy; -int ip4_ipsec_ecn = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ +int ip4_ipsec_ecn = ECN_COMPATIBILITY; /* ECN ignore(-1)/compatibility(0)/normal(1) */ int ip4_esp_randpad = -1; int esp_udp_encap_port = 0; static int sysctl_def_policy SYSCTL_HANDLER_ARGS; @@ -214,7 +214,7 @@ int ip6_esp_net_deflev = IPSEC_LEVEL_USE; int ip6_ah_trans_deflev = IPSEC_LEVEL_USE; int ip6_ah_net_deflev = IPSEC_LEVEL_USE; struct secpolicy ip6_def_policy; -int ip6_ipsec_ecn = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ +int ip6_ipsec_ecn = ECN_COMPATIBILITY; /* ECN ignore(-1)/compatibility(0)/normal(1) */ int ip6_esp_randpad = -1; /* net.inet6.ipsec6 */ @@ -262,12 +262,14 @@ static void vshiftl(unsigned char *, int, int); static int ipsec_in_reject(struct secpolicy *, struct mbuf *); #if INET6 static int ipsec64_encapsulate(struct mbuf *, struct secasvar *); +static int ipsec6_update_routecache_and_output(struct ipsec_output_state *state, struct secasvar *sav); +static int ipsec46_encapsulate(struct ipsec_output_state *state, struct secasvar *sav); #endif static struct ipsec_tag *ipsec_addaux(struct mbuf *); static struct ipsec_tag *ipsec_findaux(struct mbuf *); static void ipsec_optaux(struct mbuf *, struct ipsec_tag *); int ipsec_send_natt_keepalive(struct secasvar *sav); -bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ipsec_offload_frame *frame, size_t frame_data_offset); +bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ifnet_keepalive_offload_frame *frame, size_t frame_data_offset); static int sysctl_def_policy SYSCTL_HANDLER_ARGS @@ -1505,10 +1507,9 @@ ipsec_deepcopy_policy(struct secpolicy *src) q = &newchain; for (p = src->req; p; p = p->next) { *q = (struct ipsecrequest *)_MALLOC(sizeof(struct ipsecrequest), - M_SECA, M_WAITOK); + M_SECA, M_WAITOK | M_ZERO); if (*q == NULL) goto fail; - bzero(*q, sizeof(**q)); (*q)->next = NULL; (*q)->saidx.proto = p->saidx.proto; @@ -2573,10 +2574,6 @@ ipsec64_encapsulate(m, sav) m->m_pkthdr.len += sizeof(struct ip); ip6i = mtod(m->m_next, struct ip6_hdr *); } - /* construct new IPv4 header. see RFC 2401 5.1.2.1 */ - /* ECN consideration. */ - /* XXX To be fixed later if needed */ - // ip_ecn_ingress(ip4_ipsec_ecn, &ip->ip_tos, &oip->ip_tos); bcopy(ip6, ip6i, sizeof(struct ip6_hdr)); ip = mtod(m, struct ip *); @@ -2593,6 +2590,11 @@ ipsec64_encapsulate(m, sav) ip->ip_off = 0; ip->ip_ttl = hlim; ip->ip_p = IPPROTO_IPV6; + + /* construct new IPv4 header. see RFC 2401 5.1.2.1 */ + /* ECN consideration. */ + ip64_ecn_ingress(ip4_ipsec_ecn, &ip->ip_tos, &ip6->ip6_flow); + if (plen + sizeof(struct ip) < IP_MAXPACKET) ip->ip_len = htons(plen + sizeof(struct ip)); else { @@ -2671,6 +2673,281 @@ ipsec6_encapsulate_utun_esp_keepalive(m_ptr, sav) return 0; } + +int +ipsec6_update_routecache_and_output(state, sav) + struct ipsec_output_state *state; + struct secasvar *sav; +{ + struct sockaddr_in6* dst6; + struct route *ro6; + struct ip6_hdr *ip6; + errno_t error = 0; + + int plen; + struct ip6_out_args ip6oa; + struct route_in6 ro6_new; + struct flowadv *adv = NULL; + + if (!state->m) { + return EINVAL; + } + ip6 = mtod(state->m, struct ip6_hdr *); + + // grab sadb_mutex, before updating sah's route cache + lck_mtx_lock(sadb_mutex); + ro6 = &sav->sah->sa_route; + dst6 = (struct sockaddr_in6 *)(void *)&ro6->ro_dst; + if (ro6->ro_rt) { + RT_LOCK(ro6->ro_rt); + } + if (ROUTE_UNUSABLE(ro6) || + !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst)) { + if (ro6->ro_rt != NULL) + RT_UNLOCK(ro6->ro_rt); + ROUTE_RELEASE(ro6); + } + if (ro6->ro_rt == 0) { + bzero(dst6, sizeof(*dst6)); + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof(*dst6); + dst6->sin6_addr = ip6->ip6_dst; + rtalloc(ro6); + if (ro6->ro_rt) { + RT_LOCK(ro6->ro_rt); + } + } + if (ro6->ro_rt == 0) { + ip6stat.ip6s_noroute++; + IPSEC_STAT_INCREMENT(ipsec6stat.out_noroute); + error = EHOSTUNREACH; + // release sadb_mutex, after updating sah's route cache + lck_mtx_unlock(sadb_mutex); + return error; + } + + /* + * adjust state->dst if tunnel endpoint is offlink + * + * XXX: caching rt_gateway value in the state is + * not really good, since it may point elsewhere + * when the gateway gets modified to a larger + * sockaddr via rt_setgate(). This is currently + * addressed by SA_SIZE roundup in that routine. + */ + if (ro6->ro_rt->rt_flags & RTF_GATEWAY) + dst6 = (struct sockaddr_in6 *)(void *)ro6->ro_rt->rt_gateway; + RT_UNLOCK(ro6->ro_rt); + ROUTE_RELEASE(&state->ro); + route_copyout(&state->ro, ro6, sizeof(state->ro)); + state->dst = (struct sockaddr *)dst6; + state->tunneled = 6; + // release sadb_mutex, after updating sah's route cache + lck_mtx_unlock(sadb_mutex); + + state->m = ipsec6_splithdr(state->m); + if (!state->m) { + IPSEC_STAT_INCREMENT(ipsec6stat.out_nomem); + error = ENOMEM; + return error; + } + + ip6 = mtod(state->m, struct ip6_hdr *); + switch (sav->sah->saidx.proto) { + case IPPROTO_ESP: +#if IPSEC_ESP + error = esp6_output(state->m, &ip6->ip6_nxt, state->m->m_next, sav); +#else + m_freem(state->m); + error = EINVAL; +#endif + break; + case IPPROTO_AH: + error = ah6_output(state->m, &ip6->ip6_nxt, state->m->m_next, sav); + break; + case IPPROTO_IPCOMP: + /* XXX code should be here */ + /*FALLTHROUGH*/ + default: + ipseclog((LOG_ERR, "%s: unknown ipsec protocol %d\n", __FUNCTION__, sav->sah->saidx.proto)); + m_freem(state->m); + IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); + error = EINVAL; + break; + } + if (error) { + // If error, packet already freed by above output routines + state->m = NULL; + return error; + } + + plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr); + if (plen > IPV6_MAXPACKET) { + ipseclog((LOG_ERR, "%s: IPsec with IPv6 jumbogram is not supported\n", __FUNCTION__)); + IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); + error = EINVAL;/*XXX*/ + return error; + } + ip6 = mtod(state->m, struct ip6_hdr *); + ip6->ip6_plen = htons(plen); + + ipsec_set_pkthdr_for_interface(sav->sah->ipsec_if, state->m, AF_INET6); + + /* Increment statistics */ + ifnet_stat_increment_out(sav->sah->ipsec_if, 1, mbuf_pkthdr_len(state->m), 0); + + /* Send to ip6_output */ + bzero(&ro6_new, sizeof(ro6_new)); + bzero(&ip6oa, sizeof(ip6oa)); + ip6oa.ip6oa_flowadv.code = 0; + ip6oa.ip6oa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR; + if (state->outgoing_if) { + ip6oa.ip6oa_boundif = state->outgoing_if; + ip6oa.ip6oa_flags |= IPOAF_BOUND_IF; + } + + adv = &ip6oa.ip6oa_flowadv; + (void) ip6_output(state->m, NULL, &ro6_new, IPV6_OUTARGS, NULL, NULL, &ip6oa); + + if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) { + error = ENOBUFS; + ifnet_disable_output(sav->sah->ipsec_if); + return error; + } + + return 0; +} + +int +ipsec46_encapsulate(state, sav) + struct secasvar *sav; + struct ipsec_output_state *state; +{ + struct mbuf *m; + struct ip6_hdr *ip6; + struct ip *oip; + struct ip *ip; + size_t hlen; + size_t plen; + + m = state->m; + if (!m) { + return EINVAL; + } + + /* can't tunnel between different AFs */ + if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family + != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family + || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET6) { + m_freem(m); + return EINVAL; + } +#if 0 + /* XXX if the dst is myself, perform nothing. */ + if (key_ismyaddr((struct sockaddr *)&sav->sah->saidx.dst)) { + m_freem(m); + return EINVAL; + } +#endif + + if (m->m_len < sizeof(*ip)) { + panic("ipsec46_encapsulate: assumption failed (first mbuf length)"); + return EINVAL; + } + + ip = mtod(m, struct ip *); +#ifdef _IP_VHL + hlen = _IP_VHL_HL(ip->ip_vhl) << 2; +#else + hlen = ip->ip_hl << 2; +#endif + + if (m->m_len != hlen) { + panic("ipsec46_encapsulate: assumption failed (first mbuf length)"); + return EINVAL; + } + + /* generate header checksum */ + ip->ip_sum = 0; +#ifdef _IP_VHL + ip->ip_sum = in_cksum(m, hlen); +#else + ip->ip_sum = in_cksum(m, hlen); +#endif + + plen = m->m_pkthdr.len; // save original IPv4 packet len, this will be ipv6 payload len + + /* + * First move the IPv4 header to the second mbuf in the chain + */ + if (M_LEADINGSPACE(m->m_next) < hlen) { + struct mbuf *n; + MGET(n, M_DONTWAIT, MT_DATA); + if (!n) { + m_freem(m); + return ENOBUFS; + } + n->m_len = hlen; + n->m_next = m->m_next; + m->m_next = n; + m->m_pkthdr.len += sizeof(struct ip6_hdr); + oip = mtod(n, struct ip *); + } else { + m->m_next->m_len += hlen; + m->m_next->m_data -= hlen; + m->m_pkthdr.len += sizeof(struct ip6_hdr); + oip = mtod(m->m_next, struct ip *); + } + ip = mtod(m, struct ip *); + ovbcopy((caddr_t)ip, (caddr_t)oip, hlen); + + /* + * Grow the first mbuf to accomodate the new IPv6 header. + */ + if (M_LEADINGSPACE(m) < sizeof(struct ip6_hdr) - hlen) { + struct mbuf *n; + MGETHDR(n, M_DONTWAIT, MT_HEADER); + if (!n) { + m_freem(m); + return ENOBUFS; + } + M_COPY_PKTHDR(n, m); + MH_ALIGN(n, sizeof(struct ip6_hdr)); + n->m_len = sizeof(struct ip6_hdr); + n->m_next = m->m_next; + m->m_next = NULL; + m_freem(m); + state->m = n; + m = state->m; + } else { + m->m_len += (sizeof(struct ip6_hdr) - hlen); + m->m_data -= (sizeof(struct ip6_hdr) - hlen); + } + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + + /* construct new IPv6 header. see RFC 2401 5.1.2.2 */ + /* ECN consideration. */ + ip46_ecn_ingress(ip6_ipsec_ecn, &ip6->ip6_flow, &ip->ip_tos); + if (plen < IPV6_MAXPACKET - sizeof(struct ip6_hdr)) + ip6->ip6_plen = htons(plen); + else { + /* ip6->ip6_plen will be updated in ip6_output() */ + } + + ip6->ip6_nxt = IPPROTO_IPV4; + ip6->ip6_hlim = IPV6_DEFHLIM; + + bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.src)->sin6_addr, + &ip6->ip6_src, sizeof(ip6->ip6_src)); + bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.dst)->sin6_addr, + &ip6->ip6_dst, sizeof(ip6->ip6_dst)); + + return 0; +} + #endif /*INET6*/ /* @@ -3052,73 +3329,83 @@ ipsec4_output_internal(struct ipsec_output_state *state, struct secasvar *sav) /* * build IPsec tunnel. */ - /* XXX should be processed with other familiy */ - if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET) { - ipseclog((LOG_ERR, "ipsec4_output: " - "family mismatched between inner and outer spi=%u\n", - (u_int32_t)ntohl(sav->spi))); - error = EAFNOSUPPORT; - goto bad; - } - state->m = ipsec4_splithdr(state->m); if (!state->m) { error = ENOMEM; goto bad; } - error = ipsec4_encapsulate(state->m, sav); - if (error) { - state->m = NULL; - goto bad; - } - ip = mtod(state->m, struct ip *); - // grab sadb_mutex, before updating sah's route cache - lck_mtx_lock(sadb_mutex); - ro4= &sav->sah->sa_route; - dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst; - if (ro4->ro_rt != NULL) { - RT_LOCK(ro4->ro_rt); - } - if (ROUTE_UNUSABLE(ro4) || - dst4->sin_addr.s_addr != ip->ip_dst.s_addr) { - if (ro4->ro_rt != NULL) - RT_UNLOCK(ro4->ro_rt); - ROUTE_RELEASE(ro4); - } - if (ro4->ro_rt == 0) { - dst4->sin_family = AF_INET; - dst4->sin_len = sizeof(*dst4); - dst4->sin_addr = ip->ip_dst; - rtalloc(ro4); - if (ro4->ro_rt == 0) { - OSAddAtomic(1, &ipstat.ips_noroute); - error = EHOSTUNREACH; - // release sadb_mutex, after updating sah's route cache - lck_mtx_unlock(sadb_mutex); + if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family == AF_INET6) { + error = ipsec46_encapsulate(state, sav); + if (error) { + // packet already freed by encapsulation error handling + state->m = NULL; + return error; + } + + error = ipsec6_update_routecache_and_output(state, sav); + return error; + + } else if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family == AF_INET) { + error = ipsec4_encapsulate(state->m, sav); + if (error) { + state->m = NULL; goto bad; } - RT_LOCK(ro4->ro_rt); - } + ip = mtod(state->m, struct ip *); - /* - * adjust state->dst if tunnel endpoint is offlink - * - * XXX: caching rt_gateway value in the state is - * not really good, since it may point elsewhere - * when the gateway gets modified to a larger - * sockaddr via rt_setgate(). This is currently - * addressed by SA_SIZE roundup in that routine. - */ - if (ro4->ro_rt->rt_flags & RTF_GATEWAY) - dst4 = (struct sockaddr_in *)(void *)ro4->ro_rt->rt_gateway; - RT_UNLOCK(ro4->ro_rt); - ROUTE_RELEASE(&state->ro); - route_copyout(&state->ro, ro4, sizeof(state->ro)); - state->dst = (struct sockaddr *)dst4; - state->tunneled = 4; - // release sadb_mutex, after updating sah's route cache - lck_mtx_unlock(sadb_mutex); + // grab sadb_mutex, before updating sah's route cache + lck_mtx_lock(sadb_mutex); + ro4= &sav->sah->sa_route; + dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst; + if (ro4->ro_rt != NULL) { + RT_LOCK(ro4->ro_rt); + } + if (ROUTE_UNUSABLE(ro4) || + dst4->sin_addr.s_addr != ip->ip_dst.s_addr) { + if (ro4->ro_rt != NULL) + RT_UNLOCK(ro4->ro_rt); + ROUTE_RELEASE(ro4); + } + if (ro4->ro_rt == 0) { + dst4->sin_family = AF_INET; + dst4->sin_len = sizeof(*dst4); + dst4->sin_addr = ip->ip_dst; + rtalloc(ro4); + if (ro4->ro_rt == 0) { + OSAddAtomic(1, &ipstat.ips_noroute); + error = EHOSTUNREACH; + // release sadb_mutex, after updating sah's route cache + lck_mtx_unlock(sadb_mutex); + goto bad; + } + RT_LOCK(ro4->ro_rt); + } + + /* + * adjust state->dst if tunnel endpoint is offlink + * + * XXX: caching rt_gateway value in the state is + * not really good, since it may point elsewhere + * when the gateway gets modified to a larger + * sockaddr via rt_setgate(). This is currently + * addressed by SA_SIZE roundup in that routine. + */ + if (ro4->ro_rt->rt_flags & RTF_GATEWAY) + dst4 = (struct sockaddr_in *)(void *)ro4->ro_rt->rt_gateway; + RT_UNLOCK(ro4->ro_rt); + ROUTE_RELEASE(&state->ro); + route_copyout(&state->ro, ro4, sizeof(state->ro)); + state->dst = (struct sockaddr *)dst4; + state->tunneled = 4; + // release sadb_mutex, after updating sah's route cache + lck_mtx_unlock(sadb_mutex); + } else { + ipseclog((LOG_ERR, "%s: family mismatched between inner and outer spi=%u\n", + __FUNCTION__, (u_int32_t)ntohl(sav->spi))); + error = EAFNOSUPPORT; + goto bad; + } } state->m = ipsec4_splithdr(state->m); @@ -4201,11 +4488,12 @@ ipsec4_tunnel_validate(m, off, nxt0, sav, ifamily) #if INET6 /* validate inbound IPsec tunnel packet. */ int -ipsec6_tunnel_validate(m, off, nxt0, sav) +ipsec6_tunnel_validate(m, off, nxt0, sav, ifamily) struct mbuf *m; /* no pullup permitted, m->m_len >= ip */ int off; u_int nxt0; struct secasvar *sav; + sa_family_t *ifamily; { u_int8_t nxt = nxt0 & 0xff; struct sockaddr_in6 *sin6; @@ -4219,8 +4507,9 @@ ipsec6_tunnel_validate(m, off, nxt0, sav) if (m->m_len < sizeof(struct ip6_hdr)) panic("too short mbuf on ipsec6_tunnel_validate"); #endif - if (nxt != IPPROTO_IPV6) + if (nxt != IPPROTO_IPV4 && nxt != IPPROTO_IPV6) return 0; + if (m->m_pkthdr.len < off + sizeof(struct ip6_hdr)) return 0; /* do not decapsulate if the SA is for transport mode only */ @@ -4235,8 +4524,16 @@ ipsec6_tunnel_validate(m, off, nxt0, sav) if (!IN6_ARE_ADDR_EQUAL(&oip6->ip6_dst, &sin6->sin6_addr)) return 0; - if (sav->utun_in_fn) { - // the utun SAs don't have a policy (yet). + if (sav->utun_in_fn || + sav->sah->ipsec_if != NULL) { + // the ipsec/utun interface SAs don't have a policies. + if (nxt == IPPROTO_IPV4) { + *ifamily = AF_INET; + } else if (nxt == IPPROTO_IPV6) { + *ifamily = AF_INET6; + } else { + return 0; + } return 1; } @@ -4246,7 +4543,7 @@ ipsec6_tunnel_validate(m, off, nxt0, sav) bzero(&isrc, sizeof(isrc)); bzero(&idst, sizeof(idst)); osrc.sin6_family = odst.sin6_family = isrc.sin6_family = - idst.sin6_family = AF_INET6; + idst.sin6_family = *ifamily = AF_INET6; osrc.sin6_len = odst.sin6_len = isrc.sin6_len = idst.sin6_len = sizeof(struct sockaddr_in6); osrc.sin6_addr = oip6->ip6_src; @@ -4640,7 +4937,7 @@ ipsec_send_natt_keepalive( __private_extern__ bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, - struct ipsec_offload_frame *frame, + struct ifnet_keepalive_offload_frame *frame, size_t frame_data_offset) { u_int8_t *data = NULL; @@ -4656,12 +4953,13 @@ ipsec_fill_offload_frame(ifnet_t ifp, sav->flags & SADB_X_EXT_ESP_KEEPALIVE || (esp_udp_encap_port & 0xFFFF) == 0 || sav->remote_ike_port == 0 || - (natt_keepalive_interval == 0 && sav->natt_interval == 0)) { + (natt_keepalive_interval == 0 && sav->natt_interval == 0 && sav->natt_offload_interval == 0)) { /* SA is not eligible for keepalive offload on this interface */ return (FALSE); } - if (frame_data_offset + sizeof(struct udpiphdr) + 1 > IPSEC_OFFLOAD_FRAME_DATA_SIZE) { + if (frame_data_offset + sizeof(struct udpiphdr) + 1 > + IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) { /* Not enough room in this data frame */ return (FALSE); } @@ -4671,7 +4969,10 @@ ipsec_fill_offload_frame(ifnet_t ifp, uh = (__typeof__(uh))(void *)(data + frame_data_offset + sizeof(*ip)); frame->length = frame_data_offset + sizeof(struct udpiphdr) + 1; - bzero(data, IPSEC_OFFLOAD_FRAME_DATA_SIZE); + frame->type = IFNET_KEEPALIVE_OFFLOAD_FRAME_IPSEC; + frame->ether_type = IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV4; + + bzero(data, IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(struct ip) >> 2; @@ -4706,7 +5007,9 @@ ipsec_fill_offload_frame(ifnet_t ifp, uh->uh_sum = 0; *(u_int8_t*)(data + frame_data_offset + sizeof(*ip) + sizeof(*uh)) = 0xFF; - if (sav->natt_interval != 0) { + if (sav->natt_offload_interval != 0) { + frame->interval = sav->natt_offload_interval; + } else if (sav->natt_interval != 0) { frame->interval = sav->natt_interval; } else { frame->interval = natt_keepalive_interval; diff --git a/bsd/netinet6/ipsec.h b/bsd/netinet6/ipsec.h index 9c452d26f..7a422a690 100644 --- a/bsd/netinet6/ipsec.h +++ b/bsd/netinet6/ipsec.h @@ -52,7 +52,7 @@ extern lck_mtx_t *sadb_stat_mutex; #define IPSEC_STAT_INCREMENT(x) \ - {lck_mtx_lock(sadb_stat_mutex); (x)++; lck_mtx_unlock(sadb_stat_mutex);} + OSIncrementAtomic64((SInt64 *)&x) struct secpolicyaddrrange { struct sockaddr_storage start; /* Start (low values) of address range */ @@ -204,32 +204,32 @@ struct secspacq { /* statistics for ipsec processing */ struct ipsecstat { - u_quad_t in_success; /* succeeded inbound process */ - u_quad_t in_polvio; + u_quad_t in_success __attribute__ ((aligned (8))); /* succeeded inbound process */ + u_quad_t in_polvio __attribute__ ((aligned (8))); /* security policy violation for inbound process */ - u_quad_t in_nosa; /* inbound SA is unavailable */ - u_quad_t in_inval; /* inbound processing failed due to EINVAL */ - u_quad_t in_nomem; /* inbound processing failed due to ENOBUFS */ - u_quad_t in_badspi; /* failed getting a SPI */ - u_quad_t in_ahreplay; /* AH replay check failed */ - u_quad_t in_espreplay; /* ESP replay check failed */ - u_quad_t in_ahauthsucc; /* AH authentication success */ - u_quad_t in_ahauthfail; /* AH authentication failure */ - u_quad_t in_espauthsucc; /* ESP authentication success */ - u_quad_t in_espauthfail; /* ESP authentication failure */ - u_quad_t in_esphist[256]; - u_quad_t in_ahhist[256]; - u_quad_t in_comphist[256]; - u_quad_t out_success; /* succeeded outbound process */ - u_quad_t out_polvio; + u_quad_t in_nosa __attribute__ ((aligned (8))); /* inbound SA is unavailable */ + u_quad_t in_inval __attribute__ ((aligned (8))); /* inbound processing failed due to EINVAL */ + u_quad_t in_nomem __attribute__ ((aligned (8))); /* inbound processing failed due to ENOBUFS */ + u_quad_t in_badspi __attribute__ ((aligned (8))); /* failed getting a SPI */ + u_quad_t in_ahreplay __attribute__ ((aligned (8))); /* AH replay check failed */ + u_quad_t in_espreplay __attribute__ ((aligned (8))); /* ESP replay check failed */ + u_quad_t in_ahauthsucc __attribute__ ((aligned (8))); /* AH authentication success */ + u_quad_t in_ahauthfail __attribute__ ((aligned (8))); /* AH authentication failure */ + u_quad_t in_espauthsucc __attribute__ ((aligned (8))); /* ESP authentication success */ + u_quad_t in_espauthfail __attribute__ ((aligned (8))); /* ESP authentication failure */ + u_quad_t in_esphist[256] __attribute__ ((aligned (8))); + u_quad_t in_ahhist[256] __attribute__ ((aligned (8))); + u_quad_t in_comphist[256] __attribute__ ((aligned (8))); + u_quad_t out_success __attribute__ ((aligned (8))); /* succeeded outbound process */ + u_quad_t out_polvio __attribute__ ((aligned (8))); /* security policy violation for outbound process */ - u_quad_t out_nosa; /* outbound SA is unavailable */ - u_quad_t out_inval; /* outbound process failed due to EINVAL */ - u_quad_t out_nomem; /* inbound processing failed due to ENOBUFS */ - u_quad_t out_noroute; /* there is no route */ - u_quad_t out_esphist[256]; - u_quad_t out_ahhist[256]; - u_quad_t out_comphist[256]; + u_quad_t out_nosa __attribute__ ((aligned (8))); /* outbound SA is unavailable */ + u_quad_t out_inval __attribute__ ((aligned (8))); /* outbound process failed due to EINVAL */ + u_quad_t out_nomem __attribute__ ((aligned (8))); /* inbound processing failed due to ENOBUFS */ + u_quad_t out_noroute __attribute__ ((aligned (8))); /* there is no route */ + u_quad_t out_esphist[256] __attribute__ ((aligned (8))); + u_quad_t out_ahhist[256] __attribute__ ((aligned (8))); + u_quad_t out_comphist[256] __attribute__ ((aligned (8))); }; #ifdef BSD_KERNEL_PRIVATE diff --git a/bsd/netinet6/ipsec6.h b/bsd/netinet6/ipsec6.h index b5b065526..018afa4d7 100644 --- a/bsd/netinet6/ipsec6.h +++ b/bsd/netinet6/ipsec6.h @@ -81,6 +81,6 @@ extern int ipsec6_output_trans(struct ipsec_output_state *, u_char *, extern int ipsec6_output_tunnel(struct ipsec_output_state *, struct secpolicy *, int); extern int ipsec6_tunnel_validate(struct mbuf *, int, u_int, - struct secasvar *); + struct secasvar *, sa_family_t *); #endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_IPSEC6_H_ */ diff --git a/bsd/netinet6/mld6.c b/bsd/netinet6/mld6.c index 228767199..4dda3d82e 100644 --- a/bsd/netinet6/mld6.c +++ b/bsd/netinet6/mld6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -2312,16 +2312,20 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, VERIFY(mli->mli_ifp == ifp); /* - * Groups joined on loopback or marked as 'not reported', - * enter the MLD_SILENT_MEMBER state and - * are never reported in any protocol exchanges. + * Avoid MLD if group is : + * 1. Joined on loopback, OR + * 2. On a link that is marked MLIF_SILENT + * 3. rdar://problem/19227650 Is link local scoped and + * on cellular interface + * 4. Is a type that should not be reported (node local + * or all node link local multicast. * All other groups enter the appropriate state machine * for the version in use on this link. - * A link marked as MLIF_SILENT causes MLD to be completely - * disabled for the link. */ if ((ifp->if_flags & IFF_LOOPBACK) || (mli->mli_flags & MLIF_SILENT) || + (IFNET_IS_CELLULAR(ifp) && + IN6_IS_ADDR_MC_LINKLOCAL(&inm->in6m_addr)) || !mld_is_addr_reported(&inm->in6m_addr)) { MLD_PRINTF(("%s: not kicking state machine for silent group\n", __func__)); diff --git a/bsd/netinet6/nd6.c b/bsd/netinet6/nd6.c index 15faf1316..8fe0d4d9e 100644 --- a/bsd/netinet6/nd6.c +++ b/bsd/netinet6/nd6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -168,15 +168,9 @@ struct llinfo_nd6 llinfo_nd6 = { .ln_prev = &llinfo_nd6, }; -/* Protected by nd_if_rwlock */ -size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */ -struct nd_ifinfo *nd_ifinfo = NULL; - -static lck_grp_attr_t *nd_if_lock_grp_attr; -static lck_grp_t *nd_if_lock_grp; -static lck_attr_t *nd_if_lock_attr; -decl_lck_rw_data(, nd_if_rwlock_data); -lck_rw_t *nd_if_rwlock = &nd_if_rwlock_data; +static lck_grp_attr_t *nd_if_lock_grp_attr = NULL; +static lck_grp_t *nd_if_lock_grp = NULL; +static lck_attr_t *nd_if_lock_attr = NULL; /* Protected by nd6_mutex */ struct nd_drhead nd_defrouter; @@ -216,6 +210,7 @@ static void nd6_llinfo_free(void *); static void nd6_llinfo_purge(struct rtentry *); static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *); static void nd6_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *); +static void nd6_llinfo_refresh(struct rtentry *); static uint64_t ln_getexpire(struct llinfo_nd6 *); static void nd6_service(void *); @@ -267,6 +262,13 @@ SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, nd6_sysctl_prlist, "S,in6_defrouter", ""); +SYSCTL_DECL(_net_inet6_ip6); + +static int ip6_maxchainsent = 0; +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0, + "use dlil_output_list"); + void nd6_init(void) { @@ -285,7 +287,6 @@ nd6_init(void) nd_if_lock_grp_attr = lck_grp_attr_alloc_init(); nd_if_lock_grp = lck_grp_alloc_init("nd_if_lock", nd_if_lock_grp_attr); nd_if_lock_attr = lck_attr_alloc_init(); - lck_rw_init(nd_if_rwlock, nd_if_lock_grp, nd_if_lock_attr); llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6), LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0, @@ -331,7 +332,7 @@ nd6_llinfo_free(void *arg) /* Just in case there's anything there, free it */ if (ln->ln_hold != NULL) { - m_freem(ln->ln_hold); + m_freem_list(ln->ln_hold); ln->ln_hold = NULL; } @@ -403,6 +404,31 @@ nd6_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri) } } +static void +nd6_llinfo_refresh(struct rtentry *rt) +{ + struct llinfo_nd6 *ln = rt->rt_llinfo; + uint64_t timenow = net_uptime(); + /* + * Can't refresh permanent, static or entries that are + * not direct host entries + */ + if (!ln || ln->ln_expire == 0 || + (rt->rt_flags & RTF_STATIC) || + !(rt->rt_flags & RTF_LLINFO)) { + return; + } + + if ((ln->ln_state > ND6_LLINFO_INCOMPLETE) && + (ln->ln_state < ND6_LLINFO_PROBE)) { + if (ln->ln_expire > timenow) { + ln->ln_expire = timenow; + ln->ln_state = ND6_LLINFO_PROBE; + } + } + return; +} + void ln_setexpire(struct llinfo_nd6 *ln, uint64_t expiry) { @@ -437,13 +463,10 @@ ln_getexpire(struct llinfo_nd6 *ln) void nd6_ifreset(struct ifnet *ifp) { - struct nd_ifinfo *ndi; - - lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_HELD); - VERIFY(ifp != NULL && ifp->if_index < nd_ifinfo_indexlim); - ndi = &nd_ifinfo[ifp->if_index]; - + struct nd_ifinfo *ndi = ND_IFINFO(ifp); + VERIFY(NULL != ndi); VERIFY(ndi->initialized); + lck_mtx_assert(&ndi->lock, LCK_MTX_ASSERT_OWNED); ndi->linkmtu = ifp->if_mtu; ndi->chlim = IPV6_DEFHLIM; @@ -452,54 +475,12 @@ nd6_ifreset(struct ifnet *ifp) ndi->retrans = RETRANS_TIMER; } -int +void nd6_ifattach(struct ifnet *ifp) { - size_t newlim; - struct nd_ifinfo *ndi; - - /* - * We have some arrays that should be indexed by if_index. - * since if_index will grow dynamically, they should grow too. - */ - lck_rw_lock_shared(nd_if_rwlock); - newlim = nd_ifinfo_indexlim; - if (nd_ifinfo == NULL || if_index >= newlim) { - if (!lck_rw_lock_shared_to_exclusive(nd_if_rwlock)) - lck_rw_lock_exclusive(nd_if_rwlock); - lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_EXCLUSIVE); - - newlim = nd_ifinfo_indexlim; - if (nd_ifinfo == NULL || if_index >= newlim) { - size_t n; - caddr_t q; - - while (if_index >= newlim) - newlim <<= 1; - - /* grow nd_ifinfo */ - n = newlim * sizeof (struct nd_ifinfo); - q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK); - if (q == NULL) { - lck_rw_done(nd_if_rwlock); - return (ENOBUFS); - } - bzero(q, n); - if (nd_ifinfo != NULL) { - bcopy((caddr_t)nd_ifinfo, q, n/2); - /* - * We might want to pattern fill the old - * array to catch use-after-free cases. - */ - FREE((caddr_t)nd_ifinfo, M_IP6NDP); - } - nd_ifinfo = (struct nd_ifinfo *)(void *)q; - nd_ifinfo_indexlim = newlim; - } - } + struct nd_ifinfo *ndi = ND_IFINFO(ifp); - VERIFY(ifp != NULL); - ndi = &nd_ifinfo[ifp->if_index]; + VERIFY(NULL != ndi); if (!ndi->initialized) { lck_mtx_init(&ndi->lock, nd_if_lock_grp, nd_if_lock_attr); ndi->flags = ND6_IFF_PERFORMNUD; @@ -508,42 +489,39 @@ nd6_ifattach(struct ifnet *ifp) lck_mtx_lock(&ndi->lock); - if (!(ifp->if_flags & IFF_MULTICAST)) + if (!(ifp->if_flags & IFF_MULTICAST)) { ndi->flags |= ND6_IFF_IFDISABLED; + } nd6_ifreset(ifp); lck_mtx_unlock(&ndi->lock); - - lck_rw_done(nd_if_rwlock); - nd6_setmtu(ifp); - - return (0); + return; } +#if 0 /* - * Reset ND level link MTU. This function is called when the physical MTU - * changes, which means we might have to adjust the ND level MTU. + * XXX Look more into this. Especially since we recycle ifnets and do delayed + * cleanup */ +void +nd6_ifdetach(struct nd_ifinfo *nd) +{ + /* XXX destroy nd's lock? */ + FREE(nd, M_IP6NDP); +} +#endif + void nd6_setmtu(struct ifnet *ifp) { - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = ND_IFINFO(ifp); u_int32_t oldmaxmtu, maxmtu; - /* - * Make sure IPv6 is enabled for the interface first, - * because this can be called directly from SIOCSIFMTU for IPv4 - */ - lck_rw_lock_shared(nd_if_rwlock); - if (ifp->if_index >= nd_ifinfo_indexlim || - !nd_ifinfo[ifp->if_index].initialized) { - lck_rw_done(nd_if_rwlock); - return; /* nd_ifinfo out of bound, or not yet initialized */ + if ((NULL == ndi) || (FALSE == ndi->initialized)) { + return; } - ndi = &nd_ifinfo[ifp->if_index]; - VERIFY(ndi->initialized); lck_mtx_lock(&ndi->lock); oldmaxmtu = ndi->maxmtu; @@ -573,11 +551,11 @@ nd6_setmtu(struct ifnet *ifp) } ndi->linkmtu = ifp->if_mtu; lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); /* also adjust in6_maxmtu if necessary. */ - if (maxmtu > in6_maxmtu) + if (maxmtu > in6_maxmtu) { in6_setmaxmtu(); + } } void @@ -749,6 +727,7 @@ nd6_service(void *arg) struct ifnet *ifp = NULL; struct in6_ifaddr *ia6, *nia6; uint64_t timenow; + bool send_nc_failure_kev = false; lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); /* @@ -771,6 +750,50 @@ nd6_service(void *arg) net_update_uptime(); timenow = net_uptime(); again: + /* + * send_nc_failure_kev gets set when default router's IPv6 address + * can't be resolved. + * That can happen either: + * 1. When the entry has resolved once but can't be + * resolved later and the neighbor cache entry for gateway is deleted + * after max probe attempts. + * + * 2. When the entry is in ND6_LLINFO_INCOMPLETE but can not be resolved + * after max neighbor address resolution attempts. + * + * Both set send_nc_failure_kev to true. ifp is also set to the previous + * neighbor cache entry's route's ifp. + * Once we are done sending the notification, set send_nc_failure_kev + * to false to stop sending false notifications for non default router + * neighbors. + * + * We may to send more information like Gateway's IP that could not be + * resolved, however right now we do not install more than one default + * route per interface in the routing table. + */ + if (send_nc_failure_kev && ifp->if_addrlen == IF_LLREACH_MAXLEN) { + struct kev_msg ev_msg; + struct kev_nd6_ndfailure nd6_ndfailure; + bzero(&ev_msg, sizeof(ev_msg)); + bzero(&nd6_ndfailure, sizeof(nd6_ndfailure)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_ND6_SUBCLASS; + ev_msg.event_code = KEV_ND6_NDFAILURE; + + nd6_ndfailure.link_data.if_family = ifp->if_family; + nd6_ndfailure.link_data.if_unit = ifp->if_unit; + strlcpy(nd6_ndfailure.link_data.if_name, + ifp->if_name, + sizeof(nd6_ndfailure.link_data.if_name)); + ev_msg.dv[0].data_ptr = &nd6_ndfailure; + ev_msg.dv[0].data_length = + sizeof(nd6_ndfailure); + kev_post_msg(&ev_msg); + } + + send_nc_failure_kev = false; + ifp = NULL; /* * The global list llinfo_nd6 is modified by nd6_request() and is * therefore protected by rnh_lock. For obvious reasons, we cannot @@ -791,6 +814,7 @@ nd6_service(void *arg) struct sockaddr_in6 *dst; struct llinfo_nd6 *next; u_int32_t retrans, flags; + struct nd_ifinfo *ndi = NULL; /* ln_next/prev/rt is protected by rnh_lock */ next = ln->ln_next; @@ -864,37 +888,10 @@ nd6_service(void *arg) continue; } - lck_rw_lock_shared(nd_if_rwlock); - if (ifp->if_index >= nd_ifinfo_indexlim) { - /* - * In the event the nd_ifinfo[] array is not in synch - * by now, we don't want to hold on to the llinfo entry - * forever; just purge it rather than have it consume - * resources. That's better than transmitting out of - * the interface as the rest of the layers may not be - * ready as well. - * - * We can retire this logic once we get rid of the - * separate array and utilize a per-ifnet structure. - */ - retrans = RETRANS_TIMER; - flags = ND6_IFF_PERFORMNUD; - if (ln->ln_expire != 0) { - ln->ln_state = ND6_LLINFO_PURGE; - log (LOG_ERR, "%s: purging rt(0x%llx) " - "ln(0x%llx) dst %s, if_index %d >= %d\n", - __func__, (uint64_t)VM_KERNEL_ADDRPERM(rt), - (uint64_t)VM_KERNEL_ADDRPERM(ln), - ip6_sprintf(&dst->sin6_addr), ifp->if_index, - nd_ifinfo_indexlim); - } - } else { - struct nd_ifinfo *ndi = ND_IFINFO(ifp); - VERIFY(ndi->initialized); - retrans = ndi->retrans; - flags = ndi->flags; - } - lck_rw_done(nd_if_rwlock); + ndi = ND_IFINFO(ifp); + VERIFY(ndi->initialized); + retrans = ndi->retrans; + flags = ndi->flags; RT_LOCK_ASSERT_HELD(rt); @@ -920,20 +917,21 @@ nd6_service(void *arg) } else { struct mbuf *m = ln->ln_hold; ln->ln_hold = NULL; + send_nc_failure_kev = (rt->rt_flags & RTF_ROUTER) ? true : false; if (m != NULL) { - /* - * Fake rcvif to make ICMP error - * more helpful in diagnosing - * for the receiver. - * XXX: should we consider - * older rcvif? - */ - m->m_pkthdr.rcvif = ifp; RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); - icmp6_error(m, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADDR, 0); + + struct mbuf *mnext; + while (m) { + mnext = m->m_nextpkt; + m->m_nextpkt = NULL; + m->m_pkthdr.rcvif = ifp; + icmp6_error_flag(m, ICMP6_DST_UNREACH, + ICMP6_DST_UNREACH_ADDR, 0, 0); + m = mnext; + } } else { RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); @@ -1008,6 +1006,7 @@ nd6_service(void *arg) ap->aging++; lck_mtx_lock(rnh_lock); } else { + send_nc_failure_kev = (rt->rt_flags & RTF_ROUTER) ? true : false; RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); @@ -2184,6 +2183,7 @@ nd6_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa) rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri; rt->rt_llinfo_purge = nd6_llinfo_purge; rt->rt_llinfo_free = nd6_llinfo_free; + rt->rt_llinfo_refresh = nd6_llinfo_refresh; rt->rt_flags |= RTF_LLINFO; ln->ln_rt = rt; /* this is required for "ndp" command. - shin */ @@ -2368,7 +2368,7 @@ nd6_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa) rt->rt_flags &= ~RTF_LLINFO; if (ln->ln_hold != NULL) { - m_freem(ln->ln_hold); + m_freem_list(ln->ln_hold); ln->ln_hold = NULL; } } @@ -2586,10 +2586,9 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) struct nd_defrouter *dr; struct nd_prefix *pr; struct rtentry *rt; - int i, error = 0; + int error = 0; VERIFY(ifp != NULL); - i = ifp->if_index; switch (cmd) { case SIOCGDRLST_IN6_32: /* struct in6_drlist_32 */ @@ -2621,59 +2620,58 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq * instead of in6_ndireq, so we treat it as such. */ - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); - if (!nd_ifinfo || i >= nd_ifinfo_indexlim || - !ndi->initialized) { - lck_rw_done(nd_if_rwlock); + if ((NULL == ndi) || (FALSE == ndi->initialized)){ error = EINVAL; break; } lck_mtx_lock(&ndi->lock); linkmtu = IN6_LINKMTU(ifp); bcopy(&linkmtu, &ondi->ndi.linkmtu, sizeof (linkmtu)); - bcopy(&nd_ifinfo[i].maxmtu, &ondi->ndi.maxmtu, + bcopy(&ndi->maxmtu, &ondi->ndi.maxmtu, sizeof (u_int32_t)); - bcopy(&nd_ifinfo[i].basereachable, &ondi->ndi.basereachable, + bcopy(&ndi->basereachable, &ondi->ndi.basereachable, sizeof (u_int32_t)); - bcopy(&nd_ifinfo[i].reachable, &ondi->ndi.reachable, + bcopy(&ndi->reachable, &ondi->ndi.reachable, sizeof (u_int32_t)); - bcopy(&nd_ifinfo[i].retrans, &ondi->ndi.retrans, + bcopy(&ndi->retrans, &ondi->ndi.retrans, sizeof (u_int32_t)); - bcopy(&nd_ifinfo[i].flags, &ondi->ndi.flags, + bcopy(&ndi->flags, &ondi->ndi.flags, sizeof (u_int32_t)); - bcopy(&nd_ifinfo[i].recalctm, &ondi->ndi.recalctm, + bcopy(&ndi->recalctm, &ondi->ndi.recalctm, sizeof (int)); - ondi->ndi.chlim = nd_ifinfo[i].chlim; + ondi->ndi.chlim = ndi->chlim; ondi->ndi.receivedra = 0; lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); break; } case SIOCSIFINFO_FLAGS: { /* struct in6_ndireq */ + /* + * XXX BSD has a bunch of checks here to ensure + * that interface disabled flag is not reset if + * link local address has failed DAD. + * Investigate that part. + */ struct in6_ndireq *cndi = (struct in6_ndireq *)(void *)data; u_int32_t oflags, flags; - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = ND_IFINFO(ifp); /* XXX: almost all other fields of cndi->ndi is unused */ - lck_rw_lock_shared(nd_if_rwlock); - ndi = ND_IFINFO(ifp); - if (!nd_ifinfo || i >= nd_ifinfo_indexlim || - !ndi->initialized) { - lck_rw_done(nd_if_rwlock); + if ((NULL == ndi) || !ndi->initialized) { error = EINVAL; break; } + lck_mtx_lock(&ndi->lock); - oflags = nd_ifinfo[i].flags; - bcopy(&cndi->ndi.flags, &nd_ifinfo[i].flags, sizeof (flags)); - flags = nd_ifinfo[i].flags; + oflags = ndi->flags; + bcopy(&cndi->ndi.flags, &(ndi->flags), sizeof (flags)); + flags = ndi->flags; lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); - if (oflags == flags) + if (oflags == flags) { break; + } error = nd6_setifinfo(ifp, oflags, flags); break; @@ -3052,7 +3050,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, * set the 2nd argument as the 1st one. */ RT_UNLOCK(rt); - nd6_output(ifp, ifp, m, &sin6, rt, NULL); + nd6_output_list(ifp, ifp, m, &sin6, rt, NULL); RT_LOCK(rt); } } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { @@ -3159,16 +3157,17 @@ static void nd6_slowtimo(void *arg) { #pragma unused(arg) - int i; - struct nd_ifinfo *nd6if; + struct nd_ifinfo *nd6if = NULL; + struct ifnet *ifp = NULL; + + ifnet_head_lock_shared(); + for (ifp = ifnet_head.tqh_first; ifp; + ifp = ifp->if_link.tqe_next) { + nd6if = ND_IFINFO(ifp); + if ((NULL == nd6if) || (FALSE == nd6if->initialized)) { + continue; + } - lck_rw_lock_shared(nd_if_rwlock); - for (i = 1; i < if_index + 1; i++) { - if (!nd_ifinfo || i >= nd_ifinfo_indexlim) - break; - nd6if = &nd_ifinfo[i]; - if (!nd6if->initialized) - break; lck_mtx_lock(&nd6if->lock); if (nd6if->basereachable && /* already initialized */ (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { @@ -3184,22 +3183,34 @@ nd6_slowtimo(void *arg) } lck_mtx_unlock(&nd6if->lock); } - lck_rw_done(nd_if_rwlock); + ifnet_head_done(); timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz); } -#define senderr(e) { error = (e); goto bad; } int nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv) { - struct mbuf *m = m0; + return nd6_output_list(ifp, origifp, m0, dst, hint0, adv); +} + +/* + * nd6_output_list() + * + * Assumption: route determination for first packet can be correctly applied to + * all packets in the chain. + */ +#define senderr(e) { error = (e); goto bad; } +int +nd6_output_list(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, + struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv) +{ struct rtentry *rt = hint0, *hint = hint0; struct llinfo_nd6 *ln = NULL; int error = 0; uint64_t timenow; struct rtentry *rtrele = NULL; - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; if (rt != NULL) { RT_LOCK_SPIN(rt); @@ -3243,7 +3254,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, if (rt->rt_ifp != ifp) { /* XXX: loop care? */ RT_UNLOCK(rt); - error = nd6_output(ifp, origifp, m0, + error = nd6_output_list(ifp, origifp, m0, dst, rt, adv); rtfree(rt); return (error); @@ -3444,16 +3455,15 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, } if (!ln || !rt) { - if (rt != NULL) + if (rt != NULL) { RT_UNLOCK(rt); - lck_rw_lock_shared(nd_if_rwlock); + } ndi = ND_IFINFO(ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && !(ndi->flags & ND6_IFF_PERFORMNUD)) { lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); log(LOG_DEBUG, "nd6_output: can't allocate llinfo for %s " "(ln=0x%llx, rt=0x%llx)\n", @@ -3463,7 +3473,6 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, senderr(EIO); /* XXX: good error? */ } lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); goto sendpkt; /* send anyway */ } @@ -3548,18 +3557,16 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, if (ln->ln_state == ND6_LLINFO_NOSTATE) ln->ln_state = ND6_LLINFO_INCOMPLETE; if (ln->ln_hold) - m_freem(ln->ln_hold); - ln->ln_hold = m; + m_freem_list(ln->ln_hold); + ln->ln_hold = m0; if (ln->ln_expire != 0 && ln->ln_asked < nd6_mmaxtries && ln->ln_expire <= timenow) { ln->ln_asked++; - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); ln_setexpire(ln, timenow + ndi->retrans / 1000); lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); RT_UNLOCK(rt); /* We still have a reference on rt (for ln) */ if (ip6_forwarding) @@ -3571,6 +3578,9 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, nd6_sched_timeout(NULL, NULL); lck_mtx_unlock(rnh_lock); } else { + if(ln->ln_state == ND6_LLINFO_INCOMPLETE) { + ln->ln_expire = timenow; + } RT_UNLOCK(rt); } /* @@ -3615,13 +3625,13 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, if (ifp->if_flags & IFF_LOOPBACK) { /* forwarding rules require the original scope_id */ - m->m_pkthdr.rcvif = origifp; - error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt, + m0->m_pkthdr.rcvif = origifp; + error = dlil_output(origifp, PF_INET6, m0, (caddr_t)rt, SA(dst), 0, adv); goto release; } else { /* Do not allow loopback address to wind up on a wire */ - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct ip6_hdr *ip6 = mtod(m0, struct ip6_hdr *); if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) || IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) { @@ -3639,25 +3649,34 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, RT_UNLOCK(rt); } - if (hint != NULL && nstat_collect) { - int scnt; + struct mbuf *mcur = m0; + uint32_t pktcnt = 0; - if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) && - (m->m_pkthdr.tso_segsz > 0)) - scnt = m->m_pkthdr.len / m->m_pkthdr.tso_segsz; - else - scnt = 1; + while (mcur) { + if (hint != NULL && nstat_collect) { + int scnt; - nstat_route_tx(hint, scnt, m->m_pkthdr.len, 0); - } + if ((mcur->m_pkthdr.csum_flags & CSUM_TSO_IPV6) && + (mcur->m_pkthdr.tso_segsz > 0)) + scnt = mcur->m_pkthdr.len / mcur->m_pkthdr.tso_segsz; + else + scnt = 1; - m->m_pkthdr.rcvif = NULL; - error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt, SA(dst), 0, adv); + nstat_route_tx(hint, scnt, mcur->m_pkthdr.len, 0); + } + pktcnt++; + + mcur->m_pkthdr.rcvif = NULL; + mcur = mcur->m_nextpkt; + } + if (pktcnt > ip6_maxchainsent) + ip6_maxchainsent = pktcnt; + error = dlil_output(ifp, PF_INET6, m0, (caddr_t)rt, SA(dst), 0, adv); goto release; bad: - if (m != NULL) - m_freem(m); + if (m0 != NULL) + m_freem_list(m0); release: /* Clean up "rt" unless it's already been done */ diff --git a/bsd/netinet6/nd6.h b/bsd/netinet6/nd6.h index 0b1c36bff..08b52d26a 100644 --- a/bsd/netinet6/nd6.h +++ b/bsd/netinet6/nd6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -69,6 +69,7 @@ #include #include #include +#include struct llinfo_nd6 { /* @@ -135,6 +136,7 @@ struct llinfo_nd6 { struct nd_ifinfo { #else /* For binary compatibility, this structure must not change */ +/* NOTE: nd_ifinfo is defined in nd6_var.h */ struct nd_ifinfo_compat { #endif /* !BSD_KERNEL_PRIVATE */ u_int32_t linkmtu; /* LinkMTU */ @@ -152,30 +154,6 @@ struct nd_ifinfo_compat { u_int8_t randomid[8]; /* current random ID */ }; -#if defined(BSD_KERNEL_PRIVATE) -struct nd_ifinfo { - decl_lck_mtx_data(, lock); - boolean_t initialized; /* Flag to see the entry is initialized */ - u_int32_t linkmtu; /* LinkMTU */ - u_int32_t maxmtu; /* Upper bound of LinkMTU */ - u_int32_t basereachable; /* BaseReachableTime */ - u_int32_t reachable; /* Reachable Time */ - u_int32_t retrans; /* Retrans Timer */ - u_int32_t flags; /* Flags */ - int recalctm; /* BaseReacable re-calculation timer */ - u_int8_t chlim; /* CurHopLimit */ - u_int8_t _pad[3]; - /* the following 3 members are for privacy extension for addrconf */ - u_int8_t randomseed0[8]; /* upper 64 bits of SHA1 digest */ - u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */ - u_int8_t randomid[8]; /* current random ID */ - /* keep track of routers and prefixes on this link */ - int32_t nprefixes; - int32_t ndefrouters; - struct in6_cga_modifier local_cga_modifier; -}; -#endif /* BSD_KERNEL_PRIVATE */ - #define ND6_IFF_PERFORMNUD 0x1 #if defined(PRIVATE) @@ -470,14 +448,10 @@ struct in6_ndifreq_64 { #define ND6_MAX_LIFETIME 0x7fffffff #ifdef BSD_KERNEL_PRIVATE -/* - * Protects nd_ifinfo[] - */ -extern lck_rw_t *nd_if_rwlock; - -#define ND_IFINFO(ifp) \ - ((ifp)->if_index < nd_ifinfo_indexlim ? &nd_ifinfo[(ifp)->if_index] : \ - NULL) +#define ND_IFINFO(ifp) \ + ((ifp == NULL) ? NULL : \ + ((IN6_IFEXTRA(ifp) == NULL) ? NULL : \ + (&IN6_IFEXTRA(ifp)->nd_ifinfo))) /* * In a more readable form, we derive linkmtu based on: @@ -515,6 +489,10 @@ extern lck_rw_t *nd_if_rwlock; (((MIN_RANDOM_FACTOR * (x >> 10)) + (RandomULong() & \ ((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000) +/* prefix expiry times */ +#define ND6_PREFIX_EXPIRY_UNSPEC -1 +#define ND6_PREFIX_EXPIRY_NEVER 0 + TAILQ_HEAD(nd_drhead, nd_defrouter); struct nd_defrouter { decl_lck_mtx_data(, nddr_lock); @@ -689,10 +667,22 @@ struct nd_prefix_list { #endif /* BSD_KERNEL_PRIVATE */ #if defined(PRIVATE) +struct kev_nd6_ndfailure { + struct net_event_data link_data; +}; + +struct kev_nd6_ndalive { + struct net_event_data link_data; +}; + /* ND6 kernel event subclass value */ -#define KEV_ND6_SUBCLASS 7 +#define KEV_ND6_SUBCLASS 7 + /* ND6 kernel event action type */ -#define KEV_ND6_RA 1 +#define KEV_ND6_RA 1 +#define KEV_ND6_NDFAILURE 2 /* IPv6 neighbor cache entry expiry */ +#define KEV_ND6_NDALIVE 3 /* IPv6 neighbor reachable */ + /* ND6 RA L2 source address length */ #define ND6_ROUTER_LL_SIZE 64 @@ -738,11 +728,9 @@ extern int nd6_accept_6to4; extern int nd6_maxnudhint; extern int nd6_gctimer; extern struct llinfo_nd6 llinfo_nd6; -extern struct nd_ifinfo *nd_ifinfo; extern struct nd_drhead nd_defrouter; extern struct nd_prhead nd_prefix; extern int nd6_debug; -extern size_t nd_ifinfo_indexlim; extern int nd6_onlink_ns_rfc4861; extern int nd6_optimistic_dad; @@ -794,8 +782,8 @@ union nd_opts { extern int nd6_sched_timeout_want; extern void nd6_sched_timeout(struct timeval *, struct timeval *); extern void nd6_init(void); -extern void nd6_ifreset(struct ifnet *); -extern int nd6_ifattach(struct ifnet *); +extern void nd6_ifreset(struct ifnet *ifp); +extern void nd6_ifattach(struct ifnet *); extern int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *, int); extern void nd6_option_init(void *, int, union nd_opts *); extern struct nd_opt_hdr *nd6_option(union nd_opts *); @@ -811,6 +799,8 @@ extern void nd6_rtrequest(int, struct rtentry *, struct sockaddr *); extern int nd6_ioctl(u_long, caddr_t, struct ifnet *); extern void nd6_cache_lladdr(struct ifnet *, struct in6_addr *, char *, int, int, int); +extern int nd6_output_list(struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct rtentry *, struct flowadv *); extern int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *, struct sockaddr_in6 *, struct rtentry *, struct flowadv *); extern int nd6_storelladdr(struct ifnet *, struct rtentry *, struct mbuf *, @@ -865,7 +855,7 @@ extern int nd6_prefix_onlink_scoped(struct nd_prefix *, unsigned int); extern int nd6_prefix_offlink(struct nd_prefix *); extern void pfxlist_onlink_check(void); extern struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *); -extern struct nd_prefix *nd6_prefix_lookup(struct nd_prefix *); +extern struct nd_prefix *nd6_prefix_lookup(struct nd_prefix *, int); extern int in6_init_prefix_ltimes(struct nd_prefix *ndpr); extern void rt6_flush(struct in6_addr *, struct ifnet *); extern int nd6_setdefaultiface(int); diff --git a/bsd/netinet6/nd6_nbr.c b/bsd/netinet6/nd6_nbr.c index 5de3a3b4f..64c4720e3 100644 --- a/bsd/netinet6/nd6_nbr.c +++ b/bsd/netinet6/nd6_nbr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -263,6 +263,7 @@ nd6_ns_input( struct sockaddr_dl proxydl; boolean_t advrouter; boolean_t is_dad_probe; + int oflgclr = 0; if ((ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) { nd6log((LOG_INFO, "nd6_ns_input: on ND6ALT interface!\n")); @@ -456,7 +457,7 @@ nd6_ns_input( * src addr how to process? * --- --- * multicast of course, invalid (rejected in ip6_input) - * unicast somebody is doing address resolution -> ignore + * unicast somebody is doing address resolution * unspec dup address detection * * The processing is defined in the "draft standard" RFC 4862 (and by @@ -470,12 +471,19 @@ nd6_ns_input( * duplicate address detection. * * If not, the packet is for addess resolution; - * silently ignore it. + * silently ignore it when not optimistic + * + * Per RFC 4429 the reply for an optimistic address must + * have the Override flag cleared */ - if (is_dad_probe) - nd6_dad_ns_input(m, ifa, lladdr, lladdrlen); + if (!is_dad_probe && (dadprogress & IN6_IFF_OPTIMISTIC) != 0) { + oflgclr = 1; + } else { + if (is_dad_probe) + nd6_dad_ns_input(m, ifa, lladdr, lladdrlen); - goto freeit; + goto freeit; + } } /* Are we an advertising router on this interface? */ @@ -507,7 +515,7 @@ nd6_ns_input( ND_NEIGHBOR_SOLICIT, 0); nd6_na_output(ifp, &saddr6, &taddr6, - ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | + ((anycast || proxy || !tlladdr || oflgclr) ? 0 : ND_NA_FLAG_OVERRIDE) | (advrouter ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL); freeit: @@ -850,6 +858,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) struct sockaddr_dl *sdl; union nd_opts ndopts; uint64_t timenow; + bool send_nc_alive_kev = false; if ((ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) { nd6log((LOG_INFO, "nd6_na_input: on ND6ALT interface!\n")); @@ -990,17 +999,16 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) sdl->sdl_alen = ifp->if_addrlen; bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); if (is_solicited) { + send_nc_alive_kev = (rt->rt_flags & RTF_ROUTER) ? true : false; ln->ln_state = ND6_LLINFO_REACHABLE; if (ln->ln_expire != 0) { - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(rt->rt_ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); ln_setexpire(ln, timenow + ndi->reachable); lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); RT_UNLOCK(rt); lck_mtx_lock(rnh_lock); nd6_sched_timeout(NULL, NULL); @@ -1091,16 +1099,14 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) if (is_solicited) { ln->ln_state = ND6_LLINFO_REACHABLE; if (ln->ln_expire != 0) { - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); ln_setexpire(ln, timenow + ndi->reachable); lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); RT_UNLOCK(rt); lck_mtx_lock(rnh_lock); nd6_sched_timeout(NULL, NULL); @@ -1152,6 +1158,28 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) } ln->ln_router = is_router; } + + if (send_nc_alive_kev && (ifp->if_addrlen == IF_LLREACH_MAXLEN)) { + struct kev_msg ev_msg; + struct kev_nd6_ndalive nd6_ndalive; + bzero(&ev_msg, sizeof(ev_msg)); + bzero(&nd6_ndalive, sizeof(nd6_ndalive)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_ND6_SUBCLASS; + ev_msg.event_code = KEV_ND6_NDALIVE; + + nd6_ndalive.link_data.if_family = ifp->if_family; + nd6_ndalive.link_data.if_unit = ifp->if_unit; + strlcpy(nd6_ndalive.link_data.if_name, + ifp->if_name, + sizeof(nd6_ndalive.link_data.if_name)); + ev_msg.dv[0].data_ptr = &nd6_ndalive; + ev_msg.dv[0].data_length = + sizeof(nd6_ndalive); + kev_post_msg(&ev_msg); + } + RT_LOCK_ASSERT_HELD(rt); rt->rt_flags &= ~RTF_REJECT; @@ -1170,8 +1198,9 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * prevent a ln_hold lookup in nd6_output() * (wouldn't happen, though...) */ - for (m_hold = ln->ln_hold; - m_hold; m_hold = m_hold_next) { + m_hold = ln->ln_hold; + ln->ln_hold = NULL; + for ( ; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; /* @@ -1182,8 +1211,6 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) nd6_output(ifp, ifp, m_hold, &sin6, rt, NULL); RT_LOCK_SPIN(rt); } - ln->ln_hold = NULL; - } RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); @@ -1583,16 +1610,14 @@ nd6_dad_start( */ if (tick_delay == NULL) { u_int32_t retrans; - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; nd6_dad_ns_output(dp, ifa); - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifa->ifa_ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); retrans = ndi->retrans * hz / 1000; lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, retrans); } else { int ntick; @@ -1704,7 +1729,7 @@ nd6_dad_timer(struct ifaddr *ifa) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp = NULL; - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; /* Sanity check */ if (ia == NULL) { @@ -1762,13 +1787,11 @@ nd6_dad_timer(struct ifaddr *ifa) * We have more NS to go. Send NS packet for DAD. */ nd6_dad_ns_output(dp, ifa); - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifa->ifa_ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); retrans = ndi->retrans * hz / 1000; lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, retrans); } else { /* @@ -1813,13 +1836,11 @@ nd6_dad_timer(struct ifaddr *ifa) ia->ia6_flags &= ~IN6_IFF_DADPROGRESS; IFA_UNLOCK(&ia->ia_ifa); - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifa->ifa_ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); txunsolna = (ndi->flags & ND6_IFF_REPLICATED) != 0; lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); if (txunsolna) { nd6_unsol_na_output(ifa); @@ -1877,12 +1898,12 @@ nd6_dad_duplicated(struct ifaddr *ifa) IFA_UNLOCK(&ia->ia_ifa); if (disable) { + struct nd_ifinfo *ndi = ND_IFINFO(ifp); log(LOG_ERR, "%s: possible hardware address duplication " "detected, disabling IPv6 for interface.\n", if_name(ifp)); - lck_rw_lock_shared(nd_if_rwlock); - nd_ifinfo[ifp->if_index].flags |= ND6_IFF_IFDISABLED; - lck_rw_done(nd_if_rwlock); + VERIFY((NULL != ndi) && (TRUE == ndi->initialized)); + ndi->flags |= ND6_IFF_IFDISABLED; /* Make sure to set IFEF_IPV6_DISABLED too */ nd6_if_disable(ifp, TRUE); } @@ -2013,10 +2034,10 @@ static struct mbuf * nd6_dad_na_input(struct mbuf *m, struct ifnet *ifp, struct in6_addr *taddr, caddr_t lladdr, int lladdrlen) { - struct ifaddr *ifa; - struct in6_ifaddr *ia; - struct dadq *dp; - struct nd_ifinfo *ndi; + struct ifaddr *ifa = NULL; + struct in6_ifaddr *ia = NULL; + struct dadq *dp = NULL; + struct nd_ifinfo *ndi = NULL; boolean_t candisable, replicated; ifa = (struct ifaddr *) in6ifa_ifpwithaddr(ifp, taddr); @@ -2027,14 +2048,13 @@ nd6_dad_na_input(struct mbuf *m, struct ifnet *ifp, struct in6_addr *taddr, replicated = FALSE; /* Get the ND6_IFF_REPLICATED flag. */ - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); if (ndi != NULL && ndi->initialized) { lck_mtx_lock(&ndi->lock); replicated = !!(ndi->flags & ND6_IFF_REPLICATED); lck_mtx_unlock(&ndi->lock); } - lck_rw_done(nd_if_rwlock); + if (replicated) { nd6log((LOG_INFO, "%s: ignoring duplicate NA on " "replicated interface %s\n", __func__, if_name(ifp))); diff --git a/bsd/netinet6/nd6_rtr.c b/bsd/netinet6/nd6_rtr.c index ceb0f7d9c..6227ff003 100644 --- a/bsd/netinet6/nd6_rtr.c +++ b/bsd/netinet6/nd6_rtr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2014 Apple Inc. All rights reserved. + * Copyright (c) 2003-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -439,13 +439,8 @@ nd6_ra_input( if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) mcast = 1; - lck_rw_lock_shared(nd_if_rwlock); - if (ifp->if_index >= nd_ifinfo_indexlim) { - lck_rw_done(nd_if_rwlock); - goto freeit; - } - ndi = &nd_ifinfo[ifp->if_index]; - VERIFY(ndi->initialized); + ndi = ND_IFINFO(ifp); + VERIFY((NULL != ndi) && (TRUE == ndi->initialized)); lck_mtx_lock(&ndi->lock); bzero(&dr0, sizeof (dr0)); dr0.rtaddr = saddr6; @@ -465,11 +460,19 @@ nd6_ra_input( } if (nd_ra->nd_ra_retransmit) ndi->retrans = ntohl(nd_ra->nd_ra_retransmit); - if (nd_ra->nd_ra_curhoplimit) - ndi->chlim = nd_ra->nd_ra_curhoplimit; + if (nd_ra->nd_ra_curhoplimit) { + if (ndi->chlim < nd_ra->nd_ra_curhoplimit) { + ndi->chlim = nd_ra->nd_ra_curhoplimit; + } else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) { + nd6log((LOG_ERR, + "RA with a lower CurHopLimit sent from " + "%s on %s (current = %d, received = %d). " + "Ignored.\n", ip6_sprintf(&ip6->ip6_src), + if_name(ifp), ndi->chlim, + nd_ra->nd_ra_curhoplimit)); + } + } lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); - ndi = NULL; lck_mtx_lock(nd6_mutex); dr = defrtrlist_update(&dr0); lck_mtx_unlock(nd6_mutex); @@ -615,13 +618,6 @@ nd6_ra_input( goto skip; } - lck_rw_lock_shared(nd_if_rwlock); - if (ifp->if_index >= nd_ifinfo_indexlim) { - lck_rw_done(nd_if_rwlock); - goto freeit; - } - ndi = &nd_ifinfo[ifp->if_index]; - VERIFY(ndi->initialized); lck_mtx_lock(&ndi->lock); /* upper bound */ if (ndi->maxmtu) { @@ -630,7 +626,6 @@ nd6_ra_input( ndi->linkmtu = mtu; lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); if (change) /* in6_maxmtu may change */ in6_setmaxmtu(); } else { @@ -640,17 +635,14 @@ nd6_ra_input( mtu, ip6_sprintf(&ip6->ip6_src), ndi->maxmtu)); lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); } } else { lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); nd6log((LOG_INFO, "nd6_ra_input: mtu option " "mtu=%d sent from %s; maxmtu unknown, " "ignoring\n", mtu, ip6_sprintf(&ip6->ip6_src))); } - ndi = NULL; } skip: @@ -1022,6 +1014,7 @@ defrtrlist_del(struct nd_defrouter *dr) struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; struct ifnet *ifp = dr->ifp; + struct nd_ifinfo *ndi = NULL; boolean_t resetmtu; lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); @@ -1077,19 +1070,15 @@ defrtrlist_del(struct nd_defrouter *dr) defrouter_select(ifp); resetmtu = FALSE; - lck_rw_lock_shared(nd_if_rwlock); - if (ifp->if_index < nd_ifinfo_indexlim) { - struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; - VERIFY(ndi->initialized); - lck_mtx_lock(&ndi->lock); - VERIFY(ndi->ndefrouters >= 0); - if (ndi->ndefrouters > 0 && --ndi->ndefrouters == 0) { - nd6_ifreset(ifp); - resetmtu = TRUE; - } - lck_mtx_unlock(&ndi->lock); + ndi = ND_IFINFO(ifp); + VERIFY((NULL != ndi) && (TRUE == ndi->initialized)); + lck_mtx_lock(&ndi->lock); + VERIFY(ndi->ndefrouters >= 0); + if (ndi->ndefrouters > 0 && --ndi->ndefrouters == 0) { + nd6_ifreset(ifp); + resetmtu = TRUE; } - lck_rw_done(nd_if_rwlock); + lck_mtx_unlock(&ndi->lock); if (resetmtu) nd6_setmtu(ifp); @@ -1792,17 +1781,12 @@ defrtrlist_update_common(struct nd_defrouter *new, boolean_t scoped) return (NULL); } - lck_rw_lock_shared(nd_if_rwlock); - if (ifp->if_index >= nd_ifinfo_indexlim) - goto freeit; - ndi = &nd_ifinfo[ifp->if_index]; - VERIFY(ndi->initialized); + ndi = ND_IFINFO(ifp); + VERIFY((NULL != ndi) && (TRUE == ndi->initialized)); lck_mtx_lock(&ndi->lock); if (ip6_maxifdefrouters >= 0 && ndi->ndefrouters >= ip6_maxifdefrouters) { lck_mtx_unlock(&ndi->lock); -freeit: - lck_rw_done(nd_if_rwlock); nddr_free(n); return (NULL); } @@ -1814,7 +1798,6 @@ defrtrlist_update_common(struct nd_defrouter *new, boolean_t scoped) ndi->ndefrouters++; VERIFY(ndi->ndefrouters != 0); lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); nd6log2((LOG_INFO, "%s: allocating defrouter %s\n", if_name(ifp), ip6_sprintf(&new->rtaddr))); @@ -1964,8 +1947,20 @@ pfxrtr_del(struct nd_pfxrouter *pfr, struct nd_prefix *pr) zfree(ndprtr_zone, pfr); } +/* + * The routine has been modified to atomically refresh expiry + * time for nd6 prefix as the part of lookup. + * rdar://20339655 explains the corner case where a system going + * in sleep gets rid of manual addresses configured in the system + * and then schedules the prefix for deletion. + * However before the prefix gets deleted, if system comes out + * from sleep and configures same address before prefix deletion + * , the later prefix deletion will remove the prefix route and + * the system will not be able to communicate with other IPv6 + * neighbor nodes in the same subnet. + */ struct nd_prefix * -nd6_prefix_lookup(struct nd_prefix *pr) +nd6_prefix_lookup(struct nd_prefix *pr, int nd6_prefix_expiry) { struct nd_prefix *search; @@ -1976,6 +1971,9 @@ nd6_prefix_lookup(struct nd_prefix *pr) pr->ndpr_plen == search->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &search->ndpr_prefix.sin6_addr, pr->ndpr_plen)) { + if (nd6_prefix_expiry != ND6_PREFIX_EXPIRY_UNSPEC) { + search->ndpr_expire = nd6_prefix_expiry; + } NDPR_ADDREF_LOCKED(search); NDPR_UNLOCK(search); break; @@ -2075,34 +2073,19 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr, int i, error; if (ip6_maxifprefixes >= 0) { - lck_rw_lock_shared(nd_if_rwlock); - if (ifp->if_index >= nd_ifinfo_indexlim) { - lck_rw_done(nd_if_rwlock); - return (EINVAL); - } - ndi = &nd_ifinfo[ifp->if_index]; - VERIFY(ndi->initialized); + ndi = ND_IFINFO(ifp); + VERIFY((NULL != ndi) && (TRUE == ndi->initialized)); lck_mtx_lock(&ndi->lock); if (ndi->nprefixes >= ip6_maxifprefixes / 2) { lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); purge_detached(ifp); - lck_rw_lock_shared(nd_if_rwlock); - /* - * Refresh pointer since nd_ifinfo[] may have grown; - * repeating the bounds check against nd_ifinfo_indexlim - * isn't necessary since the array never shrinks. - */ - ndi = &nd_ifinfo[ifp->if_index]; lck_mtx_lock(&ndi->lock); } if (ndi->nprefixes >= ip6_maxifprefixes) { lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); return (ENOMEM); } lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); } new = ndpr_alloc(M_WAITOK); @@ -2168,19 +2151,10 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr, pfxrtr_add(new, dr); } - lck_rw_lock_shared(nd_if_rwlock); - /* - * Refresh pointer since nd_ifinfo[] may have grown; - * repeating the bounds check against nd_ifinfo_indexlim - * isn't necessary since the array never shrinks. - */ - ndi = &nd_ifinfo[ifp->if_index]; - VERIFY(ndi->initialized); lck_mtx_lock(&ndi->lock); ndi->nprefixes++; VERIFY(ndi->nprefixes != 0); lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); lck_mtx_unlock(nd6_mutex); @@ -2196,6 +2170,7 @@ prelist_remove(struct nd_prefix *pr) struct nd_pfxrouter *pfr, *next; struct ifnet *ifp = pr->ndpr_ifp; int e; + struct nd_ifinfo *ndi = NULL; lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); NDPR_LOCK_ASSERT_HELD(pr); @@ -2257,16 +2232,12 @@ prelist_remove(struct nd_prefix *pr) pfxrtr_del(pfr, pr); } - lck_rw_lock_shared(nd_if_rwlock); - if (ifp->if_index < nd_ifinfo_indexlim) { - struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; - VERIFY(ndi->initialized); - lck_mtx_lock(&ndi->lock); - VERIFY(ndi->nprefixes > 0); - ndi->nprefixes--; - lck_mtx_unlock(&ndi->lock); - } - lck_rw_done(nd_if_rwlock); + ndi = ND_IFINFO(ifp); + VERIFY((NULL != ndi) && (TRUE == ndi->initialized)); + lck_mtx_lock(&ndi->lock); + VERIFY(ndi->nprefixes > 0); + ndi->nprefixes--; + lck_mtx_unlock(&ndi->lock); /* This must not be the last reference to the nd_prefix */ if (NDPR_REMREF_LOCKED(pr) == NULL) { @@ -2312,7 +2283,7 @@ prelist_update( #endif } - if ((pr = nd6_prefix_lookup(new)) != NULL) { + if ((pr = nd6_prefix_lookup(new, ND6_PREFIX_EXPIRY_UNSPEC)) != NULL) { /* * nd6_prefix_lookup() ensures that pr and new have the same * prefix on a same interface. @@ -3596,14 +3567,13 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped, */ if (!(pr->ndpr_stateflags & NDPRF_DEFUNCT) && (rt != NULL || error == EEXIST)) { - struct nd_ifinfo *ndi; + struct nd_ifinfo *ndi = NULL; VERIFY(pr->ndpr_prproxy_sols_cnt == 0); VERIFY(RB_EMPTY(&pr->ndpr_prproxy_sols)); - lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); - VERIFY(ndi != NULL && ndi->initialized); + VERIFY((NULL != ndi) && (TRUE == ndi->initialized)); lck_mtx_lock(&ndi->lock); pr->ndpr_rt = rt; /* keep reference from rtrequest */ @@ -3625,7 +3595,6 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped, } lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); } else if (rt != NULL && pr->ndpr_stateflags & NDPRF_DEFUNCT) rtfree(rt); @@ -3818,9 +3787,9 @@ nd6_prefix_offlink(struct nd_prefix *pr) static struct in6_ifaddr * in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp) { - struct in6_ifaddr *ia6; - struct ifnet *ifp; - struct nd_ifinfo *ndi; + struct in6_ifaddr *ia6 = NULL; + struct ifnet *ifp = NULL; + struct nd_ifinfo *ndi = NULL; struct in6_addr mask; struct in6_aliasreq ifra; int error, ifaupdate, iidlen, notcga; @@ -3867,15 +3836,6 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp) goto unlock1; } - lck_rw_lock_shared(nd_if_rwlock); - if (ifp->if_index >= nd_ifinfo_indexlim) { - error = EADDRNOTAVAIL; - nd6log((LOG_INFO, - "%s: invalid prefix length %d for %s, ignored\n", - __func__, pr->ndpr_plen, if_name(ifp))); - goto unlock2; - } - bzero(&ifra, sizeof (ifra)); strlcpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); ifra.ifra_addr.sin6_family = AF_INET6; @@ -3890,7 +3850,7 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp) ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3]; - ndi = &nd_ifinfo[ifp->if_index]; + ndi = ND_IFINFO(ifp); VERIFY(ndi->initialized); lck_mtx_lock(&ndi->lock); @@ -3898,7 +3858,6 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp) (ndi->flags & ND6_IFF_INSECURE) != 0; lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); NDPR_UNLOCK(pr); if (notcga) { @@ -3990,9 +3949,6 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp) in6_post_msg(ifp, KEV_INET6_NEW_RTADV_ADDR, ia6, NULL); goto done; -unlock2: - lck_rw_done(nd_if_rwlock); - unlock1: NDPR_UNLOCK(pr); diff --git a/bsd/netinet6/nd6_var.h b/bsd/netinet6/nd6_var.h new file mode 100644 index 000000000..0743c0a94 --- /dev/null +++ b/bsd/netinet6/nd6_var.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NETINET6_ND6_VAR_H_ +#define _NETINET6_ND6_VAR_H_ + +#ifdef BSD_KERNEL_PRIVATE +struct nd_ifinfo { + decl_lck_mtx_data(, lock); + boolean_t initialized; /* Flag to see the entry is initialized */ + u_int32_t linkmtu; /* LinkMTU */ + u_int32_t maxmtu; /* Upper bound of LinkMTU */ + u_int32_t basereachable; /* BaseReachableTime */ + u_int32_t reachable; /* Reachable Time */ + u_int32_t retrans; /* Retrans Timer */ + u_int32_t flags; /* Flags */ + int recalctm; /* BaseReacable re-calculation timer */ + u_int8_t chlim; /* CurHopLimit */ + u_int8_t _pad[3]; + /* the following 3 members are for privacy extension for addrconf */ + u_int8_t randomseed0[8]; /* upper 64 bits of SHA1 digest */ + u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */ + u_int8_t randomid[8]; /* current random ID */ + /* keep track of routers and prefixes on this link */ + int32_t nprefixes; + int32_t ndefrouters; + struct in6_cga_modifier local_cga_modifier; +}; +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* _NETINET6_ND6_VAR_H_ */ diff --git a/bsd/netinet6/raw_ip6.c b/bsd/netinet6/raw_ip6.c index 3ac3106bd..3cee2fb21 100644 --- a/bsd/netinet6/raw_ip6.c +++ b/bsd/netinet6/raw_ip6.c @@ -195,7 +195,8 @@ rip6_input( struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); #if NECP - if (n && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL)) { + if (n && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, + &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL, NULL)) { m_freem(n); /* do not inject data into pcb */ } else @@ -226,9 +227,10 @@ rip6_input( } last = in6p; } - + #if NECP - if (last && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL)) { + if (last && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, + &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL, NULL)) { m_freem(m); ip6stat.ip6s_delivered--; /* do not inject data into pcb */ @@ -412,7 +414,7 @@ rip6_output( (htonl(in6p->inp_flowhash) & IPV6_FLOWLABEL_MASK); } - M_PREPEND(m, sizeof(*ip6), M_WAIT); + M_PREPEND(m, sizeof(*ip6), M_WAIT, 1); if (m == NULL) { error = ENOBUFS; goto bad; @@ -547,19 +549,21 @@ rip6_output( *p = 0; *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen); } - + #if NECP { necp_kernel_policy_id policy_id; - if (!necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, &ip6->ip6_src, &ip6->ip6_dst, NULL, &policy_id)) { + u_int32_t route_rule_id; + if (!necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, + &ip6->ip6_src, &ip6->ip6_dst, NULL, &policy_id, &route_rule_id)) { error = EHOSTUNREACH; goto bad; } - necp_mark_packet_from_socket(m, in6p, policy_id); + necp_mark_packet_from_socket(m, in6p, policy_id, route_rule_id); } #endif /* NECP */ - + #if IPSEC if (in6p->in6p_sp != NULL && ipsec_setsocket(m, so) != 0) { error = ENOBUFS; diff --git a/bsd/netinet6/scope6_var.h b/bsd/netinet6/scope6_var.h index 22270e584..58e9b34f4 100644 --- a/bsd/netinet6/scope6_var.h +++ b/bsd/netinet6/scope6_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2013 Apple Inc. All rights reserved. + * Copyright (c) 2009-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -59,12 +59,13 @@ #define _NETINET6_SCOPE6_VAR_H_ #include -#ifdef BSD_KERNEL_PRIVATE /* * 16 is correspondent to 4bit multicast scope field. * i.e. from node-local to global with some reserved/unassigned types. */ -#define SCOPE6_ID_MAX 16 +#define SCOPE6_ID_MAX 16 + +#ifdef BSD_KERNEL_PRIVATE struct scope6_id { u_int32_t s6id_list[SCOPE6_ID_MAX]; diff --git a/bsd/netinet6/udp6_output.c b/bsd/netinet6/udp6_output.c index 2d64d6b94..a8d0e2fb3 100644 --- a/bsd/netinet6/udp6_output.c +++ b/bsd/netinet6/udp6_output.c @@ -139,6 +139,7 @@ * UDP protocol inplementation. * Per RFC 768, August, 1980. */ +extern int soreserveheadroom; int udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, @@ -306,7 +307,7 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, * Calculate data length and get a mbuf * for UDP and IP6 headers. */ - M_PREPEND(m, hlen + sizeof (struct udphdr), M_DONTWAIT); + M_PREPEND(m, hlen + sizeof (struct udphdr), M_DONTWAIT, 1); if (m == 0) { error = ENOBUFS; goto release; @@ -350,19 +351,20 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, flags = IPV6_OUTARGS; udp6stat.udp6s_opackets++; - + #if NECP { necp_kernel_policy_id policy_id; - if (!necp_socket_is_allowed_to_send_recv_v6(in6p, in6p->in6p_lport, fport, laddr, faddr, NULL, &policy_id)) { + u_int32_t route_rule_id; + if (!necp_socket_is_allowed_to_send_recv_v6(in6p, in6p->in6p_lport, fport, laddr, faddr, NULL, &policy_id, &route_rule_id)) { error = EHOSTUNREACH; goto release; } - necp_mark_packet_from_socket(m, in6p, policy_id); + necp_mark_packet_from_socket(m, in6p, policy_id, route_rule_id); } #endif /* NECP */ - + #if IPSEC if (in6p->in6p_sp != NULL && ipsec_setsocket(m, so) != 0) { error = ENOBUFS; @@ -440,6 +442,17 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, if ( --in6p->inp_sndinprog_cnt == 0) in6p->inp_flags &= ~(INP_FC_FEEDBACK); + if (ro.ro_rt != NULL) { + struct ifnet *outif = ro.ro_rt->rt_ifp; + + so->so_pktheadroom = P2ROUNDUP( + sizeof(struct udphdr) + + hlen + + ifnet_hdrlen(outif) + + ifnet_packetpreamblelen(outif), + sizeof(u_int32_t)); + } + /* Synchronize PCB cached route */ in6p_route_copyin(in6p, &ro); @@ -462,8 +475,16 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, * with that of the route interface used by IP. */ if (rt != NULL && - (outif = rt->rt_ifp) != in6p->in6p_last_outifp) + (outif = rt->rt_ifp) != in6p->in6p_last_outifp) { in6p->in6p_last_outifp = outif; + + so->so_pktheadroom = P2ROUNDUP( + sizeof(struct udphdr) + + hlen + + ifnet_hdrlen(outif) + + ifnet_packetpreamblelen(outif), + sizeof(u_int32_t)); + } } else { ROUTE_RELEASE(&in6p->in6p_route); } diff --git a/bsd/netinet6/udp6_usrreq.c b/bsd/netinet6/udp6_usrreq.c index 29d037b58..1e0f9eb37 100644 --- a/bsd/netinet6/udp6_usrreq.c +++ b/bsd/netinet6/udp6_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -132,12 +132,19 @@ #if IPSEC #include #include +#include +extern int ipsec_bypass; +extern int esp_udp_encap_port; #endif /* IPSEC */ #if NECP #include #endif /* NECP */ +#if FLOW_DIVERT +#include +#endif /* FLOW_DIVERT */ + /* * UDP protocol inplementation. * Per RFC 768, August, 1980. @@ -147,11 +154,11 @@ static int udp6_abort(struct socket *); static int udp6_attach(struct socket *, int, struct proc *); static int udp6_bind(struct socket *, struct sockaddr *, struct proc *); static int udp6_connectx(struct socket *, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, - uint32_t, void *, uint32_t); + struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *); static int udp6_detach(struct socket *); static int udp6_disconnect(struct socket *); -static int udp6_disconnectx(struct socket *, associd_t, connid_t); +static int udp6_disconnectx(struct socket *, sae_associd_t, sae_connid_t); static int udp6_send(struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); static void udp6_append(struct inpcb *, struct ip6_hdr *, @@ -193,6 +200,7 @@ struct pr_usrreqs udp6_usrreqs = { .pru_sockaddr = in6_mapped_sockaddr, .pru_sosend = sosend, .pru_soreceive = soreceive, + .pru_soreceive_list = soreceive_list, }; /* @@ -411,7 +419,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) skipit = 0; if (!necp_socket_is_allowed_to_send_recv_v6(in6p, uh->uh_dport, uh->uh_sport, &ip6->ip6_dst, - &ip6->ip6_src, ifp, NULL)) { + &ip6->ip6_src, ifp, NULL, NULL)) { /* do not inject data to pcb */ skipit = 1; } @@ -477,6 +485,49 @@ udp6_input(struct mbuf **mp, int *offp, int proto) m_freem(m); return (IPPROTO_DONE); } + +#if IPSEC + /* + * UDP to port 4500 with a payload where the first four bytes are + * not zero is a UDP encapsulated IPSec packet. Packets where + * the payload is one byte and that byte is 0xFF are NAT keepalive + * packets. Decapsulate the ESP packet and carry on with IPSec input + * or discard the NAT keep-alive. + */ + if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 && + uh->uh_dport == ntohs((u_short)esp_udp_encap_port)) { + int payload_len = ulen - sizeof (struct udphdr) > 4 ? 4 : + ulen - sizeof (struct udphdr); + + if (m->m_len < off + sizeof (struct udphdr) + payload_len) { + if ((m = m_pullup(m, off + sizeof (struct udphdr) + + payload_len)) == NULL) { + udpstat.udps_hdrops++; + goto bad; + } + /* + * Expect 32-bit aligned data pointer on strict-align + * platforms. + */ + MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); + + ip6 = mtod(m, struct ip6_hdr *); + uh = (struct udphdr *)(void *)((caddr_t)ip6 + off); + } + /* Check for NAT keepalive packet */ + if (payload_len == 1 && *(u_int8_t*) + ((caddr_t)uh + sizeof (struct udphdr)) == 0xFF) { + goto bad; + } else if (payload_len == 4 && *(u_int32_t*)(void *) + ((caddr_t)uh + sizeof (struct udphdr)) != 0) { + /* UDP encapsulated IPSec packet to pass through NAT */ + /* preserve the udp header */ + *offp = off + sizeof (struct udphdr); + return (esp6_input(mp, offp, IPPROTO_UDP)); + } + } +#endif /* IPSEC */ + /* * Locate pcb for datagram. */ @@ -516,7 +567,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) } #if NECP if (!necp_socket_is_allowed_to_send_recv_v6(in6p, uh->uh_dport, - uh->uh_sport, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL)) { + uh->uh_sport, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL, NULL)) { in_pcb_checkstate(in6p, WNT_RELEASE, 0); IF_UDP_STATINC(ifp, badipsec); goto bad; @@ -694,12 +745,8 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) int error; inp = sotoinpcb(so); - if (inp == NULL -#if NECP - || (necp_socket_should_use_flow_divert(inp)) -#endif /* NECP */ - ) - return (inp == NULL ? EINVAL : EPROTOTYPE); + if (inp == NULL) + return (EINVAL); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; @@ -730,14 +777,17 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; int error; +#if defined(NECP) && defined(FLOW_DIVERT) + int should_use_flow_divert = 0; +#endif /* defined(NECP) && defined(FLOW_DIVERT) */ inp = sotoinpcb(so); - if (inp == NULL -#if NECP - || (necp_socket_should_use_flow_divert(inp)) -#endif /* NECP */ - ) - return (inp == NULL ? EINVAL : EPROTOTYPE); + if (inp == NULL) + return (EINVAL); + +#if defined(NECP) && defined(FLOW_DIVERT) + should_use_flow_divert = necp_socket_should_use_flow_divert(inp); +#endif /* defined(NECP) && defined(FLOW_DIVERT) */ if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; @@ -749,6 +799,11 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) if (inp->inp_faddr.s_addr != INADDR_ANY) return (EISCONN); in6_sin6_2_sin(&sin, sin6_p); +#if defined(NECP) && defined(FLOW_DIVERT) + if (should_use_flow_divert) { + goto do_flow_divert; + } +#endif /* defined(NECP) && defined(FLOW_DIVERT) */ error = in_pcbconnect(inp, (struct sockaddr *)&sin, p, IFSCOPE_NONE, NULL); if (error == 0) { @@ -762,6 +817,23 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return (EISCONN); + +#if defined(NECP) && defined(FLOW_DIVERT) +do_flow_divert: + if (should_use_flow_divert) { + uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp); + if (fd_ctl_unit > 0) { + error = flow_divert_pcb_init(so, fd_ctl_unit); + if (error == 0) { + error = flow_divert_connect_out(so, nam, p); + } + } else { + error = ENETDOWN; + } + return (error); + } +#endif /* defined(NECP) && defined(FLOW_DIVERT) */ + error = in6_pcbconnect(inp, nam, p); if (error == 0) { /* should be non mapped addr */ @@ -787,11 +859,11 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) static int udp6_connectx(struct socket *so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written) { return (udp_connectx_common(so, AF_INET6, src_sl, dst_sl, - p, ifscope, aid, pcid, flags, arg, arglen)); + p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)); } static int @@ -841,10 +913,10 @@ udp6_disconnect(struct socket *so) } static int -udp6_disconnectx(struct socket *so, associd_t aid, connid_t cid) +udp6_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid) { #pragma unused(cid) - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) return (EINVAL); return (udp6_disconnect(so)); @@ -856,20 +928,20 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, { struct inpcb *inp; int error = 0; +#if defined(NECP) && defined(FLOW_DIVERT) + int should_use_flow_divert = 0; +#endif /* defined(NECP) && defined(FLOW_DIVERT) */ inp = sotoinpcb(so); - if (inp == NULL -#if NECP - || (necp_socket_should_use_flow_divert(inp)) -#endif /* NECP */ - ) { - if (inp == NULL) - error = EINVAL; - else - error = EPROTOTYPE; + if (inp == NULL) { + error = EINVAL; goto bad; } +#if defined(NECP) && defined(FLOW_DIVERT) + should_use_flow_divert = necp_socket_should_use_flow_divert(inp); +#endif /* defined(NECP) && defined(FLOW_DIVERT) */ + if (addr != NULL) { if (addr->sa_len != sizeof (struct sockaddr_in6)) { error = EINVAL; @@ -897,6 +969,11 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, if (sin6 != NULL) in6_sin6_2_sin_in_sock(addr); +#if defined(NECP) && defined(FLOW_DIVERT) + if (should_use_flow_divert) { + goto do_flow_divert; + } +#endif /* defined(NECP) && defined(FLOW_DIVERT) */ pru = ip_protox[IPPROTO_UDP]->pr_usrreqs; error = ((*pru->pru_send)(so, flags, m, addr, control, p)); @@ -904,6 +981,15 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, return (error); } } + +#if defined(NECP) && defined(FLOW_DIVERT) +do_flow_divert: + if (should_use_flow_divert) { + /* Implicit connect */ + return (flow_divert_implicit_data_out(so, flags, m, addr, control, p)); + } +#endif /* defined(NECP) && defined(FLOW_DIVERT) */ + return (udp6_output(inp, m, addr, control, p)); bad: diff --git a/bsd/netkey/Makefile b/bsd/netkey/Makefile index 882e7b5e3..6152b0760 100644 --- a/bsd/netkey/Makefile +++ b/bsd/netkey/Makefile @@ -24,7 +24,7 @@ EXPORT_MI_LIST = ${DATAFILES} EXPORT_MI_DIR = netkey -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES} diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c index 91b6ba040..0e2e7df2e 100644 --- a/bsd/netkey/key.c +++ b/bsd/netkey/key.c @@ -234,6 +234,9 @@ static const int minsize[] = { sizeof(struct sadb_address), /* SADB_X_EXT_ADDR_RANGE_SRC_END */ sizeof(struct sadb_address), /* SADB_X_EXT_ADDR_RANGE_DST_START */ sizeof(struct sadb_address), /* SADB_X_EXT_ADDR_RANGE_DST_END */ + sizeof(struct sadb_address), /* SADB_EXT_MIGRATE_ADDRESS_SRC */ + sizeof(struct sadb_address), /* SADB_EXT_MIGRATE_ADDRESS_DST */ + sizeof(struct sadb_x_ipsecif), /* SADB_X_EXT_MIGRATE_IPSECIF */ }; static const int maxsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ @@ -263,6 +266,9 @@ static const int maxsize[] = { 0, /* SADB_X_EXT_ADDR_RANGE_SRC_END */ 0, /* SADB_X_EXT_ADDR_RANGE_DST_START */ 0, /* SADB_X_EXT_ADDR_RANGE_DST_END */ + 0, /* SADB_EXT_MIGRATE_ADDRESS_SRC */ + 0, /* SADB_EXT_MIGRATE_ADDRESS_DST */ + sizeof(struct sadb_x_ipsecif), /* SADB_X_EXT_MIGRATE_IPSECIF */ }; static int ipsec_esp_keymin = 256; @@ -565,6 +571,7 @@ static int key_validate_ext(const struct sadb_ext *, int); static int key_align(struct mbuf *, struct sadb_msghdr *); static struct mbuf *key_alloc_mbuf(int); static int key_getsastat (struct socket *, struct mbuf *, const struct sadb_msghdr *); +static int key_migrate (struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_setsaval2(struct secasvar *sav, u_int8_t satype, u_int8_t alg_auth, @@ -585,8 +592,8 @@ static int key_setsaval2(struct secasvar *sav, extern int ipsec_bypass; extern int esp_udp_encap_port; int ipsec_send_natt_keepalive(struct secasvar *sav); -bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ipsec_offload_frame *frame, size_t frame_data_offset); -u_int32_t key_fill_offload_frames_for_savs (ifnet_t ifp, struct ipsec_offload_frame *frames_array, u_int32_t frames_array_count, size_t frame_data_offset); +bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ifnet_keepalive_offload_frame *frame, size_t frame_data_offset); +u_int32_t key_fill_offload_frames_for_savs (ifnet_t ifp, struct ifnet_keepalive_offload_frame *frames_array, u_int32_t frames_array_count, size_t frame_data_offset); void key_init(struct protosw *, struct domain *); @@ -850,7 +857,7 @@ struct secasvar *key_alloc_outbound_sav_for_interface(ifnet_t interface, int fam LIST_FOREACH(sah, &sahtree, chain) { if (sah->ipsec_if == interface && - (family == AF_INET6 || sah->saidx.dst.ss_family == family) && /* IPv6 can go over IPv4 */ + (family == AF_INET6 || family == AF_INET) && sah->dir == IPSEC_DIR_OUTBOUND) { /* This SAH is linked to the IPSec interface, and the right family. We found it! */ if (key_preferred_oldsa) { @@ -2251,13 +2258,13 @@ key_spdadd( /* Process interfaces */ if (ipsecifopts != NULL) { - if (ipsecifopts->sadb_x_ipsecif_internal_if) { + if (ipsecifopts->sadb_x_ipsecif_internal_if[0]) { ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_internal_if, &internal_if); } - if (ipsecifopts->sadb_x_ipsecif_outgoing_if) { + if (ipsecifopts->sadb_x_ipsecif_outgoing_if[0]) { outgoing_if = ipsecifopts->sadb_x_ipsecif_outgoing_if; } - if (ipsecifopts->sadb_x_ipsecif_ipsec_if) { + if (ipsecifopts->sadb_x_ipsecif_ipsec_if[0]) { ipsec_if = ipsecifopts->sadb_x_ipsecif_ipsec_if; } init_disabled = ipsecifopts->sadb_x_ipsecif_init_disabled; @@ -2596,7 +2603,7 @@ key_spddelete( /* Process interfaces */ if (ipsecifopts != NULL) { - if (ipsecifopts->sadb_x_ipsecif_internal_if) { + if (ipsecifopts->sadb_x_ipsecif_internal_if[0]) { ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_internal_if, &internal_if); } } @@ -3830,6 +3837,23 @@ key_newsav2(struct secashead *sah, return newsav; } +static int +key_migratesav(struct secasvar *sav, + struct secashead *newsah) +{ + if (sav == NULL || newsah == NULL || sav->state != SADB_SASTATE_MATURE) { + return EINVAL; + } + + /* remove from SA header */ + if (__LIST_CHAINED(sav)) + LIST_REMOVE(sav, chain); + + sav->sah = newsah; + LIST_INSERT_TAIL(&newsah->savtree[SADB_SASTATE_MATURE], sav, secasvar, chain); + return 0; +} + /* * free() SA variable entry. */ @@ -4090,6 +4114,7 @@ key_setsaval( } sav->remote_ike_port = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_port; sav->natt_interval = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_interval; + sav->natt_offload_interval = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_offload_interval; } /* @@ -4856,7 +4881,7 @@ key_setdumpsa( if ((!m && !p) || (m && p)) goto fail; if (p && tres) { - M_PREPEND(tres, l, M_WAITOK); + M_PREPEND(tres, l, M_WAITOK, 1); if (!tres) goto fail; bcopy(p, mtod(tres, caddr_t), l); @@ -6364,14 +6389,14 @@ key_proto2satype( } static ifnet_t -key_get_ipsec_if_from_message (const struct sadb_msghdr *mhp) +key_get_ipsec_if_from_message (const struct sadb_msghdr *mhp, int message_type) { struct sadb_x_ipsecif *ipsecifopts = NULL; ifnet_t ipsec_if = NULL; - ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[SADB_X_EXT_IPSECIF]; + ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[message_type]; if (ipsecifopts != NULL) { - if (ipsecifopts->sadb_x_ipsecif_internal_if) { + if (ipsecifopts->sadb_x_ipsecif_ipsec_if[0]) { ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_ipsec_if, &ipsec_if); } } @@ -6380,14 +6405,14 @@ key_get_ipsec_if_from_message (const struct sadb_msghdr *mhp) } static u_int -key_get_outgoing_ifindex_from_message (const struct sadb_msghdr *mhp) +key_get_outgoing_ifindex_from_message (const struct sadb_msghdr *mhp, int message_type) { struct sadb_x_ipsecif *ipsecifopts = NULL; ifnet_t outgoing_if = NULL; - ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[SADB_X_EXT_IPSECIF]; + ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[message_type]; if (ipsecifopts != NULL) { - if (ipsecifopts->sadb_x_ipsecif_outgoing_if) { + if (ipsecifopts->sadb_x_ipsecif_outgoing_if[0]) { ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_outgoing_if, &outgoing_if); } } @@ -6454,7 +6479,7 @@ key_getspi( src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - ipsec_if = key_get_ipsec_if_from_message(mhp); + ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { @@ -6512,7 +6537,7 @@ key_getspi( /* get a SA index */ if ((newsah = key_getsah(&saidx)) == NULL) { /* create a new SA index: key_addspi is always used for inbound spi */ - if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp), IPSEC_DIR_INBOUND)) == NULL) { + if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_INBOUND)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_getspi: No more memory.\n")); return key_senderror(so, m, ENOBUFS); @@ -6829,7 +6854,7 @@ key_update( sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - ipsec_if = key_get_ipsec_if_from_message(mhp); + ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF); /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); @@ -6943,6 +6968,152 @@ key_update( } } +static int +key_migrate(struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) +{ + struct sadb_sa *sa0 = NULL; + struct sadb_address *src0 = NULL; + struct sadb_address *dst0 = NULL; + struct sadb_address *src1 = NULL; + struct sadb_address *dst1 = NULL; + ifnet_t ipsec_if0 = NULL; + ifnet_t ipsec_if1 = NULL; + struct secasindex saidx0; + struct secasindex saidx1; + struct secashead *sah = NULL; + struct secashead *newsah = NULL; + struct secasvar *sav = NULL; + u_int16_t proto; + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + + /* sanity check */ + if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) + panic("key_migrate: NULL pointer is passed.\n"); + + /* map satype to proto */ + if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { + ipseclog((LOG_DEBUG, "key_migrate: invalid satype is passed.\n")); + return key_senderror(so, m, EINVAL); + } + + if (mhp->ext[SADB_EXT_SA] == NULL || + mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || + mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || + mhp->ext[SADB_EXT_MIGRATE_ADDRESS_SRC] == NULL || + mhp->ext[SADB_EXT_MIGRATE_ADDRESS_DST] == NULL) { + ipseclog((LOG_DEBUG, "key_migrate: invalid message is passed.\n")); + return key_senderror(so, m, EINVAL); + } + + if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || + mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || + mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || + mhp->extlen[SADB_EXT_MIGRATE_ADDRESS_SRC] < sizeof(struct sadb_address) || + mhp->extlen[SADB_EXT_MIGRATE_ADDRESS_DST] < sizeof(struct sadb_address)) { + ipseclog((LOG_DEBUG, "key_migrate: invalid message is passed.\n")); + return key_senderror(so, m, EINVAL); + } + + lck_mtx_lock(sadb_mutex); + + sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; + src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); + dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); + src1 = (struct sadb_address *)(mhp->ext[SADB_EXT_MIGRATE_ADDRESS_SRC]); + dst1 = (struct sadb_address *)(mhp->ext[SADB_EXT_MIGRATE_ADDRESS_DST]); + ipsec_if0 = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF); + ipsec_if1 = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_MIGRATE_IPSECIF); + + /* Find existing SAH and SAV */ + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if0 ? ipsec_if0->if_index : 0, &saidx0); + + LIST_FOREACH(sah, &sahtree, chain) { + if (sah->state != SADB_SASTATE_MATURE) + continue; + if (key_cmpsaidx(&sah->saidx, &saidx0, CMP_HEAD) == 0) + continue; + + sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); + if (sav && sav->state == SADB_SASTATE_MATURE) + break; + } + if (sah == NULL) { + lck_mtx_unlock(sadb_mutex); + ipseclog((LOG_DEBUG, "key_migrate: no mature SAH found.\n")); + return key_senderror(so, m, ENOENT); + } + + if (sav == NULL) { + lck_mtx_unlock(sadb_mutex); + ipseclog((LOG_DEBUG, "key_migrate: no SA found.\n")); + return key_senderror(so, m, ENOENT); + } + + /* Find or create new SAH */ + KEY_SETSECASIDX(proto, sah->saidx.mode, sah->saidx.reqid, src1 + 1, dst1 + 1, ipsec_if1 ? ipsec_if1->if_index : 0, &saidx1); + + if ((newsah = key_getsah(&saidx1)) == NULL) { + if ((newsah = key_newsah(&saidx1, ipsec_if1, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_MIGRATE_IPSECIF), sah->dir)) == NULL) { + lck_mtx_unlock(sadb_mutex); + ipseclog((LOG_DEBUG, "key_migrate: No more memory.\n")); + return key_senderror(so, m, ENOBUFS); + } + } + + /* Migrate SAV in to new SAH */ + if (key_migratesav(sav, newsah) != 0) { + lck_mtx_unlock(sadb_mutex); + ipseclog((LOG_DEBUG, "key_migrate: Failed to migrate SA to new SAH.\n")); + return key_senderror(so, m, EINVAL); + } + + /* Reset NAT values */ + sav->flags = sa0->sadb_sa_flags; + sav->remote_ike_port = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_port; + sav->natt_interval = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_interval; + sav->natt_offload_interval = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_offload_interval; + sav->natt_last_activity = natt_now; + + /* + * Verify if SADB_X_EXT_NATT_MULTIPLEUSERS flag is set that + * SADB_X_EXT_NATT is set and SADB_X_EXT_NATT_KEEPALIVE is not + * set (we're not behind nat) - otherwise clear it. + */ + if ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0) + if ((sav->flags & SADB_X_EXT_NATT) == 0 || + (sav->flags & SADB_X_EXT_NATT_KEEPALIVE) != 0) + sav->flags &= ~SADB_X_EXT_NATT_MULTIPLEUSERS; + + lck_mtx_unlock(sadb_mutex); + { + struct mbuf *n; + struct sadb_msg *newmsg; + int mbufItems[] = {SADB_EXT_RESERVED, SADB_EXT_SA, + SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST, SADB_X_EXT_IPSECIF, + SADB_EXT_MIGRATE_ADDRESS_SRC, SADB_EXT_MIGRATE_ADDRESS_DST, SADB_X_EXT_MIGRATE_IPSECIF}; + + /* create new sadb_msg to reply. */ + n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); + if (!n) + return key_senderror(so, m, ENOBUFS); + + if (n->m_len < sizeof(struct sadb_msg)) { + n = m_pullup(n, sizeof(struct sadb_msg)); + if (n == NULL) + return key_senderror(so, m, ENOBUFS); + } + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + } +} + /* * search SAD with sequence for a SA which state is SADB_SASTATE_LARVAL. * only called by key_update(). @@ -7060,7 +7231,7 @@ key_add( sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - ipsec_if = key_get_ipsec_if_from_message(mhp); + ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF); /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); @@ -7070,7 +7241,7 @@ key_add( /* get a SA header */ if ((newsah = key_getsah(&saidx)) == NULL) { /* create a new SA header: key_addspi is always used for outbound spi */ - if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp), IPSEC_DIR_OUTBOUND)) == NULL) { + if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_OUTBOUND)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_add: No more memory.\n")); return key_senderror(so, m, ENOBUFS); @@ -7328,7 +7499,7 @@ key_delete( sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - ipsec_if = key_get_ipsec_if_from_message(mhp); + ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF); /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); @@ -7403,7 +7574,7 @@ key_delete_all( src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - ipsec_if = key_get_ipsec_if_from_message(mhp); + ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF); /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); @@ -7519,7 +7690,7 @@ key_get( sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - ipsec_if = key_get_ipsec_if_from_message(mhp); + ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF); /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); @@ -7790,7 +7961,7 @@ key_getcomb_ah(void) m->m_next = NULL; } } else - M_PREPEND(m, l, M_WAITOK); + M_PREPEND(m, l, M_WAITOK, 1); if (!m) return NULL; @@ -7836,7 +8007,7 @@ key_getcomb_ipcomp(void) m->m_next = NULL; } } else - M_PREPEND(m, l, M_WAITOK); + M_PREPEND(m, l, M_WAITOK, 1); if (!m) return NULL; @@ -7882,7 +8053,7 @@ key_getprop( if (!m) return NULL; - M_PREPEND(m, l, M_WAITOK); + M_PREPEND(m, l, M_WAITOK, 1); if (!m) return NULL; @@ -8316,7 +8487,7 @@ key_acquire2( src0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - ipsec_if = key_get_ipsec_if_from_message(mhp); + ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF); /* XXX boundary check against sa_len */ /* cast warnings */ @@ -9014,8 +9185,69 @@ static int (*key_typesw[])(struct socket *, struct mbuf *, key_getsastat, /* SADB_GETSASTAT */ key_spdenable, /* SADB_X_SPDENABLE */ key_spddisable, /* SADB_X_SPDDISABLE */ + key_migrate, /* SADB_MIGRATE */ }; +static void +bzero_mbuf(struct mbuf *m) +{ + struct mbuf *mptr = m; + struct sadb_msg *msg = NULL; + int offset = 0; + + if (!mptr) { + return; + } + + if (mptr->m_len >= sizeof(struct sadb_msg)) { + msg = mtod(mptr, struct sadb_msg *); + if (msg->sadb_msg_type != SADB_ADD && + msg->sadb_msg_type != SADB_UPDATE) { + return; + } + offset = sizeof(struct sadb_msg); + } + bzero(mptr->m_data+offset, mptr->m_len-offset); + mptr = mptr->m_next; + while (mptr != NULL) { + bzero(mptr->m_data, mptr->m_len); + mptr = mptr->m_next; + } +} + +static void +bzero_keys(struct sadb_msghdr *mh) +{ + int extlen = 0; + int offset = 0; + + if (!mh) { + return; + } + offset = sizeof(struct sadb_key); + + if (mh->ext[SADB_EXT_KEY_ENCRYPT]) { + struct sadb_key *key = (struct sadb_key*)mh->ext[SADB_EXT_KEY_ENCRYPT]; + extlen = key->sadb_key_bits >> 3; + + if (mh->extlen[SADB_EXT_KEY_ENCRYPT] >= offset + extlen) { + bzero((uint8_t *)mh->ext[SADB_EXT_KEY_ENCRYPT]+offset, extlen); + } else { + bzero(mh->ext[SADB_EXT_KEY_ENCRYPT], mh->extlen[SADB_EXT_KEY_ENCRYPT]); + } + } + if (mh->ext[SADB_EXT_KEY_AUTH]) { + struct sadb_key *key = (struct sadb_key*)mh->ext[SADB_EXT_KEY_AUTH]; + extlen = key->sadb_key_bits >> 3; + + if (mh->extlen[SADB_EXT_KEY_AUTH] >= offset + extlen) { + bzero((uint8_t *)mh->ext[SADB_EXT_KEY_AUTH]+offset, extlen); + } else { + bzero(mh->ext[SADB_EXT_KEY_AUTH], mh->extlen[SADB_EXT_KEY_AUTH]); + } + } +} + /* * parse sadb_msg buffer to process PFKEYv2, * and create a data to response if needed. @@ -9037,7 +9269,8 @@ key_parse( u_int orglen; int error; int target; - + Boolean keyAligned = FALSE; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ @@ -9101,12 +9334,14 @@ key_parse( } } if (!n) { + bzero_mbuf(m); m_freem(m); return ENOBUFS; } m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t)); n->m_pkthdr.len = n->m_len = m->m_pkthdr.len; n->m_next = NULL; + bzero_mbuf(m); m_freem(m); m = n; } @@ -9117,10 +9352,12 @@ key_parse( return error; if (m->m_next) { /*XXX*/ + bzero_mbuf(m); m_freem(m); return ENOBUFS; } + keyAligned = TRUE; msg = mh.msg; /* check SA type */ @@ -9277,9 +9514,20 @@ key_parse( goto senderror; } - return (*key_typesw[msg->sadb_msg_type])(so, m, &mh); + error = (*key_typesw[msg->sadb_msg_type])(so, m, &mh); + + // mh.ext points to the mbuf content. + // Zero out Encryption and Integrity keys if present. + bzero_keys(&mh); + return error; + senderror: + if (keyAligned) { + bzero_keys(&mh); + } else { + bzero_mbuf(m); + } msg->sadb_msg_errno = error; return key_sendup_mbuf(so, m, target); } @@ -9367,6 +9615,9 @@ key_align( case SADB_X_EXT_ADDR_RANGE_SRC_END: case SADB_X_EXT_ADDR_RANGE_DST_START: case SADB_X_EXT_ADDR_RANGE_DST_END: + case SADB_EXT_MIGRATE_ADDRESS_SRC: + case SADB_EXT_MIGRATE_ADDRESS_DST: + case SADB_X_EXT_MIGRATE_IPSECIF: /* duplicate check */ /* * XXX Are there duplication payloads of either @@ -9376,6 +9627,7 @@ key_align( ipseclog((LOG_DEBUG, "key_align: duplicate ext_type %u " "is passed.\n", ext->sadb_ext_type)); + bzero_mbuf(m); m_freem(m); PFKEY_STAT_INCREMENT(pfkeystat.out_dupext); return EINVAL; @@ -9385,6 +9637,7 @@ key_align( ipseclog((LOG_DEBUG, "key_align: invalid ext_type %u is passed.\n", ext->sadb_ext_type)); + bzero_mbuf(m); m_freem(m); PFKEY_STAT_INCREMENT(pfkeystat.out_invexttype); return EINVAL; @@ -9393,6 +9646,7 @@ key_align( extlen = PFKEY_UNUNIT64(ext->sadb_ext_len); if (key_validate_ext(ext, extlen)) { + bzero_mbuf(m); m_freem(m); PFKEY_STAT_INCREMENT(pfkeystat.out_invlen); return EINVAL; @@ -9411,6 +9665,7 @@ key_align( } if (off != end) { + bzero_mbuf(m); m_freem(m); PFKEY_STAT_INCREMENT(pfkeystat.out_invlen); return EINVAL; @@ -9450,6 +9705,8 @@ key_validate_ext( case SADB_X_EXT_ADDR_RANGE_SRC_END: case SADB_X_EXT_ADDR_RANGE_DST_START: case SADB_X_EXT_ADDR_RANGE_DST_END: + case SADB_EXT_MIGRATE_ADDRESS_SRC: + case SADB_EXT_MIGRATE_ADDRESS_DST: baselen = PFKEY_ALIGN8(sizeof(struct sadb_address)); checktype = ADDR; break; @@ -10007,13 +10264,13 @@ key_delsp_for_ipsec_if (ifnet_t ipsec_if) __private_extern__ u_int32_t key_fill_offload_frames_for_savs (ifnet_t ifp, - struct ipsec_offload_frame *frames_array, - u_int32_t frames_array_count, - size_t frame_data_offset) + struct ifnet_keepalive_offload_frame *frames_array, + u_int32_t frames_array_count, + size_t frame_data_offset) { struct secashead *sah = NULL; struct secasvar *sav = NULL; - struct ipsec_offload_frame *frame = frames_array; + struct ifnet_keepalive_offload_frame *frame = frames_array; u_int32_t frame_index = 0; if (frame == NULL || frames_array_count == 0) { diff --git a/bsd/netkey/keydb.c b/bsd/netkey/keydb.c index 362ce530f..f1d5c830b 100644 --- a/bsd/netkey/keydb.c +++ b/bsd/netkey/keydb.c @@ -65,11 +65,8 @@ keydb_newsecpolicy() lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); - p = (struct secpolicy *)_MALLOC(sizeof(*p), M_SECA, M_WAITOK); - if (!p) - return p; - bzero(p, sizeof(*p)); - return p; + return (struct secpolicy *)_MALLOC(sizeof(*p), M_SECA, + M_WAITOK | M_ZERO); } void @@ -91,15 +88,15 @@ keydb_newsecashead() lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - p = (struct secashead *)_MALLOC(sizeof(*p), M_SECA, M_NOWAIT); + p = (struct secashead *)_MALLOC(sizeof(*p), M_SECA, M_NOWAIT | M_ZERO); if (!p) { lck_mtx_unlock(sadb_mutex); - p = (struct secashead *)_MALLOC(sizeof(*p), M_SECA, M_WAITOK); + p = (struct secashead *)_MALLOC(sizeof(*p), M_SECA, + M_WAITOK | M_ZERO); lck_mtx_lock(sadb_mutex); } if (!p) return p; - bzero(p, sizeof(*p)); for (i = 0; i < sizeof(p->savtree)/sizeof(p->savtree[0]); i++) LIST_INIT(&p->savtree[i]); return p; @@ -180,28 +177,28 @@ keydb_newsecreplay(wsize) lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - p = (struct secreplay *)_MALLOC(sizeof(*p), M_SECA, M_NOWAIT); + p = (struct secreplay *)_MALLOC(sizeof(*p), M_SECA, M_NOWAIT | M_ZERO); if (!p) { lck_mtx_unlock(sadb_mutex); - p = (struct secreplay *)_MALLOC(sizeof(*p), M_SECA, M_WAITOK); + p = (struct secreplay *)_MALLOC(sizeof(*p), M_SECA, + M_WAITOK | M_ZERO); lck_mtx_lock(sadb_mutex); } if (!p) return p; - bzero(p, sizeof(*p)); if (wsize != 0) { - p->bitmap = (caddr_t)_MALLOC(wsize, M_SECA, M_NOWAIT); + p->bitmap = (caddr_t)_MALLOC(wsize, M_SECA, M_NOWAIT | M_ZERO); if (!p->bitmap) { lck_mtx_unlock(sadb_mutex); - p->bitmap = (caddr_t)_MALLOC(wsize, M_SECA, M_WAITOK); + p->bitmap = (caddr_t)_MALLOC(wsize, M_SECA, + M_WAITOK | M_ZERO); lck_mtx_lock(sadb_mutex); if (!p->bitmap) { _FREE(p, M_SECA); return NULL; } } - bzero(p->bitmap, wsize); } p->wsize = wsize; return p; diff --git a/bsd/netkey/keydb.h b/bsd/netkey/keydb.h index c2e463073..715e5e6fb 100644 --- a/bsd/netkey/keydb.h +++ b/bsd/netkey/keydb.h @@ -113,6 +113,7 @@ struct secasvar { u_int16_t remote_ike_port; u_int16_t natt_encapsulated_src_port; /* network byte order */ u_int16_t natt_interval; /* Interval in seconds */ + u_int16_t natt_offload_interval; /* Hardware Offload Interval in seconds */ u_int8_t always_expire; /* Send expire/delete messages even if unused */ diff --git a/bsd/netkey/keysock.c b/bsd/netkey/keysock.c index 69c1e92c8..dd7e2da45 100644 --- a/bsd/netkey/keysock.c +++ b/bsd/netkey/keysock.c @@ -171,7 +171,7 @@ key_sendup0(rp, m, promisc) if (promisc) { struct sadb_msg *pmsg; - M_PREPEND(m, sizeof(struct sadb_msg), M_NOWAIT); + M_PREPEND(m, sizeof(struct sadb_msg), M_NOWAIT, 1); if (m && m->m_len < sizeof(struct sadb_msg)) m = m_pullup(m, sizeof(struct sadb_msg)); if (!m) { @@ -358,10 +358,10 @@ key_attach(struct socket *so, int proto, struct proc *p) if (sotorawcb(so) != 0) return EISCONN; /* XXX panic? */ - kp = (struct keycb *)_MALLOC(sizeof *kp, M_PCB, M_WAITOK); /* XXX */ + kp = (struct keycb *)_MALLOC(sizeof (*kp), M_PCB, + M_WAITOK | M_ZERO); /* XXX */ if (kp == 0) return ENOBUFS; - bzero(kp, sizeof *kp); so->so_pcb = (caddr_t)kp; kp->kp_promisc = kp->kp_registered = 0; diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h index 370ddaa83..3bc6a641c 100644 --- a/bsd/nfs/nfs.h +++ b/bsd/nfs/nfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -106,7 +106,6 @@ extern int nfs_ticks; #define NFS_ASYNCTHREADMAXIDLE 60 /* Seconds before idle nfsiods are reaped */ #define NFS_DEFSTATFSRATELIMIT 10 /* Def. max # statfs RPCs per second */ #define NFS_REQUESTDELAY 10 /* ms interval to check request queue */ -#define NFSRV_DEADSOCKDELAY 5 /* Seconds before dead sockets are reaped */ #define NFSRV_MAXWGATHERDELAY 100 /* Max. write gather delay (msec) */ #ifndef NFSRV_WGATHERDELAY #define NFSRV_WGATHERDELAY 1 /* Default write gather delay (msec) */ @@ -186,6 +185,7 @@ extern int nfs_ticks; #define NFS_MATTR_REALM 24 /* Realm to authenticate with */ #define NFS_MATTR_PRINCIPAL 25 /* GSS principal to authenticate with */ #define NFS_MATTR_SVCPRINCIPAL 26 /* GSS principal to authenticate to, the server principal */ +#define NFS_MATTR_NFS_VERSION_RANGE 27 /* Packed version range to try */ /* NFS mount flags */ #define NFS_MFLAG_SOFT 0 /* soft mount (requests fail if unresponsive) */ @@ -207,6 +207,11 @@ extern int nfs_ticks; #define NFS_MFLAG_MNTUDP 16 /* MOUNT protocol should use UDP */ #define NFS_MFLAG_MNTQUICK 17 /* use short timeouts while mounting */ +/* Macros for packing and unpacking packed versions */ +#define PVER2MAJOR(M) ((uint32_t)(((M) >> 16) & 0xffff)) +#define PVER2MINOR(m) ((uint32_t)((m) & 0xffff)) +#define VER2PVER(M, m) ((uint32_t)((M) << 16) | ((m) & 0xffff)) + /* NFS advisory file locking modes */ #define NFS_LOCK_MODE_ENABLED 0 /* advisory file locking enabled */ #define NFS_LOCK_MODE_DISABLED 1 /* do not support advisory file locking */ @@ -931,8 +936,6 @@ extern lck_grp_t *nfs_request_grp; #define NFSNOLIST ((void *)0x0badcafe) /* sentinel value for nfs lists */ #define NFSREQNOLIST NFSNOLIST /* sentinel value for nfsreq lists */ -#define NFSIODCOMPLETING ((void *)0x10d) /* sentinel value for iod processing - async I/O w/callback being completed */ /* Flag values for r_flags */ #define R_TIMING 0x00000001 /* timing request (in mntp) */ @@ -952,6 +955,7 @@ extern lck_grp_t *nfs_request_grp; #define R_RESENDQ 0x00004000 /* async request currently on resendq */ #define R_SENDING 0x00008000 /* request currently being sent */ #define R_SOFT 0x00010000 /* request is soft - don't retry or reconnect */ +#define R_IOD 0x00020000 /* request is being managed by an IOD */ #define R_NOINTR 0x20000000 /* request should not be interupted by a signal */ #define R_RECOVER 0x40000000 /* a state recovery RPC - during NFSSTA_RECOVER */ @@ -970,7 +974,7 @@ extern int nfs_lockd_mounts, nfs_lockd_request_sent, nfs_single_des; extern int nfs_tprintf_initial_delay, nfs_tprintf_delay; extern int nfsiod_thread_count, nfsiod_thread_max, nfs_max_async_writes; extern int nfs_idmap_ctrl, nfs_callback_port; -extern int nfs_is_mobile, nfs_readlink_nocache; +extern int nfs_is_mobile, nfs_readlink_nocache, nfs_root_steals_ctx; extern uint32_t nfs_squishy_flags; extern uint32_t nfs_debug_ctl; @@ -1050,8 +1054,8 @@ extern struct nfsrv_sock *nfsrv_udpsock, *nfsrv_udp6sock; * nfsrv_sockwork - sockets being worked on which may have more work to do (ns_svcq) * nfsrv_sockwg - sockets with pending write gather input (ns_wgq) */ -extern TAILQ_HEAD(nfsrv_sockhead, nfsrv_sock) nfsrv_socklist, nfsrv_deadsocklist, - nfsrv_sockwg, nfsrv_sockwait, nfsrv_sockwork; +extern TAILQ_HEAD(nfsrv_sockhead, nfsrv_sock) nfsrv_socklist, nfsrv_sockwg, + nfsrv_sockwait, nfsrv_sockwork; /* lock groups for nfsrv_sock's */ extern lck_grp_t *nfsrv_slp_rwlock_group; @@ -1123,7 +1127,7 @@ extern in_port_t nfs4_cb_port, nfs4_cb_port6; extern thread_call_t nfs_request_timer_call; extern thread_call_t nfs_buf_timer_call; extern thread_call_t nfs4_callback_timer_call; -extern thread_call_t nfsrv_deadsock_timer_call; +extern thread_call_t nfsrv_idlesock_timer_call; #if CONFIG_FSE extern thread_call_t nfsrv_fmod_timer_call; #endif @@ -1392,13 +1396,14 @@ void nfsrv_cleancache(void); void nfsrv_cleanup(void); int nfsrv_credcheck(struct nfsrv_descript *, vfs_context_t, struct nfs_export *, struct nfs_export_options *); -void nfsrv_deadsock_timer(void *, void *); +void nfsrv_idlesock_timer(void *, void *); int nfsrv_dorec(struct nfsrv_sock *, struct nfsd *, struct nfsrv_descript **); int nfsrv_errmap(struct nfsrv_descript *, int); int nfsrv_export(struct user_nfs_export_args *, vfs_context_t); int nfsrv_fhmatch(struct nfs_filehandle *, struct nfs_filehandle *); int nfsrv_fhtovp(struct nfs_filehandle *, struct nfsrv_descript *, vnode_t *, struct nfs_export **, struct nfs_export_options **); +int nfsrv_check_exports_allow_address(mbuf_t); #if CONFIG_FSE void nfsrv_fmod_timer(void *, void *); #endif diff --git a/bsd/nfs/nfs4_subs.c b/bsd/nfs/nfs4_subs.c index 376ae3443..60a52d867 100644 --- a/bsd/nfs/nfs4_subs.c +++ b/bsd/nfs/nfs4_subs.c @@ -236,7 +236,7 @@ nfs4_setclientid(struct nfsmount *nmp) // SETCLIENTID numops = 1; nfsm_chain_build_alloc_init(error, &nmreq, 14 * NFSX_UNSIGNED + nmp->nm_longid->nci_idlen); - nfsm_chain_add_compound_header(error, &nmreq, "setclid", numops); + nfsm_chain_add_compound_header(error, &nmreq, "setclid", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_SETCLIENTID); /* nfs_client_id4 client; */ @@ -300,7 +300,7 @@ nfs4_setclientid(struct nfsmount *nmp) // SETCLIENTID_CONFIRM numops = 1; nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "setclid_conf", numops); + nfsm_chain_add_compound_header(error, &nmreq, "setclid_conf", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_SETCLIENTID_CONFIRM); nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); @@ -325,7 +325,7 @@ nfs4_setclientid(struct nfsmount *nmp) // PUTFH, GETATTR(FS) numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "setclid_attr", numops); + nfsm_chain_add_compound_header(error, &nmreq, "setclid_attr", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, nmp->nm_dnp->n_fhp, nmp->nm_dnp->n_fhsize); @@ -377,7 +377,7 @@ nfs4_renew(struct nfsmount *nmp, int rpcflag) // RENEW numops = 1; nfsm_chain_build_alloc_init(error, &nmreq, 8 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "renew", numops); + nfsm_chain_add_compound_header(error, &nmreq, "renew", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_RENEW); nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); @@ -533,7 +533,7 @@ nfs4_secinfo_rpc(struct nfsmount *nmp, struct nfsreq_secinfo_args *siap, kauth_c numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 4 * NFSX_UNSIGNED + NFSX_FH(nfsvers) + nfsm_rndup(namelen)); - nfsm_chain_add_compound_header(error, &nmreq, "secinfo", numops); + nfsm_chain_add_compound_header(error, &nmreq, "secinfo", nmp->nm_minor_vers, numops); numops--; if (fhp) { nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); @@ -665,7 +665,7 @@ nfs4_get_fs_locations( NFSREQ_SECINFO_SET(&si, NULL, fhp, fhsize, name, 0); numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 18 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "fs_locations", numops); + nfsm_chain_add_compound_header(error, &nmreq, "fs_locations", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, NFS_VER4, fhp, fhsize); diff --git a/bsd/nfs/nfs4_vnops.c b/bsd/nfs/nfs4_vnops.c index 2682d94be..a018cdaa7 100644 --- a/bsd/nfs/nfs4_vnops.c +++ b/bsd/nfs/nfs4_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2011 Apple Inc. All rights reserved. + * Copyright (c) 2006-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -102,7 +102,7 @@ nfs4_access_rpc(nfsnode_t np, u_int32_t *access, int rpcflags, vfs_context_t ctx // PUTFH, ACCESS, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "access", numops); + nfsm_chain_add_compound_header(error, &nmreq, "access", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -214,7 +214,7 @@ nfs4_getattr_rpc( // PUTFH, GETATTR numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "getattr", numops); + nfsm_chain_add_compound_header(error, &nmreq, "getattr", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, fhp, fhsize); @@ -271,7 +271,7 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx) // PUTFH, GETATTR, READLINK numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops); + nfsm_chain_add_compound_header(error, &nmreq, "readlink", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize); @@ -341,7 +341,7 @@ nfs4_read_rpc_async( // PUTFH, READ, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "read", numops); + nfsm_chain_add_compound_header(error, &nmreq, "read", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -453,7 +453,7 @@ nfs4_write_rpc_async( // PUTFH, WRITE, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED + len); - nfsm_chain_add_compound_header(error, &nmreq, "write", numops); + nfsm_chain_add_compound_header(error, &nmreq, "write", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -578,7 +578,7 @@ nfs4_remove_rpc( // PUTFH, REMOVE, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED + namelen); - nfsm_chain_add_compound_header(error, &nmreq, "remove", numops); + nfsm_chain_add_compound_header(error, &nmreq, "remove", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize); @@ -654,7 +654,7 @@ nfs4_rename_rpc( // PUTFH(FROM), SAVEFH, PUTFH(TO), RENAME, GETATTR(TO), RESTOREFH, GETATTR(FROM) numops = 7; nfsm_chain_build_alloc_init(error, &nmreq, 30 * NFSX_UNSIGNED + fnamelen + tnamelen); - nfsm_chain_add_compound_header(error, &nmreq, "rename", numops); + nfsm_chain_add_compound_header(error, &nmreq, "rename", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize); @@ -854,7 +854,7 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx) // PUTFH, GETATTR, READDIR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, tag, numops); + nfsm_chain_add_compound_header(error, &nmreq, tag, nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize); @@ -1104,7 +1104,7 @@ nfs4_lookup_rpc_async( // PUTFH, GETATTR, LOOKUP(P), GETFH, GETATTR (FH) numops = 5; nfsm_chain_build_alloc_init(error, &nmreq, 20 * NFSX_UNSIGNED + namelen); - nfsm_chain_add_compound_header(error, &nmreq, "lookup", numops); + nfsm_chain_add_compound_header(error, &nmreq, "lookup", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize); @@ -1160,6 +1160,8 @@ nfs4_lookup_rpc_async_finish( struct nfsm_chain nmrep; nmp = NFSTONMP(dnp); + if (nmp == NULL) + return (ENXIO); nfsvers = nmp->nm_vers; if ((name[0] == '.') && (name[1] == '.') && (namelen == 2)) isdotdot = 1; @@ -1255,7 +1257,7 @@ nfs4_commit_rpc( // PUTFH, COMMIT, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 19 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "commit", numops); + nfsm_chain_add_compound_header(error, &nmreq, "commit", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -1325,7 +1327,7 @@ nfs4_pathconf_rpc( // PUTFH, GETATTR numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "pathconf", numops); + nfsm_chain_add_compound_header(error, &nmreq, "pathconf", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -1540,7 +1542,7 @@ nfs4_setattr_rpc( // PUTFH, SETATTR, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 40 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "setattr", numops); + nfsm_chain_add_compound_header(error, &nmreq, "setattr", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -2735,10 +2737,25 @@ nfs_vnop_mmap( * So grab another open count matching the accessMode passed in. * If we already had an mmap open, prefer read/write without deny mode. * This means we may have to drop the current mmap open first. + * + * N.B. We should have an open for the mmap, because, mmap was + * called on an open descriptor, or we've created an open for read + * from reading the first page for execve. However, if we piggy + * backed on an existing NFS_OPEN_SHARE_ACCESS_READ/NFS_OPEN_SHARE_DENY_NONE + * that open may have closed. */ - if (!nofp->nof_access) { - if (accessMode != NFS_OPEN_SHARE_ACCESS_READ) { + if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_READ)) { + if (nofp->nof_flags & NFS_OPEN_FILE_NEEDCLOSE) { + /* We shouldn't get here. We've already open the file for execve */ + NP(np, "nfs_vnop_mmap: File already needs close access: 0x%x, cred: %d thread: %lld", + nofp->nof_access, kauth_cred_getuid(nofp->nof_owner->noo_cred), thread_tid(vfs_context_thread(ctx))); + } + /* + * mmapings for execve are just for read. Get out with EPERM if the accessMode is not ACCESS_READ + * or the access would be denied. Other accesses should have an open descriptor for the mapping. + */ + if (accessMode != NFS_OPEN_SHARE_ACCESS_READ || (accessMode & nofp->nof_deny)) { /* not asking for just read access -> fail */ error = EPERM; goto out; @@ -2795,6 +2812,29 @@ nfs_vnop_mmap( denyMode = NFS_OPEN_SHARE_DENY_WRITE; else if (nofp->nof_r_drw) denyMode = NFS_OPEN_SHARE_DENY_BOTH; + } else if (nofp->nof_d_rw || nofp->nof_d_rw_dw || nofp->nof_d_rw_drw) { + /* + * This clause and the one below is to co-opt a read write access + * for a read only mmaping. We probably got here in that an + * existing rw open for an executable file already exists. + */ + delegated = 1; + accessMode = NFS_OPEN_SHARE_ACCESS_BOTH; + if (nofp->nof_d_rw) + denyMode = NFS_OPEN_SHARE_DENY_NONE; + else if (nofp->nof_d_rw_dw) + denyMode = NFS_OPEN_SHARE_DENY_WRITE; + else if (nofp->nof_d_rw_drw) + denyMode = NFS_OPEN_SHARE_DENY_BOTH; + } else if (nofp->nof_rw || nofp->nof_rw_dw || nofp->nof_rw_drw) { + delegated = 0; + accessMode = NFS_OPEN_SHARE_ACCESS_BOTH; + if (nofp->nof_rw) + denyMode = NFS_OPEN_SHARE_DENY_NONE; + else if (nofp->nof_rw_dw) + denyMode = NFS_OPEN_SHARE_DENY_WRITE; + else if (nofp->nof_rw_drw) + denyMode = NFS_OPEN_SHARE_DENY_BOTH; } else { error = EPERM; } @@ -3164,7 +3204,7 @@ nfs_file_lock_destroy(struct nfs_file_lock *nflp) FREE(nflp, M_TEMP); } else { lck_mtx_lock(&nlop->nlo_lock); - bzero(nflp, sizeof(nflp)); + bzero(nflp, sizeof(*nflp)); lck_mtx_unlock(&nlop->nlo_lock); } nfs_lock_owner_rele(nlop); @@ -3264,7 +3304,7 @@ nfs4_setlock_rpc( // PUTFH, GETATTR, LOCK numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 33 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "lock", numops); + nfsm_chain_add_compound_header(error, &nmreq, "lock", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize); @@ -3360,7 +3400,7 @@ nfs4_unlock_rpc( // PUTFH, GETATTR, LOCKU numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "unlock", numops); + nfsm_chain_add_compound_header(error, &nmreq, "unlock", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize); @@ -3434,7 +3474,7 @@ nfs4_getlock_rpc( // PUTFH, GETATTR, LOCKT numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "locktest", numops); + nfsm_chain_add_compound_header(error, &nmreq, "locktest", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize); @@ -4582,7 +4622,7 @@ nfs4_open_confirm_rpc( // PUTFH, OPEN_CONFIRM, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", numops); + nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen); @@ -4700,7 +4740,7 @@ nfs4_open_rpc_internal( // PUTFH, SAVEFH, OPEN(CREATE?), GETATTR(FH), RESTOREFH, GETATTR numops = 6; nfsm_chain_build_alloc_init(error, &nmreq, 53 * NFSX_UNSIGNED + cnp->cn_namelen); - nfsm_chain_add_compound_header(error, &nmreq, create ? "create" : "open", numops); + nfsm_chain_add_compound_header(error, &nmreq, create ? "create" : "open", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize); @@ -5040,6 +5080,7 @@ nfs4_claim_delegated_open_rpc( MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK); if (!filename) { error = ENOMEM; + nfs_node_unlock(np); goto out; } snprintf(filename, namelen+1, "%s", name); @@ -5047,8 +5088,7 @@ nfs4_claim_delegated_open_rpc( nfs_node_unlock(np); if ((error = nfs_open_owner_set_busy(noop, NULL))) - return (error); - + goto out; NVATTR_INIT(&nvattr); delegation = NFS_OPEN_DELEGATE_NONE; dstateid = np->n_dstateid; @@ -5060,7 +5100,7 @@ nfs4_claim_delegated_open_rpc( // PUTFH, OPEN, GETATTR(FH) numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "open_claim_d", numops); + nfsm_chain_add_compound_header(error, &nmreq, "open_claim_d", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, VTONFS(dvp)->n_fhp, VTONFS(dvp)->n_fhsize); @@ -5266,7 +5306,7 @@ nfs4_open_reclaim_rpc( // PUTFH, OPEN, GETATTR(FH) numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "open_reclaim", numops); + nfsm_chain_add_compound_header(error, &nmreq, "open_reclaim", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -5449,7 +5489,7 @@ nfs4_open_downgrade_rpc( // PUTFH, OPEN_DOWNGRADE, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "open_downgrd", numops); + nfsm_chain_add_compound_header(error, &nmreq, "open_downgrd", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -5519,7 +5559,7 @@ nfs4_close_rpc( // PUTFH, CLOSE, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "close", numops); + nfsm_chain_add_compound_header(error, &nmreq, "close", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -5989,7 +6029,7 @@ nfs4_delegreturn_rpc(struct nfsmount *nmp, u_char *fhp, int fhlen, struct nfs_st // PUTFH, DELEGRETURN numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", numops); + nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen); @@ -6065,42 +6105,76 @@ nfs_vnop_read( nfs_open_owner_rele(noop); return (error); } - if (!nofp->nof_access) { - /* we don't have the file open, so open it for read access */ - error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx)); - if (error) { + /* + * Since the read path is a hot path, if we already have + * read access, lets go and try and do the read, without + * busying the mount and open file node for this open owner. + * + * N.B. This is inherently racy w.r.t. an execve using + * an already open file, in that the read at the end of + * this routine will be racing with a potential close. + * The code below ultimately has the same problem. In practice + * this does not seem to be an issue. + */ + if (nofp->nof_access & NFS_OPEN_SHARE_ACCESS_READ) { + nfs_open_owner_rele(noop); + goto do_read; + } + error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx)); + if (error) { + nfs_open_owner_rele(noop); + return (error); + } + /* + * If we don't have a file already open with the access we need (read) then + * we need to open one. Otherwise we just co-opt an open. We might not already + * have access because we're trying to read the first page of the + * file for execve. + */ + error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx)); + if (error) { + nfs_mount_state_in_use_end(nmp, 0); + nfs_open_owner_rele(noop); + return (error); + } + if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_READ)) { + /* we don't have the file open, so open it for read access if we're not denied */ + if (nofp->nof_flags & NFS_OPEN_FILE_NEEDCLOSE) { + NP(np, "nfs_vnop_read: File already needs close access: 0x%x, cred: %d thread: %lld", + nofp->nof_access, kauth_cred_getuid(nofp->nof_owner->noo_cred), thread_tid(vfs_context_thread(ctx))); + } + if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ) { + nfs_open_file_clear_busy(nofp); + nfs_mount_state_in_use_end(nmp, 0); nfs_open_owner_rele(noop); - return (error); + return (EPERM); } if (np->n_flag & NREVOKE) { error = EIO; + nfs_open_file_clear_busy(nofp); nfs_mount_state_in_use_end(nmp, 0); nfs_open_owner_rele(noop); return (error); } - error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx)); - if (error) - nofp = NULL; - if (!error) { - if (nmp->nm_vers < NFS_VER4) { - /* NFS v2/v3 opens are always allowed - so just add it. */ - nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0); - } else { - error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx); - } + if (nmp->nm_vers < NFS_VER4) { + /* NFS v2/v3 opens are always allowed - so just add it. */ + nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0); + } else { + error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx); } if (!error) nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE; - if (nofp) - nfs_open_file_clear_busy(nofp); - if (nfs_mount_state_in_use_end(nmp, error)) { - nofp = NULL; - goto restart; - } + } + if (nofp) + nfs_open_file_clear_busy(nofp); + if (nfs_mount_state_in_use_end(nmp, error)) { + nofp = NULL; + goto restart; } nfs_open_owner_rele(noop); if (error) return (error); +do_read: return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context)); } @@ -6348,7 +6422,7 @@ nfs4_create_rpc( // PUTFH, SAVEFH, CREATE, GETATTR(FH), RESTOREFH, GETATTR numops = 6; nfsm_chain_build_alloc_init(error, &nmreq, 66 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, tag, numops); + nfsm_chain_add_compound_header(error, &nmreq, tag, nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize); @@ -6606,7 +6680,7 @@ nfs4_vnop_link( // PUTFH(SOURCE), SAVEFH, PUTFH(DIR), LINK, GETATTR(DIR), RESTOREFH, GETATTR numops = 7; nfsm_chain_build_alloc_init(error, &nmreq, 29 * NFSX_UNSIGNED + cnp->cn_namelen); - nfsm_chain_add_compound_header(error, &nmreq, "link", numops); + nfsm_chain_add_compound_header(error, &nmreq, "link", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -6811,7 +6885,7 @@ nfs4_named_attr_dir_get(nfsnode_t np, int fetch, vfs_context_t ctx) // PUTFH, OPENATTR, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "openattr", numops); + nfsm_chain_add_compound_header(error, &nmreq, "openattr", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize); @@ -7141,7 +7215,7 @@ nfs4_named_attr_get( if (prefetch) numops += 4; // also sending: SAVEFH, RESTOREFH, NVERIFY, READ nfsm_chain_build_alloc_init(error, &nmreq, 64 * NFSX_UNSIGNED + cnp->cn_namelen); - nfsm_chain_add_compound_header(error, &nmreq, "getnamedattr", numops); + nfsm_chain_add_compound_header(error, &nmreq, "getnamedattr", nmp->nm_minor_vers, numops); if (hadattrdir) { numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); diff --git a/bsd/nfs/nfs_bio.c b/bsd/nfs/nfs_bio.c index c0715a2b0..926cc0ad7 100644 --- a/bsd/nfs/nfs_bio.c +++ b/bsd/nfs/nfs_bio.c @@ -3797,7 +3797,7 @@ nfs_asyncio_finish(struct nfsreq *req) } } - if (req->r_achain.tqe_next == NFSREQNOLIST || req->r_achain.tqe_next == NFSIODCOMPLETING) + if (req->r_achain.tqe_next == NFSREQNOLIST) TAILQ_INSERT_TAIL(&nmp->nm_iodq, req, r_achain); /* If this mount doesn't already have an nfsiod working on it... */ @@ -3835,11 +3835,17 @@ nfs_asyncio_resend(struct nfsreq *req) if (nfs_mount_gone(nmp)) return; + nfs_gss_clnt_rpcdone(req); lck_mtx_lock(&nmp->nm_lock); if (!(req->r_flags & R_RESENDQ)) { TAILQ_INSERT_TAIL(&nmp->nm_resendq, req, r_rchain); req->r_flags |= R_RESENDQ; + /* + * We take a reference on this request so that it can't be + * destroyed while a resend is queued or in progress. + */ + nfs_request_ref(req, 1); } nfs_mount_sock_thread_wake(nmp); lck_mtx_unlock(&nmp->nm_lock); diff --git a/bsd/nfs/nfs_gss.c b/bsd/nfs/nfs_gss.c index 9f98a9a50..3414db347 100644 --- a/bsd/nfs/nfs_gss.c +++ b/bsd/nfs/nfs_gss.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Apple Inc. All rights reserved. + * Copyright (c) 2007-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -109,8 +109,11 @@ #include #include #include - #include "nfs_gss_crypto.h" +#include +#include + +#define ASSERT(EX) assert(EX) #define NFS_GSS_MACH_MAX_RETRIES 3 @@ -192,8 +195,9 @@ static int nfs_gss_clnt_ctx_init_retry(struct nfsreq *, struct nfs_gss_clnt_ctx static int nfs_gss_clnt_ctx_callserver(struct nfsreq *, struct nfs_gss_clnt_ctx *); static uint8_t *nfs_gss_clnt_svcname(struct nfsmount *, gssd_nametype *, uint32_t *); static int nfs_gss_clnt_gssd_upcall(struct nfsreq *, struct nfs_gss_clnt_ctx *); -void nfs_gss_clnt_ctx_neg_cache_enter(struct nfs_gss_clnt_ctx *, struct nfsmount *); +void nfs_gss_clnt_ctx_neg_cache_reap(struct nfsmount *); static void nfs_gss_clnt_ctx_clean(struct nfs_gss_clnt_ctx *); +static int nfs_gss_clnt_ctx_copy(struct nfs_gss_clnt_ctx *, struct nfs_gss_clnt_ctx **, gss_key_info *); static void nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *); static void nfs_gss_clnt_log_error(struct nfsreq *, struct nfs_gss_clnt_ctx *, uint32_t, uint32_t); #endif /* NFSCLIENT */ @@ -276,6 +280,16 @@ nfs_gss_init(void) #define kauth_cred_getasid(cred) ((cred)->cr_audit.as_aia_p->ai_asid) #define kauth_cred_getauid(cred) ((cred)->cr_audit.as_aia_p->ai_auid) +#define SAFE_CAST_INTTYPE( type, intval ) \ + ( (type)(intval)/(sizeof(type) < sizeof(intval) ? 0 : 1) ) + +uid_t +nfs_cred_getasid2uid(kauth_cred_t cred) +{ + uid_t result = SAFE_CAST_INTTYPE(uid_t, kauth_cred_getasid(cred)); + return (result); +} + /* * Debugging */ @@ -285,7 +299,7 @@ nfs_gss_clnt_ctx_dump(struct nfsmount *nmp) struct nfs_gss_clnt_ctx *cp; lck_mtx_lock(&nmp->nm_lock); - NFS_GSS_DBG("Enter"); + NFS_GSS_DBG("Enter\n"); TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { lck_mtx_lock(cp->gss_clnt_mtx); printf("context %d/%d: refcnt = %d, flags = %x\n", @@ -294,19 +308,47 @@ nfs_gss_clnt_ctx_dump(struct nfsmount *nmp) cp->gss_clnt_refcnt, cp->gss_clnt_flags); lck_mtx_unlock(cp->gss_clnt_mtx); } - - TAILQ_FOREACH(cp, &nmp->nm_gssnccl, gss_clnt_entries) { - lck_mtx_lock(cp->gss_clnt_mtx); - printf("context %d/%d: refcnt = %d, flags = %x\n", - kauth_cred_getasid(cp->gss_clnt_cred), - kauth_cred_getauid(cp->gss_clnt_cred), - cp->gss_clnt_refcnt, cp->gss_clnt_flags); - lck_mtx_unlock(cp->gss_clnt_mtx); - } - NFS_GSS_DBG("Exit"); + NFS_GSS_DBG("Exit\n"); lck_mtx_unlock(&nmp->nm_lock); } +static char * +nfs_gss_clnt_ctx_name(struct nfsmount *nmp, struct nfs_gss_clnt_ctx *cp, char *buf, int len) +{ + char *np; + int nlen; + const char *server = ""; + + if (nmp && nmp->nm_mountp) + server = vfs_statfs(nmp->nm_mountp)->f_mntfromname; + + if (cp == NULL) { + snprintf(buf, len, "[%s] NULL context", server); + return (buf); + } + + if (cp->gss_clnt_principal && !cp->gss_clnt_display) { + np = (char *)cp->gss_clnt_principal; + nlen = cp->gss_clnt_prinlen; + } else { + np = cp->gss_clnt_display; + nlen = np ? strlen(cp->gss_clnt_display) : 0; + } + if (nlen) + snprintf(buf, len, "[%s] %.*s %d/%d %s", server, nlen, np, + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getuid(cp->gss_clnt_cred), + cp->gss_clnt_principal ? "" : "[from default cred] "); + else + snprintf(buf, len, "[%s] using default %d/%d ", server, + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getuid(cp->gss_clnt_cred)); + return (buf); +} + +#define NFS_CTXBUFSZ 80 +#define NFS_GSS_CTX(req, cp) nfs_gss_clnt_ctx_name((req)->r_nmp, cp ? cp : (req)->r_gss_ctx, CTXBUF, sizeof(CTXBUF)) + #define NFS_GSS_CLNT_CTX_DUMP(nmp) \ do { \ if (NFS_GSS_ISDBG && (NFS_DEBUG_FLAGS & 0x2)) \ @@ -321,23 +363,79 @@ nfs_gss_clnt_ctx_cred_match(kauth_cred_t cred1, kauth_cred_t cred2) return (0); } - +/* + * Busy the mount for each principal set on the mount + * so that the automounter will not unmount the file + * system underneath us. With out this, if an unmount + * occurs the principal that is set for an audit session + * will be lost and we may end up with a different identity. + * + * Note setting principals on the mount is a bad idea. This + * really should be handle by KIM (Kerberos Identity Management) + * so that defaults can be set by service identities. + */ + +static void +nfs_gss_clnt_mnt_ref(struct nfsmount *nmp) +{ + int error; + vnode_t rvp; + + if (nmp == NULL || + !(vfs_flags(nmp->nm_mountp) & MNT_AUTOMOUNTED)) + return; + + error = VFS_ROOT(nmp->nm_mountp, &rvp, NULL); + if (!error) { + vnode_ref(rvp); + vnode_put(rvp); + } +} + +/* + * Unbusy the mout. See above comment, + */ + +static void +nfs_gss_clnt_mnt_rele(struct nfsmount *nmp) +{ + int error; + vnode_t rvp; + + if (nmp == NULL || + !(vfs_flags(nmp->nm_mountp) & MNT_AUTOMOUNTED)) + return; + + error = VFS_ROOT(nmp->nm_mountp, &rvp, NULL); + if (!error) { + vnode_rele(rvp); + vnode_put(rvp); + } +} + +int nfs_root_steals_ctx = 1; + static int -nfs_gss_clnt_ctx_find(struct nfsreq *req) +nfs_gss_clnt_ctx_find_principal(struct nfsreq *req, uint8_t *principal, uint32_t plen, uint32_t nt) { struct nfsmount *nmp = req->r_nmp; struct nfs_gss_clnt_ctx *cp; + struct nfsreq treq; int error = 0; struct timeval now; - + gss_key_info *ki; + char CTXBUF[NFS_CTXBUFSZ]; + + bzero(&treq, sizeof (struct nfsreq)); + treq.r_nmp = nmp; + microuptime(&now); lck_mtx_lock(&nmp->nm_lock); TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { lck_mtx_lock(cp->gss_clnt_mtx); if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { - NFS_GSS_DBG("Found destroyed context %d/%d. refcnt = %d continuing\n", - kauth_cred_getasid(cp->gss_clnt_cred), - kauth_cred_getauid(cp->gss_clnt_cred), + NFS_GSS_DBG("Found destroyed context %s refcnt = %d continuing\n", + NFS_GSS_CTX(req, cp), cp->gss_clnt_refcnt); lck_mtx_unlock(cp->gss_clnt_mtx); continue; @@ -347,27 +445,88 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); TAILQ_INSERT_HEAD(&nmp->nm_gsscl, cp, gss_clnt_entries); } + if (principal) { + /* + * If we have a principal, but it does not match the current cred + * mark it for removal + */ + if (cp->gss_clnt_prinlen != plen || cp->gss_clnt_prinnt != nt || + bcmp(cp->gss_clnt_principal, principal, plen) != 0) { + cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY); + cp->gss_clnt_refcnt++; + lck_mtx_unlock(cp->gss_clnt_mtx); + NFS_GSS_DBG("Marking %s for deletion because %s does not match\n", + NFS_GSS_CTX(req, cp), principal); + NFS_GSS_DBG("len = (%d,%d), nt = (%d,%d)\n", cp->gss_clnt_prinlen, plen, + cp->gss_clnt_prinnt, nt); + treq.r_gss_ctx = cp; + cp = NULL; + break; + } + } if (cp->gss_clnt_flags & GSS_CTX_INVAL) { - /* - * We haven't been moved to the neg cache list - * but we're about to be, finding an entry on - * the negative cache list will result in an - * NFSERR_EAUTH for GSS_NEG_CACHE_TO so we just - * return that now. - */ - lck_mtx_unlock(cp->gss_clnt_mtx); - lck_mtx_unlock(&nmp->nm_lock); - return (NFSERR_EAUTH); + /* + * If we're still being used and we're not expired + * just return and don't bother gssd again. Note if + * gss_clnt_nctime is zero it is about to be set to now. + */ + if (cp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec || cp->gss_clnt_nctime == 0) { + NFS_GSS_DBG("Context %s (refcnt = %d) not expired returning EAUTH nctime = %ld now = %ld\n", + NFS_GSS_CTX(req, cp), cp->gss_clnt_refcnt, cp->gss_clnt_nctime, now.tv_sec); + lck_mtx_unlock(cp->gss_clnt_mtx); + lck_mtx_unlock(&nmp->nm_lock); + return (NFSERR_EAUTH); + } + if (cp->gss_clnt_refcnt) { + struct nfs_gss_clnt_ctx *ncp; + /* + * If this context has references, we can't use it so we mark if for + * destruction and create a new context based on this one in the + * same manner as renewing one. + */ + cp->gss_clnt_flags |= GSS_CTX_DESTROY; + NFS_GSS_DBG("Context %s has expired but we still have %d references\n", + NFS_GSS_CTX(req, cp), cp->gss_clnt_refcnt); + error = nfs_gss_clnt_ctx_copy(cp, &ncp, NULL); + lck_mtx_unlock(cp->gss_clnt_mtx); + if (error) { + lck_mtx_unlock(&nmp->nm_lock); + return (error); + } + cp = ncp; + break; + } else { + /* cp->gss_clnt_kinfo should be NULL here */ + if (cp->gss_clnt_kinfo) { + FREE(cp->gss_clnt_kinfo, M_TEMP); + cp->gss_clnt_kinfo = NULL; + } + if (cp->gss_clnt_nctime) + nmp->nm_ncentries--; + lck_mtx_unlock(cp->gss_clnt_mtx); + TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); + break; + } } + /* Found a valid context to return */ + cp->gss_clnt_refcnt++; + req->r_gss_ctx = cp; lck_mtx_unlock(cp->gss_clnt_mtx); lck_mtx_unlock(&nmp->nm_lock); - nfs_gss_clnt_ctx_ref(req, cp); return (0); } lck_mtx_unlock(cp->gss_clnt_mtx); } - if (kauth_cred_getuid(req->r_cred) == 0) { + MALLOC(ki, gss_key_info *, sizeof (gss_key_info), M_TEMP, M_WAITOK|M_ZERO); + if (ki == NULL) { + lck_mtx_unlock(&nmp->nm_lock); + return (ENOMEM); + } + + if (cp) { + cp->gss_clnt_kinfo = ki; + } else if (nfs_root_steals_ctx && principal == NULL && kauth_cred_getuid(req->r_cred) == 0) { /* * If superuser is trying to get access, then co-opt * the first valid context in the list. @@ -379,66 +538,17 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) if (!(cp->gss_clnt_flags & (GSS_CTX_INVAL|GSS_CTX_DESTROY))) { nfs_gss_clnt_ctx_ref(req, cp); lck_mtx_unlock(&nmp->nm_lock); - NFS_GSS_DBG("Root stole context %d/%d\n", - kauth_cred_getasid(cp->gss_clnt_cred), kauth_cred_getauid(cp->gss_clnt_cred)); + NFS_GSS_DBG("Root stole context %s\n", NFS_GSS_CTX(req, NULL)); return (0); } } } - /* - * Check negative context cache - * If found and the cache has not expired - * return NFSERR_EAUTH, else remove - * from the cache and try to create a new context - */ - TAILQ_FOREACH(cp, &nmp->nm_gssnccl, gss_clnt_entries) { - lck_mtx_lock(cp->gss_clnt_mtx); - if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { - NFS_GSS_DBG("Found destroyed context %d/%d. refcnt = %d continuing\n", - kauth_cred_getasid(cp->gss_clnt_cred), - kauth_cred_getauid(cp->gss_clnt_cred), cp->gss_clnt_refcnt); - lck_mtx_unlock(cp->gss_clnt_mtx); - continue; - } - if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, req->r_cred)) { - /* - * If we're still being used and invalid or we're not expired - * just return and don't bother gssd again. - */ - if (cp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec) { - NFS_GSS_DBG("Context %d/%d (refcnt = %d) not expired returning EAUTH nctime = %ld now = %ld\n", - kauth_cred_getasid(cp->gss_clnt_cred), - kauth_cred_getauid(cp->gss_clnt_cred), - cp->gss_clnt_refcnt, cp->gss_clnt_nctime, now.tv_sec); - lck_mtx_unlock(cp->gss_clnt_mtx); - lck_mtx_unlock(&nmp->nm_lock); - return (NFSERR_EAUTH); - } - if (cp->gss_clnt_refcnt && (cp->gss_clnt_flags & GSS_CTX_INVAL)) { - NFS_GSS_DBG("Context %d/%d has expired but we still have %d references\n", - kauth_cred_getasid(cp->gss_clnt_cred), - kauth_cred_getauid(cp->gss_clnt_cred), - cp->gss_clnt_refcnt); - lck_mtx_unlock(cp->gss_clnt_mtx); - lck_mtx_unlock(&nmp->nm_lock); - return (NFSERR_EAUTH); - } - TAILQ_REMOVE(&nmp->nm_gssnccl, cp, gss_clnt_entries); - lck_mtx_unlock(cp->gss_clnt_mtx); - nmp->nm_ncentries--; - break; - } - lck_mtx_unlock(cp->gss_clnt_mtx); - } - - - NFS_GSS_DBG("Context %d/%d %sfound in Neg Cache @ %ld\n", - kauth_cred_getasid(req->r_cred), - kauth_cred_getauid(req->r_cred), - cp == NULL ? "not " : "", + NFS_GSS_DBG("Context %s%sfound in Neg Cache @ %ld\n", + NFS_GSS_CTX(req, cp), + cp == NULL ? " not " : "", cp == NULL ? 0L : cp->gss_clnt_nctime); - + /* * Not found - create a new context */ @@ -449,26 +559,61 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) lck_mtx_unlock(&nmp->nm_lock); return (ENOMEM); } + cp->gss_clnt_kinfo = ki; cp->gss_clnt_cred = req->r_cred; kauth_cred_ref(cp->gss_clnt_cred); cp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); cp->gss_clnt_ptime = now.tv_sec - GSS_PRINT_DELAY; + if (principal) { + MALLOC(cp->gss_clnt_principal, uint8_t *, plen+1, M_TEMP, M_WAITOK|M_ZERO); + memcpy(cp->gss_clnt_principal, principal, plen); + cp->gss_clnt_prinlen = plen; + cp->gss_clnt_prinnt = nt; + cp->gss_clnt_flags |= GSS_CTX_STICKY; + nfs_gss_clnt_mnt_ref(nmp); + } } else { nfs_gss_clnt_ctx_clean(cp); + if (principal) { + /* + * If we have a principal and we found a matching audit + * session, then to get here, the principal had to match. + * In walking the context list if it has a principal + * or the principal is not set then we mark the context + * for destruction and set cp to NULL and we fall to the + * if clause above. If the context still has references + * again we copy the context which will preserve the principal + * and we end up here with the correct principal set. + * If we don't have references the the principal must have + * match and we will fall through here. + */ + cp->gss_clnt_flags |= GSS_CTX_STICKY; + } } - + cp->gss_clnt_thread = current_thread(); nfs_gss_clnt_ctx_ref(req, cp); TAILQ_INSERT_HEAD(&nmp->nm_gsscl, cp, gss_clnt_entries); lck_mtx_unlock(&nmp->nm_lock); error = nfs_gss_clnt_ctx_init_retry(req, cp); // Initialize new context - if (error) + if (error) { + NFS_GSS_DBG("nfs_gss_clnt_ctx_init_retry returned %d for %s\n", error, NFS_GSS_CTX(req, cp)); nfs_gss_clnt_ctx_unref(req); + } + + /* Remove any old matching contex that had a different principal */ + nfs_gss_clnt_ctx_unref(&treq); return (error); } +static int +nfs_gss_clnt_ctx_find(struct nfsreq *req) +{ + return (nfs_gss_clnt_ctx_find_principal(req, NULL, 0, 0)); +} + /* * Inserts an RPCSEC_GSS credential into an RPC header. * After the credential is inserted, the code continues @@ -489,12 +634,13 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) u_char tokbuf[KRB5_SZ_TOKMAX(MAX_DIGEST)]; u_char cksum[MAX_DIGEST]; gss_key_info *ki; - + slpflag = (PZERO-1); if (req->r_nmp) { slpflag |= (NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; recordmark = (req->r_nmp->nm_sotype == SOCK_STREAM); } + retry: if (req->r_gss_ctx == NULL) { /* @@ -526,7 +672,7 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) } lck_mtx_unlock(cp->gss_clnt_mtx); - ki = &cp->gss_clnt_kinfo; + ki = cp->gss_clnt_kinfo; if (cp->gss_clnt_flags & GSS_CTX_COMPLETE) { /* * Get a sequence number for this request. @@ -721,7 +867,7 @@ nfs_gss_clnt_verf_get( struct gss_seq *gsp; uint32_t reslen, start, cksumlen, toklen; int error = 0; - gss_key_info *ki = &cp->gss_clnt_kinfo; + gss_key_info *ki = cp->gss_clnt_kinfo; reslen = cksumlen = 0; *accepted_statusp = 0; @@ -939,7 +1085,7 @@ nfs_gss_clnt_verf_get( * The location and length of the args is marked by two fields * in the request structure: r_gss_argoff and r_gss_arglen, * which are stashed when the NFS request is built. - */ + */ int nfs_gss_clnt_args_restore(struct nfsreq *req) { @@ -947,7 +1093,7 @@ nfs_gss_clnt_args_restore(struct nfsreq *req) struct nfsm_chain mchain, *nmc = &mchain; int len, error = 0; - if (cp == NULL) + if (cp == NULL) return (NFSERR_EAUTH); if ((cp->gss_clnt_flags & GSS_CTX_COMPLETE) == 0) @@ -986,7 +1132,7 @@ nfs_gss_clnt_args_restore(struct nfsreq *req) */ len = req->r_gss_arglen; len += len % 8 > 0 ? 4 : 8; // add DES padding length - nfs_gss_encrypt_chain(&cp->gss_clnt_kinfo, nmc, + nfs_gss_encrypt_chain(cp->gss_clnt_kinfo, nmc, req->r_gss_argoff, len, DES_DECRYPT); nfsm_chain_adv(error, nmc, req->r_gss_arglen); if (error) @@ -1017,10 +1163,9 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) int server_complete = 0; u_char cksum1[MAX_DIGEST], cksum2[MAX_DIGEST]; int error = 0; - gss_key_info *ki = &cp->gss_clnt_kinfo; + gss_key_info *ki = cp->gss_clnt_kinfo; /* Initialize a new client context */ - if (cp->gss_clnt_svcname == NULL) { cp->gss_clnt_svcname = nfs_gss_clnt_svcname(nmp, &cp->gss_clnt_svcnt, &cp->gss_clnt_svcnamlen); @@ -1131,12 +1276,12 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) if (cp->gss_clnt_seqbits == NULL) error = NFSERR_EAUTH; nfsmout: - /* + /* * If the error is ENEEDAUTH we're not done, so no need * to wake up other threads again. This thread will retry in * the find or renew routines. */ - if (error == ENEEDAUTH) + if (error == ENEEDAUTH) return (error); /* @@ -1407,6 +1552,7 @@ nfs_gss_clnt_get_upcall_port(kauth_cred_t credp) kr = mach_gss_lookup(gssd_host_port, uid, asid, &uc_port); if (kr != KERN_SUCCESS) printf("nfs_gss_clnt_get_upcall_port: mach_gssd_lookup failed: status %x (%d)\n", kr, kr); + host_release_special_port(gssd_host_port); return (uc_port); } @@ -1628,13 +1774,13 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) goto out; } error = nfs_gss_mach_vmcopyout((vm_map_copy_t) okey, skeylen, - cp->gss_clnt_kinfo.skey); + cp->gss_clnt_kinfo->skey); if (error) { vm_map_copy_discard((vm_map_copy_t) otoken); goto out; } - error = gss_key_init(&cp->gss_clnt_kinfo, skeylen); + error = gss_key_init(cp->gss_clnt_kinfo, skeylen); if (error) goto out; } @@ -1752,9 +1898,11 @@ nfs_gss_clnt_ctx_unref(struct nfsreq *req) { struct nfsmount *nmp = req->r_nmp; struct nfs_gss_clnt_ctx *cp = req->r_gss_ctx; - int neg_cache = 0; int on_neg_cache = 0; + int neg_cache = 0; int destroy = 0; + struct timeval now; + char CTXBUF[NFS_CTXBUFSZ]; if (cp == NULL) return; @@ -1765,84 +1913,94 @@ nfs_gss_clnt_ctx_unref(struct nfsreq *req) if (--cp->gss_clnt_refcnt < 0) panic("Over release of gss context!\n"); - if (cp->gss_clnt_refcnt == 0 && (cp->gss_clnt_flags & GSS_CTX_DESTROY)) { - destroy = 1; - if (cp->gss_clnt_flags & GSS_CTX_NC) - on_neg_cache = 1; - } else if ((cp->gss_clnt_flags & (GSS_CTX_INVAL | GSS_CTX_NC)) == GSS_CTX_INVAL) { + if (cp->gss_clnt_refcnt == 0) { + if ((cp->gss_clnt_flags & GSS_CTX_INVAL) && + cp->gss_clnt_kinfo) { + FREE(cp->gss_clnt_kinfo, M_TEMP); + cp->gss_clnt_kinfo = NULL; + } + if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { + destroy = 1; + if (cp->gss_clnt_flags & GSS_CTX_STICKY) + nfs_gss_clnt_mnt_rele(nmp); + if (cp->gss_clnt_nctime) + on_neg_cache = 1; + } + } + if (!destroy && cp->gss_clnt_nctime == 0 && + (cp->gss_clnt_flags & GSS_CTX_INVAL)) { + microuptime(&now); + cp->gss_clnt_nctime = now.tv_sec; neg_cache = 1; } lck_mtx_unlock(cp->gss_clnt_mtx); if (destroy) { + NFS_GSS_DBG("Destroying context %s\n", NFS_GSS_CTX(req, cp)); if (nmp) { lck_mtx_lock(&nmp->nm_lock); if (cp->gss_clnt_entries.tqe_next != NFSNOLIST) { - if (on_neg_cache) - TAILQ_REMOVE(&nmp->nm_gssnccl, cp, gss_clnt_entries); - else - TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); + TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); + } + if (on_neg_cache) { + nmp->nm_ncentries--; } lck_mtx_unlock(&nmp->nm_lock); } nfs_gss_clnt_ctx_destroy(cp); - } else if (neg_cache) - nfs_gss_clnt_ctx_neg_cache_enter(cp, nmp); + } else if (neg_cache) { + NFS_GSS_DBG("Entering context %s into negative cache\n", NFS_GSS_CTX(req, cp)); + if (nmp) { + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_ncentries++; + nfs_gss_clnt_ctx_neg_cache_reap(nmp); + lck_mtx_unlock(&nmp->nm_lock); + } + } NFS_GSS_CLNT_CTX_DUMP(nmp); } /* - * Enter the gss context associated with req on to the neg context + * Try and reap any old negative cache entries. * cache queue. */ void -nfs_gss_clnt_ctx_neg_cache_enter(struct nfs_gss_clnt_ctx *cp, struct nfsmount *nmp) +nfs_gss_clnt_ctx_neg_cache_reap(struct nfsmount *nmp) { - struct nfs_gss_clnt_ctx *nccp, *tcp; + struct nfs_gss_clnt_ctx *cp, *tcp; struct timeval now; int reaped = 0; - if (nmp == NULL) - return; - - microuptime(&now); - lck_mtx_lock(&nmp->nm_lock); - - lck_mtx_lock(cp->gss_clnt_mtx); - if (cp->gss_clnt_entries.tqe_next != NFSNOLIST) - TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); - - cp->gss_clnt_flags |= GSS_CTX_NC; - cp->gss_clnt_nctime = now.tv_sec; - lck_mtx_unlock(cp->gss_clnt_mtx); - - TAILQ_INSERT_TAIL(&nmp->nm_gssnccl, cp, gss_clnt_entries); - nmp->nm_ncentries++; - NFS_GSS_DBG("Reaping contexts ncentries = %d\n", nmp->nm_ncentries); /* Try and reap old, unreferenced, expired contexts */ - TAILQ_FOREACH_SAFE(nccp, &nmp->nm_gssnccl, gss_clnt_entries, tcp) { + + TAILQ_FOREACH_SAFE(cp, &nmp->nm_gsscl, gss_clnt_entries, tcp) { int destroy = 0; + /* Don't reap STICKY contexts */ + if ((cp->gss_clnt_flags & GSS_CTX_STICKY) || + !(cp->gss_clnt_flags & GSS_CTX_INVAL)) + continue; /* Keep up to GSS_MAX_NEG_CACHE_ENTRIES */ if (nmp->nm_ncentries <= GSS_MAX_NEG_CACHE_ENTRIES) break; - /* Contexts to young */ - if (nccp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec) - break; + /* Contexts too young */ + if (cp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec) + continue; /* Not referenced, remove it. */ - lck_mtx_lock(nccp->gss_clnt_mtx); - if (nccp->gss_clnt_refcnt == 0) { - TAILQ_REMOVE(&nmp->nm_gssnccl, nccp, gss_clnt_entries); - reaped++; + lck_mtx_lock(cp->gss_clnt_mtx); + if (cp->gss_clnt_refcnt == 0) { + cp->gss_clnt_flags |= GSS_CTX_DESTROY; destroy = 1; } - lck_mtx_unlock(nccp->gss_clnt_mtx); - if (destroy) - nfs_gss_clnt_ctx_destroy(nccp); - nmp->nm_ncentries--; + lck_mtx_unlock(cp->gss_clnt_mtx); + if (destroy) { + TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); + nmp->nm_ncentries++; + reaped++; + nfs_gss_clnt_ctx_destroy(cp); + } } NFS_GSS_DBG("Reaped %d contexts ncentries = %d\n", reaped, nmp->nm_ncentries); - lck_mtx_unlock(&nmp->nm_lock); } /* @@ -1851,26 +2009,116 @@ nfs_gss_clnt_ctx_neg_cache_enter(struct nfs_gss_clnt_ctx *cp, struct nfsmount *n static void nfs_gss_clnt_ctx_clean(struct nfs_gss_clnt_ctx *cp) { + /* Preserve gss_clnt_mtx */ + assert(cp->gss_clnt_thread == NULL); /* Will be set to this thread */ + /* gss_clnt_entries we should not be on any list at this point */ cp->gss_clnt_flags = 0; + /* gss_clnt_refcnt should be zero */ + assert(cp->gss_clnt_refcnt == 0); + /* + * We are who we are preserve: + * gss_clnt_cred + * gss_clnt_principal + * gss_clnt_prinlen + * gss_clnt_prinnt + * gss_clnt_desplay + */ + /* gss_clnt_proc will be set in nfs_gss_clnt_ctx_init */ + cp->gss_clnt_seqnum = 0; + /* Preserve gss_clnt_service, we're not changing flavors */ if (cp->gss_clnt_handle) { FREE(cp->gss_clnt_handle, M_TEMP); cp->gss_clnt_handle = NULL; } + cp->gss_clnt_handle_len = 0; + cp->gss_clnt_nctime = 0; + cp->gss_clnt_seqwin = 0; if (cp->gss_clnt_seqbits) { FREE(cp->gss_clnt_seqbits, M_TEMP); cp->gss_clnt_seqbits = NULL; } - if (cp->gss_clnt_token) { - FREE(cp->gss_clnt_token, M_TEMP); - cp->gss_clnt_token = NULL; + /* Preserve gss_clnt_mport. Still talking to the same gssd */ + if (cp->gss_clnt_verf) { + FREE(cp->gss_clnt_verf, M_TEMP); + cp->gss_clnt_verf = NULL; } + /* Service name might change on failover, so reset it */ if (cp->gss_clnt_svcname) { FREE(cp->gss_clnt_svcname, M_TEMP); cp->gss_clnt_svcname = NULL; + cp->gss_clnt_svcnt = 0; } - cp->gss_clnt_flags = 0; - cp->gss_clnt_seqwin = 0; - cp->gss_clnt_seqnum = 0; + cp->gss_clnt_svcnamlen = 0; + cp->gss_clnt_cred_handle = 0; + cp->gss_clnt_context = 0; + if (cp->gss_clnt_token) { + FREE(cp->gss_clnt_token, M_TEMP); + cp->gss_clnt_token = NULL; + } + cp->gss_clnt_tokenlen = 0; + if (cp->gss_clnt_kinfo) + bzero(cp->gss_clnt_kinfo, sizeof(gss_key_info)); + /* + * Preserve: + * gss_clnt_gssd_flags + * gss_clnt_major + * gss_clnt_minor + * gss_clnt_ptime + */ +} + +/* + * Copy a source context to a new context. This is used to create a new context + * with the identity of the old context for renewal. The old context is invalid + * at this point but may have reference still to it, so it is not safe to use that + * context. + */ +static int +nfs_gss_clnt_ctx_copy(struct nfs_gss_clnt_ctx *scp, struct nfs_gss_clnt_ctx **dcpp, gss_key_info *ki) +{ + struct nfs_gss_clnt_ctx *dcp; + + *dcpp = (struct nfs_gss_clnt_ctx *)NULL; + MALLOC(dcp, struct nfs_gss_clnt_ctx *, sizeof (struct nfs_gss_clnt_ctx), M_TEMP, M_WAITOK); + if (dcp == NULL) + return (ENOMEM); + bzero(dcp, sizeof (struct nfs_gss_clnt_ctx)); + if (ki == NULL) { + MALLOC(dcp->gss_clnt_kinfo, gss_key_info *, sizeof (gss_key_info), M_TEMP, M_WAITOK); + if (dcp->gss_clnt_kinfo == NULL) { + FREE(dcp, M_TEMP); + return (ENOMEM); + } + } else { + dcp->gss_clnt_kinfo = ki; + } + bzero(dcp->gss_clnt_kinfo, sizeof (gss_key_info)); + dcp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); + dcp->gss_clnt_cred = scp->gss_clnt_cred; + kauth_cred_ref(dcp->gss_clnt_cred); + dcp->gss_clnt_prinlen = scp->gss_clnt_prinlen; + dcp->gss_clnt_prinnt = scp->gss_clnt_prinnt; + if (scp->gss_clnt_principal) { + MALLOC(dcp->gss_clnt_principal, uint8_t *, dcp->gss_clnt_prinlen, M_TEMP, M_WAITOK | M_ZERO); + if (dcp->gss_clnt_principal == NULL) { + FREE(dcp->gss_clnt_kinfo, M_TEMP); + FREE(dcp, M_TEMP); + return (ENOMEM); + } + bcopy(scp->gss_clnt_principal, dcp->gss_clnt_principal, dcp->gss_clnt_prinlen); + } + /* Note we don't preserve the display name, that will be set by a successful up call */ + dcp->gss_clnt_service = scp->gss_clnt_service; + dcp->gss_clnt_mport = host_copy_special_port(scp->gss_clnt_mport); + /* gss_clnt_kinfo allocated above */ + dcp->gss_clnt_gssd_flags = scp->gss_clnt_gssd_flags; + dcp->gss_clnt_major = scp->gss_clnt_major; + dcp->gss_clnt_minor = scp->gss_clnt_minor; + dcp->gss_clnt_ptime = scp->gss_clnt_ptime; + + *dcpp = dcp; + + return (0); } /* @@ -1885,7 +2133,7 @@ nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *cp) host_release_special_port(cp->gss_clnt_mport); cp->gss_clnt_mport = IPC_PORT_NULL; - + if (cp->gss_clnt_mtx) { lck_mtx_destroy(cp->gss_clnt_mtx, nfs_gss_clnt_grp); cp->gss_clnt_mtx = (lck_mtx_t *)NULL; @@ -1902,9 +2150,13 @@ nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *cp) FREE(cp->gss_clnt_display, M_TEMP); cp->gss_clnt_display = NULL; } - + if (cp->gss_clnt_kinfo) { + FREE(cp->gss_clnt_kinfo, M_TEMP); + cp->gss_clnt_kinfo = NULL; + } + nfs_gss_clnt_ctx_clean(cp); - + FREE(cp, M_TEMP); } @@ -1917,15 +2169,18 @@ int nfs_gss_clnt_ctx_renew(struct nfsreq *req) { struct nfs_gss_clnt_ctx *cp = req->r_gss_ctx; - struct nfsmount *nmp = req->r_nmp; - struct nfs_gss_clnt_ctx tmp; struct nfs_gss_clnt_ctx *ncp; - + struct nfsmount *nmp; int error = 0; + char CTXBUF[NFS_CTXBUFSZ]; if (cp == NULL) return (0); + if (req->r_nmp == NULL) + return (ENXIO); + nmp = req->r_nmp; + lck_mtx_lock(cp->gss_clnt_mtx); if (cp->gss_clnt_flags & GSS_CTX_INVAL) { lck_mtx_unlock(cp->gss_clnt_mtx); @@ -1933,59 +2188,34 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) return (0); // already being renewed } - bzero(&tmp, sizeof(tmp)); - tmp.gss_clnt_cred = cp->gss_clnt_cred; - kauth_cred_ref(tmp.gss_clnt_cred); - tmp.gss_clnt_mport = host_copy_special_port(cp->gss_clnt_mport); - tmp.gss_clnt_principal = cp->gss_clnt_principal; - cp->gss_clnt_principal = NULL; - tmp.gss_clnt_prinlen = cp->gss_clnt_prinlen; - tmp.gss_clnt_prinnt = cp->gss_clnt_prinnt; - tmp.gss_clnt_major = cp->gss_clnt_major; - tmp.gss_clnt_minor = cp->gss_clnt_minor; - tmp.gss_clnt_ptime = cp->gss_clnt_ptime; - - NFS_GSS_DBG("Renewing context %d/%d\n", - kauth_cred_getasid(tmp.gss_clnt_cred), - kauth_cred_getauid(tmp.gss_clnt_cred)); cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY); - /* - * If there's a thread waiting - * in the old context, wake it up. - */ if (cp->gss_clnt_flags & (GSS_NEEDCTX | GSS_NEEDSEQ)) { cp->gss_clnt_flags &= ~GSS_NEEDSEQ; wakeup(cp); } lck_mtx_unlock(cp->gss_clnt_mtx); + error = nfs_gss_clnt_ctx_copy(cp, &ncp, NULL); + NFS_GSS_DBG("Renewing context %s\n", NFS_GSS_CTX(req, ncp)); + nfs_gss_clnt_ctx_unref(req); + if (error) + return (error); + + lck_mtx_lock(&nmp->nm_lock); /* - * Create a new context + * Note we don't bother taking the new context mutex as we're + * not findable at the moment. */ - MALLOC(ncp, struct nfs_gss_clnt_ctx *, sizeof(*ncp), - M_TEMP, M_WAITOK|M_ZERO); - if (ncp == NULL) { - error = ENOMEM; - goto out; - } - - *ncp = tmp; - ncp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); ncp->gss_clnt_thread = current_thread(); - - lck_mtx_lock(&nmp->nm_lock); - TAILQ_INSERT_TAIL(&nmp->nm_gsscl, ncp, gss_clnt_entries); - lck_mtx_unlock(&nmp->nm_lock); - - /* Adjust reference counts to new and old context */ - nfs_gss_clnt_ctx_unref(req); nfs_gss_clnt_ctx_ref(req, ncp); + TAILQ_INSERT_HEAD(&nmp->nm_gsscl, ncp, gss_clnt_entries); + lck_mtx_unlock(&nmp->nm_lock); - error = nfs_gss_clnt_ctx_init_retry(req, ncp); -out: + error = nfs_gss_clnt_ctx_init_retry(req, ncp); // Initialize new context if (error) nfs_gss_clnt_ctx_unref(req); + return (error); } @@ -2006,26 +2236,27 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp) if (!nmp) return; - for (;;) { - lck_mtx_lock(&nmp->nm_lock); - cp = TAILQ_FIRST(&nmp->nm_gsscl); - if (cp == NULL) { - lck_mtx_unlock(&nmp->nm_lock); - goto remove_neg_cache; - } - + + lck_mtx_lock(&nmp->nm_lock); + while((cp = TAILQ_FIRST(&nmp->nm_gsscl))) { + TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); + cp->gss_clnt_entries.tqe_next = NFSNOLIST; lck_mtx_lock(cp->gss_clnt_mtx); + if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { + lck_mtx_unlock(cp->gss_clnt_mtx); + continue; + } cp->gss_clnt_refcnt++; lck_mtx_unlock(cp->gss_clnt_mtx); req.r_gss_ctx = cp; lck_mtx_unlock(&nmp->nm_lock); - /* * Tell the server to destroy its context. * But don't bother if it's a forced unmount. */ - if (!nfs_mount_gone(nmp)) { + if (!nfs_mount_gone(nmp) && + (cp->gss_clnt_flags & (GSS_CTX_INVAL | GSS_CTX_DESTROY | GSS_CTX_COMPLETE)) == GSS_CTX_COMPLETE) { cp->gss_clnt_proc = RPCSEC_GSS_DESTROY; error = 0; @@ -2049,37 +2280,13 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp) cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY); lck_mtx_unlock(cp->gss_clnt_mtx); nfs_gss_clnt_ctx_unref(&req); - } - - /* Now all the remaining contexts should be on the negative cache list */ -remove_neg_cache: - for (;;) { lck_mtx_lock(&nmp->nm_lock); - cp = TAILQ_FIRST(&nmp->nm_gssnccl); - if (cp == NULL) { - lck_mtx_unlock(&nmp->nm_lock); - return; - } - req.r_gss_ctx = cp; - TAILQ_REMOVE(&nmp->nm_gssnccl, cp, gss_clnt_entries); - cp->gss_clnt_entries.tqe_next = NFSNOLIST; - - lck_mtx_lock(cp->gss_clnt_mtx); - if (cp->gss_clnt_refcnt) - NFS_GSS_DBG("Context %d/%d found with %d references\n", - kauth_cred_getasid(cp->gss_clnt_cred), - kauth_cred_getauid(cp->gss_clnt_cred), - cp->gss_clnt_refcnt); - cp->gss_clnt_refcnt++; - cp->gss_clnt_flags |= GSS_CTX_DESTROY; - lck_mtx_unlock(cp->gss_clnt_mtx); - lck_mtx_unlock(&nmp->nm_lock); - - nfs_gss_clnt_ctx_unref(&req); } - NFS_GSS_CLNT_CTX_DUMP(nmp); + lck_mtx_unlock(&nmp->nm_lock); + assert(TAILQ_EMPTY(&nmp->nm_gsscl)); } + /* * Removes a mounts context for a credential */ @@ -2124,43 +2331,115 @@ nfs_gss_clnt_ctx_remove(struct nfsmount *nmp, kauth_cred_t cred) lck_mtx_unlock(cp->gss_clnt_mtx); } - TAILQ_FOREACH(cp, &nmp->nm_gssnccl, gss_clnt_entries) { + lck_mtx_unlock(&nmp->nm_lock); + + NFS_GSS_DBG("Returning ENOENT\n"); + return (ENOENT); +} + +/* + * Sets a mounts principal for a session associated with cred. + */ +int +nfs_gss_clnt_ctx_set_principal(struct nfsmount *nmp, vfs_context_t ctx, + uint8_t *principal, uint32_t princlen, uint32_t nametype) + +{ + struct nfsreq req; + int error; + + NFS_GSS_DBG("Enter:\n"); + + bzero(&req, sizeof(struct nfsreq)); + req.r_nmp = nmp; + req.r_gss_ctx = NULL; + req.r_auth = nmp->nm_auth; + req.r_thread = vfs_context_thread(ctx); + req.r_cred = vfs_context_ucred(ctx); + + error = nfs_gss_clnt_ctx_find_principal(&req, principal, princlen, nametype); + NFS_GSS_DBG("nfs_gss_clnt_ctx_find_principal returned %d\n", error); + /* + * We don't care about auth errors. Those would indicate that the context is in the + * neagative cache and if and when the user has credentials for the principal + * we should be good to go in that we will select those credentials for this principal. + */ + if (error == EACCES || error == EAUTH || error == ENEEDAUTH) + error = 0; + + /* We're done with this request */ + nfs_gss_clnt_ctx_unref(&req); + + return (error); +} + +/* + * Gets a mounts principal from a session associated with cred + */ +int +nfs_gss_clnt_ctx_get_principal(struct nfsmount *nmp, vfs_context_t ctx, + struct user_nfs_gss_principal *p) +{ + struct nfsreq req; + int error = 0; + struct nfs_gss_clnt_ctx *cp; + kauth_cred_t cred = vfs_context_ucred(ctx); + const char *princ; + char CTXBUF[NFS_CTXBUFSZ]; + + req.r_nmp = nmp; + lck_mtx_lock(&nmp->nm_lock); + TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { lck_mtx_lock(cp->gss_clnt_mtx); + if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { + NFS_GSS_DBG("Found destroyed context %s refcnt = %d continuing\n", + NFS_GSS_CTX(&req, cp), + cp->gss_clnt_refcnt); + lck_mtx_unlock(cp->gss_clnt_mtx); + continue; + } if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, cred)) { - if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { - NFS_GSS_DBG("Found destroyed context %d/%d refcnt = %d continuing\n", - kauth_cred_getasid(cp->gss_clnt_cred), - kauth_cred_getuid(cp->gss_clnt_cred), - cp->gss_clnt_refcnt); - lck_mtx_unlock(cp->gss_clnt_mtx); - continue; - } cp->gss_clnt_refcnt++; - cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY); lck_mtx_unlock(cp->gss_clnt_mtx); - req.r_gss_ctx = cp; - lck_mtx_unlock(&nmp->nm_lock); - /* - * Drop the reference to remove it if its - * refcount is zero. - */ - NFS_GSS_DBG("Removed context from neg cache %d/%d refcnt = %d\n", - kauth_cred_getasid(cp->gss_clnt_cred), - kauth_cred_getuid(cp->gss_clnt_cred), - cp->gss_clnt_refcnt); - nfs_gss_clnt_ctx_unref(&req); - return (0); + goto out; } lck_mtx_unlock(cp->gss_clnt_mtx); } - lck_mtx_unlock(&nmp->nm_lock); - - NFS_GSS_DBG("Returning ENOENT\n"); - return (ENOENT); -} +out: + if (cp == NULL) { + lck_mtx_unlock(&nmp->nm_lock); + p->princlen = 0; + p->principal = USER_ADDR_NULL; + p->nametype = GSSD_STRING_NAME; + p->flags |= NFS_IOC_NO_CRED_FLAG; + NFS_GSS_DBG("No context found for session %d by uid %d\n", + kauth_cred_getasid(cred), kauth_cred_getuid(cred)); + return (0); + } + princ = cp->gss_clnt_principal ? (char *)cp->gss_clnt_principal : cp->gss_clnt_display; + p->princlen = cp->gss_clnt_principal ? cp->gss_clnt_prinlen : + (cp->gss_clnt_display ? strlen(cp->gss_clnt_display) : 0); + p->nametype = cp->gss_clnt_prinnt; + if (princ) { + char *pp; + + MALLOC(pp, char *, p->princlen, M_TEMP, M_WAITOK); + if (pp) { + bcopy(princ, pp, p->princlen); + p->principal = CAST_USER_ADDR_T(pp); + } + else + error = ENOMEM; + } + lck_mtx_unlock(&nmp->nm_lock); + req.r_gss_ctx = cp; + NFS_GSS_DBG("Found context %s\n", NFS_GSS_CTX(&req, NULL)); + nfs_gss_clnt_ctx_unref(&req); + return (error); +} #endif /* NFSCLIENT */ /************* @@ -3243,7 +3522,7 @@ nfs_gss_mach_alloc_buffer(u_char *buf, uint32_t buflen, vm_map_copy_t *addr) tbuflen = vm_map_round_page(buflen, vm_map_page_mask(ipc_kernel_map)); - kr = vm_allocate(ipc_kernel_map, &kmem_buf, tbuflen, VM_FLAGS_ANYWHERE); + kr = vm_allocate(ipc_kernel_map, &kmem_buf, tbuflen, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_FILE)); if (kr != 0) { printf("nfs_gss_mach_alloc_buffer: vm_allocate failed\n"); return; @@ -3254,7 +3533,7 @@ nfs_gss_mach_alloc_buffer(u_char *buf, uint32_t buflen, vm_map_copy_t *addr) vm_map_page_mask(ipc_kernel_map)), vm_map_round_page(kmem_buf + tbuflen, vm_map_page_mask(ipc_kernel_map)), - VM_PROT_READ|VM_PROT_WRITE, FALSE); + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE), FALSE); if (kr != 0) { printf("nfs_gss_mach_alloc_buffer: vm_map_wire failed\n"); return; diff --git a/bsd/nfs/nfs_gss.h b/bsd/nfs/nfs_gss.h index 1588eba02..e590eb1bf 100644 --- a/bsd/nfs/nfs_gss.h +++ b/bsd/nfs/nfs_gss.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Apple Inc. All rights reserved. + * Copyright (c) 2007-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -154,7 +154,7 @@ struct nfs_gss_clnt_ctx { gssd_ctx gss_clnt_context; // Opaque context handle from gssd uint8_t *gss_clnt_token; // GSS token exchanged via gssd & server uint32_t gss_clnt_tokenlen; // Length of token - gss_key_info gss_clnt_kinfo; // GSS key info + gss_key_info *gss_clnt_kinfo; // GSS key info uint32_t gss_clnt_gssd_flags; // Special flag bits to gssd uint32_t gss_clnt_major; // GSS major result from gssd or server uint32_t gss_clnt_minor; // GSS minor result from gssd or server @@ -166,11 +166,10 @@ struct nfs_gss_clnt_ctx { */ #define GSS_CTX_COMPLETE 0x00000001 // Context is complete #define GSS_CTX_INVAL 0x00000002 // Context is invalid -#define GSS_CTX_INCOMPLETE 0x00000004 // Context needs to be inited +#define GSS_CTX_STICKY 0x00000004 // Context has been set by user #define GSS_NEEDSEQ 0x00000008 // Need a sequence number #define GSS_NEEDCTX 0x00000010 // Need the context -#define GSS_CTX_NC 0x00000020 // Context is in negative cache -#define GSS_CTX_DESTROY 0x00000040 // Context is being destroyed, don't cache +#define GSS_CTX_DESTROY 0x00000020 // Context is being destroyed, don't cache /* * The server's RPCSEC_GSS context information @@ -217,9 +216,15 @@ LIST_HEAD(nfs_gss_svc_ctx_hashhead, nfs_gss_svc_ctx); #define GSS_TIMER_PERIOD 300 // seconds #define MSECS_PER_SEC 1000 +#define auth_is_kerberized(auth) \ + (auth == RPCAUTH_KRB5 || \ + auth == RPCAUTH_KRB5I || \ + auth == RPCAUTH_KRB5P) + __BEGIN_DECLS void nfs_gss_init(void); +uid_t nfs_cred_getasid2uid(kauth_cred_t); int nfs_gss_clnt_cred_put(struct nfsreq *, struct nfsm_chain *, mbuf_t); int nfs_gss_clnt_verf_get(struct nfsreq *, struct nfsm_chain *, uint32_t, uint32_t, uint32_t *); @@ -229,7 +234,9 @@ int nfs_gss_clnt_ctx_renew(struct nfsreq *); void nfs_gss_clnt_ctx_ref(struct nfsreq *, struct nfs_gss_clnt_ctx *); void nfs_gss_clnt_ctx_unref(struct nfsreq *); void nfs_gss_clnt_ctx_unmount(struct nfsmount *); -int nfs_gss_clnt_ctx_remove(struct nfsmount *, kauth_cred_t cred); +int nfs_gss_clnt_ctx_remove(struct nfsmount *, kauth_cred_t); +int nfs_gss_clnt_ctx_set_principal(struct nfsmount *, vfs_context_t, uint8_t *, uint32_t, uint32_t); +int nfs_gss_clnt_ctx_get_principal(struct nfsmount *, vfs_context_t, struct user_nfs_gss_principal *); int nfs_gss_svc_cred_get(struct nfsrv_descript *, struct nfsm_chain *); int nfs_gss_svc_verf_put(struct nfsrv_descript *, struct nfsm_chain *); int nfs_gss_svc_ctx_init(struct nfsrv_descript *, struct nfsrv_sock *, mbuf_t *); diff --git a/bsd/nfs/nfs_ioctl.h b/bsd/nfs/nfs_ioctl.h index 31e27f56d..9b2cbb5ff 100644 --- a/bsd/nfs/nfs_ioctl.h +++ b/bsd/nfs/nfs_ioctl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Apple Inc. All rights reserved. + * Copyright (c) 2012,2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -39,5 +39,40 @@ */ #define NFS_IOC_DESTROY_CRED _IO('n', 1) #define NFS_FSCTL_DESTROY_CRED IOCBASECMD(NFS_IOC_DESTROY_CRED) +/* + * fsclt (vnop_ioctl) to set the callers credentials associated with the vnode's mount + */ + + +struct nfs_gss_principal +{ + uint32_t princlen; /* length of data */ + uint32_t nametype; /* nametype of data */ +#ifdef KERNEL + user32_addr_t principal; /* principal data in userspace */ +#else + uint8_t *principal; +#endif + uint32_t flags; /* Return flags */ +}; + +#ifdef KERNEL +/* LP64 version of nfs_gss_principal */ +struct user_nfs_gss_principal +{ + uint32_t princlen; /* length of data */ + uint32_t nametype; /* nametype of data */ + user_addr_t principal; /* principal data in userspace */ + uint32_t flags; /* Returned flags */ +}; +#endif + +/* If no credential was found returned NFS_IOC_NO_CRED_FLAG in the flags field. */ +#define NFS_IOC_NO_CRED_FLAG 1 /* No credential was found */ + +#define NFS_IOC_SET_CRED _IOW('n', 2, struct nfs_gss_principal) +#define NFS_FSCTL_SET_CRED IOCBASECMD(NFS_IOC_SET_CRED) +#define NFS_IOC_GET_CRED _IOWR('n', 3, struct nfs_gss_principal) +#define NFS_FSCTL_GET_CRED IOCBASECMD(NFS_IOC_GET_CRED) #endif diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c index 8cc717b8e..7b9df74b4 100644 --- a/bsd/nfs/nfs_serv.c +++ b/bsd/nfs/nfs_serv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -113,7 +113,7 @@ struct nfsd_head nfsd_head, nfsd_queue; lck_grp_t *nfsrv_slp_rwlock_group; lck_grp_t *nfsrv_slp_mutex_group; -struct nfsrv_sockhead nfsrv_socklist, nfsrv_deadsocklist, nfsrv_sockwg, +struct nfsrv_sockhead nfsrv_socklist, nfsrv_sockwg, nfsrv_sockwait, nfsrv_sockwork; struct nfsrv_sock *nfsrv_udpsock = NULL; struct nfsrv_sock *nfsrv_udp6sock = NULL; @@ -140,7 +140,7 @@ int nfsrv_fsevents_enabled = 1; #if CONFIG_FSE thread_call_t nfsrv_fmod_timer_call; #endif -thread_call_t nfsrv_deadsock_timer_call; +thread_call_t nfsrv_idlesock_timer_call; thread_call_t nfsrv_wg_timer_call; int nfsrv_wg_timer_on; @@ -223,14 +223,13 @@ nfsrv_init(void) #if CONFIG_FSE nfsrv_fmod_timer_call = thread_call_allocate(nfsrv_fmod_timer, NULL); #endif - nfsrv_deadsock_timer_call = thread_call_allocate(nfsrv_deadsock_timer, NULL); + nfsrv_idlesock_timer_call = thread_call_allocate(nfsrv_idlesock_timer, NULL); nfsrv_wg_timer_call = thread_call_allocate(nfsrv_wg_timer, NULL); /* Init server data structures */ TAILQ_INIT(&nfsrv_socklist); TAILQ_INIT(&nfsrv_sockwait); TAILQ_INIT(&nfsrv_sockwork); - TAILQ_INIT(&nfsrv_deadsocklist); TAILQ_INIT(&nfsrv_sockwg); TAILQ_INIT(&nfsd_head); TAILQ_INIT(&nfsd_queue); diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c index ab4cdbe8f..191a8fa29 100644 --- a/bsd/nfs/nfs_socket.c +++ b/bsd/nfs/nfs_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -389,14 +389,23 @@ nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag) nso->nso_version = RPCBVERS3; } } else if (nso->nso_protocol == NFS_PROG) { - if ((minvers > NFS_VER4) || (maxvers < NFS_VER2)) - error = EPROGMISMATCH; - else if ((NFS_VER3 >= minvers) && (NFS_VER3 <= maxvers)) - nso->nso_version = NFS_VER3; - else if ((NFS_VER2 >= minvers) && (NFS_VER2 <= maxvers)) - nso->nso_version = NFS_VER2; - else if ((NFS_VER4 >= minvers) && (NFS_VER4 <= maxvers)) - nso->nso_version = NFS_VER4; + int vers; + + /* + * N.B. Both portmapper and rpcbind V3 are happy to return + * addresses for other versions than the one you ask (getport or + * getaddr) and thus we may have fallen to this code path. So if + * we get a version that we support, use highest supported + * version. This assumes that the server supports all versions + * between minvers and maxvers. Note for IPv6 we will try and + * use rpcbind V4 which has getversaddr and we should not get + * here if that was successful. + */ + for (vers = nso->nso_nfs_max_vers; vers >= (int)nso->nso_nfs_min_vers; vers--) { + if (vers >= (int)minvers && vers <= (int)maxvers) + break; + } + nso->nso_version = (vers < (int)nso->nso_nfs_min_vers) ? 0 : vers; } if (!error && nso->nso_version) accepted_status = RPC_SUCCESS; @@ -456,7 +465,7 @@ nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag) */ int nfs_socket_create( - __unused struct nfsmount *nmp, + struct nfsmount *nmp, struct sockaddr *sa, int sotype, in_port_t port, @@ -506,6 +515,8 @@ nfs_socket_create( ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port); nso->nso_protocol = protocol; nso->nso_version = vers; + nso->nso_nfs_min_vers = PVER2MAJOR(nmp->nm_min_vers); + nso->nso_nfs_max_vers = PVER2MAJOR(nmp->nm_max_vers); error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so); @@ -844,7 +855,7 @@ nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct tim if (nso->nso_protocol == PMAPPROG) vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; else if (nso->nso_protocol == NFS_PROG) - vers = NFS_VER3; + vers = PVER2MAJOR(nmp->nm_max_vers); } lck_mtx_unlock(&nso->nso_lock); error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS, @@ -883,7 +894,7 @@ nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct tim * Set the nfs socket protocol and version if needed. */ void -nfs_connect_search_socket_found(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct nfs_socket *nso) +nfs_connect_search_socket_found(struct nfsmount *nmp, struct nfs_socket_search *nss, struct nfs_socket *nso) { NFS_SOCK_DBG("nfs connect %s socket %p verified\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); @@ -892,7 +903,7 @@ nfs_connect_search_socket_found(struct nfsmount *nmp __unused, struct nfs_socket if (nso->nso_protocol == PMAPPROG) nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; if (nso->nso_protocol == NFS_PROG) - nso->nso_version = NFS_VER3; + nso->nso_version = PVER2MAJOR(nmp->nm_max_vers); } TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); nss->nss_sockcnt--; @@ -1046,6 +1057,7 @@ nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss) * A mount's initial connection may require negotiating some parameters such * as socket type and NFS version. */ + int nfs_connect(struct nfsmount *nmp, int verbose, int timeo) { @@ -1056,6 +1068,7 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo) sock_upcall upcall; struct timeval now, start; int error, savederror, nfsvers; + int tryv4 = 1; uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM; fhandle_t *fh = NULL; char *path = NULL; @@ -1107,10 +1120,17 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo) if (!nmp->nm_vers) { /* No NFS version specified... */ if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { - /* ...connect to portmapper first if we (may) need any ports. */ - nss.nss_port = PMAPPORT; - nss.nss_protocol = PMAPPROG; - nss.nss_version = 0; + if (PVER2MAJOR(nmp->nm_max_vers) >= NFS_VER4 && tryv4) { + nss.nss_port = NFS_PORT; + nss.nss_protocol = NFS_PROG; + nss.nss_version = 4; + nss.nss_flags |= NSS_FALLBACK2PMAP; + } else { + /* ...connect to portmapper first if we (may) need any ports. */ + nss.nss_port = PMAPPORT; + nss.nss_protocol = PMAPPROG; + nss.nss_version = 0; + } } else { /* ...connect to NFS port first. */ nss.nss_port = nmp->nm_nfsport; @@ -1118,10 +1138,23 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo) nss.nss_version = 0; } } else if (nmp->nm_vers >= NFS_VER4) { - /* For NFSv4, we use the given (or default) port. */ - nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT; - nss.nss_protocol = NFS_PROG; - nss.nss_version = 4; + if (tryv4) { + /* For NFSv4, we use the given (or default) port. */ + nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT; + nss.nss_protocol = NFS_PROG; + nss.nss_version = 4; + /* + * set NSS_FALLBACK2PMAP here to pick up any non standard port + * if no port is specified on the mount; + * Note nm_vers is set so we will only try NFS_VER4. + */ + if (!nmp->nm_nfsport) + nss.nss_flags |= NSS_FALLBACK2PMAP; + } else { + nss.nss_port = PMAPPORT; + nss.nss_protocol = PMAPPROG; + nss.nss_version = 0; + } } else { /* For NFSv3/v2... */ if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { @@ -1176,6 +1209,13 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo) if (error || !nss.nss_sock) { /* search failed */ nfs_socket_search_cleanup(&nss); + if (nss.nss_flags & NSS_FALLBACK2PMAP) { + tryv4 = 0; + NFS_SOCK_DBG("nfs connect %s TCP failed for V4 %d %d, trying PORTMAP\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error); + goto tryagain; + } + if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) { /* Try using UDP */ sotype = SOCK_DGRAM; @@ -1222,30 +1262,21 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo) /* Set up socket address and port for NFS socket. */ bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); - /* If NFS version not set, try NFSv3 then NFSv2. */ - nfsvers = nmp->nm_vers ? nmp->nm_vers : NFS_VER3; - + /* If NFS version not set, try nm_max_vers down to nm_min_vers */ + nfsvers = nmp->nm_vers ? nmp->nm_vers : PVER2MAJOR(nmp->nm_max_vers); if (!(port = nmp->nm_nfsport)) { if (ss.ss_family == AF_INET) ((struct sockaddr_in*)&ss)->sin_port = htons(0); else if (ss.ss_family == AF_INET6) ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); - error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, - nso->nso_so, NFS_PROG, nfsvers, - (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); - if (!error) { - if (ss.ss_family == AF_INET) - port = ntohs(((struct sockaddr_in*)&ss)->sin_port); - else if (ss.ss_family == AF_INET6) - port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); - if (!port) - error = EPROGUNAVAIL; - } - if (error && !nmp->nm_vers) { - nfsvers = NFS_VER2; + for (; nfsvers >= (int)PVER2MAJOR(nmp->nm_min_vers); nfsvers--) { + if (nmp->nm_vers && nmp->nm_vers != nfsvers) + continue; /* Wrong version */ + if (nfsvers == NFS_VER4 && nso->nso_sotype == SOCK_DGRAM) + continue; /* NFSv4 does not do UDP */ error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, - nso->nso_so, NFS_PROG, nfsvers, - (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); + nso->nso_so, NFS_PROG, nfsvers, + (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); if (!error) { if (ss.ss_family == AF_INET) port = ntohs(((struct sockaddr_in*)&ss)->sin_port); @@ -1253,8 +1284,14 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo) port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); if (!port) error = EPROGUNAVAIL; + if (port == NFS_PORT && nfsvers == NFS_VER4 && tryv4 == 0) + continue; /* We already tried this */ } + if (!error) + break; } + if (nfsvers < (int)PVER2MAJOR(nmp->nm_min_vers) && error == 0) + error = EPROGUNAVAIL; if (error) { nfs_socket_search_update_error(&nss, error); nfs_socket_destroy(nso); @@ -1262,6 +1299,7 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo) } } /* Create NFS protocol socket and add it to the list of sockets. */ + /* N.B. If nfsvers is NFS_VER4 at this point then we're on a non standard port */ error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port, NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs); if (error) { @@ -1680,7 +1718,7 @@ nfs_reconnect(struct nfsmount *nmp) rq->r_flags |= R_MUSTRESEND; rq->r_rtt = -1; wakeup(rq); - if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) + if ((rq->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) nfs_asyncio_resend(rq); } lck_mtx_unlock(&rq->r_mtx); @@ -1751,7 +1789,7 @@ nfs_need_reconnect(struct nfsmount *nmp) rq->r_flags |= R_MUSTRESEND; rq->r_rtt = -1; wakeup(rq); - if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) + if ((rq->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) nfs_asyncio_resend(rq); } lck_mtx_unlock(&rq->r_mtx); @@ -1846,6 +1884,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) req->r_rchain.tqe_next = NFSREQNOLIST; lck_mtx_unlock(&nmp->nm_lock); lck_mtx_lock(&req->r_mtx); + /* Note that we have a reference on the request that was taken nfs_asyncio_resend */ if (req->r_error || req->r_nmrep.nmc_mhead) { dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); req->r_flags &= ~R_RESENDQ; @@ -1853,6 +1892,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) lck_mtx_unlock(&req->r_mtx); if (dofinish) nfs_asyncio_finish(req); + nfs_request_rele(req); lck_mtx_lock(&nmp->nm_lock); continue; } @@ -1886,6 +1926,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) lck_mtx_unlock(&req->r_mtx); if (dofinish) nfs_asyncio_finish(req); + nfs_request_rele(req); lck_mtx_lock(&nmp->nm_lock); error = 0; continue; @@ -1903,6 +1944,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) req->r_flags &= ~R_RESENDQ; wakeup(req); lck_mtx_unlock(&req->r_mtx); + nfs_request_rele(req); lck_mtx_lock(&nmp->nm_lock); continue; } @@ -1915,6 +1957,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) lck_mtx_unlock(&req->r_mtx); if (dofinish) nfs_asyncio_finish(req); + nfs_request_rele(req); lck_mtx_lock(&nmp->nm_lock); } if (nfs_mount_check_dead_timeout(nmp)) { @@ -3214,6 +3257,9 @@ nfs_send(struct nfsreq *req, int wait) nfs_sndunlock(req); + if (nfs_is_dead(error, nmp)) + error = EIO; + /* * Don't log some errors: * EPIPE errors may be common with servers that drop idle connections. @@ -3227,9 +3273,6 @@ nfs_send(struct nfsreq *req, int wait) !req->r_nmp ? "" : vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname); - if (nfs_is_dead(error, nmp)) - error = EIO; - /* prefer request termination error over other errors */ error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0); if (error2) @@ -3678,26 +3721,24 @@ nfs_request_create( void nfs_request_destroy(struct nfsreq *req) { - struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; + struct nfsmount *nmp; struct gss_seq *gsp, *ngsp; int clearjbtimeo = 0; - struct timespec ts = { 1, 0 }; if (!req || !(req->r_flags & R_INITTED)) return; + nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; req->r_flags &= ~R_INITTED; if (req->r_lflags & RL_QUEUED) nfs_reqdequeue(req); - if (req->r_achain.tqe_next != NFSREQNOLIST && - req->r_achain.tqe_next != NFSIODCOMPLETING) { + if (req->r_achain.tqe_next != NFSREQNOLIST) { /* * Still on an async I/O queue? * %%% But which one, we may be on a local iod. */ lck_mtx_lock(nfsiod_mutex); - if (nmp && req->r_achain.tqe_next != NFSREQNOLIST && - req->r_achain.tqe_next != NFSIODCOMPLETING) { + if (nmp && req->r_achain.tqe_next != NFSREQNOLIST) { TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain); req->r_achain.tqe_next = NFSREQNOLIST; } @@ -3719,6 +3760,8 @@ nfs_request_destroy(struct nfsreq *req) wakeup(req2); } } + assert((req->r_flags & R_RESENDQ) == 0); + /* XXX should we just remove this conditional, we should have a reference if we're resending */ if (req->r_rchain.tqe_next != NFSREQNOLIST) { TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); req->r_rchain.tqe_next = NFSREQNOLIST; @@ -3736,9 +3779,6 @@ nfs_request_destroy(struct nfsreq *req) } lck_mtx_unlock(&nmp->nm_lock); } - /* Wait for the mount_sock_thread to finish with the resend */ - while (req->r_flags & R_RESENDQ) - msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts); lck_mtx_unlock(&req->r_mtx); if (clearjbtimeo) @@ -4480,6 +4520,8 @@ nfs_request_async( req->r_flags |= R_SENDING; lck_mtx_unlock(&req->r_mtx); error = nfs_send(req, 1); + /* Remove the R_RESENDQ reference */ + nfs_request_rele(req); lck_mtx_lock(&req->r_mtx); if (error) break; @@ -4537,6 +4579,9 @@ nfs_request_async_finish( req->r_rchain.tqe_next = NFSREQNOLIST; if (req->r_flags & R_RESENDQ) req->r_flags &= ~R_RESENDQ; + /* Remove the R_RESENDQ reference */ + assert(req->r_refs > 0); + req->r_refs--; lck_mtx_unlock(&nmp->nm_lock); break; } @@ -4554,11 +4599,16 @@ nfs_request_async_finish( } while (!error && (req->r_flags & R_RESTART)) { - if (asyncio && req->r_resendtime) { /* send later */ + if (asyncio) { + assert(req->r_achain.tqe_next == NFSREQNOLIST); lck_mtx_lock(&req->r_mtx); - nfs_asyncio_resend(req); + req->r_flags &= ~R_IOD; + if (req->r_resendtime) { /* send later */ + nfs_asyncio_resend(req); + lck_mtx_unlock(&req->r_mtx); + return (EINPROGRESS); + } lck_mtx_unlock(&req->r_mtx); - return (EINPROGRESS); } req->r_error = 0; req->r_flags &= ~R_RESTART; @@ -4912,7 +4962,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) req->r_flags |= R_MUSTRESEND; req->r_rtt = -1; wakeup(req); - if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) + if ((req->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) nfs_asyncio_resend(req); lck_mtx_unlock(&req->r_mtx); } diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index 3872e7ff0..8496093a9 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -2527,6 +2527,12 @@ nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) if (error) return (error); + if (nxna.nxna_addr.ss_len > sizeof(struct sockaddr_storage) || + nxna.nxna_mask.ss_len > sizeof(struct sockaddr_storage) || + nxna.nxna_addr.ss_family > AF_MAX || + nxna.nxna_mask.ss_family > AF_MAX) + return (EINVAL); + if (nxna.nxna_flags & (NX_MAPROOT|NX_MAPALL)) { struct posix_cred temp_pcred; bzero(&temp_pcred, sizeof(temp_pcred)); @@ -3221,6 +3227,38 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) return (error); } +/* + * Check if there is a least one export that will allow this address. + * + * Return 0, if there is an export that will allow this address, + * else return EACCES + */ +int +nfsrv_check_exports_allow_address(mbuf_t nam) +{ + struct nfs_exportfs *nxfs; + struct nfs_export *nx; + struct nfs_export_options *nxo; + + if (nam == NULL) + return (EACCES); + + lck_rw_lock_shared(&nfsrv_export_rwlock); + LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { + LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { + /* A little optimizing by checking for the default first */ + if (nx->nx_flags & NX_DEFAULTEXPORT) + nxo = &nx->nx_defopt; + if (nxo || (nxo = nfsrv_export_lookup(nx, nam))) + goto found; + } + } +found: + lck_rw_done(&nfsrv_export_rwlock); + + return (nxo ? 0 : EACCES); +} + struct nfs_export_options * nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam) { diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index 5fa063cea..12daa5588 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -131,7 +131,10 @@ extern int nfsrv_wg_delay; extern int nfsrv_wg_delay_v3; static int nfsrv_require_resv_port = 0; -static int nfsrv_deadsock_timer_on = 0; +static time_t nfsrv_idlesock_timer_on = 0; +static int nfsrv_sock_tcp_cnt = 0; +#define NFSD_MIN_IDLE_TIMEOUT 30 +static int nfsrv_sock_idle_timeout = 3600; /* One hour */ int nfssvc_export(user_addr_t argp); int nfssvc_nfsd(void); @@ -170,7 +173,7 @@ SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LO SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, ""); SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, ""); - +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, ""); #endif /* NFSCLIENT */ #if NFSSERVER @@ -189,11 +192,13 @@ SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOC #endif SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, ""); SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, ""); #ifdef NFS_UC_Q_DEBUG SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, ""); SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, ""); SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)&nfsrv_uc_queue_count, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, ""); #endif #endif /* NFSSERVER */ @@ -421,14 +426,19 @@ nfsiod_continue(int error) /* grab the current contents of the queue */ TAILQ_INIT(&iodq); TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); + /* Mark each iod request as being managed by an iod */ + TAILQ_FOREACH(req, &iodq, r_achain) { + lck_mtx_lock(&req->r_mtx); + assert(!(req->r_flags & R_IOD)); + req->r_flags |= R_IOD; + lck_mtx_unlock(&req->r_mtx); + } lck_mtx_unlock(nfsiod_mutex); /* process the queue */ TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { TAILQ_REMOVE(&iodq, req, r_achain); - lck_mtx_lock(nfsiod_mutex); - req->r_achain.tqe_next = NFSIODCOMPLETING; - lck_mtx_unlock(nfsiod_mutex); + req->r_achain.tqe_next = NFSREQNOLIST; req->r_callback.rcb_func(req); } @@ -831,8 +841,12 @@ nfssvc_addsock(socket_t so, mbuf_t mynam) } /* Set protocol options and reserve some space (for UDP). */ - if (sotype == SOCK_STREAM) + if (sotype == SOCK_STREAM) { + error = nfsrv_check_exports_allow_address(mynam); + if (error) + return (error); sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); + } if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ @@ -898,6 +912,58 @@ nfssvc_addsock(socket_t so, mbuf_t mynam) /* add the socket to the list */ first = TAILQ_EMPTY(&nfsrv_socklist); TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); + if (soprotocol == IPPROTO_TCP) { + nfsrv_sock_tcp_cnt++; + if (nfsrv_sock_idle_timeout < 0) + nfsrv_sock_idle_timeout = 0; + if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) + nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT; + /* + * Possibly start or stop the idle timer. We only start the idle timer when + * we have more than 2 * nfsd_thread_max connections. If the idle timer is + * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or + * the number of connections. + */ + if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) { + if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { + if (nfsrv_idlesock_timer_on) { + thread_call_cancel(nfsrv_idlesock_timer_call); + nfsrv_idlesock_timer_on = 0; + } + } else { + struct nfsrv_sock *old_slp; + struct timeval now; + time_t time_to_wait = nfsrv_sock_idle_timeout; + /* + * Get the oldest tcp socket and calculate the + * earliest time for the next idle timer to fire + * based on the possibly updated nfsrv_sock_idle_timeout + */ + TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) { + if (old_slp->ns_sotype == SOCK_STREAM) { + microuptime(&now); + time_to_wait -= now.tv_sec - old_slp->ns_timestamp; + if (time_to_wait < 1) + time_to_wait = 1; + break; + } + } + /* + * If we have a timer scheduled, but if its going to fire too late, + * turn it off. + */ + if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) { + thread_call_cancel(nfsrv_idlesock_timer_call); + nfsrv_idlesock_timer_on = 0; + } + /* Schedule the idle thread if it isn't already */ + if (!nfsrv_idlesock_timer_on) { + nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); + nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; + } + } + } + } sock_retain(so); /* grab a retain count on the socket */ slp->ns_so = so; @@ -909,7 +975,7 @@ nfssvc_addsock(socket_t so, mbuf_t mynam) /* mark that the socket is not in the nfsrv_sockwg list */ slp->ns_wgq.tqe_next = SLPNOLIST; - + slp->ns_flag = SLP_VALID | SLP_NEEDQ; nfsrv_wakenfsd(slp); @@ -1057,6 +1123,11 @@ nfssvc_nfsd(void) if (!nfsd->nfsd_slp && slp) { /* we found a socket to work on, grab a reference */ slp->ns_sref++; + microuptime(&now); + slp->ns_timestamp = now.tv_sec; + /* We keep the socket list in least recently used order for reaping idle sockets */ + TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); + TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); nfsd->nfsd_slp = slp; opcnt = 0; /* and put it at the back of the work queue */ @@ -1376,15 +1447,7 @@ nfsrv_zapsock(struct nfsrv_sock *slp) if (so == NULL) return; - /* - * Attempt to deter future up-calls, but leave the - * up-call info in place to avoid a race with the - * networking code. - */ - socket_lock(so, 1); - so->so_rcv.sb_flags &= ~SB_UPCALL; - socket_unlock(so, 1); - + sock_setupcall(so, NULL, NULL); sock_shutdown(so, SHUT_RDWR); /* @@ -1416,9 +1479,6 @@ nfsrv_slpfree(struct nfsrv_sock *slp) slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL; slp->ns_reccnt = 0; - if (slp->ns_ua) - FREE(slp->ns_ua, M_NFSSVC); - for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { nnwp = nwp->nd_tq.le_next; LIST_REMOVE(nwp, nd_tq); @@ -1444,12 +1504,9 @@ nfsrv_slpfree(struct nfsrv_sock *slp) * Derefence a server socket structure. If it has no more references and * is no longer valid, you can throw it away. */ -void -nfsrv_slpderef(struct nfsrv_sock *slp) +static void +nfsrv_slpderef_locked(struct nfsrv_sock *slp) { - struct timeval now; - - lck_mtx_lock(nfsd_mutex); lck_rw_lock_exclusive(&slp->ns_rwlock); slp->ns_sref--; @@ -1463,7 +1520,6 @@ nfsrv_slpderef(struct nfsrv_sock *slp) slp->ns_flag &= ~SLP_QUEUED; } lck_rw_done(&slp->ns_rwlock); - lck_mtx_unlock(nfsd_mutex); return; } @@ -1476,66 +1532,88 @@ nfsrv_slpderef(struct nfsrv_sock *slp) TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); slp->ns_flag &= ~SLP_QUEUED; } + lck_rw_done(&slp->ns_rwlock); - /* - * Queue the socket up for deletion - * and start the timer to delete it - * after it has been in limbo for - * a while. - */ - microuptime(&now); - slp->ns_timestamp = now.tv_sec; TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); - TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); - if (!nfsrv_deadsock_timer_on) { - nfsrv_deadsock_timer_on = 1; - nfs_interval_timer_start(nfsrv_deadsock_timer_call, - NFSRV_DEADSOCKDELAY * 1000); - } + if (slp->ns_sotype == SOCK_STREAM) + nfsrv_sock_tcp_cnt--; - lck_rw_done(&slp->ns_rwlock); /* now remove from the write gather socket list */ if (slp->ns_wgq.tqe_next != SLPNOLIST) { TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); slp->ns_wgq.tqe_next = SLPNOLIST; } + nfsrv_slpfree(slp); +} + +void +nfsrv_slpderef(struct nfsrv_sock *slp) +{ + lck_mtx_lock(nfsd_mutex); + nfsrv_slpderef_locked(slp); lck_mtx_unlock(nfsd_mutex); } /* - * Check periodically for dead sockets pending delete. - * If a socket has been dead for more than NFSRV_DEADSOCKDELAY - * seconds then we assume it's safe to free. + * Check periodically for idle sockest if needed and + * zap them. */ void -nfsrv_deadsock_timer(__unused void *param0, __unused void *param1) +nfsrv_idlesock_timer(__unused void *param0, __unused void *param1) { - struct nfsrv_sock *slp; + struct nfsrv_sock *slp, *tslp; struct timeval now; - time_t time_to_wait; + time_t time_to_wait = nfsrv_sock_idle_timeout; microuptime(&now); lck_mtx_lock(nfsd_mutex); - while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) { - if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec) - break; - TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain); - nfsrv_slpfree(slp); - } - if (TAILQ_EMPTY(&nfsrv_deadsocklist)) { - nfsrv_deadsock_timer_on = 0; + /* Turn off the timer if we're suppose to and get out */ + if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) + nfsrv_sock_idle_timeout = 0; + if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) { + nfsrv_idlesock_timer_on = 0; lck_mtx_unlock(nfsd_mutex); return; } - time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec; - if (time_to_wait < 1) - time_to_wait = 1; - lck_mtx_unlock(nfsd_mutex); + TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) { + lck_rw_lock_exclusive(&slp->ns_rwlock); + /* Skip udp and referenced sockets */ + if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) { + lck_rw_done(&slp->ns_rwlock); + continue; + } + /* + * If this is the first non-referenced socket that hasn't idle out, + * use its time stamp to calculate the earlist time in the future + * to start the next invocation of the timer. Since the nfsrv_socklist + * is sorted oldest access to newest. Once we find the first one, + * we're done and break out of the loop. + */ + if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) || + nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { + time_to_wait -= now.tv_sec - slp->ns_timestamp; + if (time_to_wait < 1) + time_to_wait = 1; + lck_rw_done(&slp->ns_rwlock); + break; + } + /* + * Bump the ref count. nfsrv_slpderef below will destroy + * the socket, since nfsrv_zapsock has closed it. + */ + slp->ns_sref++; + nfsrv_zapsock(slp); + lck_rw_done(&slp->ns_rwlock); + nfsrv_slpderef_locked(slp); + } - nfs_interval_timer_start(nfsrv_deadsock_timer_call, - time_to_wait * 1000); + /* Start ourself back up */ + nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); + /* Remember when the next timer will fire for nfssvc_addsock. */ + nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; + lck_mtx_unlock(nfsd_mutex); } /* @@ -1554,33 +1632,14 @@ nfsrv_cleanup(void) microuptime(&now); for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { nslp = TAILQ_NEXT(slp, ns_chain); - if (slp->ns_flag & SLP_VALID) { - lck_rw_lock_exclusive(&slp->ns_rwlock); + lck_rw_lock_exclusive(&slp->ns_rwlock); + slp->ns_sref++; + if (slp->ns_flag & SLP_VALID) nfsrv_zapsock(slp); - lck_rw_done(&slp->ns_rwlock); - } - if (slp->ns_flag & SLP_QUEUED) { - if (slp->ns_flag & SLP_WAITQ) - TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); - else - TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); - slp->ns_flag &= ~SLP_QUEUED; - } - if (slp->ns_wgq.tqe_next != SLPNOLIST) { - TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); - slp->ns_wgq.tqe_next = SLPNOLIST; - } - /* queue the socket up for deletion */ - slp->ns_timestamp = now.tv_sec; - TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); - TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); - if (!nfsrv_deadsock_timer_on) { - nfsrv_deadsock_timer_on = 1; - nfs_interval_timer_start(nfsrv_deadsock_timer_call, - NFSRV_DEADSOCKDELAY * 1000); - } + lck_rw_done(&slp->ns_rwlock); + nfsrv_slpderef_locked(slp); } - +# #if CONFIG_FSE /* * Flush pending file write fsevents diff --git a/bsd/nfs/nfs_upcall.c b/bsd/nfs/nfs_upcall.c index 7d6f85f53..bc71aa0a9 100644 --- a/bsd/nfs/nfs_upcall.c +++ b/bsd/nfs/nfs_upcall.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Copyright (c) 2011-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -164,6 +164,8 @@ nfsrv_uc_dequeue(struct nfsrv_sock *slp) OSDecrementAtomic(&nfsrv_uc_queue_count); #endif } + FREE(slp->ns_ua, M_TEMP); + slp->ns_ua = NULL; lck_mtx_unlock(myqueue->ucq_lock); } @@ -315,9 +317,9 @@ nfsrv_uc_proxy(socket_t so, void *arg, int waitflag) lck_mtx_lock(myqueue->ucq_lock); DPRINT("nfsrv_uc_proxy called for %p (%p)\n", uap, uap->nua_slp); DPRINT("\tUp-call queued on %d for wakeup of %p\n", qi, myqueue); - if (uap->nua_flags & NFS_UC_QUEUED) { + if (uap == NULL || uap->nua_flags & NFS_UC_QUEUED) { lck_mtx_unlock(myqueue->ucq_lock); - return; /* Already queued */ + return; /* Already queued or freed */ } uap->nua_so = so; @@ -366,7 +368,7 @@ nfsrv_uc_addsock(struct nfsrv_sock *slp, int start) * generate up-calls. */ if (nfsrv_uc_thread_count) { - MALLOC(arg, struct nfsrv_uc_arg *, sizeof (struct nfsrv_uc_arg), M_NFSSVC, M_WAITOK | M_ZERO); + MALLOC(arg, struct nfsrv_uc_arg *, sizeof (struct nfsrv_uc_arg), M_TEMP, M_WAITOK | M_ZERO); if (arg == NULL) goto direct; diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c index 49d487f53..90e20e774 100644 --- a/bsd/nfs/nfs_vfsops.c +++ b/bsd/nfs/nfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2015 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -455,7 +455,7 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) // PUTFH + GETATTR numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "statfs", numops); + nfsm_chain_add_compound_header(error, &nmreq, "statfs", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -1986,7 +1986,7 @@ nfs4_mount_update_path_with_symlink(struct nfsmount *nmp, struct nfs_fs_path *nf // PUTFH, READLINK numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 12 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops); + nfsm_chain_add_compound_header(error, &nmreq, "readlink", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, NFS_VER4, fhp->fh_data, fhp->fh_len); @@ -2180,7 +2180,7 @@ nfs4_mount( NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, NULL, 0); numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 9 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "mount", numops); + nfsm_chain_add_compound_header(error, &nmreq, "mount", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH); numops--; @@ -2240,7 +2240,7 @@ nfs4_mount( NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0); numops = 4; nfsm_chain_build_alloc_init(error, &nmreq, 18 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "mount", numops); + nfsm_chain_add_compound_header(error, &nmreq, "mount", nmp->nm_minor_vers, numops); numops--; if (dirfh.fh_len) { nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); @@ -2403,7 +2403,7 @@ nfs4_mount( /* get attrs for mount point root */ numops = NMFLAG(nmp, NONAMEDATTR) ? 2 : 3; // PUTFH + GETATTR + OPENATTR nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "mount", numops); + nfsm_chain_add_compound_header(error, &nmreq, "mount", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len); @@ -2631,6 +2631,21 @@ nfs_mount_connect(struct nfsmount *nmp) return (error); } +/* Table of maximum minor version for a given version */ +uint32_t maxminorverstab[] = { + 0, /* Version 0 (does not exist) */ + 0, /* Version 1 (does not exist) */ + 0, /* Version 2 */ + 0, /* Version 3 */ + 0, /* Version 4 */ +}; + +#define NFS_MAX_SUPPORTED_VERSION ((long)(sizeof (maxminorverstab) / sizeof (uint32_t) - 1)) +#define NFS_MAX_SUPPORTED_MINOR_VERSION(v) ((long)(maxminorverstab[(v)])) + +#define DEFAULT_NFS_MIN_VERS VER2PVER(2, 0) +#define DEFAULT_NFS_MAX_VERS VER2PVER(3, 0) + /* * Common code to mount an NFS file system. */ @@ -2646,7 +2661,7 @@ mountnfs( int error = 0; struct vfsstatfs *sbp; struct xdrbuf xb; - uint32_t i, val, vers = 0, minorvers, maxio, iosize, len; + uint32_t i, val, maxio, iosize, len; uint32_t *mattrs; uint32_t *mflags_mask; uint32_t *mflags; @@ -2675,7 +2690,6 @@ mountnfs( TAILQ_INIT(&nmp->nm_resendq); TAILQ_INIT(&nmp->nm_iodq); TAILQ_INIT(&nmp->nm_gsscl); - TAILQ_INIT(&nmp->nm_gssnccl); LIST_INIT(&nmp->nm_monlist); vfs_setfsprivate(mp, nmp); vfs_getnewfsid(mp); @@ -2689,6 +2703,8 @@ mountnfs( /* set up defaults */ nmp->nm_ref = 0; nmp->nm_vers = 0; + nmp->nm_min_vers = DEFAULT_NFS_MIN_VERS; + nmp->nm_max_vers = DEFAULT_NFS_MAX_VERS; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; nmp->nm_sotype = 0; @@ -2753,37 +2769,36 @@ mountnfs( } } if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION)) { - xb_get_32(error, &xb, vers); + /* Can't specify a single version and a range */ + if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION_RANGE)) + error = EINVAL; + xb_get_32(error, &xb, nmp->nm_vers); + if (nmp->nm_vers > NFS_MAX_SUPPORTED_VERSION || + nmp->nm_vers < NFS_VER2) + error = EINVAL; if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION)) - xb_get_32(error, &xb, minorvers); + xb_get_32(error, &xb, nmp->nm_minor_vers); else - minorvers = 0; - nfsmerr_if(error); - switch (vers) { - case 2: - nmp->nm_vers = NFS_VER2; - break; - case 3: - nmp->nm_vers = NFS_VER3; - break; - case 4: - switch (minorvers) { - case 0: - nmp->nm_vers = NFS_VER4; - break; - default: - error = EINVAL; - } - break; - default: + nmp->nm_minor_vers = maxminorverstab[nmp->nm_vers]; + if (nmp->nm_minor_vers > maxminorverstab[nmp->nm_vers]) error = EINVAL; - } - } + nmp->nm_max_vers = nmp->nm_min_vers = + VER2PVER(nmp->nm_vers, nmp->nm_minor_vers); + } if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION)) { - /* should have also gotten NFS version (and already gotten minorvers) */ + /* should have also gotten NFS version (and already gotten minor version) */ if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION)) error = EINVAL; } + if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION_RANGE)) { + xb_get_32(error, &xb, nmp->nm_min_vers); + xb_get_32(error, &xb, nmp->nm_max_vers); + if ((nmp->nm_min_vers > nmp->nm_max_vers) || + (PVER2MAJOR(nmp->nm_max_vers) > NFS_MAX_SUPPORTED_VERSION) || + (PVER2MINOR(nmp->nm_min_vers) > maxminorverstab[PVER2MAJOR(nmp->nm_min_vers)]) || + (PVER2MINOR(nmp->nm_max_vers) > maxminorverstab[PVER2MAJOR(nmp->nm_max_vers)])) + error = EINVAL; + } if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE)) xb_get_32(error, &xb, nmp->nm_rsize); if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE)) @@ -3461,6 +3476,10 @@ nfs_mirror_mount_domount(vnode_t dvp, vnode_t vp, vfs_context_t ctx) xb_copy_32(error, &xb, &xbnew, val); if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION)) xb_copy_32(error, &xb, &xbnew, val); + if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION_RANGE)) { + xb_copy_32(error, &xb, &xbnew, val); + xb_copy_32(error, &xb, &xbnew, val); + } if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE)) xb_copy_32(error, &xb, &xbnew, val); if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE)) @@ -4286,7 +4305,7 @@ void nfs_mount_zombie(struct nfsmount *nmp, int nm_state_flags) { struct nfsreq *req, *treq; - struct nfs_reqqhead iodq; + struct nfs_reqqhead iodq, resendq; struct timespec ts = { 1, 0 }; struct nfs_open_owner *noop, *nextnoop; nfsnode_t np; @@ -4366,39 +4385,83 @@ nfs_mount_zombie(struct nfsmount *nmp, int nm_state_flags) } /* - * Loop through outstanding request list and remove dangling - * references to defunct nfsmount struct + * Be sure all requests for this mount are completed + * and removed from the resend queue. + */ + TAILQ_INIT(&resendq); + lck_mtx_lock(nfs_request_mutex); + TAILQ_FOREACH(req, &nfs_reqq, r_chain) { + if (req->r_nmp == nmp) { + lck_mtx_lock(&req->r_mtx); + if (!req->r_error && req->r_nmrep.nmc_mhead == NULL) + req->r_error = EIO; + if (req->r_flags & R_RESENDQ) { + lck_mtx_lock(&nmp->nm_lock); + req->r_flags &= ~R_RESENDQ; + if (req->r_rchain.tqe_next != NFSREQNOLIST) { + TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); + /* + * Queue up the request so that we can unreference them + * with out holding nfs_request_mutex + */ + TAILQ_INSERT_TAIL(&resendq, req, r_rchain); + } + lck_mtx_unlock(&nmp->nm_lock); + } + wakeup(req); + lck_mtx_unlock(&req->r_mtx); + } + } + lck_mtx_unlock(nfs_request_mutex); + + /* Since we've drop the request mutex we can now safely unreference the request */ + TAILQ_FOREACH_SAFE(req, &resendq, r_rchain, treq) { + TAILQ_REMOVE(&resendq, req, r_rchain); + nfs_request_rele(req); + } + + /* + * Now handle and outstanding async requests. We need to walk the + * request queue again this time with the nfsiod_mutex held. No + * other iods can grab our requests until we've put them on our own + * local iod queue for processing. */ TAILQ_INIT(&iodq); lck_mtx_lock(nfs_request_mutex); + lck_mtx_lock(nfsiod_mutex); TAILQ_FOREACH(req, &nfs_reqq, r_chain) { if (req->r_nmp == nmp) { - if (req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT)) { - /* async I/O RPC needs to be finished */ - lck_mtx_lock(nfsiod_mutex); + lck_mtx_lock(&req->r_mtx); + if (req->r_callback.rcb_func + && !(req->r_flags & R_WAITSENT) && !(req->r_flags & R_IOD)) { + /* + * Since R_IOD is not set then we need to handle it. If + * we're not on a list add it to our iod queue. Otherwise + * we must already be on nm_iodq which is added to our + * local queue below. + * %%% We should really keep a back pointer to our iod queue + * that we're on. + */ + req->r_flags |= R_IOD; if (req->r_achain.tqe_next == NFSREQNOLIST) { TAILQ_INSERT_TAIL(&iodq, req, r_achain); } - lck_mtx_unlock(nfsiod_mutex); } - wakeup(req); + lck_mtx_unlock(&req->r_mtx); } } - lck_mtx_unlock(nfs_request_mutex); /* finish any async I/O RPCs queued up */ - lck_mtx_lock(nfsiod_mutex); if (nmp->nm_iodlink.tqe_next != NFSNOLIST) TAILQ_REMOVE(&nfsiodmounts, nmp, nm_iodlink); TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); lck_mtx_unlock(nfsiod_mutex); + lck_mtx_unlock(nfs_request_mutex); + TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { TAILQ_REMOVE(&iodq, req, r_achain); - lck_mtx_lock(nfsiod_mutex); - req->r_achain.tqe_next = NFSIODCOMPLETING; - lck_mtx_unlock(nfsiod_mutex); + req->r_achain.tqe_next = NFSREQNOLIST; lck_mtx_lock(&req->r_mtx); - req->r_error = ENXIO; docallback = !(req->r_flags & R_WAITSENT); lck_mtx_unlock(&req->r_mtx); if (docallback) @@ -4697,7 +4760,7 @@ nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struc // PUTFH + GETATTR numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "quota", numops); + nfsm_chain_add_compound_header(error, &nmreq, "quota", nmp->nm_minor_vers, numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); @@ -5069,7 +5132,7 @@ nfs_mountinfo_assemble(struct nfsmount *nmp, struct xdrbuf *xb) xb_add_bitmap(error, &xbinfo, mflags, NFS_MFLAG_BITMAP_LEN); xb_add_32(error, &xbinfo, nmp->nm_vers); /* NFS_VERSION */ if (nmp->nm_vers >= NFS_VER4) - xb_add_32(error, &xbinfo, 0); /* NFS_MINOR_VERSION */ + xb_add_32(error, &xbinfo, nmp->nm_minor_vers); /* NFS_MINOR_VERSION */ xb_add_32(error, &xbinfo, nmp->nm_rsize); /* READ_SIZE */ xb_add_32(error, &xbinfo, nmp->nm_wsize); /* WRITE_SIZE */ xb_add_32(error, &xbinfo, nmp->nm_readdirsize); /* READDIR_SIZE */ diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index 4f155940f..ae1906aed 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -636,7 +636,10 @@ nfs_vnop_access( * Does our cached result allow us to give a definite yes to * this request? */ - uid = kauth_cred_getuid(vfs_context_ucred(ctx)); + if (auth_is_kerberized(np->n_auth) || auth_is_kerberized(nmp->nm_auth)) + uid = nfs_cred_getasid2uid(vfs_context_ucred(ctx)); + else + uid = kauth_cred_getuid(vfs_context_ucred(ctx)); slot = nfs_node_access_slot(np, uid, 0); dorpc = 1; if (access == 0) { @@ -6065,6 +6068,8 @@ nfs3_lookup_rpc_async_finish( struct nfsm_chain nmrep; nmp = NFSTONMP(dnp); + if (nmp == NULL) + return (ENXIO); nfsvers = nmp->nm_vers; nfsm_chain_null(&nmrep); @@ -6906,6 +6911,8 @@ nfs_vnop_ioctl( vfs_context_t ctx = ap->a_context; vnode_t vp = ap->a_vp; struct nfsmount *mp = VTONMP(vp); + struct user_nfs_gss_principal gprinc; + uint32_t len; int error = ENOTTY; if (mp == NULL) @@ -6919,8 +6926,78 @@ nfs_vnop_ioctl( error = nfs_flush(VTONFS(vp), MNT_WAIT, vfs_context_thread(ctx), 0); break; case NFS_FSCTL_DESTROY_CRED: + if (!auth_is_kerberized(mp->nm_auth)) + return (ENOTSUP); error = nfs_gss_clnt_ctx_remove(mp, vfs_context_ucred(ctx)); break; + case NFS_FSCTL_SET_CRED: + if (!auth_is_kerberized(mp->nm_auth)) + return (ENOTSUP); + NFS_DBG(NFS_FAC_GSS, 7, "Enter NFS_FSCTL_SET_CRED (proc %d) data = %p\n", vfs_context_is64bit(ctx), (void *)ap->a_data); + if (vfs_context_is64bit(ctx)) { + gprinc = *(struct user_nfs_gss_principal *)ap->a_data; + } else { + struct nfs_gss_principal *tp; + tp = (struct nfs_gss_principal *)ap->a_data; + gprinc.princlen = tp->princlen; + gprinc.nametype = tp->nametype; + gprinc.principal = CAST_USER_ADDR_T(tp->principal); + } + if (gprinc.princlen > MAXPATHLEN) + return (EINVAL); + NFS_DBG(NFS_FAC_GSS, 7, "Received principal length %d name type = %d\n", gprinc.princlen, gprinc.nametype); + uint8_t *p; + MALLOC(p, uint8_t *, gprinc.princlen+1, M_TEMP, M_WAITOK|M_ZERO); + if (p == NULL) + return (ENOMEM); + error = copyin(gprinc.principal, p, gprinc.princlen); + if (error) { + NFS_DBG(NFS_FAC_GSS, 7, "NFS_FSCTL_SET_CRED could not copy in princiapl data of len %d: %d\n", + gprinc.princlen, error); + FREE(p, M_TEMP); + return (error); + } + NFS_DBG(NFS_FAC_GSS, 7, "Seting credential to principal %s\n", p); + error = nfs_gss_clnt_ctx_set_principal(mp, ctx, p, gprinc.princlen, gprinc.nametype); + NFS_DBG(NFS_FAC_GSS, 7, "Seting credential to principal %s returned %d\n", p, error); + FREE(p, M_TEMP); + break; + case NFS_FSCTL_GET_CRED: + if (!auth_is_kerberized(mp->nm_auth)) + return (ENOTSUP); + error = nfs_gss_clnt_ctx_get_principal(mp, ctx, &gprinc); + if (error) + break; + if (vfs_context_is64bit(ctx)) { + struct user_nfs_gss_principal *upp = (struct user_nfs_gss_principal *)ap->a_data; + len = upp->princlen; + if (gprinc.princlen < len) + len = gprinc.princlen; + upp->princlen = gprinc.princlen; + upp->nametype = gprinc.nametype; + upp->flags = gprinc.flags; + if (gprinc.principal) + error = copyout((void *)gprinc.principal, upp->principal, len); + else + upp->principal = USER_ADDR_NULL; + } else { + struct nfs_gss_principal *u32pp = (struct nfs_gss_principal *)ap->a_data; + len = u32pp->princlen; + if (gprinc.princlen < len) + len = gprinc.princlen; + u32pp->princlen = gprinc.princlen; + u32pp->nametype = gprinc.nametype; + u32pp->flags = gprinc.flags; + if (gprinc.principal) + error = copyout((void *)gprinc.principal, u32pp->principal, len); + else + u32pp->principal = (user32_addr_t)0; + } + if (error) { + NFS_DBG(NFS_FAC_GSS, 7, "NFS_FSCTL_GET_CRED could not copy out princiapl data of len %d: %d\n", + gprinc.princlen, error); + } + FREE(gprinc.principal, M_TEMP); } return (error); diff --git a/bsd/nfs/nfsm_subs.h b/bsd/nfs/nfsm_subs.h index 434d4f57a..69d0f7865 100644 --- a/bsd/nfs/nfsm_subs.h +++ b/bsd/nfs/nfsm_subs.h @@ -433,7 +433,7 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *); /* add NFSv4 COMPOUND header */ #define NFS4_TAG_LENGTH 12 -#define nfsm_chain_add_compound_header(E, NMC, TAG, NUMOPS) \ +#define nfsm_chain_add_compound_header(E, NMC, TAG, MINOR, NUMOPS) \ do { \ if ((TAG) && strlen(TAG)) { \ /* put tags into a fixed-length space-padded field */ \ @@ -444,7 +444,7 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *); } else { \ nfsm_chain_add_32((E), (NMC), 0); \ } \ - nfsm_chain_add_32((E), (NMC), 0); /*minorversion*/ \ + nfsm_chain_add_32((E), (NMC), (MINOR)); /*minorversion*/ \ nfsm_chain_add_32((E), (NMC), (NUMOPS)); \ } while (0) diff --git a/bsd/nfs/nfsmount.h b/bsd/nfs/nfsmount.h index 4d28a9774..7721e6336 100644 --- a/bsd/nfs/nfsmount.h +++ b/bsd/nfs/nfsmount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -172,6 +172,8 @@ struct nfs_socket { uint32_t nso_protocol; /* RPC protocol */ uint32_t nso_version; /* RPC protocol version */ uint32_t nso_pingxid; /* RPC XID of NULL ping request */ + uint32_t nso_nfs_min_vers; /* minimum nfs version for connecting sockets */ + uint32_t nso_nfs_max_vers; /* maximum nfs version for connecting sockets */ int nso_error; /* saved error/status */ struct nfs_rpc_record_state nso_rrs; /* RPC record parsing state (TCP) */ }; @@ -206,6 +208,7 @@ struct nfs_socket_search { /* nss_flags */ #define NSS_VERBOSE 0x00000001 /* OK to log info about socket search */ #define NSS_WARNED 0x00000002 /* logged warning about socket search taking a while */ +#define NSS_FALLBACK2PMAP 0x00000004 /* Try V4 on NFS_PORT first, if that fails fall back to portmapper */ /* * function table for calling version-specific NFS functions @@ -261,6 +264,9 @@ struct nfsmount { int nm_ref; /* Reference count on this mount */ int nm_state; /* Internal state flags */ int nm_vers; /* NFS version */ + uint32_t nm_minor_vers; /* minor version of above */ + uint32_t nm_min_vers; /* minimum packed version to try */ + uint32_t nm_max_vers; /* maximum packed version to try */ struct nfs_funcs *nm_funcs; /* version-specific functions */ kauth_cred_t nm_mcred; /* credential used for the mount */ mount_t nm_mountp; /* VFS structure for this filesystem */ @@ -268,7 +274,6 @@ struct nfsmount { struct nfs_fs_locations nm_locations; /* file system locations */ uint32_t nm_numgrps; /* Max. size of groupslist */ TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gsscl; /* GSS user contexts */ - TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gssnccl; /* GSS neg cache contexts */ uint32_t nm_ncentries; /* GSS expired negative cache entries */ int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */ int nm_retry; /* Max retries */ diff --git a/bsd/pgo/profile_runtime.c b/bsd/pgo/profile_runtime.c new file mode 100644 index 000000000..ac308b681 --- /dev/null +++ b/bsd/pgo/profile_runtime.c @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include + + +/* + * This tells compiler_rt not to include userspace-specific stuff writing + * profile data to a file. + */ +int __llvm_profile_runtime = 0; + + +#ifdef PROFILE + +/* These __llvm functions are defined in InstrProfiling.h in compiler_rt. That + * is a internal header, so we need to re-prototype them here. */ + +uint64_t __llvm_profile_get_size_for_buffer(void); +int __llvm_profile_write_buffer(char *Buffer); +uint64_t __llvm_profile_get_size_for_buffer_internal(const char *DataBegin, + const char *DataEnd, + const char *CountersBegin, + const char *CountersEnd , + const char *NamesBegin, + const char *NamesEnd); +int __llvm_profile_write_buffer_internal(char *Buffer, + const char *DataBegin, + const char *DataEnd, + const char *CountersBegin, + const char *CountersEnd , + const char *NamesBegin, + const char *NamesEnd); + +extern char __pgo_hib_DataStart __asm("section$start$__HIB$__llvm_prf_data"); +extern char __pgo_hib_DataEnd __asm("section$end$__HIB$__llvm_prf_data"); +extern char __pgo_hib_NamesStart __asm("section$start$__HIB$__llvm_prf_names"); +extern char __pgo_hib_NamesEnd __asm("section$end$__HIB$__llvm_prf_names"); +extern char __pgo_hib_CountersStart __asm("section$start$__HIB$__llvm_prf_cnts"); +extern char __pgo_hib_CountersEnd __asm("section$end$__HIB$__llvm_prf_cnts"); + + +static uint64_t get_size_for_buffer(int flags) +{ + if (flags & PGO_HIB) { + return __llvm_profile_get_size_for_buffer_internal( + &__pgo_hib_DataStart, &__pgo_hib_DataEnd, + &__pgo_hib_CountersStart, &__pgo_hib_CountersEnd, + &__pgo_hib_NamesStart, &__pgo_hib_NamesEnd); + } else { + return __llvm_profile_get_size_for_buffer(); + } +} + + +static int write_buffer(int flags, char *buffer) +{ + if (flags & PGO_HIB) { + return __llvm_profile_write_buffer_internal( + buffer, + &__pgo_hib_DataStart, &__pgo_hib_DataEnd, + &__pgo_hib_CountersStart, &__pgo_hib_CountersEnd, + &__pgo_hib_NamesStart, &__pgo_hib_NamesEnd); + } else { + return __llvm_profile_write_buffer(buffer); + } +} + + +#endif + + + +/* + * returns: + * EPERM unless you are root + * EINVAL for invalid args. + * ENOSYS for not implemented + * ERANGE for integer overflow + * ENOENT if kext not found + * ENOTSUP kext does not support PGO + * EIO llvm returned an error. shouldn't ever happen. + */ + +int grab_pgo_data(struct proc *p, + struct grab_pgo_data_args *uap, + register_t *retval) +{ + char *buffer = NULL; + int err = 0; + + (void) p; + + if (!kauth_cred_issuser(kauth_cred_get())) { + err = EPERM; + goto out; + } + +#if CONFIG_MACF + err = mac_system_check_info(kauth_cred_get(), "kern.profiling_data"); + if (err) { + goto out; + } +#endif + + if ( uap->flags & ~PGO_ALL_FLAGS || + uap->size < 0 || + (uap->size > 0 && uap->buffer == 0)) + { + err = EINVAL; + goto out; + } + + *retval = 0; + + if (uap->uuid) { + uuid_t uuid; + err = copyin(uap->uuid, &uuid, sizeof(uuid)); + if (err) { + goto out; + } + + if (uap->buffer == 0 && uap->size == 0) { + uint64_t size64; + + if (uap->flags & PGO_WAIT_FOR_UNLOAD) { + err = EINVAL; + goto out; + } + + err = OSKextGrabPgoData(uuid, &size64, NULL, 0, 0, !!(uap->flags & PGO_METADATA)); + if (err) { + goto out; + } + + ssize_t size = size64; + if ( ((uint64_t) size) != size64 || + size < 0 ) + { + err = ERANGE; + goto out; + } + + *retval = size; + err = 0; + goto out; + + } else if (!uap->buffer || uap->size <= 0) { + + err = EINVAL; + goto out; + + } else { + + MALLOC(buffer, char *, uap->size, M_TEMP, M_WAITOK); + if (!buffer) { + err = ENOMEM; + goto out; + } + + uint64_t size64; + + err = OSKextGrabPgoData(uuid, &size64, buffer, uap->size, + !!(uap->flags & PGO_WAIT_FOR_UNLOAD), + !!(uap->flags & PGO_METADATA)); + if (err) { + goto out; + } + + ssize_t size = size64; + if ( ((uint64_t) size) != size64 || + size < 0 ) + { + err = ERANGE; + goto out; + } + + err = copyout(buffer, uap->buffer, size); + if (err) { + goto out; + } + + *retval = size; + goto out; + } + } + + +#ifdef PROFILE + + uint64_t size64 = get_size_for_buffer(uap->flags); + ssize_t size = size64; + + if (uap->flags & (PGO_WAIT_FOR_UNLOAD | PGO_METADATA)) { + err = EINVAL; + goto out; + } + + if ( ((uint64_t) size) != size64 || + size < 0 ) + { + err = ERANGE; + goto out; + } + + + if (uap->buffer == 0 && uap->size == 0) { + *retval = size; + err = 0; + goto out; + } else if (uap->size < size) { + err = EINVAL; + goto out; + } else { + MALLOC(buffer, char *, size, M_TEMP, M_WAITOK); + if (!buffer) { + err = ENOMEM; + goto out; + } + + err = write_buffer(uap->flags, buffer); + if (err) + { + err = EIO; + goto out; + } + + err = copyout(buffer, uap->buffer, size); + if (err) { + goto out; + } + + *retval = size; + goto out; + } + +#else + + *retval = -1; + err = ENOSYS; + goto out; + +#endif + +out: + if (buffer) { + FREE(buffer, M_TEMP); + } + if (err) { + *retval = -1; + } + return err; +} diff --git a/bsd/security/audit/audit.c b/bsd/security/audit/audit.c index 8fffb654b..c5f18ebe4 100644 --- a/bsd/security/audit/audit.c +++ b/bsd/security/audit/audit.c @@ -76,7 +76,6 @@ #include #include #include -#include #include #include diff --git a/bsd/security/audit/audit_arg.c b/bsd/security/audit/audit_arg.c index 4b16e76b6..207337909 100644 --- a/bsd/security/audit/audit_arg.c +++ b/bsd/security/audit/audit_arg.c @@ -75,7 +75,6 @@ #include #include #include -#include #include #if CONFIG_MACF diff --git a/bsd/security/audit/audit_bsm.c b/bsd/security/audit/audit_bsm.c index d63c131ea..7ca2771d4 100644 --- a/bsd/security/audit/audit_bsm.c +++ b/bsd/security/audit/audit_bsm.c @@ -1911,8 +1911,6 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) case AUE_MAC_GET_PROC: case AUE_MAC_SET_PROC: - case AUE_MAC_GET_LCTX: - case AUE_MAC_SET_LCTX: PROCESS_MAC_TOKENS; break; #endif diff --git a/bsd/security/audit/audit_bsm_fcntl.c b/bsd/security/audit/audit_bsm_fcntl.c index c741986bf..e62a0b9e3 100644 --- a/bsd/security/audit/audit_bsm_fcntl.c +++ b/bsd/security/audit/audit_bsm_fcntl.c @@ -217,6 +217,9 @@ static const bsm_fcntl_cmd_t bsm_fcntl_cmdtab[] = { #ifdef F_MARKDEPENDENCY { BSM_F_MARKDEPENDENCY, F_MARKDEPENDENCY }, #endif +#ifdef F_BARRIERFSYNC + { BSM_F_BARRIERFSYNC, F_BARRIERFSYNC }, +#endif #ifdef FCNTL_FS_SPECIFIC_BASE { BSM_F_FS_SPECIFIC_0, FCNTL_FS_SPECIFIC_BASE}, diff --git a/bsd/security/audit/audit_mac.c b/bsd/security/audit/audit_mac.c index f1f065561..f80c948ba 100644 --- a/bsd/security/audit/audit_mac.c +++ b/bsd/security/audit/audit_mac.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #if CONFIG_AUDIT diff --git a/bsd/security/audit/audit_syscalls.c b/bsd/security/audit/audit_syscalls.c index 67c4bab7a..2a46a579d 100644 --- a/bsd/security/audit/audit_syscalls.c +++ b/bsd/security/audit/audit_syscalls.c @@ -73,7 +73,6 @@ #include #include #include -#include #include #if CONFIG_MACF diff --git a/bsd/security/audit/audit_worker.c b/bsd/security/audit/audit_worker.c index 9a7a99281..aa44fa446 100644 --- a/bsd/security/audit/audit_worker.c +++ b/bsd/security/audit/audit_worker.c @@ -70,7 +70,6 @@ #include #include #include -#include #include diff --git a/bsd/sys/Makefile b/bsd/sys/Makefile index 30a5166b8..4b4072516 100644 --- a/bsd/sys/Makefile +++ b/bsd/sys/Makefile @@ -22,8 +22,8 @@ DATAFILES = \ dir.h dirent.h disk.h dkstat.h dtrace.h dtrace_glue.h dtrace_impl.h \ errno.h ev.h event.h fasttrap.h fasttrap_isa.h fcntl.h file.h filedesc.h filio.h gmon.h \ ioccom.h ioctl.h \ - ioctl_compat.h ipc.h kernel.h kernel_types.h kern_event.h loadable_fs.h lock.h lockf.h \ - kauth.h kdebug.h kern_control.h lctx.h lockstat.h malloc.h \ + ioctl_compat.h ipc.h kernel.h kernel_types.h kern_event.h lctx.h loadable_fs.h lock.h lockf.h \ + kauth.h kdebug.h kern_control.h lockstat.h malloc.h \ mbuf.h mman.h mount.h msg.h msgbuf.h netport.h param.h paths.h pipe.h poll.h \ proc.h proc_info.h ptrace.h queue.h quota.h random.h reboot.h resource.h resourcevar.h \ sbuf.h posix_sem.h posix_shm.h sdt.h \ @@ -39,30 +39,58 @@ DATAFILES = \ # Installs header file for Apple internal use in user level - # $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders PRIVATE_DATAFILES = \ + attr.h \ + cdefs.h \ coalition.h \ codesign.h \ content_protection.h \ csr.h \ decmpfs.h \ + disk.h \ disklabel.h \ + domain.h \ + event.h \ + fcntl.h \ fileport.h \ fsctl.h \ + fsevents.h \ fsgetpath.h \ fslog.h \ guarded.h \ imgsrc.h \ ipcs.h \ kas_info.h \ + kdebug.h \ + kern_control.h \ + kern_event.h \ kern_memorystatus.h \ kern_overrides.h \ + mbuf.h \ + mman.h \ + priv.h \ + proc.h \ + proc_info.h \ + proc_uuid_policy.h \ + process_policy.h \ + resource.h \ sfi.h \ shm_internal.h \ + socket.h \ + socketvar.h \ + sockio.h \ + spawn.h \ spawn_internal.h \ + stackshot.h \ + sys_domain.h \ tree.h \ + unpcb.h \ ux_exception.h \ + work_interval.h \ process_policy.h \ proc_uuid_policy.h \ - priv.h + priv.h \ + pgo.h \ + memory_maintenance.h # Installs header file for kernel extensions - # $(DSTROOT)/System/Library/Frameworks/Kernel.framework/Headers @@ -73,7 +101,7 @@ KERNELFILES = \ dir.h dirent.h disk.h disklabel.h dkstat.h \ errno.h ev.h event.h fcntl.h file.h filio.h \ ioccom.h ioctl.h ipc.h \ - ioctl_compat.h kernel.h kernel_types.h kern_event.h lctx.h lock.h lockf.h \ + ioctl_compat.h kernel.h kernel_types.h kern_event.h lock.h lockf.h \ kauth.h kdebug.h md5.h kern_control.h imgact.h malloc.h namei.h \ mman.h mbuf.h mount.h netport.h param.h paths.h \ proc.h queue.h random.h resource.h \ @@ -106,6 +134,7 @@ PRIVATE_KERNELFILES = \ fslog.h \ kasl.h \ kern_memorystatus.h \ + kpi_private.h \ mach_swapon.h \ msgbuf.h \ eventvar.h \ @@ -115,16 +144,19 @@ PRIVATE_KERNELFILES = \ sem_internal.h \ shm_internal.h \ signalvar.h \ + stackshot.h \ tty.h ttychars.h \ ttydefaults.h ttydev.h \ user.h \ vfs_context.h \ vmmeter.h \ spawn_internal.h \ - priv.h + priv.h \ + pgo.h \ + memory_maintenance.h -# /System/Library/Frameworks/System.framework/Headers and /usr/include +# /usr/include INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_GEN_LIST = syscall.h _posix_availability.h _symbol_aliasing.h @@ -133,14 +165,14 @@ INSTALL_MI_DIR = sys EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} linker_set.h bsdtask_info.h pthread_internal.h filedesc.h pipe.h resourcevar.h semaphore.h \ vnode_internal.h proc_internal.h file_internal.h mount_internal.h \ - uio_internal.h tree.h munge.h kern_tests.h + uio_internal.h tree.h munge.h EXPORT_MI_GEN_LIST = syscall.h sysproto.h kdebugevents.h EXPORT_MI_DIR = sys # /System/Library/Frameworks/System.framework/PrivateHeaders -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} # /System/Library/Frameworks/Kernel.framework/PrivateHeaders INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} @@ -169,8 +201,8 @@ sysproto.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) $(OBJROOT)/cscop $(_v)$(MAKESYSCALLS) $< proto > /dev/null kdebugevents.h: $(SRCROOT)/bsd/kern/trace.codes $(MAKEKDEBUGEVENTS) $(OBJROOT)/cscope.genhdrs - @echo "Generating bsd/kern/$@ from $<"; - @echo "$(OBJPATH)/bsd/kern/$@" > $(OBJROOT)/cscope.genhdrs/$@.path + @echo "Generating bsd/sys/$@ from $<"; + @echo "$(OBJPATH)/bsd/sys/$@" > $(OBJROOT)/cscope.genhdrs/$@.path $(_v)$(MAKEKDEBUGEVENTS) $< > "$(OBJPATH)/bsd/sys/$@" MAKE_POSIX_AVAILABILITY = $(SRCROOT)/bsd/sys/make_posix_availability.sh diff --git a/bsd/sys/_types/Makefile b/bsd/sys/_types/Makefile index 318b9dce8..1cc149aa8 100644 --- a/bsd/sys/_types/Makefile +++ b/bsd/sys/_types/Makefile @@ -75,6 +75,7 @@ DATAFILES = \ _timespec.h \ _timeval.h \ _timeval32.h \ + _timeval64.h \ _ucontext.h \ _ucontext64.h \ _uid_t.h \ @@ -126,7 +127,7 @@ EXPORT_MI_GEN_LIST = EXPORT_MI_DIR = sys/_types # /System/Library/Frameworks/System.framework/PrivateHeaders -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} # /System/Library/Frameworks/Kernel.framework/PrivateHeaders INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} diff --git a/bsd/sys/_types/_timeval64.h b/bsd/sys/_types/_timeval64.h new file mode 100644 index 000000000..c14f8338f --- /dev/null +++ b/bsd/sys/_types/_timeval64.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _STRUCT_TIMEVAL64 +#define _STRUCT_TIMEVAL64 +struct timeval64 +{ + __int64_t tv_sec; /* seconds */ + __int64_t tv_usec; /* and microseconds */ +}; +#endif /* _STRUCT_TIMEVAL32 */ diff --git a/bsd/sys/attr.h b/bsd/sys/attr.h index 8dc62498d..ebfeb6091 100644 --- a/bsd/sys/attr.h +++ b/bsd/sys/attr.h @@ -373,6 +373,27 @@ typedef struct vol_attributes_attr { #define ATTR_CMN_RETURNED_ATTRS 0x80000000 #define ATTR_CMN_VALIDMASK 0xFFFFFFFF +/* + * The settable ATTR_CMN_* attributes include the following: + * ATTR_CMN_SCRIPT + * ATTR_CMN_CRTIME + * ATTR_CMN_MODTIME + * ATTR_CMN_CHGTIME + * + * ATTR_CMN_ACCTIME + * ATTR_CMN_BKUPTIME + * ATTR_CMN_FNDRINFO + * ATTR_CMN_OWNERID + * + * ATTR_CMN_GRPID + * ATTR_CMN_ACCESSMASK + * ATTR_CMN_FLAGS + * + * ATTR_CMN_EXTENDED_SECURITY + * ATTR_CMN_UUID + * + * ATTR_CMN_GRPUUID + */ #define ATTR_CMN_SETMASK 0x01C7FF00 #define ATTR_CMN_VOLSETMASK 0x00006700 @@ -399,6 +420,12 @@ typedef struct vol_attributes_attr { #define ATTR_VOL_INFO 0x80000000 #define ATTR_VOL_VALIDMASK 0xC007FFFF + +/* + * The list of settable ATTR_VOL_* attributes include the following: + * ATTR_VOL_NAME + * ATTR_VOL_INFO + */ #define ATTR_VOL_SETMASK 0x80002000 @@ -426,6 +453,10 @@ typedef struct vol_attributes_attr { #define ATTR_FILE_RSRCALLOCSIZE 0x00002000 #define ATTR_FILE_VALIDMASK 0x000037FF +/* + * Settable ATTR_FILE_* attributes include: + * ATTR_FILE_DEVTYPE + */ #define ATTR_FILE_SETMASK 0x00000020 #define ATTR_FORK_TOTALSIZE 0x00000001 diff --git a/bsd/sys/bsdtask_info.h b/bsd/sys/bsdtask_info.h index 0bebc2c7f..1f5fb1cc7 100644 --- a/bsd/sys/bsdtask_info.h +++ b/bsd/sys/bsdtask_info.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2005, 2015 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,6 +30,8 @@ #ifndef _SYS_BSDTASK_INFO_H #define _SYS_BSDTASK_INFO_H +#include + struct proc_taskinfo_internal { uint64_t pti_virtual_size; /* virtual memory size (bytes) */ uint64_t pti_resident_size; /* resident memory size (bytes) */ diff --git a/bsd/sys/buf.h b/bsd/sys/buf.h index 6db9a6937..3763a223a 100644 --- a/bsd/sys/buf.h +++ b/bsd/sys/buf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -1060,12 +1060,20 @@ bufattr_t bufattr_dup (bufattr_t bap); void bufattr_free(bufattr_t bap); /*! - @function bufattr_cpaddr - @abstract Get the address of cp_entry on a buffer. - @param bap Buffer Attribute whose cp_entry to get. - @return int. + @function bufattr_cpx + @abstract Returns a pointer to a cpx_t structure. + @param bap Buffer Attribute whose cpx_t structure you wish to get. + @return Returns a cpx_t structure, or NULL if not valid */ -void *bufattr_cpaddr(bufattr_t); +struct cpx *bufattr_cpx(bufattr_t); + +/*! + @function bufattr_setcpx + @abstract Set the cp_ctx on a buffer attribute. + @param bap Buffer Attribute that you wish to change + @return void + */ +void bufattr_setcpx(bufattr_t, struct cpx *cpx); /*! @function bufattr_cpoff @@ -1075,15 +1083,6 @@ void *bufattr_cpaddr(bufattr_t); */ uint64_t bufattr_cpoff(bufattr_t); - -/*! - @function bufattr_setcpaddr - @abstract Set the address of cp_entry on a buffer attribute. - @param bap Buffer Attribute whose cp entry value has to be set - @return void. - */ -void bufattr_setcpaddr(bufattr_t, void *); - /*! @function bufattr_setcpoff @abstract Set the file offset for a content protected I/O on diff --git a/bsd/sys/buf_internal.h b/bsd/sys/buf_internal.h index e0674869f..6ff3284bc 100644 --- a/bsd/sys/buf_internal.h +++ b/bsd/sys/buf_internal.h @@ -79,6 +79,10 @@ #include #include +#if CONFIG_PROTECT +#include +#endif + #define NOLIST ((struct buf *)0x87654321) /* @@ -86,8 +90,8 @@ */ struct bufattr { #if CONFIG_PROTECT - struct cprotect *ba_cpentry; /* address of cp_entry */ - uint64_t ba_cp_file_off; /* rounded file offset. See buf_setcpoff() for more comments */ + struct cpx *ba_cpx; + uint64_t ba_cp_file_off; #endif uint64_t ba_flags; /* flags. Some are only in-use on embedded devices */ }; @@ -333,7 +337,6 @@ errno_t buf_make_private(buf_t bp); #endif #ifdef CONFIG_PROTECT -void buf_setcpaddr(buf_t, struct cprotect *); void buf_setcpoff (buf_t, uint64_t); #endif diff --git a/bsd/sys/cdefs.h b/bsd/sys/cdefs.h index 7c729026f..a8d95eb3a 100644 --- a/bsd/sys/cdefs.h +++ b/bsd/sys/cdefs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -81,6 +81,26 @@ #warning "Unsupported compiler detected" #endif +/* + * Compatibility with compilers and environments that don't support compiler + * feature checking function-like macros. + */ +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif +#ifndef __has_include +#define __has_include(x) 0 +#endif +#ifndef __has_feature +#define __has_feature(x) 0 +#endif +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif +#ifndef __has_extension +#define __has_extension(x) 0 +#endif + /* * The __CONCAT macro is used to concatenate parts of symbol names, e.g. * with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo. @@ -155,26 +175,17 @@ */ #define __deprecated __attribute__((deprecated)) -#ifdef __has_extension - #if __has_extension(attribute_deprecated_with_message) - #define __deprecated_msg(_msg) __attribute__((deprecated(_msg))) - #else - #define __deprecated_msg(_msg) __attribute__((deprecated)) - #endif -#elif defined(__GNUC__) && ((__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))) - #define __deprecated_msg(_msg) __attribute__((deprecated(_msg))) +#if __has_extension(attribute_deprecated_with_message) || \ + (defined(__GNUC__) && ((__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))) + #define __deprecated_msg(_msg) __attribute__((deprecated(_msg))) #else - #define __deprecated_msg(_msg) __attribute__((deprecated)) + #define __deprecated_msg(_msg) __attribute__((deprecated)) #endif -#ifdef __has_extension - #if __has_extension(enumerator_attributes) - #define __deprecated_enum_msg(_msg) __deprecated_msg(_msg) - #else - #define __deprecated_enum_msg(_msg) - #endif +#if __has_extension(enumerator_attributes) + #define __deprecated_enum_msg(_msg) __deprecated_msg(_msg) #else - #define __deprecated_enum_msg(_msg) + #define __deprecated_enum_msg(_msg) #endif /* __unavailable causes the compiler to error out when encountering @@ -198,6 +209,22 @@ #define __restrict restrict #endif +/* Compatibility with compilers and environments that don't support the + * nullability feature. + */ + +#if !__has_feature(nullability) +#ifndef __nullable +#define __nullable +#endif +#ifndef __nonnull +#define __nonnull +#endif +#ifndef __null_unspecified +#define __null_unspecified +#endif +#endif + /* Declaring inline functions within headers is error-prone due to differences * across various versions of the C language and extensions. __header_inline * can be used to declare inline functions within system headers. In cases @@ -369,6 +396,30 @@ #define __DARWIN_ONLY_UNIX_CONFORMANCE 1 #define __DARWIN_ONLY_VERS_1050 1 #endif /* PLATFORM_iPhoneSimulator */ +#ifdef PLATFORM_tvOS +/* Platform: tvOS */ +#define __DARWIN_ONLY_64_BIT_INO_T 1 +#define __DARWIN_ONLY_UNIX_CONFORMANCE 1 +#define __DARWIN_ONLY_VERS_1050 1 +#endif /* PLATFORM_tvOS */ +#ifdef PLATFORM_AppleTVOS +/* Platform: AppleTVOS */ +#define __DARWIN_ONLY_64_BIT_INO_T 1 +#define __DARWIN_ONLY_UNIX_CONFORMANCE 1 +#define __DARWIN_ONLY_VERS_1050 1 +#endif /* PLATFORM_AppleTVOS */ +#ifdef PLATFORM_tvSimulator +/* Platform: tvSimulator */ +#define __DARWIN_ONLY_64_BIT_INO_T 1 +#define __DARWIN_ONLY_UNIX_CONFORMANCE 1 +#define __DARWIN_ONLY_VERS_1050 1 +#endif /* PLATFORM_tvSimulator */ +#ifdef PLATFORM_AppleTVSimulator +/* Platform: AppleTVSimulator */ +#define __DARWIN_ONLY_64_BIT_INO_T 1 +#define __DARWIN_ONLY_UNIX_CONFORMANCE 1 +#define __DARWIN_ONLY_VERS_1050 1 +#endif /* PLATFORM_AppleTVSimulator */ #ifdef PLATFORM_iPhoneOSNano /* Platform: iPhoneOSNano */ #define __DARWIN_ONLY_64_BIT_INO_T 1 @@ -381,6 +432,18 @@ #define __DARWIN_ONLY_UNIX_CONFORMANCE 1 #define __DARWIN_ONLY_VERS_1050 1 #endif /* PLATFORM_iPhoneNanoSimulator */ +#ifdef PLATFORM_WatchOS +/* Platform: WatchOS */ +#define __DARWIN_ONLY_64_BIT_INO_T 1 +#define __DARWIN_ONLY_UNIX_CONFORMANCE 1 +#define __DARWIN_ONLY_VERS_1050 1 +#endif /* PLATFORM_WatchOS */ +#ifdef PLATFORM_WatchSimulator +/* Platform: WatchSimulator */ +#define __DARWIN_ONLY_64_BIT_INO_T 1 +#define __DARWIN_ONLY_UNIX_CONFORMANCE 1 +#define __DARWIN_ONLY_VERS_1050 1 +#endif /* PLATFORM_WatchSimulator */ #ifdef PLATFORM_MacOSX /* Platform: MacOSX */ #define __DARWIN_ONLY_64_BIT_INO_T 0 @@ -561,7 +624,7 @@ #elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) #define __DARWIN_ALIAS_STARTING(_mac, _iphone, x) __DARWIN_ALIAS_STARTING_MAC_##_mac(x) #else -#define __DARWIN_ALIAS_STARTING(_mac, _iphone, x) +#define __DARWIN_ALIAS_STARTING(_mac, _iphone, x) x #endif #endif /* KERNEL */ @@ -736,4 +799,14 @@ #error Unsupported architecture #endif +#ifdef XNU_KERNEL_PRIVATE +/* + * Selectively ignore cast alignment warnings + */ +#define __IGNORE_WCASTALIGN(x) _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wcast-align\"") \ + x; \ + _Pragma("clang diagnostic pop") +#endif + #endif /* !_CDEFS_H_ */ diff --git a/bsd/sys/coalition.h b/bsd/sys/coalition.h index 73eca26a8..62b205eb7 100644 --- a/bsd/sys/coalition.h +++ b/bsd/sys/coalition.h @@ -29,11 +29,12 @@ #ifndef _SYS_COALITION_H_ #define _SYS_COALITION_H_ -#include -#include #include +#include #include +#include + __BEGIN_DECLS #ifndef KERNEL @@ -44,42 +45,175 @@ int coalition_create(uint64_t *cid_out, uint32_t flags); int coalition_terminate(uint64_t cid, uint32_t flags); int coalition_reap(uint64_t cid, uint32_t flags); -/* This struct is also defined in osfmk/kern/coalition.h. Keep in sync. */ -struct coalition_resource_usage { - uint64_t tasks_started; - uint64_t tasks_exited; - uint64_t time_nonempty; - uint64_t cpu_time; - uint64_t interrupt_wakeups; - uint64_t platform_idle_wakeups; - uint64_t bytesread; - uint64_t byteswritten; - uint64_t gpu_time; -}; - /* Wrappers around __coalition_info syscall (with proper struct types) */ int coalition_info_resource_usage(uint64_t cid, struct coalition_resource_usage *cru, size_t sz); -#endif /* KERNEL */ +#else /* KERNEL */ -/* Flags shared by userspace and xnu */ +#if CONFIG_COALITIONS +/* in-kernel BSD interfaces */ -#define COALITION_CREATE_FLAG_PRIVILEGED ((uint32_t)0x1) +/* + * coalition_id: + * Get the unique 64-bit identifier associated with the given coalition + */ +uint64_t coalition_id(coalition_t coal); -#define COALITION_CREATE_FLAG_MASK ((uint32_t)0x1) -#ifdef PRIVATE -/* Flavors shared by only xnu + Libsyscall */ +/* + * coalitions_get_list: + * Get a list of coalitions as procinfo_coalinfo structures + * + * This interface is primarily to support libproc. + * + * Parameters: + * type : The COALITION_TYPE of the coalitions to investigate. + * Valid types can be found in + * coal_list : Pointer to an array of procinfo_coalinfo structures + * that will be filled with information about each + * coalition whose type matches 'type' + * NOTE: This can be NULL to perform a simple query of + * the total number of coalitions. + * list_sz : The size (in number of structures) of 'coal_list' + * + * Returns: 0 if no coalitions matching 'type' are found + * Otherwise: the number of coalitions whose type matches + * the 'type' parameter (all coalitions if type == -1) + */ +extern int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz); -/* Syscall flavors */ -#define COALITION_OP_CREATE 1 -#define COALITION_OP_TERMINATE 2 -#define COALITION_OP_REAP 3 -/* coalition_info flavors */ -#define COALITION_INFO_RESOURCE_USAGE 1 +/* + * coalition_is_leader: + * Determine if a task is a coalition leader. + * + * Parameters: + * task : The task to investigate + * coal_type : The COALITION_TYPE of the coalition to investigate. + * Valid types can be found in + * coal : If 'task' is a valid task, and is a member of a coalition + * of type 'coal_type', then 'coal' will be filled in with + * the corresponding coalition_t object. + * NOTE: This will be filled in whether or not the 'task' is + * a leader in the coalition. However, if 'task' is + * not a member of a coalition of type 'coal_type' then + * 'coal' will be filled in with COALITION_NULL. + * NOTE: This can be NULL + * + * Returns: TRUE if 'task' is a coalition leader, FALSE otherwise. + */ +extern boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal); -#endif /* PRIVATE */ +/* + * coalition_get_task_count: + * Sum up the number of tasks in the given coalition + * + * Parameters: + * coal : The coalition to investigate + * + * Returns: The number of tasks in the coalition + */ +extern int coalition_get_task_count(coalition_t coal); + +/* + * coalition_get_page_count: + * Sum up the page count for each task in the coalition specified by 'coal' + * + * Parameters: + * coal : The coalition to investigate + * ntasks : If non-NULL, this will be filled in with the number of + * tasks in the coalition. + * + * Returns: The sum of all pages used by all members of the coalition + */ +extern uint64_t coalition_get_page_count(coalition_t coal, int *ntasks); + +/* + * coalition_get_pid_list: + * Gather a list of constituent PIDs of tasks within a coalition playing a + * given role. + * + * Parameters: + * coal : The coalition to investigate + * rolemask : The set of coalition task roles used to filter the list + * of PIDs returned in 'pid_list'. Roles can be combined + * using the COALITION_ROLEMASK_* tokens found in + * . Each PID returned is guaranteed to + * be tagged with one of the task roles specified by this + * mask. + * sort_order : The order in which the returned PIDs should be sorted + * by default this is in descending page count. + * pid_list : Pointer to an array of PIDs that will be filled with + * members of the coalition tagged with the given 'taskrole' + * list_sz : The size (in number of PIDs) of 'pid_list' + * + * Note: + * This function will return the list of PIDs in a sorted order. By default + * the PIDs will be sorted by task page count in descending order. In the + * future it may be possible for user space to specify a level of importance + * for each coalition member. If there is a user space specified importance, + * then the list of PIDs returned will be sorted in _ascending_ importance, + * i.e., pid_list[0] will be the least important task (or the largest consumer + * of memory). The desired sort order can be specified using the + * COALITION_SORT_* definitions in osfmk/mach/coalition.h + * + * It is also possible to return an unsorted list of PIDs using the special + * sort type 'COALITION_SORT_NOSORT' + * + * Returns: < 0 on ERROR + * 0 if 'coal' contains no tasks whose role is 'taskrole' + * (or if the coalition is being deallocated) + * Otherwise: the number of PIDs in the coalition whose role is + * 'taskrole'. NOTE: This may be larger or smaller than + * the 'pid_list' array. + * + */ +extern int coalition_get_pid_list(coalition_t coal, uint32_t rolemask, + int sort_order, int *pid_list, int list_sz); + +#else /* !CONFIG_COALITIONS */ +static inline uint64_t coalition_id(__unused coalition_t coal) +{ + return 0; +} + +static inline int coalitions_get_list(__unused int type, + __unused struct procinfo_coalinfo *coal_list, + __unused int list_sz) +{ + return 0; +} + +static inline boolean_t coalition_is_leader(__unused task_t task, + __unused int coal_type, + coalition_t *coal) +{ + *coal = COALITION_NULL; + return FALSE; +} + +static inline int coalition_get_task_count(__unused coalition_t coal) +{ + return 0; +} + +static inline uint64_t coalition_get_page_count(__unused coalition_t coal, + __unused int *ntasks) +{ + return 0; +} + +static inline int coalition_get_pid_list(__unused coalition_t coal, + __unused uint32_t rolemask, + __unused int sort_order, + __unused int *pid_list, + __unused int list_sz) +{ + return 0; +} +#endif + +#endif /* KERNEL */ __END_DECLS diff --git a/bsd/sys/codedir_internal.h b/bsd/sys/codedir_internal.h index e0070c5ca..ffda24326 100644 --- a/bsd/sys/codedir_internal.h +++ b/bsd/sys/codedir_internal.h @@ -41,7 +41,7 @@ const CS_CodeDirectory *findCodeDirectory( const CS_SuperBlob *embedded, - char *lower_bound, - char *upper_bound); + const char *lower_bound, + const char *upper_bound); #endif diff --git a/bsd/sys/codesign.h b/bsd/sys/codesign.h index 1a23c3d0a..1d8ee6516 100644 --- a/bsd/sys/codesign.h +++ b/bsd/sys/codesign.h @@ -52,6 +52,8 @@ #define CS_KILLED 0x1000000 /* was killed by kernel for invalidity */ #define CS_DYLD_PLATFORM 0x2000000 /* dyld used to load this is a platform binary */ +#define CS_PLATFORM_BINARY 0x4000000 /* this is a platform binary */ +#define CS_PLATFORM_PATH 0x8000000 /* platform binary by the fact of path (osx only) */ #define CS_ENTITLEMENT_FLAGS (CS_GET_TASK_ALLOW | CS_INSTALLER) @@ -74,18 +76,6 @@ #define CS_OPS_BLOB 10 /* get codesign blob */ #define CS_OPS_IDENTITY 11 /* get codesign identity */ -/* SigPUP */ -#define CS_OPS_SIGPUP_INSTALL 20 -#define CS_OPS_SIGPUP_DROP 21 -#define CS_OPS_SIGPUP_VALIDATE 22 - -struct sigpup_install_table { - uint64_t data; - uint64_t length; - uint64_t path; -}; - - /* * Magic numbers used by Code Signing */ @@ -114,11 +104,20 @@ enum { CSTYPE_INDEX_REQUIREMENTS = 0x00000002, /* compat with amfi */ CSTYPE_INDEX_ENTITLEMENTS = 0x00000005, /* compat with amfi */ - CS_HASHTYPE_SHA1 = 1 + CS_HASHTYPE_SHA1 = 1, + CS_HASHTYPE_SHA256 = 2, + CS_HASHTYPE_SHA256_TRUNCATED = 3, + + CS_SHA1_LEN = 20, + CS_SHA256_TRUNCATED_LEN = 20, + + CS_CDHASH_LEN = 20, + CS_HASH_MAX_SIZE = 32, /* max size of the hash we'll support */ }; #define KERNEL_HAVE_CS_CODEDIRECTORY 1 +#define KERNEL_CS_CODEDIRECTORY_HAVE_PLATFORM 1 /* * C form of a CodeDirectory. @@ -135,7 +134,7 @@ typedef struct __CodeDirectory { uint32_t codeLimit; /* limit to main image signature range */ uint8_t hashSize; /* size of each hash in bytes */ uint8_t hashType; /* type of hash (cdHashType* constants) */ - uint8_t spare1; /* unused (must be zero) */ + uint8_t platform; /* platform identifier; zero if not platform binary */ uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */ uint32_t spare2; /* unused (must be zero) */ /* Version 0x20100 */ @@ -162,6 +161,7 @@ typedef struct __SC_SuperBlob { /* followed by Blobs in no particular order as indicated by offsets in index */ } CS_SuperBlob; +#define KERNEL_HAVE_CS_GENERICBLOB 1 typedef struct __SC_GenericBlob { uint32_t magic; /* magic number */ uint32_t length; /* total length of blob */ @@ -196,43 +196,50 @@ struct vnode; struct cs_blob; struct fileglob; -struct cscsr_functions { - int csr_version; -#define CSCSR_VERSION 1 - int (*csr_validate_header)(const uint8_t *, size_t); - const void* (*csr_find_file_codedirectory)(struct vnode *, const uint8_t *, size_t, size_t *); -}; - __BEGIN_DECLS int cs_enforcement(struct proc *); int cs_require_lv(struct proc *); uint32_t cs_entitlement_flags(struct proc *p); int cs_entitlements_blob_get(struct proc *, void **, size_t *); +int cs_restricted(struct proc *); uint8_t * cs_get_cdhash(struct proc *); -void cs_register_cscsr(struct cscsr_functions *); -const CS_GenericBlob * - cs_find_blob(struct cs_blob *, uint32_t, uint32_t); +struct cs_blob * csproc_get_blob(struct proc *); +struct cs_blob * csvnode_get_blob(struct vnode *, off_t); +void csvnode_print_debug(struct vnode *); + +const char * csblob_get_teamid(struct cs_blob *); +const char * csblob_get_identity(struct cs_blob *); +const uint8_t * csblob_get_cdhash(struct cs_blob *); +int csblob_get_platform_binary(struct cs_blob *); +unsigned int csblob_get_flags(struct cs_blob *blob); +int csblob_get_entitlements(struct cs_blob *, void **, size_t *); +const CS_GenericBlob * + csblob_find_blob(struct cs_blob *, uint32_t, uint32_t); +const CS_GenericBlob * + csblob_find_blob_bytes(const uint8_t *, size_t, uint32_t, uint32_t); + +/* + * Mostly convenience functions below + */ -const char * csblob_get_teamid(struct cs_blob *); const char * csproc_get_teamid(struct proc *); const char * csvnode_get_teamid(struct vnode *, off_t); int csproc_get_platform_binary(struct proc *); const char * csfg_get_teamid(struct fileglob *); int csfg_get_path(struct fileglob *, char *, int *); int csfg_get_platform_binary(struct fileglob *); +uint8_t * csfg_get_cdhash(struct fileglob *, uint64_t, size_t *); -__END_DECLS +extern int cs_debug; #ifdef XNU_KERNEL_PRIVATE void cs_init(void); int cs_allow_invalid(struct proc *); int cs_invalid_page(addr64_t); -int sigpup_install(user_addr_t); -int sigpup_drop(void); +int csproc_get_platform_path(struct proc *); -extern int cs_debug; extern int cs_validation; #if !SECURE_KERNEL extern int cs_enforcement_panic; @@ -240,6 +247,11 @@ extern int cs_enforcement_panic; #endif /* XNU_KERNEL_PRIVATE */ + +__END_DECLS + + + #endif /* KERNEL */ #endif /* _SYS_CODESIGN_H_ */ diff --git a/bsd/sys/content_protection.h b/bsd/sys/content_protection.h index d35884f1b..20eae8b31 100644 --- a/bsd/sys/content_protection.h +++ b/bsd/sys/content_protection.h @@ -49,6 +49,12 @@ #define PROTECTION_CLASS_E 5 #define PROTECTION_CLASS_F 6 +/* + * This forces open_dprotected_np to behave as though the file were created with + * the traditional open(2) semantics. + */ +#define PROTECTION_CLASS_DEFAULT (-1) + #endif /* PRIVATE */ #endif /* _SYS_CONTENT_PROTECTION_H_ */ diff --git a/bsd/sys/cprotect.h b/bsd/sys/cprotect.h index 67cdd1e57..642858921 100644 --- a/bsd/sys/cprotect.h +++ b/bsd/sys/cprotect.h @@ -29,18 +29,16 @@ #ifndef _SYS_CPROTECT_H_ #define _SYS_CPROTECT_H_ -#ifdef __cplusplus -extern "C" { -#endif - #if KERNEL_PRIVATE #include -#include -#include +#include +#include +#include #include +#include -#include +__BEGIN_DECLS #define CP_CODE(code) FSDBG_CODE(DBG_CONTENT_PROT, code) /* @@ -67,16 +65,7 @@ enum { #endif - - -#define CP_IV_KEYSIZE 20 /* 16x8 = 128, but SHA1 pushes 20 bytes so keep space for that */ -#define CP_MAX_KEYSIZE 32 /* 8x4 = 32, 32x8 = 256 */ -#define CP_MAX_CACHEBUFLEN 64 /* Maximum size of cp cache buffer/array */ - #define CP_MAX_WRAPPEDKEYSIZE 128 /* The size of the largest allowed key */ -#define CP_INITIAL_WRAPPEDKEYSIZE 40 -#define CP_V2_WRAPPEDKEYSIZE 40 /* Size of the wrapped key in a v2 EA */ -#define CP_V4_RESERVEDBYTES 20 /* Number of reserved bytes in EA still present */ /* lock events from AppleKeyStore */ #define CP_LOCKED_STATE 0 /* Device is locked */ @@ -84,48 +73,9 @@ enum { #define CP_MAX_STATE 1 /* uint8_t ; maximum # of states is 255 */ -#define CP_LOCKED_KEYCHAIN 0 -#define CP_UNLOCKED_KEYCHAIN 1 - -/* For struct cprotect: cp_flags */ -#define CP_NEEDS_KEYS 0x01 /* File needs persistent keys */ -#define CP_KEY_FLUSHED 0x02 /* File's unwrapped key has been purged from memory */ -#define CP_NO_XATTR 0x04 /* Key info has not been saved as EA to the FS */ -#define CP_OFF_IV_ENABLED 0x08 /* Only go down relative IV route if this flag is set */ -#define CP_RELOCATION_INFLIGHT 0x10 /* File with offset IVs is in the process of being relocated. */ -#define CP_SEP_WRAPPEDKEY 0x20 /* Wrapped key delivered from keybag */ - - - -/* Content Protection VNOP Operation flags */ -#define CP_READ_ACCESS 0x1 -#define CP_WRITE_ACCESS 0x2 - -/* - * Check for this version when deciding to enable features - * For iOS 4, CP_CURRENT_MAJOR_VERS = 2.0 - * For iOS 5, CP_CURRENT_MAJOR_VERS = 4.0 - */ -#define CONTENT_PROTECTION_XATTR_NAME "com.apple.system.cprotect" -#define CP_NEW_MAJOR_VERS 4 -#define CP_PREV_MAJOR_VERS 2 -#define CP_MINOR_VERS 0 - -/* the class occupies the lowest 5 bits, so there are 32 values (0-31) */ -#define CP_EFFECTIVE_CLASSMASK 0x0000001f - -/* macros for quick access/typing to mask out the classmask */ -#define CP_CLASS(x) ((uint32_t)(CP_EFFECTIVE_CLASSMASK & (x))) - -#define CP_CRYPTO_G1 0x00000020 - typedef struct cprotect *cprotect_t; typedef struct cp_wrap_func *cp_wrap_func_t; -typedef struct cp_xattr *cp_xattr_t; - -typedef struct cnode * cnode_ptr_t; -//forward declare the struct. -struct hfsmount; +typedef struct cpx *cpx_t; /* Structures passed between HFS and AKS kext */ typedef struct { @@ -146,11 +96,14 @@ typedef struct { typedef cp_wrapped_key_s* cp_wrapped_key_t; +typedef uint16_t cp_key_revision_t; + typedef struct { - ino64_t inode; - uint32_t volume; - pid_t pid; - uid_t uid; + ino64_t inode; + uint32_t volume; + pid_t pid; + uid_t uid; + cp_key_revision_t key_revision; } cp_cred_s; typedef cp_cred_s* cp_cred_t; @@ -170,41 +123,22 @@ typedef int backup_key_t(cp_cred_t access, const cp_wrapped_key_t wrapped_key_in */ #define CP_RAW_KEY_WRAPPEDKEY 0x00000001 - -/* - * Flags for Key Generation Behavior - * - * These are passed to cp_generate_keys() and cp_new() in the - * flags arguments - */ -#define CP_KEYWRAP_DIFFCLASS 0x00000001 /* wrapping with a different class bag is OK */ - - /* - * Runtime-only structure containing the content protection status - * for the given file. This is contained within the cnode - * This is passed down to IOStorageFamily via the bufattr struct - * - ****************************************************** - * Some Key calculation information for offset based IV - ****************************************************** - * Kf = original 256 bit per file key - * Kiv = SHA1(Kf), use full Kf, but truncate Kiv to 128 bits - * Kiv can be cached in the cprotect, so it only has to be calculated once for the file init - * - * IVb = Encrypt(Kiv, offset) - * + * Function prototypes for kexts to interface with our internal cprotect + * fields; cpx provides opacity and allows us to modify behavior internally + * without requiring kext changes. */ -struct cprotect { - uint32_t cp_flags; - uint32_t cp_pclass; /* persistent class stored on-disk */ - aes_encrypt_ctx cp_cache_iv_ctx; - uint32_t cp_cache_key_len; - uint8_t cp_cache_key[CP_MAX_CACHEBUFLEN]; - uint32_t cp_persistent_key_len; - void* cp_backing_cnode; - uint8_t cp_persistent_key[]; -}; +cpx_t cpx_alloc(size_t key_size); +void cpx_free(cpx_t); +__attribute__((const)) size_t cpx_size(size_t key_size); +__attribute__((pure)) bool cpx_is_sep_wrapped_key(const struct cpx *); +void cpx_set_is_sep_wrapped_key(struct cpx *, bool); +__attribute__((pure)) bool cpx_use_offset_for_iv(const struct cpx *); +void cpx_set_use_offset_for_iv(struct cpx *, bool); +__attribute__((pure)) uint16_t cpx_key_len(const struct cpx *); +void cpx_set_key_len(struct cpx *, uint16_t key_len); +__attribute__((pure)) void *cpx_key(const struct cpx *); +aes_encrypt_ctx *cpx_iv_aes_ctx(struct cpx *); /* Structure to store pointers for AKS functions */ struct cp_wrap_func { @@ -215,100 +149,32 @@ struct cp_wrap_func { backup_key_t *backup_key; }; -/* - * On-disk structure written as the per-file EA payload - * All on-disk multi-byte fields for the CP XATTR must be stored - * little-endian on-disk. This means they must be endian swapped to - * L.E on getxattr() and converted to LE on setxattr(). - * - * This structure is a fixed length and is tightly packed. - * 56 bytes total. - */ -struct cp_xattr_v2 { - u_int16_t xattr_major_version; - u_int16_t xattr_minor_version; - u_int32_t flags; - u_int32_t persistent_class; - u_int32_t key_size; - uint8_t persistent_key[CP_V2_WRAPPEDKEYSIZE]; -} __attribute__((aligned(2), packed)); - - -/* - * V4 Content Protection EA On-Disk Layout. - * - * This structure must be tightly packed, but the *size can vary* - * depending on the length of the key. At MOST, the key length will be - * CP_MAX_WRAPPEDKEYSIZE, but the length is defined by the key_size field. - * - * Either way, the packing must be applied to ensure that the key data is - * retrievable in the right location relative to the start of the struct. - * - * Fully packed, this structure can range from : - * MIN: 36 bytes (no key -- used with directories) - * MAX: 164 bytes (with 128 byte key) - * - * During runtime we always allocate with the full 128 byte key, but only - * use as much of the key buffer as needed. It must be tightly packed, though. - */ - -struct cp_xattr_v4 { - u_int16_t xattr_major_version; - u_int16_t xattr_minor_version; - u_int32_t flags; - u_int32_t persistent_class; - u_int32_t key_size; - /* CP V4 Reserved Bytes == 20 */ - u_int8_t reserved[CP_V4_RESERVEDBYTES]; - /* All above fields are fixed regardless of key length (36 bytes) */ - /* Max Wrapped Size == 128 */ - uint8_t persistent_key[CP_MAX_WRAPPEDKEYSIZE]; -} __attribute__((aligned(2), packed)); +int cp_key_store_action(int); +int cp_register_wraps(cp_wrap_func_t); +#ifdef BSD_KERNEL_PRIVATE /* - * The Root Directory's EA (fileid 1) is special; it defines information about - * what capabilities the filesystem is using. - * - * The data is still stored little endian. - * - * Note that this structure is tightly packed: 28 bytes total. + * Declarations that are not exported from the kernel but are used by + * VFS to call into the implementation (i.e. HFS) should be here. */ - struct cp_root_xattr { - u_int16_t major_version; - u_int16_t minor_version; - u_int64_t flags; - u_int8_t reserved[16]; -} __attribute__((aligned(2), packed)); +/* Content Protection VNOP Operation flags */ +#define CP_READ_ACCESS 0x1 +#define CP_WRITE_ACCESS 0x2 /* * Functions to check the status of a CP and to query * the containing filesystem to see if it is supported. */ -int cp_vnode_getclass(vnode_t, int *); -int cp_vnode_setclass(vnode_t, uint32_t); -int cp_vnode_transcode(vnode_t vp, void *key, unsigned *len); +struct vnode; +struct hfsmount; -int cp_key_store_action(int); -int cp_register_wraps(cp_wrap_func_t); +int cp_vnode_getclass(struct vnode *, int *); +int cp_vnode_setclass(struct vnode *, uint32_t); +int cp_vnode_transcode(struct vnode * vp, void *key, unsigned *len); -int cp_entry_init(cnode_ptr_t, struct mount *); -int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp); -int cp_needs_tempkeys (struct hfsmount *hfsmp, int* needs); -void cp_entry_destroy(struct cprotect *entry_ptr); -void cp_replace_entry (struct cnode *cp, struct cprotect *newentry); -cnode_ptr_t cp_get_protected_cnode(vnode_t); -int cp_handle_vnop(vnode_t, int, int); -int cp_fs_protected (mount_t); -int cp_getrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *outxattr); -int cp_setrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *newxattr); -int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, uint32_t fileid, int options); -int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, - uint32_t flags, struct cprotect **newentry); -int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppliedclass, - mode_t cmode, struct cprotect **tmpentry); -int cp_handle_relocate (cnode_ptr_t cp, struct hfsmount *hfsmp); +int cp_handle_vnop(struct vnode *, int, int); int cp_handle_open(struct vnode *vp, int mode); int cp_get_root_major_vers (struct vnode *vp, uint32_t *level); int cp_get_default_level (struct vnode *vp, uint32_t *level); @@ -317,11 +183,9 @@ int cp_set_trimmed(struct hfsmount *hfsmp); int cp_set_rewrapped(struct hfsmount *hfsmp); int cp_flop_generation (struct hfsmount *hfsmp); +#endif /* BSD_KERNEL_PRIVATE */ -#endif /* KERNEL_PRIVATE */ - -#ifdef __cplusplus -}; -#endif +__END_DECLS +#endif /* KERNEL_PRIVATE */ #endif /* !_SYS_CPROTECT_H_ */ diff --git a/bsd/sys/csr.h b/bsd/sys/csr.h index f418eef68..b2f59f1c3 100644 --- a/bsd/sys/csr.h +++ b/bsd/sys/csr.h @@ -47,6 +47,7 @@ typedef uint32_t csr_op_t; #define CSR_ALLOW_DESTRUCTIVE_DTRACE (1 << 5) /* name deprecated */ #define CSR_ALLOW_UNRESTRICTED_DTRACE (1 << 5) #define CSR_ALLOW_UNRESTRICTED_NVRAM (1 << 6) +#define CSR_ALLOW_DEVICE_CONFIGURATION (1 << 7) #define CSR_VALID_FLAGS (CSR_ALLOW_UNTRUSTED_KEXTS | \ CSR_ALLOW_UNRESTRICTED_FS | \ @@ -54,15 +55,25 @@ typedef uint32_t csr_op_t; CSR_ALLOW_KERNEL_DEBUGGER | \ CSR_ALLOW_APPLE_INTERNAL | \ CSR_ALLOW_UNRESTRICTED_DTRACE | \ - CSR_ALLOW_UNRESTRICTED_NVRAM) + CSR_ALLOW_UNRESTRICTED_NVRAM | \ + CSR_ALLOW_DEVICE_CONFIGURATION) + + +/* CSR capabilities that a booter can give to the system */ +#define CSR_CAPABILITY_UNLIMITED (1 << 0) +#define CSR_CAPABILITY_CONFIG (1 << 1) +#define CSR_CAPABILITY_APPLE_INTERNAL (1 << 2) + +#define CSR_VALID_CAPABILITIES (CSR_CAPABILITY_UNLIMITED | CSR_CAPABILITY_CONFIG | CSR_CAPABILITY_APPLE_INTERNAL) #ifdef PRIVATE /* Private system call interface between Libsyscall and xnu */ /* Syscall flavors */ -#define CSR_OP_CHECK 0 -#define CSR_OP_GET_ACTIVE_CONFIG 1 -#define CSR_OP_GET_PENDING_CONFIG 2 +enum csr_syscalls { + CSR_SYSCALL_CHECK, + CSR_SYSCALL_GET_ACTIVE_CONFIG, +}; #endif /* PRIVATE */ @@ -79,7 +90,6 @@ void csr_set_allow_all(int value); /* Syscalls */ int csr_check(csr_config_t mask); int csr_get_active_config(csr_config_t *config); -int csr_get_pending_config(csr_config_t *config); __END_DECLS diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h index 48c1ab9cc..6a4718016 100644 --- a/bsd/sys/disk.h +++ b/bsd/sys/disk.h @@ -42,7 +42,7 @@ * ioctl description * ------------------------------------- --------------------------------------- * DKIOCEJECT eject media - * DKIOCSYNCHRONIZECACHE flush media + * DKIOCSYNCHRONIZE flush media * * DKIOCFORMAT format media * DKIOCGETFORMATCAPACITIES get media's formattable capacities @@ -75,9 +75,12 @@ * DKIOCGETCOMMANDPOOLSIZE get device's queue depth */ +#define DK_FEATURE_BARRIER 0x00000002 #define DK_FEATURE_PRIORITY 0x00000004 #define DK_FEATURE_UNMAP 0x00000010 +#define DK_SYNCHRONIZE_OPTION_BARRIER 0x00000002 + typedef struct { uint64_t offset; @@ -109,6 +112,16 @@ typedef struct #endif /* !__LP64__ */ } dk_format_capacities_t; +typedef struct +{ + uint64_t offset; + uint64_t length; + + uint32_t options; + + uint8_t reserved0160[4]; /* reserved, clear to zero */ +} dk_synchronize_t; + typedef struct { dk_extent_t * extents; @@ -122,6 +135,21 @@ typedef struct } dk_unmap_t; +typedef struct +{ + uint64_t flags; + uint64_t hotfile_size; /* in bytes */ + uint64_t hibernate_minsize; + uint64_t swapfile_pinning; + + uint64_t padding[4]; +} dk_corestorage_info_t; + +#define DK_CORESTORAGE_PIN_YOUR_METADATA 0x00000001 +#define DK_CORESTORAGE_ENABLE_HOTFILES 0x00000002 +#define DK_CORESTORAGE_PIN_YOUR_SWAPFILE 0x00000004 + + #ifdef KERNEL #ifdef PRIVATE @@ -132,7 +160,7 @@ typedef struct #endif /* KERNEL */ #define DKIOCEJECT _IO('d', 21) -#define DKIOCSYNCHRONIZECACHE _IO('d', 22) +#define DKIOCSYNCHRONIZE _IOW('d', 22, dk_synchronize_t) #define DKIOCFORMAT _IOW('d', 26, dk_format_capacity_t) #define DKIOCGETFORMATCAPACITIES _IOWR('d', 26, dk_format_capacities_t) @@ -146,7 +174,7 @@ typedef struct #define DKIOCREQUESTIDLE _IO('d', 30) #define DKIOCUNMAP _IOW('d', 31, dk_unmap_t) -#define _DKIOCCORESTORAGE _IO('d', 32) +#define DKIOCCORESTORAGE _IOR('d', 32, dk_corestorage_info_t) #define DKIOCGETMAXBLOCKCOUNTREAD _IOR('d', 64, uint64_t) #define DKIOCGETMAXBLOCKCOUNTWRITE _IOR('d', 65, uint64_t) @@ -165,6 +193,8 @@ typedef struct #define DKIOCGETPHYSICALBLOCKSIZE _IOR('d', 77, uint32_t) #define DKIOCGETCOMMANDPOOLSIZE _IOR('d', 78, uint32_t) +#define DKIOCSYNCHRONIZECACHE _IO('d', 22) + #ifdef KERNEL #define DK_FEATURE_FORCE_UNIT_ACCESS 0x00000001 @@ -202,7 +232,6 @@ typedef struct #endif /* !__LP64__ */ } dk_set_tier_t; -#define DKIOCGETBLOCKCOUNT32 _IOR('d', 25, uint32_t) #define DKIOCSETBLOCKSIZE _IOW('d', 24, uint32_t) #define DKIOCGETBSDUNIT _IOR('d', 27, uint32_t) #define DKIOCISSOLIDSTATE _IOR('d', 79, uint32_t) @@ -234,8 +263,16 @@ typedef struct _dk_cs_pin { dk_extent_t cp_extent; int64_t cp_flags; } _dk_cs_pin_t; -#define _DKIOCCSPINFORHIBERNATION (1 << 0) -#define _DKIOCCSPINDISCARDBLACKLIST (1 << 1) +/* The following are modifiers to _DKIOCCSPINEXTENT/cp_flags operation */ +#define _DKIOCCSPINTOFASTMEDIA (0) /* Pin extent to the fast (SSD) media */ +#define _DKIOCCSPINFORHIBERNATION (1 << 0) /* Pin of hibernation file, content not preserved */ +#define _DKIOCCSPINDISCARDBLACKLIST (1 << 1) /* Hibernation complete/error, stop blacklisting */ +#define _DKIOCCSPINTOSLOWMEDIA (1 << 2) /* Pin extent to the slow (HDD) media */ +#define _DKIOCCSTEMPORARYPIN (1 << 3) /* Relocate, but do not pin, to indicated media */ +#define _DKIOCCSHIBERNATEIMGSIZE (1 << 4) /* Anticipate/Max size of the upcoming hibernate */ +#define _DKIOCCSPINFORSWAPFILE (1 << 5) /* Pin of swap file, content not preserved */ + +#define _DKIOCCSSETLVNAME _IOW('d', 198, char[256]) #define _DKIOCCSPINEXTENT _IOW('d', 199, _dk_cs_pin_t) #define _DKIOCCSUNPINEXTENT _IOW('d', 200, _dk_cs_pin_t) #define _DKIOCGETMIGRATIONUNITBYTESIZE _IOR('d', 201, uint32_t) @@ -252,8 +289,7 @@ typedef struct _dk_cs_unmap { } _dk_cs_unmap_t; #define _DKIOCCSMAP _IOWR('d', 202, _dk_cs_map_t) -#define _DKIOCCSSETFSVNODE _IOW('d', 203, vnode_t) -#define _DKIOCCSGETFREEBYTES _IOR('d', 204, uint64_t) +// No longer used: _DKIOCCSSETFSVNODE (203) & _DKIOCCSGETFREEBYTES (204) #define _DKIOCCSUNMAP _IOWR('d', 205, _dk_cs_unmap_t) #endif /* PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/sys/domain.h b/bsd/sys/domain.h index 3fe847cdf..fa77f963b 100644 --- a/bsd/sys/domain.h +++ b/bsd/sys/domain.h @@ -175,11 +175,11 @@ extern void net_drain_domains(void); extern void domain_proto_mtx_lock_assert_held(void); extern void domain_proto_mtx_lock_assert_notheld(void); struct domain_guard; -typedef struct domain_guard *domain_guard_t; +typedef const struct domain_guard *domain_guard_t; extern domain_guard_t domain_guard_deploy(void); extern void domain_guard_release(domain_guard_t); struct domain_unguard; -typedef struct domain_unguard *domain_unguard_t; +typedef const struct domain_unguard *domain_unguard_t; extern domain_unguard_t domain_unguard_deploy(void); extern void domain_unguard_release(domain_unguard_t); extern struct domain_old *pffinddomain_old(int); diff --git a/bsd/sys/dtrace.h b/bsd/sys/dtrace.h index 3e39fca6a..debf2f767 100644 --- a/bsd/sys/dtrace.h +++ b/bsd/sys/dtrace.h @@ -20,7 +20,8 @@ */ /* - * Portions copyright (c) 2011, Joyent, Inc. All rights reserved. + * Portions copyright (c) 2013, Joyent, Inc. All rights reserved. + * Portions Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -340,6 +341,7 @@ typedef enum dtrace_probespec { #define DIF_VAR_PTHREAD_SELF 0x0200 /* Apple specific PTHREAD_SELF (Not currently supported!) */ #define DIF_VAR_DISPATCHQADDR 0x0201 /* Apple specific dispatch queue addr */ #define DIF_VAR_MACHTIMESTAMP 0x0202 /* mach_absolute_timestamp() */ +#define DIF_VAR_CPU 0x0203 /* cpu number */ #endif /* __APPLE __ */ #define DIF_SUBR_RAND 0 @@ -388,13 +390,14 @@ typedef enum dtrace_probespec { #define DIF_SUBR_INET_NTOA6 43 #define DIF_SUBR_TOUPPER 44 #define DIF_SUBR_TOLOWER 45 +#define DIF_SUBR_VM_KERNEL_ADDRPERM 46 #if !defined(__APPLE__) -#define DIF_SUBR_MAX 45 /* max subroutine value */ +#define DIF_SUBR_MAX 46 /* max subroutine value */ #else -#define DIF_SUBR_COREPROFILE 46 +#define DIF_SUBR_COREPROFILE 47 -#define DIF_SUBR_MAX 46 /* max subroutine value */ +#define DIF_SUBR_MAX 47 /* max subroutine value */ #endif /* __APPLE__ */ typedef uint32_t dif_instr_t; @@ -457,6 +460,7 @@ typedef struct dtrace_diftype { #define DIF_TYPE_STRING 1 /* type is a D string */ #define DIF_TF_BYREF 0x1 /* type is passed by reference */ +#define DIF_TF_BYUREF 0x2 /* user type is passed by reference */ /* * A DTrace Intermediate Format variable record is used to describe each of the @@ -1447,7 +1451,10 @@ typedef struct dtrace_module_uuids_list { #define DTRACE_MODULE_UUIDS_LIST_SIZE(count) (sizeof(dtrace_module_uuids_list_t) + ((count - 1) * sizeof(UUID))) typedef struct dtrace_procdesc { - char p_comm[MAXCOMLEN+1]; + /* Must be specified by user-space */ + char p_name[128]; + /* Set or modified by the Kernel */ + int p_name_length; pid_t p_pid; } dtrace_procdesc_t; diff --git a/bsd/sys/dtrace_impl.h b/bsd/sys/dtrace_impl.h index cbb14c0ab..02065c15a 100644 --- a/bsd/sys/dtrace_impl.h +++ b/bsd/sys/dtrace_impl.h @@ -1357,7 +1357,10 @@ extern void dtrace_copystr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); /* * DTrace restriction checks */ +extern void dtrace_restriction_policy_load(void); extern boolean_t dtrace_is_restricted(void); +extern boolean_t dtrace_is_running_apple_internal(void); +extern boolean_t dtrace_fbt_probes_restricted(void); extern boolean_t dtrace_can_attach_to_proc(proc_t); /* diff --git a/bsd/sys/event.h b/bsd/sys/event.h index 44cef5438..00635c1b4 100644 --- a/bsd/sys/event.h +++ b/bsd/sys/event.h @@ -112,6 +112,16 @@ struct user32_kevent { user32_addr_t udata; /* opaque user data identifier */ }; +struct kevent_internal_s { + uint64_t ident; /* identifier for this event */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + uint32_t fflags; /* filter-specific flags */ + int64_t data; /* filter-specific data */ + uint64_t udata; /* opaque user data identifier */ + uint64_t ext[2]; /* filter-specific extensions */ +}; + #endif #pragma pack() @@ -126,6 +136,20 @@ struct kevent64_s { uint64_t ext[2]; /* filter-specific extensions */ }; +#ifdef PRIVATE +struct kevent_qos_s { + uint64_t ident; /* identifier for this event */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + int32_t qos; /* quality of service */ + uint64_t udata; /* opaque user data identifier */ + uint32_t fflags; /* filter-specific flags */ + uint32_t xflags; /* extra filter-specific flags */ + int64_t data; /* filter-specific data */ + uint64_t ext[4]; /* filter-specific extensions */ +}; +#endif /* PRIVATE */ + #define EV_SET(kevp, a, b, c, d, e, f) do { \ struct kevent *__kevp__ = (kevp); \ __kevp__->ident = (a); \ @@ -148,34 +172,73 @@ struct kevent64_s { __kevp__->ext[1] = (h); \ } while(0) + +/* kevent system call flags */ +#define KEVENT_FLAG_NONE 0x00 /* no flag value */ +#define KEVENT_FLAG_IMMEDIATE 0x01 /* immediate timeout */ +#define KEVENT_FLAG_ERROR_EVENTS 0x02 /* output events only include change errors */ + +#ifdef PRIVATE + +#define EV_SET_QOS 0 +/* + * Rather than provide an EV_SET_QOS macro for kevent_qos_t structure + * initialization, we encourage use of named field initialization support + * instead. + */ + +#define KEVENT_FLAG_STACK_EVENTS 0x04 /* output events treated as stack (grows down) */ +#define KEVENT_FLAG_STACK_DATA 0x08 /* output data allocated as stack (grows down) */ +#define KEVENT_FLAG_WORKQ 0x20 /* interact with the default workq kq */ + +#ifdef XNU_KERNEL_PRIVATE + +#define KEVENT_FLAG_LEGACY32 0x40 /* event data in legacy 32-bit format */ +#define KEVENT_FLAG_LEGACY64 0x80 /* event data in legacy 64-bit format */ + +#define KEVENT_FLAG_USER (KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS | \ + KEVENT_FLAG_STACK_EVENTS | KEVENT_FLAG_STACK_DATA | \ + KEVENT_FLAG_WORKQ) +#endif /* XNU_KERNEL_PRIVATE */ +#endif /* PRIVATE */ + /* actions */ -#define EV_ADD 0x0001 /* add event to kq (implies enable) */ -#define EV_DELETE 0x0002 /* delete event from kq */ -#define EV_ENABLE 0x0004 /* enable event */ -#define EV_DISABLE 0x0008 /* disable event (not reported) */ -#define EV_RECEIPT 0x0040 /* force EV_ERROR on success, data == 0 */ +#define EV_ADD 0x0001 /* add event to kq (implies enable) */ +#define EV_DELETE 0x0002 /* delete event from kq */ +#define EV_ENABLE 0x0004 /* enable event */ +#define EV_DISABLE 0x0008 /* disable event (not reported) */ /* flags */ -#define EV_ONESHOT 0x0010 /* only report one occurrence */ -#define EV_CLEAR 0x0020 /* clear event state after reporting */ -#define EV_DISPATCH 0x0080 /* disable event after reporting */ +#define EV_ONESHOT 0x0010 /* only report one occurrence */ +#define EV_CLEAR 0x0020 /* clear event state after reporting */ +#define EV_RECEIPT 0x0040 /* force EV_ERROR on success, data == 0 */ +#define EV_DISPATCH 0x0080 /* disable event after reporting */ + +#define EV_UDATA_SPECIFIC 0x0100 /* unique kevent per udata value */ + /* ... in combination with EV_DELETE */ + /* will defer delete until udata-specific */ + /* event enabled. EINPROGRESS will be */ + /* returned to indicate the deferral */ -#define EV_SYSFLAGS 0xF000 /* reserved by system */ -#define EV_FLAG0 0x1000 /* filter-specific flag */ -#define EV_FLAG1 0x2000 /* filter-specific flag */ +#define EV_DISPATCH2 (EV_DISPATCH | EV_UDATA_SPECIFIC) + +#define EV_SYSFLAGS 0xF000 /* reserved by system */ +#define EV_FLAG0 0x1000 /* filter-specific flag */ +#define EV_FLAG1 0x2000 /* filter-specific flag */ /* returned values */ -#define EV_EOF 0x8000 /* EOF detected */ -#define EV_ERROR 0x4000 /* error, data contains errno */ +#define EV_EOF 0x8000 /* EOF detected */ +#define EV_ERROR 0x4000 /* error, data contains errno */ /* * Filter specific flags for EVFILT_READ * * The default behavior for EVFILT_READ is to make the "read" determination - * relative to the current file descriptor read pointer. The EV_POLL - * flag indicates the determination should be made via poll(2) semantics - * (which always returns true for regular files - regardless of the amount - * of unread data in the file). + * relative to the current file descriptor read pointer. + * + * The EV_POLL flag indicates the determination should be made via poll(2) + * semantics. These semantics dictate always returning true for regular files, + * regardless of the amount of unread data in the file. * * On input, EV_OOBAND specifies that filter should actively return in the * presence of OOB on the descriptor. It implies that filter will return @@ -192,7 +255,7 @@ struct kevent64_s { * number of bytes before the current OOB marker, else data count is the number * of bytes beyond OOB marker. */ -#define EV_POLL EV_FLAG0 +#define EV_POLL EV_FLAG0 #define EV_OOBAND EV_FLAG1 /* @@ -225,6 +288,7 @@ struct kevent64_s { * realtive to the current file descriptor read pointer. */ #define NOTE_LOWAT 0x00000001 /* low water mark */ + /* * data/hint fflags for EVFILT_VNODE, shared with userspace */ @@ -356,6 +420,12 @@ typedef enum vm_pressure_level { #define NOTE_DISCONNECTED 0x00001000 /* socket is disconnected */ #define NOTE_CONNINFO_UPDATED 0x00002000 /* connection info was updated */ +#define EVFILT_SOCK_LEVEL_TRIGGER_MASK \ + (NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_SUSPEND | NOTE_RESUME | NOTE_CONNECTED | NOTE_DISCONNECTED) + +#define EVFILT_SOCK_ALL_MASK \ + (NOTE_CONNRESET | NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_TIMEOUT | NOTE_NOSRCADDR | NOTE_IFDENIED | NOTE_SUSPEND | NOTE_RESUME | NOTE_KEEPALIVE | NOTE_ADAPTIVE_WTIMO | NOTE_ADAPTIVE_RTIMO | NOTE_CONNECTED | NOTE_DISCONNECTED | NOTE_CONNINFO_UPDATED) + #endif /* PRIVATE */ /* @@ -373,6 +443,19 @@ typedef enum vm_pressure_level { * receive the message and the requested (or default) message trailers. In addition, * the fflags field contains the return code normally returned by mach_msg(). * + * If MACH_RCV_MSG is specified, and the ext[1] field specifies a zero length, the + * system call argument specifying an ouput area (kevent_qos) will be consulted. If + * the system call specified an output data area, the user-space address + * of the received message is carved from that provided output data area (if enough + * space remains there). The address and length of each received message is + * returned in the ext[0] and ext[1] fields (respectively) of the corresponding kevent. + * + * IF_MACH_RCV_VOUCHER_CONTENT is specified, the contents of the message voucher is + * extracted (as specified in the xflags field) and stored in ext[2] up to ext[3] + * length. If the input length is zero, and the system call provided a data area, + * the space for the voucher content is carved from the provided space and its + * address and length is returned in ext[2] and ext[3] respectively. + * * If no message receipt options were provided in the fflags field on setup, no * message is received by this call. Instead, on output, the data field simply * contains the name of the actual port detected with a message waiting. @@ -410,9 +493,10 @@ TAILQ_HEAD(kqtailq, knote); /* a list of "queued" events */ struct knote { int kn_inuse; /* inuse count */ - struct kqtailq *kn_tq; /* pointer to tail queue */ + int kn_hookid; TAILQ_ENTRY(knote) kn_tqe; /* linkage for tail queue */ - struct kqueue *kn_kq; /* which kqueue we are on */ + struct kqtailq *kn_tq; /* pointer to tail queue */ + struct kqueue *kn_kq; /* which kqueue we are on */ SLIST_ENTRY(knote) kn_link; /* linkage for search list */ SLIST_ENTRY(knote) kn_selnext; /* klist element chain */ union { @@ -423,10 +507,12 @@ struct knote { struct filterops *kn_fop; int kn_status; /* status bits */ int kn_sfflags; /* saved filter flags */ - struct kevent64_s kn_kevent; - void *kn_hook; - int kn_hookid; + union { + void *kn_hook; + uint64_t kn_hook_data; + }; int64_t kn_sdata; /* saved data field */ + struct kevent_internal_s kn_kevent; #define KN_ACTIVE 0x01 /* event has been triggered */ #define KN_QUEUED 0x02 /* event is on queue */ @@ -435,13 +521,17 @@ struct knote { #define KN_USEWAIT 0x10 /* wait for knote use */ #define KN_ATTACHING 0x20 /* event is pending attach */ #define KN_STAYQUEUED 0x40 /* force event to stay on queue */ +#define KN_DEFERDROP 0x80 /* defer drop until re-enabled */ +#define KN_TOUCH 0x100 /* Always call f_touch callback */ #define kn_id kn_kevent.ident #define kn_filter kn_kevent.filter #define kn_flags kn_kevent.flags +#define kn_qos kn_kevent.qos +#define kn_udata kn_kevent.udata #define kn_fflags kn_kevent.fflags +#define kn_xflags kn_kevent.xflags #define kn_data kn_kevent.data -#define kn_udata kn_kevent.udata #define kn_ext kn_kevent.ext #define kn_fp kn_ptr.p_fp }; @@ -456,13 +546,13 @@ struct filterops { void (*f_detach)(struct knote *kn); int (*f_event)(struct knote *kn, long hint); /* Optional f_touch operation, called only if !f_isfd && non-NULL */ - void (*f_touch)(struct knote *kn, struct kevent64_s *kev, long type); + void (*f_touch)(struct knote *kn, struct kevent_internal_s *kev, long type); /* Optional f_peek operation, called only if KN_STAYQUEUED is set */ unsigned (*f_peek)(struct knote *kn); }; struct proc; -struct wait_queue; +struct waitq; SLIST_HEAD(klist, knote); extern void knote_init(void); @@ -476,27 +566,45 @@ extern void klist_init(struct klist *list); extern void knote(struct klist *list, long hint); extern int knote_attach(struct klist *list, struct knote *kn); extern int knote_detach(struct klist *list, struct knote *kn); -extern int knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql); -extern int knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp); +extern int knote_link_waitq(struct knote *kn, struct waitq *wq, uint64_t *reserved_link); +extern int knote_unlink_waitq(struct knote *kn, struct waitq *wq); extern void knote_fdclose(struct proc *p, int fd); extern void knote_markstayqueued(struct knote *kn); extern void knote_clearstayqueued(struct knote *kn); + +extern int kevent_qos_internal(struct proc *p, int fd, + user_addr_t changelist, int nchanges, + user_addr_t eventlist, int nevents, + user_addr_t data_out, user_size_t *data_available, + unsigned int flags, int32_t *retval); #endif /* !KERNEL_PRIVATE */ #else /* KERNEL */ +#include struct timespec; __BEGIN_DECLS int kqueue(void); -int kevent(int kq, const struct kevent *changelist, int nchanges, - struct kevent *eventlist, int nevents, - const struct timespec *timeout); -int kevent64(int kq, const struct kevent64_s *changelist, - int nchanges, struct kevent64_s *eventlist, - int nevents, unsigned int flags, - const struct timespec *timeout); +int kevent(int kq, + const struct kevent *changelist, int nchanges, + struct kevent *eventlist, int nevents, + const struct timespec *timeout); +int kevent64(int kq, + const struct kevent64_s *changelist, int nchanges, + struct kevent64_s *eventlist, int nevents, + unsigned int flags, + const struct timespec *timeout); + +#ifdef PRIVATE +int kevent_qos(int kq, + const struct kevent_qos_s *changelist, int nchanges, + struct kevent_qos_s *eventlist, int nevents, + void *data_out, size_t *data_available, + unsigned int flags); +#endif /* PRIVATE */ + __END_DECLS diff --git a/bsd/sys/eventvar.h b/bsd/sys/eventvar.h index 29adde75d..6ce00103a 100644 --- a/bsd/sys/eventvar.h +++ b/bsd/sys/eventvar.h @@ -59,12 +59,13 @@ #include #include #include +#include #define KQ_NEVENTS 16 /* minimize copy{in,out} calls */ #define KQEXTENT 256 /* linear growth by this amount */ struct kqueue { - wait_queue_set_t kq_wqs; /* private wait queue set */ + struct waitq_set *kq_wqs; /* private waitq set */ decl_lck_spin_data( ,kq_lock) /* kqueue lock */ int kq_state; int kq_count; /* number of queued events */ @@ -79,15 +80,17 @@ struct kqueue { #define KQ_PROCWAIT 0x04 #define KQ_KEV32 0x08 #define KQ_KEV64 0x10 +#define KQ_KEV_QOS 0x20 +#define KQ_WORKQ 0x40 }; extern struct kqueue *kqueue_alloc(struct proc *); extern void kqueue_dealloc(struct kqueue *); -typedef int (*kevent_callback_t)(struct kqueue *, struct kevent64_s *, void *); +typedef int (*kevent_callback_t)(struct kqueue *, struct kevent_internal_s *, void *); typedef void (*kqueue_continue_t)(struct kqueue *, void *, int); -extern int kevent_register(struct kqueue *, struct kevent64_s *, struct proc *); +extern int kevent_register(struct kqueue *, struct kevent_internal_s *, struct proc *); extern int kqueue_scan(struct kqueue *, kevent_callback_t, kqueue_continue_t, void *, struct timeval *, struct proc *); extern int kqueue_stat(struct kqueue *, void *, int, proc_t); diff --git a/bsd/sys/fcntl.h b/bsd/sys/fcntl.h index e8dcd03fd..08ad4e546 100644 --- a/bsd/sys/fcntl.h +++ b/bsd/sys/fcntl.h @@ -190,9 +190,14 @@ #define O_CLOFORK 0x8000000 /* implicitly set FD_CLOFORK */ #endif +#ifdef KERNEL +#define FUNENCRYPTED 0x10000000 +#endif + /* Data Protection Flags */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define O_DP_GETRAWENCRYPTED 0x0001 +#define O_DP_GETRAWUNENCRYPTED 0x0002 #endif @@ -227,8 +232,8 @@ */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define CPF_OVERWRITE 1 -#define CPF_IGNORE_MODE 2 +#define CPF_OVERWRITE 0x0001 +#define CPF_IGNORE_MODE 0x0002 #define CPF_MASK (CPF_OVERWRITE|CPF_IGNORE_MODE) #endif @@ -338,6 +343,26 @@ #define F_ADDFILESIGS_FOR_DYLD_SIM 83 /* Add signature from same file, only if it is signed by Apple (used by dyld for simulator) */ +#ifdef PRIVATE +#define F_RECYCLE 84 /* Recycle vnode; debug/development builds only */ +#endif + +#define F_BARRIERFSYNC 85 /* fsync + issue barrier to drive */ + +#ifdef PRIVATE +#define F_OFD_SETLK 90 /* Acquire or release open file description lock */ +#define F_OFD_SETLKW 91 /* (as F_OFD_SETLK but blocking if conflicting lock) */ +#define F_OFD_GETLK 92 /* Examine OFD lock */ + +#define F_OFD_SETLKWTIMEOUT 93 /* (as F_OFD_SETLKW but return if timeout) */ +#define F_OFD_GETLKPID 94 /* get record locking information */ + +#define F_SETCONFINED 95 /* "confine" OFD to process */ +#define F_GETCONFINED 96 /* is-fd-confined? */ +#endif + +#define F_ADDFILESIGS_RETURN 97 /* Add signature from same file, return end offset in structure on sucess */ + // FS-specific fcntl()'s numbers begin at 0x00010000 and go up #define FCNTL_FS_SPECIFIC_BASE 0x00010000 @@ -365,6 +390,7 @@ #define F_PROV 0x080 /* Non-coalesced provisional lock */ #define F_WAKE1_SAFE 0x100 /* its safe to only wake one waiter */ #define F_ABORT 0x200 /* lock attempt aborted (force umount) */ +#define F_OFD_LOCK 0x400 /* Use "OFD" semantics for lock */ #endif #if PRIVATE diff --git a/bsd/sys/file_internal.h b/bsd/sys/file_internal.h index aaf248554..172aa3d04 100644 --- a/bsd/sys/file_internal.h +++ b/bsd/sys/file_internal.h @@ -92,7 +92,7 @@ struct fileproc { unsigned int f_flags; int32_t f_iocount; struct fileglob * f_fglob; - void * f_waddr; + void *f_wset; }; #define FILEPROC_NULL (struct fileproc *)0 @@ -157,6 +157,8 @@ typedef enum { #define FG_NOSIGPIPE 0x40 /* don't deliver SIGPIPE with EPIPE return */ #define FG_OFF_LOCKED 0x80 /* Used as a mutex for offset changes (for vnodes) */ #define FG_OFF_LOCKWANT 0x100 /* Somebody's wating for the lock */ +#define FG_CONFINED 0x200 /* fileglob confined to process, immutably */ +#define FG_HAS_OFDLOCK 0x400 /* Has or has had an OFD lock */ struct fileglob { LIST_ENTRY(fileglob) f_msglist;/* list of active files */ @@ -243,14 +245,16 @@ int open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, int32_t *retval); int kqueue_body(struct proc *p, fp_allocfn_t, void *cra, int32_t *retval); void fg_insertuipc(struct fileglob * fg); +boolean_t fg_insertuipc_mark(struct fileglob * fg); void fg_removeuipc(struct fileglob * fg); +boolean_t fg_removeuipc_mark(struct fileglob * fg); void unp_gc_wait(void); void procfdtbl_reservefd(struct proc * p, int fd); void procfdtbl_markclosefd(struct proc * p, int fd); void procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp); void procfdtbl_waitfd(struct proc * p, int fd); void procfdtbl_clearfd(struct proc * p, int fd); -boolean_t filetype_issendable(file_type_t type); +boolean_t file_issendable(struct proc * p, struct fileproc *fp); extern int fdgetf_noref(proc_t, int, struct fileproc **); extern struct fileproc *fileproc_alloc_init(void *crargs); extern void fileproc_free(struct fileproc *fp); diff --git a/bsd/sys/filedesc.h b/bsd/sys/filedesc.h index b5b80a6bc..41dc190b5 100644 --- a/bsd/sys/filedesc.h +++ b/bsd/sys/filedesc.h @@ -145,6 +145,16 @@ extern int fdavail(proc_t p, int n); (&(p)->p_fd->fd_ofiles[(fd)]) #define fdflags(p, fd) \ (&(p)->p_fd->fd_ofileflags[(fd)]) + +/* + * Accesor macros for fd flags + */ +#define FDFLAGS_GET(p, fd) (*fdflags(p, fd) & (UF_EXCLOSE|UF_FORKCLOSE)) +#define FDFLAGS_SET(p, fd, bits) \ + (*fdflags(p, fd) |= ((bits) & (UF_EXCLOSE|UF_FORKCLOSE))) +#define FDFLAGS_CLR(p, fd, bits) \ + (*fdflags(p, fd) &= ~((bits) & (UF_EXCLOSE|UF_FORKCLOSE))) + extern int falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx); #ifdef __APPLE_API_PRIVATE diff --git a/bsd/sys/fsevents.h b/bsd/sys/fsevents.h index f1208ffc3..16fb22425 100644 --- a/bsd/sys/fsevents.h +++ b/bsd/sys/fsevents.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -100,42 +100,23 @@ #define FSE_TRUNCATED_PATH (1 << 28) // the path for this item had to be truncated // ioctl's on /dev/fsevents -#if __LP64__ typedef struct fsevent_clone_args { int8_t *event_list; int32_t num_events; int32_t event_queue_depth; int32_t *fd; } fsevent_clone_args; -#else -typedef struct fsevent_clone_args { - int8_t *event_list; - int32_t pad1; - int32_t num_events; - int32_t event_queue_depth; - int32_t *fd; - int32_t pad2; -} fsevent_clone_args; -#endif #define FSEVENTS_CLONE _IOW('s', 1, fsevent_clone_args) // ioctl's on the cloned fd -#if __LP64__ #pragma pack(push, 4) typedef struct fsevent_dev_filter_args { uint32_t num_devices; dev_t *devices; } fsevent_dev_filter_args; #pragma pack(pop) -#else -typedef struct fsevent_dev_filter_args { - uint32_t num_devices; - dev_t *devices; - int32_t pad1; -} fsevent_dev_filter_args; -#endif #define FSEVENTS_DEVICE_FILTER _IOW('s', 100, fsevent_dev_filter_args) #define FSEVENTS_WANT_COMPACT_EVENTS _IO('s', 101) diff --git a/bsd/sys/guarded.h b/bsd/sys/guarded.h index dc55c7a59..6fa6a7752 100644 --- a/bsd/sys/guarded.h +++ b/bsd/sys/guarded.h @@ -54,9 +54,9 @@ extern int guarded_kqueue_np(const guardid_t *guard, u_int guardflags); extern int guarded_close_np(int fd, const guardid_t *guard); extern int change_fdguard_np(int fd, const guardid_t *guard, u_int guardflags, const guardid_t *nguard, u_int nguardflags, int *fdflagsp); -extern user_ssize_t guarded_write_np(int fd, const guardid_t *guard, user_addr_t cbuf, user_size_t nbyte); -extern user_ssize_t guarded_pwrite_np(int fd, const guardid_t *guard, user_addr_t buf, user_size_t nbyte, off_t offset); -extern user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iovec *iovp, u_int iovcnt); +extern ssize_t guarded_write_np(int fd, const guardid_t *guard, const void *buf, size_t nbyte); +extern ssize_t guarded_pwrite_np(int fd, const guardid_t *guard, const void *buf, size_t nbyte, off_t offset); +extern ssize_t guarded_writev_np(int fd, const guardid_t *guard, const struct iovec *iovp, int iovcnt); #endif /* KERNEL */ /* @@ -70,7 +70,8 @@ extern user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iov * File descriptor guard flavors. */ -/* Forbid close(2), and the implicit close() that a dup2(2) may do. +/* + * Forbid close(2), and the implicit close() that a dup2(2) may do. * Forces close-on-fork to be set immutably too. */ #define GUARD_CLOSE (1u << 0) diff --git a/bsd/sys/imgact.h b/bsd/sys/imgact.h index 945da513d..03db89c61 100644 --- a/bsd/sys/imgact.h +++ b/bsd/sys/imgact.h @@ -104,7 +104,6 @@ struct image_params { int ip_interp_sugid_fd; /* fd for sugid script */ /* Next two fields are for support of architecture translation... */ - char *ip_p_comm; /* optional alt p->p_comm */ struct vfs_context *ip_vfs_context; /* VFS context */ struct nameidata *ip_ndp; /* current nameidata */ thread_t ip_new_thread; /* thread for spawn/vfork */ @@ -118,6 +117,7 @@ struct image_params { void *ip_px_sfa; void *ip_px_spa; void *ip_px_smpx; /* MAC-specific spawn attrs. */ + void *ip_reserved; }; /* diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index af75e23a1..48dfac84a 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -2,7 +2,7 @@ * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,11 +22,11 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* Copyright (c) 1997 Apple Computer, Inc. All rights reserved. +/* Copyright (c) 1997 Apple Computer, Inc. All rights reserved. * * kdebug.h - kernel_debug definitions * @@ -53,6 +53,63 @@ __BEGIN_DECLS #include #endif +/* + * Kdebug is a facility for tracing events occurring on a system. + * + * All events are tagged with a debugid, consisting of the following: + * + * +----------------+----------------+----------------------------+----+ + * | Class (8) | Subclass (8) | Code (14) |Func| + * | | | |(2) | + * +----------------+----------------+----------------------------+----+ + * \______________________________________________________________/ + * Eventid + * \___________________________________________________________________/ + * Debugid + * + * The eventid is a hierarchical ID, indicating which components an event is + * referring to. The debugid includes an eventid and two function qualifier + * bits, to determine the structural significance of an event (whether it + * starts or ends a series of grouped events). + */ + +#define KDBG_CLASS_MASK (0xff000000) +#define KDBG_CLASS_OFFSET (24) +#define KDBG_CLASS_MAX (0xff) + +#define KDBG_SUBCLASS_MASK (0x00ff0000) +#define KDBG_SUBCLASS_OFFSET (16) +#define KDBG_SUBCLASS_MAX (0xff) + +/* class and subclass mask */ +#define KDBG_CSC_MASK (0xffff0000) +#define KDBG_CSC_OFFSET (KDBG_SUBCLASS_OFFSET) + +#define KDBG_CODE_MASK (0x0000fffc) +#define KDBG_CODE_OFFSET (2) +#define KDBG_CODE_MAX (0x3fff) + +#define KDBG_EVENTID_MASK (0xfffffffc) + +/* Generate an eventid corresponding to Class, SubClass, and Code. */ +#define KDBG_EVENTID(Class, SubClass, Code) \ + ((((Class) & 0xff) << KDBG_CLASS_OFFSET) | \ + (((SubClass) & 0xff) << KDBG_SUBCLASS_OFFSET) | \ + (((Code) & 0x3fff) << KDBG_CODE_OFFSET)) +/* Deprecated macro using old naming convention. */ +#define KDBG_CODE(Class, SubClass, Code) \ + KDBG_EVENTID(Class, SubClass, Code) + +/* Extract pieces of the debug code. */ +#define KDBG_EXTRACT_CLASS(Debugid) \ + ((uint8_t)(((Debugid) & KDBG_CLASS_MASK) >> KDBG_CLASS_OFFSET)) +#define KDBG_EXTRACT_SUBCLASS(Debugid) \ + ((uint8_t)(((Debugid) & KDBG_SUBCLASS_MASK) >> KDBG_SUBCLASS_OFFSET)) +#define KDBG_EXTRACT_CSC(Debugid) \ + ((uint16_t)(((Debugid) & KDBG_CSC_MASK) >> KDBG_CSC_OFFSET)) +#define KDBG_EXTRACT_CODE(Debugid) \ + ((uint16_t)(((Debugid) & KDBG_CODE_MASK) >> KDBG_CODE_OFFSET)) + #ifdef KERNEL_PRIVATE typedef enum @@ -73,22 +130,22 @@ struct kd_callback { typedef struct kd_callback kd_callback_t; /* - * Registers an IOP for participation in tracing. - * - * The registered callback function will be called with the - * supplied context as the first argument, followed by a - * kd_callback_type and an associated void* argument. - * - * The return value is a nonzero coreid that shall be used in - * kernel_debug_enter() to refer to your IOP. If the allocation - * failed, then 0 will be returned. - * - * - * Caveats: - * Note that not all callback calls will indicate a change in - * state (e.g. disabling trace twice would send two disable - * notifications). - * + * Registers an IOP for participation in tracing. + * + * The registered callback function will be called with the + * supplied context as the first argument, followed by a + * kd_callback_type and an associated void* argument. + * + * The return value is a nonzero coreid that shall be used in + * kernel_debug_enter() to refer to your IOP. If the allocation + * failed, then 0 will be returned. + * + * + * Caveats: + * Note that not all callback calls will indicate a change in + * state (e.g. disabling trace twice would send two disable + * notifications). + * */ extern int kernel_debug_register_callback(kd_callback_t callback); @@ -105,24 +162,14 @@ extern void kernel_debug_enter( #endif /* KERNEL_PRIVATE */ -/* The debug code consists of the following -* -* ---------------------------------------------------------------------- -*| | | |Func | -*| Class (8) | SubClass (8) | Code (14) |Qual(2)| -* ---------------------------------------------------------------------- -* The class specifies the higher level -*/ - /* The Function qualifiers */ #define DBG_FUNC_START 1 #define DBG_FUNC_END 2 #define DBG_FUNC_NONE 0 - /* The Kernel Debug Classes */ #define DBG_MACH 1 -#define DBG_NETWORK 2 +#define DBG_NETWORK 2 #define DBG_FSYSTEM 3 #define DBG_BSD 4 #define DBG_IOKIT 5 @@ -144,6 +191,8 @@ extern void kernel_debug_enter( #define DBG_XPC 41 #define DBG_ATM 42 #define DBG_ARIADNE 43 +#define DBG_DAEMON 44 +#define DBG_ENERGYTRACE 45 #define DBG_MIG 255 @@ -167,10 +216,85 @@ extern void kernel_debug_enter( * On error, -1 will be returned and errno will indicate the error. */ #ifndef KERNEL -extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) __OSX_AVAILABLE_STARTING(__MAC_10_10_2, __IPHONE_NA); +extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) __OSX_AVAILABLE_STARTING(__MAC_10_10_2, __IPHONE_8_2); +#endif + +/*! + * @function kdebug_trace_string + * + * @discussion + * This function emits strings to kdebug trace along with an ID and allows + * for previously-traced strings to be overwritten and invalidated. + * + * To start tracing a string and generate an ID to use to refer to it: + * + * string_id = kdebug_trace_string(debugid, 0, "string"); + * + * To replace a string previously traced: + * + * string_id = kdebug_trace_string(debugid, string_id, "new string"); + * + * To invalidate a string ID: + * + * string_id = kdebug_trace_string(debugid, string_id, NULL); + * + * To check for errors: + * + * if ((int64_t)string_id == -1) { perror("string error") } + * + * @param debugid + * The `debugid` to check if its enabled before tracing and include as + * an argument in the event containing the string. + * + * Some classes or subclasses are reserved for specific uses and are not + * allowed to be used with this function. No function qualifiers are + * allowed on `debugid`. + * + * @param str_id + * When 0, a new ID will be generated and returned if tracing is + * enabled. + * + * Otherwise `str_id` must contain an ID that was previously generated + * with this function. Clents should pass NULL in `str` if `str_id` + * is no longer in use. Otherwise, the string previously mapped to + * `str_id` will be overwritten with the contents of `str`. + * + * @param str + * A NUL-terminated 'C' string containing the characters that should be + * traced alongside `str_id`. + * + * If necessary, the string will be truncated at an + * implementation-defined length. The string must not be the empty + * string, but can be NULL if a valid `str_id` is provided. + * + * @return + * 0 if tracing is disabled or `debugid` is being filtered out of trace. + * It can also return (int64_t)-1 if an error occured. Otherwise, + * it returns the ID to use to refer to the string in future + * kdebug_trace(2) calls. + * + * The errors that can occur are: + * + * EINVAL + * There are function qualifiers on `debugid`, `str` is empty, or + * `str_id` was not generated by this function. + * EPERM + * The `debugid`'s class or subclass is reserved for internal use. + * EFAULT + * `str` is an invalid address or NULL when `str_id` is 0. + */ +#ifndef KERNEL +extern uint64_t kdebug_trace_string(uint32_t debugid, uint64_t str_id, + const char *str) +__OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0); #endif #endif /* PRIVATE */ +#ifdef XNU_KERNEL_PRIVATE +/* Used in early boot to log strings spanning only a single tracepoint. */ +extern void kernel_debug_string_simple(const char *message); +#endif /* XNU_KERNEL_PRIVATE */ + /* **** The Kernel Debug Sub Classes for Mach (DBG_MACH) **** */ #define DBG_MACH_EXCP_KTRAP_x86 0x02 /* Kernel Traps on x86 */ #define DBG_MACH_EXCP_DFLT 0x03 /* Data Translation Fault */ @@ -192,14 +316,15 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define DBG_MACH_MSGID_INVALID 0x50 /* Messages - invalid */ #define DBG_MACH_LOCKS 0x60 /* new lock APIs */ #define DBG_MACH_PMAP 0x70 /* pmap */ -/* #define unused 0x80 */ +#define DBG_MACH_CLOCK 0x80 /* clock */ #define DBG_MACH_MP 0x90 /* MP related */ #define DBG_MACH_VM_PRESSURE 0xA0 /* Memory Pressure Events */ #define DBG_MACH_STACKSHOT 0xA1 /* Stackshot/Microstackshot subsystem */ #define DBG_MACH_SFI 0xA2 /* Selective Forced Idle (SFI) */ #define DBG_MACH_ENERGY_PERF 0xA3 /* Energy/performance resource stats */ +#define DBG_MACH_SYSDIAGNOSE 0xA4 /* sysdiagnose keychord */ -/* Codes for Scheduler (DBG_MACH_SCHED) */ +/* Codes for Scheduler (DBG_MACH_SCHED) */ #define MACH_SCHED 0x0 /* Scheduler */ #define MACH_STACK_ATTACH 0x1 /* stack_attach() */ #define MACH_STACK_HANDOFF 0x2 /* stack_handoff() */ @@ -212,8 +337,8 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define MACH_IDLE 0x9 /* processor idling */ #define MACH_STACK_DEPTH 0xa /* stack depth at switch */ #define MACH_MOVED 0xb /* did not use original scheduling decision */ -#define MACH_FAIRSHARE_ENTER 0xc /* move to fairshare band */ -#define MACH_FAIRSHARE_EXIT 0xd /* exit fairshare band */ +/* unused 0xc */ +/* unused 0xd */ #define MACH_FAILSAFE 0xe /* tripped fixed-pri/RT failsafe */ #define MACH_BLOCK 0xf /* thread block */ #define MACH_WAIT 0x10 /* thread wait assertion */ @@ -223,10 +348,9 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar */ #define MACH_REDISPATCH 0x16 /* "next thread" thread redispatched */ #define MACH_REMOTE_AST 0x17 /* AST signal issued to remote processor */ - #define MACH_SCHED_CHOOSE_PROCESSOR 0x18 /* Result of choose_processor */ #define MACH_DEEP_IDLE 0x19 /* deep idle on master processor */ -#define MACH_SCHED_DECAY_PRIORITY 0x1a /* timeshare thread priority decayed/restored */ +/* unused 0x1a was MACH_SCHED_DECAY_PRIORITY */ #define MACH_CPU_THROTTLE_DISABLE 0x1b /* Global CPU Throttle Disable */ #define MACH_RW_PROMOTE 0x1c /* promoted due to RW lock promotion */ #define MACH_RW_DEMOTE 0x1d /* promotion due to RW lock undone */ @@ -236,6 +360,14 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define MACH_MULTIQ_DEQUEUE 0x22 /* Result of multiq dequeue */ #define MACH_SCHED_THREAD_SWITCH 0x23 /* attempt direct context switch to hinted thread */ #define MACH_SCHED_SMT_BALANCE 0x24 /* SMT load balancing ASTs */ +#define MACH_REMOTE_DEFERRED_AST 0x25 /* Deferred AST started against remote processor */ +#define MACH_REMOTE_CANCEL_AST 0x26 /* Canceled deferred AST for remote processor */ +#define MACH_SCHED_CHANGE_PRIORITY 0x27 /* thread sched priority changed */ +#define MACH_SCHED_UPDATE_REC_CORES 0x28 /* Change to recommended processor bitmask */ +#define MACH_STACK_WAIT 0x29 /* Thread could not be switched-to because of kernel stack shortage */ +#define MACH_THREAD_BIND 0x2a /* Thread was bound (or unbound) to a processor */ +#define MACH_WAITQ_PROMOTE 0x2b /* Thread promoted by waitq boost */ +#define MACH_WAITQ_DEMOTE 0x2c /* Thread demoted from waitq boost */ /* Variants for MACH_MULTIQ_DEQUEUE */ #define MACH_MULTIQ_BOUND 1 @@ -248,7 +380,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define DBG_COW_FAULT 3 #define DBG_CACHE_HIT_FAULT 4 #define DBG_NZF_PAGE_FAULT 5 -#define DBG_GUARD_FAULT 6 +#define DBG_GUARD_FAULT 6 #define DBG_PAGEINV_FAULT 7 #define DBG_PAGEIND_FAULT 8 #define DBG_COMPRESSOR_FAULT 9 @@ -266,7 +398,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define MACH_IPC_VOUCHER_CREATE_ATTR_DATA 0x8 /* Attr data for newly created voucher */ #define MACH_IPC_VOUCHER_DESTROY 0x9 /* Voucher removed from global voucher hashtable */ -/* Codes for pmap (DBG_MACH_PMAP) */ +/* Codes for pmap (DBG_MACH_PMAP) */ #define PMAP__CREATE 0x0 #define PMAP__DESTROY 0x1 #define PMAP__PROTECT 0x2 @@ -283,11 +415,19 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define PMAP__FLUSH_KERN_TLBS 0xd #define PMAP__FLUSH_DELAYED_TLBS 0xe #define PMAP__FLUSH_TLBS_TO 0xf +#define PMAP__FLUSH_EPT 0x10 + +/* Codes for clock (DBG_MACH_CLOCK) */ +#define MACH_EPOCH_CHANGE 0x0 /* wake epoch change */ + /* Codes for Stackshot/Microstackshot (DBG_MACH_STACKSHOT) */ #define MICROSTACKSHOT_RECORD 0x0 #define MICROSTACKSHOT_GATHER 0x1 +/* Codes for sysdiagnose */ +#define SYSDIAGNOSE_NOTIFY_USER 0x0 + /* Codes for Selective Forced Idle (DBG_MACH_SFI) */ #define SFI_SET_WINDOW 0x0 #define SFI_CANCEL_WINDOW 0x1 @@ -330,14 +470,14 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar /* **** The Kernel Debug Sub Classes for IOKIT (DBG_IOKIT) **** */ #define DBG_IOINTC 0 /* Interrupt controller */ -#define DBG_IOWORKLOOP 1 /* Work from work loop */ +#define DBG_IOWORKLOOP 1 /* Work from work loop */ #define DBG_IOINTES 2 /* Interrupt event source */ #define DBG_IOCLKES 3 /* Clock event source */ #define DBG_IOCMDQ 4 /* Command queue latencies */ #define DBG_IOMCURS 5 /* Memory Cursor */ #define DBG_IOMDESC 6 /* Memory Descriptors */ #define DBG_IOPOWER 7 /* Power Managerment */ -#define DBG_IOSERVICE 8 /* Matching etc. */ +#define DBG_IOSERVICE 8 /* Matching etc. */ /* **** 9-32 reserved for internal IOKit usage **** */ @@ -372,28 +512,28 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define DBG_DRVSTORAGE 1 /* Storage layers */ #define DBG_DRVNETWORK 2 /* Network layers */ #define DBG_DRVKEYBOARD 3 /* Keyboard */ -#define DBG_DRVHID 4 /* HID Devices */ +#define DBG_DRVHID 4 /* HID Devices */ #define DBG_DRVAUDIO 5 /* Audio */ #define DBG_DRVSERIAL 7 /* Serial */ -#define DBG_DRVSAM 8 /* SCSI Architecture Model layers */ -#define DBG_DRVPARALLELATA 9 /* Parallel ATA */ +#define DBG_DRVSAM 8 /* SCSI Architecture Model layers */ +#define DBG_DRVPARALLELATA 9 /* Parallel ATA */ #define DBG_DRVPARALLELSCSI 10 /* Parallel SCSI */ -#define DBG_DRVSATA 11 /* Serial ATA */ -#define DBG_DRVSAS 12 /* SAS */ +#define DBG_DRVSATA 11 /* Serial ATA */ +#define DBG_DRVSAS 12 /* SAS */ #define DBG_DRVFIBRECHANNEL 13 /* FiberChannel */ -#define DBG_DRVUSB 14 /* USB */ +#define DBG_DRVUSB 14 /* USB */ #define DBG_DRVBLUETOOTH 15 /* Bluetooth */ #define DBG_DRVFIREWIRE 16 /* FireWire */ #define DBG_DRVINFINIBAND 17 /* Infiniband */ -#define DBG_DRVGRAPHICS 18 /* Graphics */ +#define DBG_DRVGRAPHICS 18 /* Graphics */ #define DBG_DRVSD 19 /* Secure Digital */ #define DBG_DRVNAND 20 /* NAND drivers and layers */ #define DBG_SSD 21 /* SSD */ #define DBG_DRVSPI 22 /* SPI */ /* Backwards compatibility */ -#define DBG_DRVPOINTING DBG_DRVHID /* OBSOLETE: Use DBG_DRVHID instead */ -#define DBG_DRVDISK DBG_DRVSTORAGE /* OBSOLETE: Use DBG_DRVSTORAGE instead */ +#define DBG_DRVPOINTING DBG_DRVHID /* OBSOLETE: Use DBG_DRVHID instead */ +#define DBG_DRVDISK DBG_DRVSTORAGE /* OBSOLETE: Use DBG_DRVSTORAGE instead */ /* **** The Kernel Debug Sub Classes for the DLIL Layer (DBG_DLIL) **** */ #define DBG_DLIL_STATIC 1 /* Static DLIL code */ @@ -414,7 +554,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define DBG_EXFAT 0xE /* ExFAT-specific events; see the exfat project */ #define DBG_MSDOS 0xF /* FAT-specific events; see the msdosfs project */ #define DBG_ACFS 0x10 /* Xsan-specific events; see the XsanFS project */ -#define DBG_THROTTLE 0x11 /* I/O Throttling events */ +#define DBG_THROTTLE 0x11 /* I/O Throttling events */ #define DBG_CONTENT_PROT 0xCF /* Content Protection Events: see bsd/sys/cprotect.h */ /* @@ -424,8 +564,10 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define DBG_HFS_UPDATE_MODTIME 0x02 #define DBG_HFS_UPDATE_CHGTIME 0x04 #define DBG_HFS_UPDATE_MODIFIED 0x08 -#define DBG_HFS_UPDATE_FORCE 0x10 +#define DBG_HFS_UPDATE_FORCE 0x10 #define DBG_HFS_UPDATE_DATEADDED 0x20 +#define DBG_HFS_UPDATE_MINOR 0x40 +#define DBG_HFS_UPDATE_SKIPPED 0x80 /* The Kernel Debug Sub Classes for BSD */ #define DBG_BSD_PROC 0x01 /* process/signals related */ @@ -446,7 +588,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define BSD_MEMSTAT_JETSAM_HIWAT 3 /* highwater jetsam */ #define BSD_MEMSTAT_FREEZE 4 /* freeze process */ #define BSD_MEMSTAT_LATENCY_COALESCE 5 /* delay imposed to coalesce jetsam reports */ -#define BSD_MEMSTAT_UPDATE 6 /* priority update */ +#define BSD_MEMSTAT_UPDATE 6 /* priority update */ #define BSD_MEMSTAT_IDLE_DEMOTE 7 /* idle demotion fired */ #define BSD_MEMSTAT_CLEAR_ERRORS 8 /* reset termination error state */ #define BSD_MEMSTAT_DIRTY_TRACK 9 /* track the process state */ @@ -466,6 +608,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define TRACE_DATA_NEWTHREAD (TRACEDBG_CODE(DBG_TRACE_DATA, 1)) #define TRACE_DATA_EXEC (TRACEDBG_CODE(DBG_TRACE_DATA, 2)) #define TRACE_DATA_THREAD_TERMINATE (TRACEDBG_CODE(DBG_TRACE_DATA, 3)) +#define TRACE_STRING_GLOBAL (TRACEDBG_CODE(DBG_TRACE_STRING, 0)) #define TRACE_STRING_NEWTHREAD (TRACEDBG_CODE(DBG_TRACE_STRING, 1)) #define TRACE_STRING_EXEC (TRACEDBG_CODE(DBG_TRACE_STRING, 2)) #define TRACE_PANIC (TRACEDBG_CODE(DBG_TRACE_INFO, 0)) @@ -504,11 +647,13 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar /* Kernel Debug Sub Classes for Applications (DBG_APPS) */ #define DBG_APP_LOGINWINDOW 0x03 #define DBG_APP_AUDIO 0x04 +#define DBG_APP_SIGPOST 0x0A +#define DBG_APP_APPKIT 0x0C #define DBG_APP_SAMBA 0x80 /* Kernel Debug codes for Throttling (DBG_THROTTLE) */ #define OPEN_THROTTLE_WINDOW 0x1 -#define PROCESS_THROTTLED 0x2 +#define PROCESS_THROTTLED 0x2 #define IO_THROTTLE_DISABLE 0x3 @@ -555,16 +700,16 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define BANK_TASK_INFO 0x11 /* Trace points related to bank task struct */ /* Subclasses for MACH ATM Voucher Attribute Manager (ATM) */ -#define ATM_SUBAID_INFO 0x10 -#define ATM_GETVALUE_INFO 0x20 -#define ATM_UNREGISTER_INFO 0x30 +#define ATM_SUBAID_INFO 0x10 +#define ATM_GETVALUE_INFO 0x20 +#define ATM_UNREGISTER_INFO 0x30 /* Codes for BANK_ACCOUNT_INFO */ #define BANK_SETTLE_CPU_TIME 0x1 /* Bank ledger(chit) rolled up to tasks. */ /* Codes for ATM_SUBAID_INFO */ #define ATM_MIN_CALLED 0x1 -#define ATM_MIN_LINK_LIST 0x2 +#define ATM_LINK_LIST_TRIM 0x2 /* Codes for ATM_GETVALUE_INFO */ #define ATM_VALUE_REPLACED 0x1 @@ -574,11 +719,13 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define ATM_VALUE_UNREGISTERED 0x1 #define ATM_VALUE_DIFF_MAILBOX 0x2 -/**********************************************************************/ +/* Kernel Debug Sub Classes for daemons (DBG_DAEMON) */ +#define DBG_DAEMON_COREDUET 0x1 -#define KDBG_CODE(Class, SubClass, code) (((Class & 0xff) << 24) | ((SubClass & 0xff) << 16) | ((code & 0x3fff) << 2)) +/**********************************************************************/ -#define KDBG_MIGCODE(msgid) ((DBG_MIG << 24) | (((msgid) & 0x3fffff) << 2)) +#define KDBG_MIGCODE(msgid) ((DBG_MIG << KDBG_CLASS_OFFSET) | \ + (((msgid) & 0x3fffff) << KDBG_CODE_OFFSET)) #define MACHDBG_CODE(SubClass, code) KDBG_CODE(DBG_MACH, SubClass, code) #define NETDBG_CODE(SubClass, code) KDBG_CODE(DBG_NETWORK, SubClass, code) @@ -594,6 +741,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define QTDBG_CODE(SubClass,code) KDBG_CODE(DBG_QT, SubClass, code) #define APPSDBG_CODE(SubClass,code) KDBG_CODE(DBG_APPS, SubClass, code) #define ARIADNEDBG_CODE(SubClass, code) KDBG_CODE(DBG_ARIADNE, SubClass, code) +#define DAEMONDBG_CODE(SubClass, code) KDBG_CODE(DBG_DAEMON, SubClass, code) #define CPUPM_CODE(code) IOKDBG_CODE(DBG_IOCPUPM, code) #define KMEM_ALLOC_CODE MACHDBG_CODE(DBG_MACH_LEAKS, 0) @@ -612,23 +760,26 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar #define BANK_CODE(SubClass, code) KDBG_CODE(DBG_BANK, (SubClass), (code)) #define ATM_CODE(SubClass, code) KDBG_CODE(DBG_ATM, (SubClass), (code)) +/* Kernel Debug Macros for specific daemons */ +#define COREDUETDBG_CODE(code) DAEMONDBG_CODE(DBG_DAEMON_COREDUET, code) + /* Usage: -* kernel_debug((KDBG_CODE(DBG_NETWORK, DNET_PROTOCOL, 51) | DBG_FUNC_START), -* offset, 0, 0, 0,0) -* -* For ex, -* +* kernel_debug((KDBG_CODE(DBG_NETWORK, DNET_PROTOCOL, 51) | DBG_FUNC_START), +* offset, 0, 0, 0,0) +* +* For ex, +* * #include -* +* * #define DBG_NETIPINIT NETDBG_CODE(DBG_NETIP,1) -* -* +* +* * void * ip_init() * { * register struct protosw *pr; * register int i; -* +* * KERNEL_DEBUG(DBG_NETIPINIT | DBG_FUNC_START, 0,0,0,0,0) * -------- * KERNEL_DEBUG(DBG_NETIPINIT, 0,0,0,0,0) @@ -649,7 +800,7 @@ extern unsigned int kdebug_enable; /* * Infer the supported kernel debug event level from config option. * Use (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) as a guard to protect - * unaudited debug code. + * unaudited debug code. */ #define KDEBUG_LEVEL_NONE 0 #define KDEBUG_LEVEL_IST 1 @@ -657,13 +808,18 @@ extern unsigned int kdebug_enable; #define KDEBUG_LEVEL_FULL 3 #if NO_KDEBUG -#define KDEBUG_LEVEL KDEBUG_LEVEL_NONE +#define KDEBUG_LEVEL KDEBUG_LEVEL_NONE #elif IST_KDEBUG #define KDEBUG_LEVEL KDEBUG_LEVEL_IST + // currently configured for the iOS release kernel #elif KDEBUG #define KDEBUG_LEVEL KDEBUG_LEVEL_FULL #else #define KDEBUG_LEVEL KDEBUG_LEVEL_STANDARD +/* Currently, all other kernel configurations (development, etc) + build with KDEBUG_LEVEL_STANDARD. As a result, KERNEL_DEBUG_CONSTANT*() + are on by default but KERNEL_DEBUG*() are not. +*/ #endif #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) @@ -714,7 +870,21 @@ do { \ #define KERNEL_DEBUG_EARLY(x,a,b,c,d) do { } while(0) #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */ -/* +#ifdef KERNEL_PRIVATE + +// Abbreviated version of above +#define KDBG(x, ...) KDBG_(x, ## __VA_ARGS__, 5, 4, 3, 2, 1, 0) +#define KDBG_(x, a, b, c, d, e, n, ...) KDBG##n(x, a, b, c, d, e) +#define KDBG0(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, 0, 0, 0, 0, 0) +#define KDBG1(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, 0, 0, 0, 0) +#define KDBG2(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, b, 0, 0, 0) +#define KDBG3(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, b, c, 0, 0) +#define KDBG4(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, b, c, d, 0) +#define KDBG5(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, b, c, d, e) + +#endif // KERNEL_PRIVATE + +/* * Specify KDEBUG_PPT to indicate that the event belongs to the * limited PPT set. */ @@ -723,27 +893,83 @@ do { \ #define KDEBUG_PPT (KDEBUG_ENABLE_PPT) /* - * KERNEL_DEBUG_CONSTANT_IST events provide an audited subset of - * tracepoints for userland system tracing tools. + KERNEL_DEBUG_CONSTANT_IST events provide an audited subset of + tracepoints for userland system tracing tools. This tracing level was + created by 8857227 to protect fairplayd and other PT_DENY_ATTACH + processes. It has two effects: only KERNEL_DEBUG_CONSTANT_IST() traces + are emitted and any PT_DENY_ATTACH processes will only emit basic + traces as defined by the kernel_debug_filter() routine. */ #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) #ifdef XNU_KERNEL_PRIVATE -#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e) \ +#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e) \ do { \ - if (__improbable(kdebug_enable & type)) \ + if (__improbable(kdebug_enable & type)) \ kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c, \ (uintptr_t)d,(uintptr_t)e); \ } while(0) #else /* XNU_KERNEL_PRIVATE */ -#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e) \ +#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e) \ do { \ - if (kdebug_enable & type) \ + if (kdebug_enable & type) \ kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c, \ (uintptr_t)d,(uintptr_t)e); \ } while(0) #endif /* XNU_KERNEL_PRIVATE */ + +// whether to bother calculating EnergyTracing inputs +// could chnage in future to see if DBG_ENERGYTRACE is active +#define ENTR_SHOULDTRACE kdebug_enable +// encode logical EnergyTracing into 32/64 KDebug trace +#define ENTR_KDTRACE(component, opcode, lifespan, id, quality, value) \ +do { \ + uint32_t kdcode__; \ + uintptr_t highval__, lowval__, mask__ = 0xffffffff; \ + kdcode__ = KDBG_CODE(DBG_ENERGYTRACE,component,opcode)|(lifespan); \ + highval__ = ((value) >> 32) & mask__; \ + lowval__ = (value) & mask__; \ + ENTR_KDTRACEFUNC(kdcode__, id, quality, highval__, lowval__); \ +} while(0) + +/* + Trace the association of two existing activations. + + An association is traced as a modification to the parent activation. + In order to fit the sub-activation's component, activation code, and + activation ID into a kdebug tracepoint, the arguments that would hold + the value are left separate, and one stores the component and opcode + of the sub-activation, while the other stores the pointer-sized + activation ID. + + arg2 arg3 arg4 + +-----------------+ +~+----+----+--------+ +----------+ + |kEnTrModAssociate| | | | | | | | + +-----------------+ +~+----+----+--------+ +----------+ + 8-bits unused sub-activation ID + 8-bit sub-component + 16-bit sub-opcode + +*/ +#define kEnTrModAssociate (1 << 28) +#define ENTR_KDASSOCIATE(par_comp, par_opcode, par_act_id, \ + sub_comp, sub_opcode, sub_act_id) \ +do { \ + unsigned sub_compcode = ((unsigned)sub_comp << 16) | sub_opcode; \ + ENTR_KDTRACEFUNC(KDBG_CODE(DBG_ENERGYTRACE,par_comp,par_opcode), \ + par_act_id, kEnTrModAssociate, sub_compcode, \ + sub_act_id); \ +} while(0) + #else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */ + #define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e) do { } while(0) +#define ENTR_SHOULDTRACE FALSE +#define ENTR_KDTRACE(component, opcode, lifespan, id, quality, value) \ + do {} while (0) +#define ENTR_KDASSOCIATE(par_comp, par_opcode, par_act_id, \ + sub_comp, sub_opcode, sub_act_id) \ + do {} while (0) + #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */ #if NO_KDEBUG @@ -773,8 +999,19 @@ extern void kernel_debug_early( uintptr_t arg3, uintptr_t arg4); -extern void kernel_debug_string( - const char *message); +#ifdef KERNEL_PRIVATE +/* + * kernel_debug_string provides the same functionality as the + * kdebug_trace_string syscall as a KPI. str_id is an in/out + * parameter that, if it's pointing to a string ID of 0, will + * receive a generated ID. If it provides a value in str_id, + * then that will be used, instead. + * + * Returns an errno indicating the type of failure. + */ +extern int +kernel_debug_string(uint32_t debugid, uint64_t *str_id, const char *str); +#endif #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL) #ifdef XNU_KERNEL_PRIVATE @@ -808,14 +1045,66 @@ do { \ (uintptr_t)c, (uintptr_t)d, (uintptr_t)e); \ } while(0) #endif /* XNU_KERNEL_PRIVATE */ + #else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL) */ + #define KERNEL_DEBUG(x,a,b,c,d,e) do {} while (0) #define KERNEL_DEBUG1(x,a,b,c,d,e) do {} while (0) #define __kdebug_only __unused #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL) */ + +// for EnergyTracing user space & clients +#define kEnTrCompKernel 2 + +/* + EnergyTracing opcodes + + Activations use DBG_FUNC_START/END. + Events are DBG_FUNC_NONE. + */ + +/* Socket reads and writes are uniquely identified by the (sanitized) + pointer to the socket struct in question. To associate this address + with the user space file descriptor, we have a socket activation with + the FD as its identifier and the socket struct pointer as its value. +*/ +#define kEnTrActKernSocket 1 +#define kEnTrActKernSockRead 2 +#define kEnTrActKernSockWrite 3 + +#define kEnTrActKernPoll 10 +#define kEnTrActKernSelect 11 +#define kEnTrActKernKQWait 12 + +// events +#define kEnTrEvUnblocked 256 + +// EnergyTracing flags (the low-order 16 bits of 'quality') +#define kEnTrFlagNonBlocking 1 << 0 +#define kEnTrFlagNoWork 1 << 1 + +// and now the internal mechanism #ifdef KERNEL_PRIVATE + +// 20452597 requests that the trace macros not take an argument it throws away +#define KERNEL_DBG_IST_SANE(x, a, b, c, d) \ + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, x, a, b, c, d, \ + 0 /*__unused in kernel_debug()*/) +#define ENTR_KDTRACEFUNC KERNEL_DBG_IST_SANE + +// value is int64_t, quality is uint32_t +#define KERNEL_ENERGYTRACE(opcode, lifespan, id, quality, value) \ + ENTR_KDTRACE(kEnTrCompKernel, opcode, lifespan, id, \ + quality, value) +#define KERNEL_ENTR_ASSOCIATE(par_opcode, par_act_id, sub_opcode, sub_act_id) \ + ENTR_KDASSOCIATE(kEnTrCompKernel, par_opcode, par_act_id, \ + kEnTrCompKernel, sub_opcode, sub_act_id) + +// end EnergyTracing + + #include #define NUMPARMS 23 @@ -837,8 +1126,8 @@ void enable_wrap(uint32_t old_slowcheck, boolean_t lostevents); void release_storage_unit(int cpu, uint32_t storage_unit); int allocate_storage_unit(int cpu); -#define KDBG_CLASS_ENCODE(Class, SubClass) (((Class & 0xff) << 24) | ((SubClass & 0xff) << 16)) -#define KDBG_CLASS_DECODE(Debugid) (Debugid & 0xFFFF0000) +#define KDBG_CLASS_ENCODE(Class, SubClass) KDBG_EVENTID(Class, SubClass, 0) +#define KDBG_CLASS_DECODE(Debugid) (Debugid & KDBG_CSC_MASK) #endif /* KERNEL_PRIVATE */ @@ -875,7 +1164,7 @@ typedef struct { static inline void kdbg_set_cpu(kd_buf *kp, int cpu) { - kp->timestamp = (kp->timestamp & KDBG_TIMESTAMP_MASK) | + kp->timestamp = (kp->timestamp & KDBG_TIMESTAMP_MASK) | (((uint64_t) cpu) << KDBG_CPU_SHIFT); } static inline int @@ -896,7 +1185,7 @@ kdbg_get_timestamp(kd_buf *kp) static inline void kdbg_set_timestamp_and_cpu(kd_buf *kp, uint64_t thetime, int cpu) { - kp->timestamp = (thetime & KDBG_TIMESTAMP_MASK) | + kp->timestamp = (thetime & KDBG_TIMESTAMP_MASK) | (((uint64_t) cpu) << KDBG_CPU_SHIFT); } #else @@ -950,7 +1239,7 @@ typedef struct { unsigned int value2; unsigned int value3; unsigned int value4; - + } kd_regtype; typedef struct @@ -1041,8 +1330,79 @@ typedef struct { uint32_t TOD_usecs; } RAW_header; +// Version 3 header +// The header chunk has the tag 0x00001000 which also serves as a magic word +// that identifies the file as a version 3 trace file. The header payload is +// a set of fixed fields followed by a variable number of sub-chunks: +/* + ____________________________________________________________________________ + | Offset | Size | Field | + ---------------------------------------------------------------------------- + | 0 | 4 | Tag (0x00001000) | + | 4 | 4 | Sub-tag. Represents the version of the header. | + | 8 | 8 | Length of header payload (40+8x) | + | 16 | 8 | Time base info. Two 32-bit numbers, numer/denom, | + | | | for converting timestamps to nanoseconds. | + | 24 | 8 | Timestamp of trace start. | + | 32 | 8 | Wall time seconds since Unix epoch. | + | | | As returned by gettimeofday(). | + | 40 | 4 | Wall time microseconds. As returned by gettimeofday(). | + | 44 | 4 | Local time zone offset in minutes. ( " ) | + | 48 | 4 | Type of daylight savings time correction to apply. ( " ) | + | 52 | 4 | Flags. 1 = 64-bit. Remaining bits should be written | + | | | as 0 and ignored when reading. | + | 56 | 8x | Variable number of sub-chunks. None are required. | + | | | Ignore unknown chunks. | + ---------------------------------------------------------------------------- +*/ +// NOTE: The header sub-chunks are considered part of the header chunk, +// so they must be included in the header chunk’s length field. +// The CPU map is an optional sub-chunk of the header chunk. It provides +// information about the CPUs that are referenced from the trace events. +typedef struct { + uint32_t tag; + uint32_t sub_tag; + uint64_t length; + uint32_t timebase_numer; + uint32_t timebase_denom; + uint64_t timestamp; + uint64_t walltime_secs; + uint32_t walltime_usecs; + uint32_t timezone_minuteswest; + uint32_t timezone_dst; + uint32_t flags; +} kd_header_v3; + +typedef struct { + uint32_t tag; + uint32_t sub_tag; + uint64_t length; +} kd_chunk_header_v3; + #define RAW_VERSION0 0x55aa0000 #define RAW_VERSION1 0x55aa0101 +#define RAW_VERSION2 0x55aa0200 /* Only used by kperf and Instruments */ +#define RAW_VERSION3 0x00001000 + +#define V3_CONFIG 0x00001b00 +#define V3_CPU_MAP 0x00001c00 +#define V3_THREAD_MAP 0x00001d00 +#define V3_RAW_EVENTS 0x00001e00 +#define V3_NULL_CHUNK 0x00002000 + +// The current version of all kernel managed chunks is 1. The +// V3_CURRENT_CHUNK_VERSION is added to ease the simple case +// when most/all the kernel managed chunks have the same version. + +#define V3_CURRENT_CHUNK_VERSION 1 +#define V3_HEADER_VERSION V3_CURRENT_CHUNK_VERSION +#define V3_CPUMAP_VERSION V3_CURRENT_CHUNK_VERSION +#define V3_THRMAP_VERSION V3_CURRENT_CHUNK_VERSION +#define V3_EVENT_DATA_VERSION V3_CURRENT_CHUNK_VERSION + +// Apis to support writing v3 chunks in the kernel +int kdbg_write_v3_chunk_header_to_buffer(void *buffer, uint32_t tag, uint32_t sub_tag, uint64_t length); +int kdbg_write_v3_chunk_to_fd(uint32_t tag, uint32_t sub_tag, uint64_t length, void *payload, uint64_t payload_size, int fd); #define KDBG_CLASSTYPE 0x10000 #define KDBG_SUBCLSTYPE 0x20000 @@ -1053,7 +1413,7 @@ typedef struct { #define KDBG_RANGECHECK 0x100000 #define KDBG_VALCHECK 0x200000 /* Check up to 4 individual values */ -#define KDBG_TYPEFILTER_CHECK ((uint32_t) 0x400000) /* Check class and subclass against a bitmap */ +#define KDBG_TYPEFILTER_CHECK ((uint32_t) 0x400000) /* Check class and subclass against a bitmap */ #define KDBG_BUFINIT 0x80000000 @@ -1064,9 +1424,11 @@ typedef struct { #define VFS_LOOKUP (FSDBG_CODE(DBG_FSRW,36)) #define VFS_LOOKUP_DONE (FSDBG_CODE(DBG_FSRW,39)) +#ifdef XNU_KERNEL_PRIVATE #if (DEVELOPMENT || DEBUG) #define KDEBUG_MOJO_TRACE 1 #endif +#endif #endif /* __APPLE_API_PRIVATE */ #endif /* PRIVATE */ diff --git a/bsd/sys/kern_control.h b/bsd/sys/kern_control.h index 6ac130dd7..3d87bce89 100644 --- a/bsd/sys/kern_control.h +++ b/bsd/sys/kern_control.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004, 2012-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2004, 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -187,7 +187,7 @@ struct kctlstat { u_int64_t kcs_send_list_fail __attribute__((aligned(8))); u_int64_t kcs_enqueue_fail __attribute__((aligned(8))); u_int64_t kcs_enqueue_fullsock __attribute__((aligned(8))); - + u_int64_t kcs_bad_kctlref __attribute__((aligned(8))); }; #endif /* PRIVATE */ @@ -560,8 +560,20 @@ errno_t ctl_enqueuembuf_list(kern_ctl_ref kctlref, u_int32_t unit, mbuf_t m_list, u_int32_t flags, mbuf_t *m_remain); +/*! + @function ctl_getenqueuepacketcount + @discussion Retrieve the number of packets in the socket + receive buffer. + @param kctlref The control reference of the kernel control. + @param unit The unit number of the kernel control instance. + @param pcnt The address where to return the current count. + @result 0 - Success; the packet count is returned to caller. + EINVAL - Invalid parameters. + */ +errno_t +ctl_getenqueuepacketcount(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *pcnt); -#endif +#endif /* PRIVATE */ /*! @function ctl_getenqueuespace @@ -601,43 +613,11 @@ ctl_getenqueuereadable(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *differen * internal structure maintained for each register controller */ struct ctl_cb; +struct kctl; struct socket; +struct socket_info; -struct kctl { - TAILQ_ENTRY(kctl) next; /* controller chain */ - - /* controller information provided when registering */ - char name[MAX_KCTL_NAME]; /* unique nke identifier, provided by DTS */ - u_int32_t id; - u_int32_t reg_unit; - - /* misc communication information */ - u_int32_t flags; /* support flags */ - u_int32_t recvbufsize; /* request more than the default buffer size */ - u_int32_t sendbufsize; /* request more than the default buffer size */ - - /* Dispatch functions */ - ctl_connect_func connect; /* Make contact */ - ctl_disconnect_func disconnect; /* Break contact */ - ctl_send_func send; /* Send data to nke */ - ctl_send_list_func send_list; /* Send list of packets */ - ctl_setopt_func setopt; /* set kctl configuration */ - ctl_getopt_func getopt; /* get kctl configuration */ - ctl_rcvd_func rcvd; /* Notify nke when client reads data */ - - TAILQ_HEAD(, ctl_cb) kcb_head; - u_int32_t lastunit; -}; - -struct ctl_cb { - TAILQ_ENTRY(ctl_cb) next; /* controller chain */ - lck_mtx_t *mtx; - struct socket *so; /* controlling socket */ - struct kctl *kctl; /* back pointer to controller */ - void *userdata; - u_int32_t unit; - u_int32_t usecount; -}; +void kctl_fill_socketinfo(struct socket *, struct socket_info *); u_int32_t ctl_id_by_name(const char *name); errno_t ctl_name_by_id(u_int32_t id, char *out_name, size_t maxsize); diff --git a/bsd/sys/kern_memorystatus.h b/bsd/sys/kern_memorystatus.h index 4e8d01c3e..c74003291 100644 --- a/bsd/sys/kern_memorystatus.h +++ b/bsd/sys/kern_memorystatus.h @@ -184,17 +184,21 @@ int memorystatus_control(uint32_t command, int32_t pid, uint32_t flags, void *bu #define MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES 2 #define MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT 3 #define MEMORYSTATUS_CMD_GET_PRESSURE_STATUS 4 -#define MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK 5 -#define MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT 6 - -/* Group Commands */ -#define MEMORYSTATUS_CMD_GRP_SET_PROPERTIES 7 +#define MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK 5 /* Set active memory limit = inactive memory limit, both non-fatal */ +#define MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT 6 /* Set active memory limit = inactive memory limit, both fatal */ +#define MEMORYSTATUS_CMD_SET_MEMLIMIT_PROPERTIES 7 /* Set memory limits plus attributes independently */ +#define MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES 8 /* Get memory limits plus attributes */ +#define MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE 9 /* Set the task's status as a privileged listener w.r.t memory notifications */ +#define MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE 10 /* Reset the task's status as a privileged listener w.r.t memory notifications */ +/* Commands that act on a group of processes */ +#define MEMORYSTATUS_CMD_GRP_SET_PROPERTIES 100 #if PRIVATE /* Test commands */ /* Trigger forced jetsam */ -#define MEMORYSTATUS_CMD_TEST_JETSAM 1000 +#define MEMORYSTATUS_CMD_TEST_JETSAM 1000 +#define MEMORYSTATUS_CMD_TEST_JETSAM_SORT 1001 /* Panic on jetsam options */ typedef struct memorystatus_jetsam_panic_options { @@ -202,17 +206,100 @@ typedef struct memorystatus_jetsam_panic_options { uint32_t mask; } memorystatus_jetsam_panic_options_t; -#define MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS 1001 +#define MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS 1002 + +/* Select priority band sort order */ +#define JETSAM_SORT_NOSORT 0 +#define JETSAM_SORT_DEFAULT 1 + #endif /* PRIVATE */ +/* + * For use with memorystatus_control: + * MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT + * + * A jetsam snapshot is initialized when a non-idle + * jetsam event occurs. The data is held in the + * buffer until it is reaped. This is the default + * behavior. + * + * Flags change the default behavior: + * Demand mode - this is an on_demand snapshot, + * meaning data is populated upon request. + * + * Boot mode - this is a snapshot of + * memstats collected before loading the + * init program. Once collected, these + * stats do not change. In this mode, + * the snapshot entry_count is always 0. + * + * Snapshots are inherently racey between request + * for buffer size and actual data compilation. +*/ + +/* Flags */ +#define MEMORYSTATUS_SNAPSHOT_ON_DEMAND 0x1 /* A populated snapshot buffer is returned on demand */ +#define MEMORYSTATUS_SNAPSHOT_AT_BOOT 0x2 /* Returns a snapshot with memstats collected at boot */ + + +/* + * For use with memorystatus_control: + * MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES + */ typedef struct memorystatus_priority_properties { int32_t priority; uint64_t user_data; } memorystatus_priority_properties_t; +/* + * For use with memorystatus_control: + * MEMORYSTATUS_CMD_SET_MEMLIMIT_PROPERTIES + * MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES + */ +typedef struct memorystatus_memlimit_properties { + int32_t memlimit_active; /* jetsam memory limit (in MB) when process is active */ + uint32_t memlimit_active_attr; + int32_t memlimit_inactive; /* jetsam memory limit (in MB) when process is inactive */ + uint32_t memlimit_inactive_attr; +} memorystatus_memlimit_properties_t; + +#define MEMORYSTATUS_MEMLIMIT_ATTR_FATAL 0x1 /* if set, exceeding the memlimit is fatal */ + + #ifdef XNU_KERNEL_PRIVATE -/* p_memstat_state flags */ +/* + * A process will be killed immediately if it crosses a memory limit marked as fatal. + * Fatal limit types are the + * - default system-wide task limit + * - per-task custom memory limit + * + * A process with a non-fatal memory limit can exceed that limit, but becomes an early + * candidate for jetsam when the device is under memory pressure. + * Non-fatal limit types are the + * - high-water-mark limit + * + * P_MEMSTAT_MEMLIMIT_BACKGROUND is translated in posix_spawn as + * the fatal system_wide task limit when active + * non-fatal inactive limit based on limit provided. + * This is necessary for backward compatibility until the + * the flag can be considered obsolete. + * + * Processes that opt into dirty tracking are evaluated + * based on clean vs dirty state. + * dirty ==> active + * clean ==> inactive + * + * Processes that do not opt into dirty tracking are + * evalulated based on priority level. + * Foreground or above ==> active + * Below Foreground ==> inactive + */ + +/* + * p_memstat_state flag holds + * - in kernel process state and memlimit state + */ #define P_MEMSTAT_SUSPENDED 0x00000001 #define P_MEMSTAT_FROZEN 0x00000002 @@ -227,12 +314,21 @@ typedef struct memorystatus_priority_properties { #define P_MEMSTAT_PRIOR_THAW 0x00000400 #define P_MEMSTAT_MEMLIMIT_BACKGROUND 0x00000800 /* Task has a memory limit for when it's in the background. Used for a process' "high water mark".*/ #define P_MEMSTAT_INTERNAL 0x00001000 -#define P_MEMSTAT_FATAL_MEMLIMIT 0x00002000 /* cross this limit and the process is killed. Types: system-wide default task memory limit and per-task custom memory limit. */ +#define P_MEMSTAT_FATAL_MEMLIMIT 0x00002000 /* current fatal state of the process's memlimit */ +#define P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL 0x00004000 /* if set, exceeding limit is fatal when the process is active */ +#define P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED 0x00008000 /* if set, supresses high-water-mark EXC_RESOURCE, allows one hit per active limit */ +#define P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL 0x00010000 /* if set, exceeding limit is fatal when the process is inactive */ +#define P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED 0x00020000 /* if set, supresses high-water-mark EXC_RESOURCE, allows one hit per inactive limit */ extern void memorystatus_init(void) __attribute__((section("__TEXT, initcode"))); +extern void memorystatus_init_at_boot_snapshot(void); + extern int memorystatus_add(proc_t p, boolean_t locked); -extern int memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background, boolean_t is_fatal_limit); +extern int memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, + boolean_t update_memlimit, int32_t memlimit_active, boolean_t memlimit_active_is_fatal, + int32_t memlimit_inactive, boolean_t memlimit_inactive_is_fatal, boolean_t memlimit_background); + extern int memorystatus_remove(proc_t p, boolean_t locked); extern int memorystatus_dirty_track(proc_t p, uint32_t pcontrol); @@ -257,6 +353,8 @@ void memorystatus_knote_unregister(struct knote *kn); #if CONFIG_JETSAM +int memorystatus_get_pressure_status_kdp(void); + typedef enum memorystatus_policy { kPolicyDefault = 0x0, kPolicyMoreFree = 0x1, @@ -274,6 +372,7 @@ boolean_t memorystatus_kill_on_FC_thrashing(boolean_t async); boolean_t memorystatus_kill_on_vnode_limit(void); void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb); +void proc_memstat_terminated(proc_t p, boolean_t set); void jetsam_on_ledger_cpulimit_exceeded(void); void memorystatus_pages_update(unsigned int pages_avail); @@ -292,8 +391,7 @@ boolean_t memorystatus_idle_exit_from_VM(void); #define FREEZE_SUSPENDED_THRESHOLD_LOW 2 #define FREEZE_SUSPENDED_THRESHOLD_DEFAULT 4 -#define FREEZE_DAILY_MB_MAX 1024 -#define FREEZE_DAILY_PAGEOUTS_MAX (FREEZE_DAILY_MB_MAX * (1024 * 1024 / PAGE_SIZE)) +#define FREEZE_DAILY_MB_MAX_DEFAULT 1024 typedef struct throttle_interval_t { uint32_t mins; @@ -308,7 +406,7 @@ extern boolean_t memorystatus_freeze_enabled; extern int memorystatus_freeze_wakeup; extern void memorystatus_freeze_init(void) __attribute__((section("__TEXT, initcode"))); - +extern int memorystatus_freeze_process_sync(proc_t p); #endif /* CONFIG_FREEZE */ #if VM_PRESSURE_EVENTS @@ -316,6 +414,8 @@ extern void memorystatus_freeze_init(void) __attribute__((section("__TEXT, initc extern kern_return_t memorystatus_update_vm_pressure(boolean_t); #if CONFIG_MEMORYSTATUS +/* Flags */ +extern int memorystatus_low_mem_privileged_listener(uint32_t op_flags); extern int memorystatus_send_pressure_note(int pid); extern boolean_t memorystatus_is_foreground_locked(proc_t p); extern boolean_t memorystatus_bg_pressure_eligible(proc_t p); diff --git a/bsd/sys/kern_tests.h b/bsd/sys/kern_tests.h deleted file mode 100644 index df71d9e86..000000000 --- a/bsd/sys/kern_tests.h +++ /dev/null @@ -1,5 +0,0 @@ -#ifndef _KERN_TESTS_H -#define _KERN_TESTS_H - - -#endif /* !defined(_KERN_TESTS_H) */ diff --git a/bsd/sys/kpi_mbuf.h b/bsd/sys/kpi_mbuf.h index e4ac6702c..50cda45db 100644 --- a/bsd/sys/kpi_mbuf.h +++ b/bsd/sys/kpi_mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2014 Apple Inc. All rights reserved. + * Copyright (c) 2008-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -1628,6 +1628,22 @@ extern errno_t mbuf_pkthdr_aux_flags(mbuf_t mbuf, */ extern errno_t mbuf_get_driver_scratch(mbuf_t m, u_int8_t **area, size_t *area_ln); + +/* + @function mbuf_get_unsent_data_bytes + @discussion Returns the amount of data that is waiting to be sent + on this interface. This is a private SPI used by cellular + interface as an indication of future activity on that + interface. + @param mbuf The mbuf containingthe packet header + @param unsent_data A pointer to an integer where the value of + unsent data will be set. + @result 0 upon success otherwise the errno error. If the mbuf + packet header does not have valid data bytes, the error + code will be EINVAL + */ +extern errno_t mbuf_get_unsent_data_bytes(const mbuf_t m, + u_int32_t *unsent_data); #endif /* KERNEL_PRIVATE */ #ifdef XNU_KERNEL_PRIVATE diff --git a/bsd/netinet/in_dhcp.h b/bsd/sys/kpi_private.h similarity index 69% rename from bsd/netinet/in_dhcp.h rename to bsd/sys/kpi_private.h index 3a898af38..bbd67a2f2 100644 --- a/bsd/netinet/in_dhcp.h +++ b/bsd/sys/kpi_private.h @@ -1,9 +1,5 @@ -#ifndef _NETINET_IN_DHCP_H -#define _NETINET_IN_DHCP_H -#include - /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,20 +26,27 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#ifndef _SYS_KPI_PRIVATE_H +#define _SYS_KPI_PRIVATE_H + /* - * in_dhcp.h - * - definitions for in_dhcp.c + * Assorted odds and ends for exported private KPI (internal use only) */ -extern int -inet_aifaddr(struct socket * so, const char * name, - const struct in_addr * addr, - const struct in_addr * mask, - const struct in_addr * broadcast); +#ifdef KERNEL +#include + +__BEGIN_DECLS + +#ifdef KERNEL_PRIVATE + +/* kernel-exported qsort */ +void kx_qsort (void* array, size_t nm, size_t member_size, int (*)(const void * , const void *)); + +#endif /* KERNEL_PRIVATE */ + +__END_DECLS -extern int -dhcp(struct ifnet * ifp, struct in_addr * iaddr_p, int max_try, - struct in_addr * netmask_p, struct in_addr * router_p, - struct proc * procp); -#endif /* _NETINET_IN_DHCP_H */ +#endif /* KERNEL */ +#endif /* !_SYS_KPI_PRIVATE_H */ diff --git a/bsd/sys/kpi_socket.h b/bsd/sys/kpi_socket.h index 8a8f186b3..ff2475e98 100644 --- a/bsd/sys/kpi_socket.h +++ b/bsd/sys/kpi_socket.h @@ -248,12 +248,12 @@ extern errno_t sock_settclassopt(socket_t so, const void* optval, size_t optlen) extern errno_t sock_gettclassopt(socket_t so, void* optval, size_t* optlen); #ifdef XNU_KERNEL_PRIVATE -extern void socket_set_traffic_mgt_flags_locked(socket_t so, u_int32_t flags); -extern void socket_clear_traffic_mgt_flags_locked(socket_t so, u_int32_t flags); +extern void socket_set_traffic_mgt_flags_locked(socket_t so, u_int8_t flags); +extern void socket_clear_traffic_mgt_flags_locked(socket_t so, u_int8_t flags); #endif /* XNU_KERNEL_PRIVATE */ #ifdef BSD_KERNEL_PRIVATE -extern void socket_set_traffic_mgt_flags(socket_t so, u_int32_t flags); -extern void socket_clear_traffic_mgt_flags(socket_t so, u_int32_t flags); +extern void socket_set_traffic_mgt_flags(socket_t so, u_int8_t flags); +extern void socket_clear_traffic_mgt_flags(socket_t so, u_int8_t flags); extern errno_t socket_defunct(struct proc *, socket_t so, int); extern errno_t sock_receive_internal(socket_t, struct msghdr *, mbuf_t *, int, size_t *); diff --git a/bsd/sys/kpi_socketfilter.h b/bsd/sys/kpi_socketfilter.h index c52153117..14fef1c2a 100644 --- a/bsd/sys/kpi_socketfilter.h +++ b/bsd/sys/kpi_socketfilter.h @@ -564,6 +564,7 @@ struct sflt_filter { desciption of domain, type, and protocol. @param filter A structure describing the filter. @param domain The protocol domain these filters will be attached to. + Only PF_INET & PF_INET6 domains are supported. @param type The socket type these filters will be attached to. @param protocol The protocol these filters will be attached to. @result 0 on success otherwise the errno error. diff --git a/bsd/sys/lctx.h b/bsd/sys/lctx.h index fa374a0e8..554176c4a 100644 --- a/bsd/sys/lctx.h +++ b/bsd/sys/lctx.h @@ -1,23 +1,22 @@ /* - * $Id: lctx.h,v 1.1.6.2 2006/03/03 23:20:46 msteil Exp $ + * TODO: remove this file */ - #ifndef _SYS_LCTX_H_ #define _SYS_LCTX_H_ -#include - #ifndef KERNEL static __inline pid_t getlcid(pid_t pid) { - return (syscall(SYS_getlcid, pid)); + errno = ENOSYS; + return -1; } static __inline int setlcid(pid_t pid, pid_t lcid) { - return (syscall(SYS_setlcid, pid, lcid)); + errno = ENOSYS; + return -1; } #endif diff --git a/bsd/sys/loadable_fs.h b/bsd/sys/loadable_fs.h index 123e742fb..a5e736101 100644 --- a/bsd/sys/loadable_fs.h +++ b/bsd/sys/loadable_fs.h @@ -59,10 +59,6 @@ #define FS_DIR_LOCATION "/System/Library/Filesystems" #define FS_DIR_SUFFIX ".fs" #define FS_UTIL_SUFFIX ".util" -#define FS_OPEN_SUFFIX ".openfs.tiff" -#define FS_CLOSED_SUFFIX ".fs.tiff" -#define FS_NAME_SUFFIX ".name" -#define FS_LABEL_SUFFIX ".label" /* * .util program commands - all sent in the form "-p" or "-m" ... as argv[1]. @@ -79,9 +75,6 @@ #define FSUC_REPAIR 'r' /* repair ('fsck') FS */ /* example usage: foo.util -r fd0 removable */ -#define FSUC_INITIALIZE 'i' /* initialize FS */ - /* example usage: foo.util -i fd0 removable */ - #define FSUC_UNMOUNT 'u' /* unmount FS */ /* example usage: foo.util -u fd0 /bar */ @@ -113,11 +106,4 @@ #define DEVICE_REMOVABLE "removable" #define DEVICE_FIXED "fixed" -/* - * Additional parameters to the mount command - used by WSM when they - * appear in the /etc/mtab file. - */ -#define MNTOPT_FS "filesystem=" /* e.g. "filesystem=DOS" */ -#define MNTOPT_REMOVABLE "removable" - #endif /* _SYS_LOADABLE_FS_ */ diff --git a/bsd/sys/lockf.h b/bsd/sys/lockf.h index c12abebf0..e20bf8733 100644 --- a/bsd/sys/lockf.h +++ b/bsd/sys/lockf.h @@ -102,7 +102,8 @@ struct lockf { TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ #if IMPORTANCE_INHERITANCE int lf_boosted; /* Is the owner of the lock boosted */ -#endif /* IMPORTANCE_INHERITANCE */ +#endif + struct proc *lf_owner; /* The proc that did the SETLK, if known */ }; #pragma pack() diff --git a/bsd/sys/malloc.h b/bsd/sys/malloc.h index ef423f531..713210e1d 100644 --- a/bsd/sys/malloc.h +++ b/bsd/sys/malloc.h @@ -197,8 +197,8 @@ #define M_MACTEMP 104 /* MAC framework */ #define M_SBUF 105 /* string buffers */ #define M_EXTATTR 106 /* extended attribute */ -#define M_LCTX 107 /* process login context */ -/* M_TRAFFIC_MGT 108 */ +#define M_SELECT 107 /* per-thread select memory */ +/* M_TRAFFIC_MGT 108 */ #if HFS_COMPRESSION #define M_DECMPFS_CNODE 109 /* decmpfs cnode structures */ #endif /* HFS_COMPRESSION */ @@ -216,8 +216,9 @@ #define M_NECP_IP_POLICY 121 /* NECP IP-level policies */ #define M_FD_VN_DATA 122 /* Per fd vnode data */ #define M_FD_DIRBUF 123 /* Directory entries' buffer */ +#define M_NETAGENT 124 /* Network Agents */ -#define M_LAST 124 /* Must be last type + 1 */ +#define M_LAST 125 /* Must be last type + 1 */ #else /* BSD_KERNEL_PRIVATE */ @@ -259,6 +260,67 @@ extern struct kmemstats kmemstats[]; * The malloc/free primatives used * by the BSD kernel code. */ +#if XNU_KERNEL_PRIVATE + +#include + +#define MALLOC(space, cast, size, type, flags) \ + ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \ + (space) = (cast)__MALLOC(size, type, flags, &site); }) +#define REALLOC(space, cast, addr, size, type, flags) \ + ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \ + (space) = (cast)__REALLOC(addr, size, type, flags, &site); }) + +#define _MALLOC(size, type, flags) \ + ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \ + __MALLOC(size, type, flags, &site); }) +#define _REALLOC(addr, size, type, flags) \ + ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \ + __REALLOC(addr, size, type, flags, &site); }) + +#define _MALLOC_ZONE(size, type, flags) \ + ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \ + __MALLOC_ZONE(size, type, flags, &site); }) + +#define FREE(addr, type) \ + _FREE((void *)addr, type) + +#define MALLOC_ZONE(space, cast, size, type, flags) \ + (space) = (cast)_MALLOC_ZONE(size, type, flags) + +#define FREE_ZONE(addr, size, type) \ + _FREE_ZONE((void *)addr, size, type) + +extern void *__MALLOC( + size_t size, + int type, + int flags, + vm_allocation_site_t *site); + +extern void _FREE( + void *addr, + int type); + +extern void *__REALLOC( + void *addr, + size_t size, + int type, + int flags, + vm_allocation_site_t *site); + +extern void *__MALLOC_ZONE( + size_t size, + int type, + int flags, + vm_allocation_site_t *site); + +extern void _FREE_ZONE( + void *elem, + size_t size, + int type); + +#else /* XNU_KERNEL_PRIVATE */ + #define MALLOC(space, cast, size, type, flags) \ (space) = (cast)_MALLOC(size, type, flags) @@ -299,6 +361,9 @@ extern void _FREE_ZONE( size_t size, int type); + +#endif /* !XNU_KERNEL_PRIVATE */ + #endif /* KERNEL */ #endif /* _SYS_MALLOC_H_ */ diff --git a/bsd/sys/mbuf.h b/bsd/sys/mbuf.h index 5a7913ea8..b5b7ee802 100644 --- a/bsd/sys/mbuf.h +++ b/bsd/sys/mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2014 Apple Inc. All rights reserved. + * Copyright (c) 1999-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -103,18 +103,27 @@ #define _MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ #define _MHLEN (_MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ -#define NMBPBGSHIFT (MBIGCLSHIFT - MSIZESHIFT) -#define NMBPBG (1 << NMBPBGSHIFT) /* # of mbufs per big cl */ +#define NMBPGSHIFT (PAGE_SHIFT - MSIZESHIFT) +#define NMBPG (1 << NMBPGSHIFT) /* # of mbufs per page */ -#define NCLPBGSHIFT (MBIGCLSHIFT - MCLSHIFT) -#define NCLPBG (1 << NCLPBGSHIFT) /* # of cl per big cl */ +#define NCLPGSHIFT (PAGE_SHIFT - MCLSHIFT) +#define NCLPG (1 << NCLPGSHIFT) /* # of cl per page */ -#define NMBPCLSHIFT (NMBPBGSHIFT - NCLPBGSHIFT) +#define NBCLPGSHIFT (PAGE_SHIFT - MBIGCLSHIFT) +#define NBCLPG (1 << NBCLPGSHIFT) /* # of big cl per page */ + +#define NMBPCLSHIFT (MCLSHIFT - MSIZESHIFT) #define NMBPCL (1 << NMBPCLSHIFT) /* # of mbufs per cl */ -#define NCLPJCLSHIFT ((M16KCLSHIFT - MBIGCLSHIFT) + NCLPBGSHIFT) +#define NCLPJCLSHIFT (M16KCLSHIFT - MCLSHIFT) #define NCLPJCL (1 << NCLPJCLSHIFT) /* # of cl per jumbo cl */ +#define NCLPBGSHIFT (MBIGCLSHIFT - MCLSHIFT) +#define NCLPBG (1 << NCLPBGSHIFT) /* # of cl per big cl */ + +#define NMBPBGSHIFT (MBIGCLSHIFT - MSIZESHIFT) +#define NMBPBG (1 << NMBPBGSHIFT) /* # of mbufs per big cl */ + /* * Macros for type conversion * mtod(m,t) - convert mbuf pointer to data pointer of correct type @@ -273,8 +282,9 @@ struct proto_mtag { * NECP specific mbuf tag. */ struct necp_mtag { - uint32_t necp_policy_id; - uint32_t necp_last_interface_index; + u_int32_t necp_policy_id; + u_int32_t necp_last_interface_index; + u_int32_t necp_route_rule_id; }; /* @@ -350,11 +360,13 @@ struct pkthdr { #define dst_ifindex _pkt_iaif.dst #define dst_iff _pkt_iaif.dst_flags u_int64_t pkt_ifainfo; /* data field used by ifainfo */ + u_int32_t pkt_unsent_databytes; /* unsent data */ }; #if MEASURE_BW u_int64_t pkt_bwseq; /* sequence # */ #endif /* MEASURE_BW */ u_int64_t pkt_enqueue_ts; /* enqueue time */ + /* * Tags (external and built-in) */ @@ -436,6 +448,10 @@ struct pkthdr { #define PKTF_FORWARDED 0x10000 /* pkt was forwarded from another i/f */ #define PKTF_PRIV_GUARDED 0x20000 /* pkt_mpriv area guard enabled */ #define PKTF_KEEPALIVE 0x40000 /* pkt is kernel-generated keepalive */ +#define PKTF_SO_REALTIME 0x80000 /* data is realtime traffic */ +#define PKTF_VALID_UNSENT_DATA 0x100000 /* unsent data is valid */ +#define PKTF_TCP_REXMT 0x200000 /* packet is TCP retransmission */ + /* flags related to flow control/advisory and identification */ #define PKTF_FLOW_MASK \ (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK) @@ -721,7 +737,8 @@ do { \ * If how is M_DONTWAIT and allocation fails, the original mbuf chain * is freed and m is set to NULL. */ -#define M_PREPEND(m, plen, how) ((m) = m_prepend_2((m), (plen), (how))) +#define M_PREPEND(m, plen, how, align) \ + ((m) = m_prepend_2((m), (plen), (how), (align))) /* change mbuf to new type */ #define MCHTYPE(m, t) m_mchtype(m, t) @@ -1114,7 +1131,7 @@ extern struct mbuf *m_getpacket(void); extern struct mbuf *m_getpackets(int, int, int); extern struct mbuf *m_mclget(struct mbuf *, int); extern void *m_mtod(struct mbuf *); -extern struct mbuf *m_prepend_2(struct mbuf *, int, int); +extern struct mbuf *m_prepend_2(struct mbuf *, int, int, int); extern struct mbuf *m_pullup(struct mbuf *, int); extern struct mbuf *m_split(struct mbuf *, int, int); extern void m_mclfree(caddr_t p); @@ -1180,6 +1197,9 @@ extern void m_mclfree(caddr_t p); #define MBUF_TC2SCVAL(_tc) ((_tc) << 7) #define IS_MBUF_SC_BACKGROUND(_sc) (((_sc) == MBUF_SC_BK_SYS) || \ ((_sc) == MBUF_SC_BK)) +#define IS_MBUF_SC_REALTIME(_sc) ((_sc) >= MBUF_SC_AV && (_sc) <= MBUF_SC_VO) +#define IS_MBUF_SC_BESTEFFORT(_sc) ((_sc) == MBUF_SC_BE || \ + (_sc) == MBUF_SC_RD || (_sc) == MBUF_SC_OAM) #define SCIDX_BK_SYS MBUF_SCIDX(MBUF_SC_BK_SYS) #define SCIDX_BK MBUF_SCIDX(MBUF_SC_BK) @@ -1221,8 +1241,8 @@ extern void m_mclfree(caddr_t p); c == SCVAL_RV || c == SCVAL_VI || c == SCVAL_VO || \ c == SCVAL_CTL) -extern union mbigcluster *mbutl; /* start VA of mbuf pool */ -extern union mbigcluster *embutl; /* end VA of mbuf pool */ +extern unsigned char *mbutl; /* start VA of mbuf pool */ +extern unsigned char *embutl; /* end VA of mbuf pool */ extern unsigned int nmbclusters; /* number of mapped clusters */ extern int njcl; /* # of jumbo clusters */ extern int njclbytes; /* size of a jumbo cluster */ diff --git a/bsd/sys/memory_maintenance.h b/bsd/sys/memory_maintenance.h new file mode 100644 index 000000000..1de00c6eb --- /dev/null +++ b/bsd/sys/memory_maintenance.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_MEMORY_MAINTENANCE_H_ +#define _SYS_MEMORY_MAINTENANCE_H_ + +/* + * File: sys/memory_maintenance.h + * Author: Samuel Gosselin [sgosselin@apple.com] + * + * Header file for Memory Maintenance support. + */ + +/* + * The kern.darkboot sysctl can be controlled from kexts or userspace. If + * processes want to change the sysctl value, they require the + * 'com.apple.kern.darkboot' entitlement. + * + * Operating the kern.darkboot sysctl is done via using the commands below: + * + * - MEMORY_MAINTENANCE_DARK_BOOT_UNSET + * Unset the kern.darkboot sysctl (kern.sysctl=0). + * - MEMORY_MAINTENANCE_DARK_BOOT_SET + * Set the kern.darkboot sysctl (kern.sysctl=1). + * - MEMORY_MAINTENANCE_DARK_BOOT_SET_PERSISTENT + * Set the kern.darkboot sysctl (kern.sysctl=1) and save its + * value into the 'darkboot' NVRAM variable. + * + * Example: + * sysctl kern.darkboot=2 + */ +#define MEMORY_MAINTENANCE_DARK_BOOT_UNSET (0) +#define MEMORY_MAINTENANCE_DARK_BOOT_SET (1) +#define MEMORY_MAINTENANCE_DARK_BOOT_SET_PERSISTENT (2) + +#define MEMORY_MAINTENANCE_DARK_BOOT_NVRAM_NAME "darkboot" + +#endif /* _SYS_MEMORY_MAINTENANCE_H_ */ + diff --git a/bsd/sys/mman.h b/bsd/sys/mman.h index acdbeb59f..06c76abf5 100644 --- a/bsd/sys/mman.h +++ b/bsd/sys/mman.h @@ -119,6 +119,32 @@ #define MAP_HASSEMAPHORE 0x0200 /* region may contain semaphores */ #define MAP_NOCACHE 0x0400 /* don't cache pages for this mapping */ #define MAP_JIT 0x0800 /* Allocate a region that will be used for JIT purposes */ + +/* + * Mapping type + */ +#define MAP_FILE 0x0000 /* map from file (default) */ +#define MAP_ANON 0x1000 /* allocated from memory, swap space */ +#define MAP_ANONYMOUS MAP_ANON + +/* + * The MAP_RESILIENT_* flags can be used when the caller wants to map some + * possibly unreliable memory and be able to access it safely, possibly + * getting the wrong contents rather than raising any exception. + * For safety reasons, such mappings have to be read-only (PROT_READ access + * only). + * + * MAP_RESILIENT_CODESIGN: + * accessing this mapping will not generate code-signing violations, + * even if the contents are tainted. + * MAP_RESILIENT_MEDIA: + * accessing this mapping will not generate an exception if the contents + * are not available (unreachable removable or remote media, access beyond + * end-of-file, ...). Missing contents will be replaced with zeroes. + */ +#define MAP_RESILIENT_CODESIGN 0x2000 /* no code-signing failures */ +#define MAP_RESILIENT_MEDIA 0x4000 /* no backing-store failures */ + #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ /* @@ -143,11 +169,6 @@ #define MS_KILLPAGES 0x0004 /* invalidate pages, leave mapped */ #define MS_DEACTIVATE 0x0008 /* deactivate pages, leave mapped */ -/* - * Mapping type - */ -#define MAP_FILE 0x0000 /* map from file (default) */ -#define MAP_ANON 0x1000 /* allocated from memory, swap space */ #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ @@ -171,6 +192,7 @@ #define MADV_FREE_REUSABLE 7 /* pages can be reused (by anyone) */ #define MADV_FREE_REUSE 8 /* caller wants to reuse those pages */ #define MADV_CAN_REUSE 9 +#define MADV_PAGEOUT 10 /* page out now (internal only) */ /* * Return bits from mincore diff --git a/bsd/sys/mount_internal.h b/bsd/sys/mount_internal.h index 965d0a630..e73e4b9af 100644 --- a/bsd/sys/mount_internal.h +++ b/bsd/sys/mount_internal.h @@ -137,6 +137,7 @@ struct mount { pending_io_t mnt_pending_read_size __attribute__((aligned(sizeof(pending_io_t)))); /* byte count of pending reads */ struct timeval mnt_last_write_issued_timestamp; struct timeval mnt_last_write_completed_timestamp; + int64_t mnt_max_swappin_available; lck_rw_t mnt_rwlock; /* mutex readwrite lock */ lck_mtx_t mnt_renamelock; /* mutex that serializes renames that change shape of tree */ @@ -206,6 +207,8 @@ struct mount { #define MNT_IOFLAGS_UNMAP_SUPPORTED 0x00000002 #define MNT_IOFLAGS_IOSCHED_SUPPORTED 0x00000004 #define MNT_IOFLAGS_CSUNMAP_SUPPORTED 0x00000008 +#define MNT_IOFLAGS_SWAPPIN_SUPPORTED 0x00000010 +#define MNT_IOFLAGS_FUSION_DRIVE 0x00000020 /* * ioqueue depth for devices that don't report one @@ -413,6 +416,7 @@ struct user32_statfs { __BEGIN_DECLS +extern boolean_t root_is_CF_drive; extern uint32_t mount_generation; extern TAILQ_HEAD(mntlist, mount) mountlist; void mount_list_lock(void); diff --git a/bsd/sys/munge.h b/bsd/sys/munge.h index 122e3c527..d1ab96096 100644 --- a/bsd/sys/munge.h +++ b/bsd/sys/munge.h @@ -85,6 +85,7 @@ void munge_wwlw(void *args); void munge_wwlll(void *args); void munge_wwllww(void *args); void munge_wlw(void *args); +void munge_wlww(void *args); void munge_wlwwwll(void *args); void munge_wlwwwllw(void *args); void munge_wlwwlwlw(void *args); @@ -107,6 +108,7 @@ void munge_wwwwwwlw(void *args); void munge_wwwwwwll(void *args); void munge_wsw(void *args); void munge_wws(void *args); +void munge_wwws(void *args); void munge_wwwsw(void *args); void munge_llllll(void *args); void munge_l(void *args); @@ -114,5 +116,4 @@ void munge_ll(void *args); void munge_lw(void *args); void munge_lwww(void *args); void munge_wwlwww(void *args); - #endif /* __MUNGE_H__ */ diff --git a/bsd/sys/pgo.h b/bsd/sys/pgo.h new file mode 100644 index 000000000..8f7909b82 --- /dev/null +++ b/bsd/sys/pgo.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2014 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_PGO_H_ +#define _SYS_PGO_H_ + +#include +#include +#include +#include + +#define PGO_HIB (1) +#define PGO_WAIT_FOR_UNLOAD (2) +#define PGO_METADATA (4) + +#define PGO_ALL_FLAGS (PGO_HIB | PGO_WAIT_FOR_UNLOAD | PGO_METADATA) + + +/** + * This is a serialization format for metadata related to a profile data buffer. + * + * If metadata is present, this footer will appear at the end of the file, so + * the last four bytes of the file will be the ASCII string "meta". + * + * The metadata is stored in a environment-string style buffer. The buffer + * consists of key-value pairs, which are delimited by null bytes. Each + * key-value pair is a string of the form "FOO=bar". Everything before the + * first equal sign is the key, everything after is the value. + * + * All members are in network byte order. + */ +struct pgo_metadata_footer { + /** + * number of pairs. + * + * This should be htonl(n), where n is the number of key-value pairs in the + * metadata buffer + */ + uint32_t number_of_pairs; + + /** + * pointer to the metadata buffer + * + * This should be htonl(offset), where offset is the backwards offset from + * the end of the file to the metadata buffer. + */ + uint32_t offset_to_pairs; + + /** + * magic number + * + * This should be htonl(0x6d657461); + */ + uint32_t magic; +}; + +#ifndef KERNEL + +ssize_t grab_pgo_data( + uuid_t *uuid, + int flags, + unsigned char *buffer, + ssize_t size); + + +#endif + +#endif diff --git a/bsd/sys/priv.h b/bsd/sys/priv.h index 27d7eb34e..ff38da81d 100644 --- a/bsd/sys/priv.h +++ b/bsd/sys/priv.h @@ -84,6 +84,8 @@ #define PRIV_HW_DEBUG_DATA 1004 /* Extract hw-specific debug data (e.g. ECC data) */ #define PRIV_SELECTIVE_FORCED_IDLE 1005 /* Configure and control Selective Forced Idle (SFI) subsystem */ #define PRIV_PROC_TRACE_INSPECT 1006 /* Request trace memory of arbitrary process to be inspected */ +#define PRIV_DARKBOOT 1007 /* Manipulate the darkboot flag */ +#define PRIV_WORK_INTERVAL 1008 /* Express details about a work interval */ /* * Virtual memory privileges. diff --git a/bsd/sys/proc.h b/bsd/sys/proc.h index 8c4ebb790..4d3b2cbd3 100644 --- a/bsd/sys/proc.h +++ b/bsd/sys/proc.h @@ -82,6 +82,10 @@ #endif #include +#ifdef XNU_KERNEL_PRIVATE +#include /* COALITION_NUM_TYPES */ +#endif + #if defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL) struct session; @@ -178,10 +182,6 @@ struct extern_proc { #define P_AFFINITY 0x00010000 /* xxx */ #define P_TRANSLATED 0x00020000 /* xxx */ #define P_CLASSIC P_TRANSLATED /* xxx */ -/* -#define P_FSTRACE 0x10000 / * tracing via file system (elsewhere?) * / -#define P_SSTEP 0x20000 / * process needs single-step fixup ??? * / -*/ #define P_DELAYIDLESLEEP 0x00040000 /* Process is marked to delay idle sleep on disk IO */ #define P_CHECKOPENEVT 0x00080000 /* check if a vnode has the OPENEVT flag set on open */ @@ -193,7 +193,7 @@ struct extern_proc { #define P_THCWD 0x01000000 /* process has thread cwd */ #define P_RESV9 0x02000000 /* (P_VFORK)process has vfork children */ -#define P_RESV10 0x04000000 /* used to be P_NOATTACH */ +#define P_RESV10 0x04000000 /* reserved flag */ #define P_RESV11 0x08000000 /* (P_INVFORK) proc in vfork */ #define P_NOSHLIB 0x10000000 /* no shared libs are in use for proc */ @@ -269,6 +269,8 @@ extern int proc_ppid(proc_t); extern int proc_noremotehang(proc_t); /* returns 1 if the process is marked for force quota */ extern int proc_forcequota(proc_t); +/* returns 1 if the process is chrooted */ +extern int proc_chrooted(proc_t); /* this routine returns 1 if the process is running with 64bit address space, else 0 */ extern int proc_is64bit(proc_t); @@ -329,6 +331,8 @@ extern int proc_pidbackgrounded(pid_t pid, uint32_t* state); */ extern uint64_t proc_uniqueid(proc_t); +extern void proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid); + #endif /* KERNEL_PRIVATE */ #ifdef XNU_KERNEL_PRIVATE @@ -345,7 +349,7 @@ extern int proc_pidoriginatoruuid(uuid_t uuid_buf, uint32_t buffersize); extern uint64_t proc_was_throttled(proc_t); extern uint64_t proc_did_throttle(proc_t); -extern uint64_t proc_coalitionid(proc_t); +extern void proc_coalitionids(proc_t, uint64_t [COALITION_NUM_TYPES]); #endif /* XNU_KERNEL_PRIVATE*/ diff --git a/bsd/sys/proc_info.h b/bsd/sys/proc_info.h index e8ca29c4f..443861ddc 100644 --- a/bsd/sys/proc_info.h +++ b/bsd/sys/proc_info.h @@ -37,12 +37,17 @@ #include #include #include +#include #include #include #include #include #include +#ifdef PRIVATE +#include /* COALITION_NUM_TYPES */ +#endif + __BEGIN_DECLS @@ -118,12 +123,20 @@ struct proc_archinfo { }; struct proc_pidcoalitioninfo { - uint64_t coalition_id; + uint64_t coalition_id[COALITION_NUM_TYPES]; uint64_t reserved1; uint64_t reserved2; uint64_t reserved3; }; +struct proc_originatorinfo { + uuid_t originator_uuid; /* UUID of the originator process */ + pid_t originator_pid; /* pid of the originator process */ + uint64_t p_reserve2; + uint64_t p_reserve3; + uint64_t p_reserve4; +}; + #endif @@ -286,6 +299,7 @@ struct proc_fileinfo { #define PROC_FP_SHARED 1 /* shared by more than one fd */ #define PROC_FP_CLEXEC 2 /* close on exec */ #define PROC_FP_GUARDED 4 /* guarded fd */ +#define PROC_FP_CLFORK 8 /* close on fork */ #define PROC_FI_GUARD_CLOSE (1u << 0) #define PROC_FI_GUARD_DUP (1u << 1) @@ -605,8 +619,23 @@ struct kqueue_info { uint32_t kq_state; uint32_t rfu_1; /* reserved */ }; -#define PROC_KQUEUE_SELECT 1 -#define PROC_KQUEUE_SLEEP 2 + +/* keep in sync with KQ_* in sys/eventvar.h */ +#define PROC_KQUEUE_SELECT 0x01 +#define PROC_KQUEUE_SLEEP 0x02 +#define PROC_KQUEUE_32 0x08 +#define PROC_KQUEUE_64 0x10 +#define PROC_KQUEUE_QOS 0x20 + +#ifdef PRIVATE +struct kevent_extinfo { + struct kevent_qos_s kqext_kev; + uint64_t kqext_sdata; + int kqext_status; + int kqext_sfflags; + uint64_t kqext_reserved[2]; +}; +#endif /* PRIVATE */ struct kqueue_fdinfo { struct proc_fileinfo pfi; @@ -748,6 +777,11 @@ struct proc_fileportinfo { #define PROC_PIDFDATALKINFO 8 #define PROC_PIDFDATALKINFO_SIZE (sizeof(struct appletalk_fdinfo)) +#ifdef PRIVATE +#define PROC_PIDFDKQUEUE_EXTINFO 9 +#define PROC_PIDFDKQUEUE_EXTINFO_SIZE (sizeof(struct kevent_extinfo)) +#endif /* PRIVATE */ + /* Flavors for proc_pidfileportinfo */ #define PROC_PIDFILEPORTVNODEPATHINFO 2 /* out: vnode_fdinfowithpath */ @@ -800,6 +834,16 @@ struct proc_fileportinfo { #define PROC_PIDORIGINATOR_BGSTATE 0x2 #define PROC_PIDORIGINATOR_BGSTATE_SIZE (sizeof(uint32_t)) +#define PROC_PIDORIGINATOR_PID_UUID 0x3 +#define PROC_PIDORIGINATOR_PID_UUID_SIZE (sizeof(struct proc_originatorinfo)) + +/* Flavors for proc_listcoalitions */ +#define LISTCOALITIONS_ALL_COALS 1 +#define LISTCOALITIONS_ALL_COALS_SIZE (sizeof(struct procinfo_coalinfo)) + +#define LISTCOALITIONS_SINGLE_TYPE 2 +#define LISTCOALITIONS_SINGLE_TYPE_SIZE (sizeof(struct procinfo_coalinfo)) + /* __proc_info() call numbers */ #define PROC_INFO_CALL_LISTPIDS 0x1 #define PROC_INFO_CALL_PIDINFO 0x2 @@ -811,6 +855,7 @@ struct proc_fileportinfo { #define PROC_INFO_CALL_DIRTYCONTROL 0x8 #define PROC_INFO_CALL_PIDRUSAGE 0x9 #define PROC_INFO_CALL_PIDORIGINATORINFO 0xa +#define PROC_INFO_CALL_LISTCOALITIONS 0xb #endif /* PRIVATE */ @@ -832,6 +877,7 @@ extern int fill_pshminfo(struct pshmnode * pshm, struct pshm_info * pinfo); extern int fill_pseminfo(struct psemnode * psem, struct psem_info * pinfo); extern int fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo); extern int fill_kqueueinfo(struct kqueue * kq, struct kqueue_info * kinfo); +extern int pid_kqueue_extinfo(proc_t, struct kqueue * kq, user_addr_t buffer, uint32_t buffersize, int32_t * retval); extern int fill_procworkqueue(proc_t, struct proc_workqueueinfo *); #endif /* XNU_KERNEL_PRIVATE */ diff --git a/bsd/sys/proc_internal.h b/bsd/sys/proc_internal.h index 96c7c3392..a3d8487e5 100644 --- a/bsd/sys/proc_internal.h +++ b/bsd/sys/proc_internal.h @@ -100,22 +100,6 @@ __END_DECLS */ struct label; -/* - * Added by SPARTA, Inc. - */ -/* - * Login context. - */ -struct lctx { - LIST_ENTRY(lctx) lc_list; /* List of all login contexts. */ - LIST_HEAD(, proc) lc_members; /* Pointer to lc members. */ - int lc_mc; /* Member Count. */ - pid_t lc_id; /* Login context ID. */ - lck_mtx_t lc_mtx; /* Mutex to protect members */ - - struct label *lc_label; /* Login context MAC label. */ -}; - /* * One structure allocated per session. */ @@ -362,8 +346,6 @@ struct proc { u_short p_acflag; /* Accounting flags. */ volatile u_short p_vfs_iopolicy; /* VFS iopolicy flags. (atomic bit ops) */ - struct lctx *p_lctx; /* Pointer to login context. */ - LIST_ENTRY(proc) p_lclist; /* List of processes in lctx. */ user_addr_t p_threadstart; /* pthread start fn */ user_addr_t p_wqthread; /* pthread workqueue fn */ int p_pthsize; /* pthread size */ @@ -374,6 +356,8 @@ struct proc { int p_wqsize; /* allocated size */ boolean_t p_wqiniting; /* semaphore to serialze wq_open */ lck_spin_t p_wqlock; /* lock to protect work queue */ + struct kqueue * p_wqkqueue; /* private workq kqueue */ + struct timeval p_start; /* starting time */ void * p_rcall; int p_ractive; @@ -406,12 +390,17 @@ struct proc { uint64_t p_memstat_userdata; /* user state */ uint64_t p_memstat_idledeadline; /* time at which process became clean */ #if CONFIG_JETSAM - int32_t p_memstat_memlimit; /* cached memory limit */ + int32_t p_memstat_memlimit; /* cached memory limit, toggles between active and inactive limits */ + int32_t p_memstat_memlimit_active; /* memory limit enforced when process is in active jetsam state */ + int32_t p_memstat_memlimit_inactive; /* memory limit enforced when process is in inactive jetsam state */ #endif #if CONFIG_FREEZE uint32_t p_memstat_suspendedfootprint; /* footprint at time of suspensions */ #endif /* CONFIG_FREEZE */ #endif /* CONFIG_MEMORYSTATUS */ + + /* cached proc-specific data required for corpse inspection */ + pid_t p_responsible_pid; /* pid resonsible for this process */ }; #define PGRPID_DEAD 0xdeaddead @@ -460,11 +449,11 @@ struct proc { #define P_LLIMWAIT 0x00040000 #define P_LWAITED 0x00080000 #define P_LINSIGNAL 0x00100000 -#define P_UNUSED 0x00200000 /* Unused */ +#define P_LRETURNWAIT 0x00200000 /* process is completing spawn/vfork-exec/fork */ #define P_LRAGE_VNODES 0x00400000 #define P_LREGISTER 0x00800000 /* thread start fns registered */ #define P_LVMRSRCOWNER 0x01000000 /* can handle the resource ownership of */ -/* old P_LPTERMINATE 0x02000000 */ +#define P_LRETURNWAITER 0x02000000 /* thread is waiting on P_LRETURNWAIT being cleared */ #define P_LTERM_DECRYPTFAIL 0x04000000 /* process terminating due to key failure to decrypt */ #define P_LTERM_JETSAM 0x08000000 /* process is being jetsam'd */ #define P_JETSAM_VMPAGESHORTAGE 0x00000000 /* jetsam: lowest jetsam priority proc, killed due to vm page shortage */ @@ -491,6 +480,7 @@ struct proc { /* additional process flags */ #define P_LADVLOCK 0x01 +#define P_LXBKIDLEINPROG 0x02 /* p_vfs_iopolicy flags */ #define P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY 0x0001 @@ -525,6 +515,9 @@ struct proc { #ifdef KERNEL #include /* user_timeval, user_itimerval */ +/* This packing breaks symmetry with userspace side (struct extern_proc + * of proc.h) for the ARMV7K ABI where 64-bit types are 64-bit aligned + */ #pragma pack(4) struct user32_extern_proc { union { @@ -653,18 +646,6 @@ extern vm_offset_t * execargs_cache; #define SESS_LEADER(p, sessp) ((sessp)->s_leader == (p)) -/* Lock and unlock a login context. */ -#define LCTX_LOCK(lc) lck_mtx_lock(&(lc)->lc_mtx) -#define LCTX_UNLOCK(lc) lck_mtx_unlock(&(lc)->lc_mtx) -#define LCTX_LOCKED(lc) -#define LCTX_LOCK_ASSERT(lc, type) -#define ALLLCTX_LOCK lck_mtx_lock(&alllctx_lock) -#define ALLLCTX_UNLOCK lck_mtx_unlock(&alllctx_lock) -extern lck_mtx_t alllctx_lock; -extern lck_grp_t * lctx_lck_grp; -extern lck_grp_attr_t * lctx_lck_grp_attr; -extern lck_attr_t * lctx_lck_attr; - #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; extern u_long pidhash; @@ -710,17 +691,12 @@ __private_extern__ struct proc *proc_find_zombref(pid_t); /* Find zombie by id. __private_extern__ void proc_drop_zombref(struct proc * p); /* Find zombie by id. */ -extern struct lctx *lcfind(pid_t); /* Find a login context by id */ -extern struct lctx *lccreate(void); /* Create a new login context */ - extern int chgproccnt(uid_t uid, int diff); -extern void enterlctx(struct proc *p, struct lctx *l, int create); extern void pinsertchild(struct proc *parent, struct proc *child); extern int enterpgrp(struct proc *p, pid_t pgid, int mksess); extern void fixjobc(struct proc *p, struct pgrp *pgrp, int entering); extern int inferior(struct proc *p); extern int leavepgrp(struct proc *p); -extern void leavelctx(struct proc *p); extern void resetpriority(struct proc *); extern void setrunnable(struct proc *); extern void setrunqueue(struct proc *); @@ -731,7 +707,7 @@ extern int msleep0(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, int t extern void vfork_return(struct proc *child, int32_t *retval, int rval); extern int exit1(struct proc *, int, int *); extern int exit1_internal(struct proc *, int, int *, boolean_t, boolean_t, int); -extern int fork1(proc_t, thread_t *, int, coalition_t); +extern int fork1(proc_t, thread_t *, int, coalition_t *); extern void vfork_exit_internal(struct proc *p, int rv, int forced); extern void proc_reparentlocked(struct proc *child, struct proc * newparent, int cansignal, int locked); extern int pgrp_iterate(struct pgrp * pgrp, int flags, int (*callout)(proc_t , void *), void *arg, int (*filterfn)(proc_t , void *), void *filterarg); @@ -795,6 +771,10 @@ extern lck_mtx_t * pthread_list_mlock; #endif /* PSYNCH */ struct uthread * current_uthread(void); +void proc_set_return_wait(struct proc *); +void proc_clear_return_wait(proc_t p, thread_t child_thread); +void proc_wait_to_return(void); + /* return 1 if process is forcing case-sensitive HFS+ access, 0 for default */ extern int proc_is_forcing_hfs_case_sensitivity(proc_t); diff --git a/bsd/sys/protosw.h b/bsd/sys/protosw.h index b7d319d18..ac2b4f8c5 100644 --- a/bsd/sys/protosw.h +++ b/bsd/sys/protosw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -85,6 +85,7 @@ struct sockaddr; struct socket; struct sockopt; struct socket_filter; +struct uio; #ifdef XNU_KERNEL_PRIVATE struct domain_old; #endif /* XNU_KERNEL_PRIVATE */ @@ -256,6 +257,8 @@ struct protosw { #define PR_ATTACHED 0x800 /* protocol is attached to a domain */ #define PR_MULTICONN 0x1000 /* supports multiple connect calls */ #define PR_EVCONNINFO 0x2000 /* protocol generates conninfo event */ +#define PR_PRECONN_WRITE 0x4000 /* protocol supports preconnect write */ +#define PR_DATA_IDEMPOTENT 0x8000 /* protocol supports idempotent data at connectx-time */ #define PR_OLD 0x10000000 /* added via net_add_proto */ /* pseudo-public domain flags */ @@ -384,6 +387,7 @@ struct ifnet; struct stat; struct ucred; struct uio; +struct recv_msg_elem; #ifdef XNU_KERNEL_PRIVATE /* @@ -450,14 +454,17 @@ struct pr_usrreqs { int (*pru_connect2)(struct socket *, struct socket *); int (*pru_connectx)(struct socket *, struct sockaddr_list **, struct sockaddr_list **, struct proc *, uint32_t, - associd_t, connid_t *, uint32_t, void *, uint32_t); + sae_associd_t, sae_connid_t *, uint32_t, void *, uint32_t, + struct uio *, user_ssize_t *); int (*pru_control)(struct socket *, u_long, caddr_t, struct ifnet *, struct proc *); int (*pru_detach)(struct socket *); int (*pru_disconnect)(struct socket *); - int (*pru_disconnectx)(struct socket *, associd_t, connid_t); + int (*pru_disconnectx)(struct socket *, + sae_associd_t, sae_connid_t); int (*pru_listen)(struct socket *, struct proc *); - int (*pru_peeloff)(struct socket *, associd_t, struct socket **); + int (*pru_peeloff)(struct socket *, + sae_associd_t, struct socket **); int (*pru_peeraddr)(struct socket *, struct sockaddr **); int (*pru_rcvd)(struct socket *, int); int (*pru_rcvoob)(struct socket *, struct mbuf *, int); @@ -474,13 +481,13 @@ struct pr_usrreqs { int (*pru_sopoll)(struct socket *, int, struct ucred *, void *); int (*pru_soreceive)(struct socket *, struct sockaddr **, struct uio *, struct mbuf **, struct mbuf **, int *); - int (*pru_soreceive_list)(struct socket *, struct sockaddr **, - struct uio **, u_int, struct mbuf **, struct mbuf **, int *); + int (*pru_soreceive_list)(struct socket *, struct recv_msg_elem *, u_int, + int *); int (*pru_sosend)(struct socket *, struct sockaddr *, struct uio *, struct mbuf *, struct mbuf *, int); - int (*pru_sosend_list)(struct socket *, struct sockaddr *, - struct uio **, u_int, struct mbuf *, struct mbuf *, int); + int (*pru_sosend_list)(struct socket *, struct uio **, u_int, int); int (*pru_socheckopt)(struct socket *, struct sockopt *); + int (*pru_preconnect)(struct socket *so); }; /* Values for pru_flags */ @@ -499,11 +506,12 @@ extern int pru_connect_notsupp(struct socket *so, struct sockaddr *nam, extern int pru_connect2_notsupp(struct socket *so1, struct socket *so2); #ifdef XNU_KERNEL_PRIVATE extern int pru_connectx_notsupp(struct socket *, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, - uint32_t, void *, uint32_t); -extern int pru_disconnectx_notsupp(struct socket *, associd_t, connid_t); + struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *); +extern int pru_disconnectx_notsupp(struct socket *, sae_associd_t, + sae_connid_t); extern int pru_socheckopt_null(struct socket *, struct sockopt *); -extern int pru_peeloff_notsupp(struct socket *, associd_t, struct socket **); +extern int pru_peeloff_notsupp(struct socket *, sae_associd_t, struct socket **); #endif /* XNU_KERNEL_PRIVATE */ extern int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p); @@ -522,14 +530,13 @@ extern int pru_shutdown_notsupp(struct socket *so); extern int pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam); extern int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags); -extern int pru_sosend_list_notsupp(struct socket *so, struct sockaddr *addr, - struct uio **uio, u_int, struct mbuf *top, struct mbuf *control, int flags); +extern int pru_sosend_list_notsupp(struct socket *so, struct uio **uio, + u_int, int flags); extern int pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); -extern int pru_soreceive_list_notsupp(struct socket *so, - struct sockaddr **paddr, struct uio **uio, u_int, struct mbuf **mp0, - struct mbuf **controlp, int *flagsp); +extern int pru_soreceive_list_notsupp(struct socket *, struct recv_msg_elem *, u_int, + int *); extern int pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, void *); #ifdef XNU_KERNEL_PRIVATE diff --git a/bsd/sys/pthread_shims.h b/bsd/sys/pthread_shims.h index d0050e416..872173b09 100644 --- a/bsd/sys/pthread_shims.h +++ b/bsd/sys/pthread_shims.h @@ -50,6 +50,8 @@ struct uthread; typedef void (*sched_call_t)(int type, thread_t thread); #endif +typedef struct workq_reqthreads_req_s {unsigned long priority; int count;} *workq_reqthreads_req_t; + /* * Increment each time new reserved slots are used. When the pthread * kext registers this table, it will include the version of the xnu @@ -100,8 +102,14 @@ typedef struct pthread_functions_s { /* New pthreadctl system. */ int (*bsdthread_ctl)(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval); + /* Request threads to deliver kevents */ + thread_t (*workq_reqthreads)(struct proc *p, int requests_count, workq_reqthreads_req_t requests); + + /* Resolve a pthread_priority_t to a QoS/relative pri */ + integer_t (*thread_qos_from_pthread_priority)(unsigned long pthread_priority, unsigned long *flags); + /* padding for future */ - void* _pad[97]; + void* _pad[95]; } *pthread_functions_t; typedef struct pthread_callbacks_s { diff --git a/bsd/sys/ptrace.h b/bsd/sys/ptrace.h index fda2515a5..cf589929b 100644 --- a/bsd/sys/ptrace.h +++ b/bsd/sys/ptrace.h @@ -67,6 +67,11 @@ #include #include +enum { + ePtAttachDeprecated __deprecated_enum_msg("PT_ATTACH is deprecated. See PT_ATTACHEXC") = 10 +}; + + #define PT_TRACE_ME 0 /* child declares it's being traced */ #define PT_READ_I 1 /* read word in child's I space */ #define PT_READ_D 2 /* read word in child's D space */ @@ -77,7 +82,7 @@ #define PT_CONTINUE 7 /* continue the child */ #define PT_KILL 8 /* kill the child process */ #define PT_STEP 9 /* single step the child */ -#define PT_ATTACH 10 /* trace some running process */ +#define PT_ATTACH ePtAttachDeprecated /* trace some running process */ #define PT_DETACH 11 /* stop tracing a process */ #define PT_SIGEXC 12 /* signals as exceptions for current_proc */ #define PT_THUPDATE 13 /* signal for thread# */ diff --git a/bsd/sys/reboot.h b/bsd/sys/reboot.h index 18e3662e9..5f51c9952 100644 --- a/bsd/sys/reboot.h +++ b/bsd/sys/reboot.h @@ -84,8 +84,7 @@ #define RB_SAFEBOOT 0x100 /* booting safe */ #define RB_UPSDELAY 0x200 /* Delays restart by 5 minutes */ #define RB_QUICK 0x400 /* quick and ungraceful reboot with file system caches flushed*/ -#define RB_PANIC 0 /* reboot due to panic */ -#define RB_BOOT 1 /* reboot due to boot() */ +#define RB_PANIC 0x800 /* panic the kernel */ #endif /* __APPLE_API_PRIVATE */ @@ -131,7 +130,7 @@ #include __BEGIN_DECLS -int boot(int, int, char *); +int reboot_kernel(int, char *); __END_DECLS #define PROC_SHUTDOWN_LOG "/var/log/kernel-shutdown.log" diff --git a/bsd/sys/resource.h b/bsd/sys/resource.h index 6a4164fa8..993907e54 100644 --- a/bsd/sys/resource.h +++ b/bsd/sys/resource.h @@ -120,10 +120,12 @@ typedef __uint64_t rlim_t; #define PRIO_DARWIN_ROLE 6 /* Second argument is a PID */ -#define PRIO_DARWIN_ROLE_DEFAULT 0x0 /* Default state */ +#define PRIO_DARWIN_ROLE_DEFAULT 0x0 /* Reset to default state */ #define PRIO_DARWIN_ROLE_UI_FOCAL 0x1 /* On screen, focal UI */ -#define PRIO_DARWIN_ROLE_UI 0x2 /* On screen, non-focal UI */ +#define PRIO_DARWIN_ROLE_UI 0x2 /* On screen UI, focal unknown */ #define PRIO_DARWIN_ROLE_NON_UI 0x3 /* Off screen, non-focal UI */ +#define PRIO_DARWIN_ROLE_UI_NON_FOCAL 0x4 /* On screen, non-focal UI */ +#define PRIO_DARWIN_ROLE_TAL_LAUNCH 0x5 /* Throttled-launch (for OS X TAL resume) */ #endif /* PRIVATE */ diff --git a/bsd/sys/select.h b/bsd/sys/select.h index a4c33d0c8..1fa3f7605 100644 --- a/bsd/sys/select.h +++ b/bsd/sys/select.h @@ -108,11 +108,8 @@ #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ #ifdef KERNEL -#ifdef KERNEL_PRIVATE -#include -#endif #include - +#include #include /* @@ -121,8 +118,8 @@ */ #ifdef KERNEL_PRIVATE struct selinfo { - struct wait_queue si_wait_queue; /* wait_queue for wait/wakeup */ - struct klist si_note; /* JMM - temporary separation */ + struct waitq si_waitq; /* waitq for wait/wakeup */ + struct klist si_note; /* JMM - temporary separation */ u_int si_flags; /* see below */ }; diff --git a/bsd/sys/signal.h b/bsd/sys/signal.h index 2ff005609..2d0fc43e8 100644 --- a/bsd/sys/signal.h +++ b/bsd/sys/signal.h @@ -71,6 +71,7 @@ #include #include +#include #define __DARWIN_NSIG 32 /* counting 0; could be 33 (mask is 1-32) */ diff --git a/bsd/sys/signalvar.h b/bsd/sys/signalvar.h index cd5c2d133..6d8488807 100644 --- a/bsd/sys/signalvar.h +++ b/bsd/sys/signalvar.h @@ -188,10 +188,16 @@ int sigprop[NSIG + 1] = { #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP)) +#define SIGRESTRICTMASK (sigmask(SIGILL) | sigmask(SIGTRAP) | sigmask(SIGABRT) | \ + sigmask(SIGFPE) | sigmask(SIGBUS) | sigmask(SIGSEGV) | \ + sigmask(SIGSYS)) + +extern unsigned sigrestrict_arg; + /* * Machine-independent functions: */ -int coredump(struct proc *p, uint32_t reserve_mb, int ignore_ulimit); + void execsigs(struct proc *p, thread_t thread); void gsignal(int pgid, int sig); int issignal_locked(struct proc *p); @@ -203,6 +209,7 @@ void siginit(struct proc *p); void trapsignal(struct proc *p, int sig, unsigned code); void pt_setrunnable(struct proc *p); int hassigprop(int sig, int prop); +int setsigvec(proc_t, thread_t, int signum, struct __kern_sigaction *, boolean_t in_sigstart); /* * Machine-dependent functions: @@ -219,9 +226,6 @@ void threadsignal(thread_t sig_actthread, int signum, int thread_issignal(proc_t p, thread_t th, sigset_t mask); void psignal_vfork(struct proc *p, task_t new_task, thread_t thread, int signum); -void psignal_vtalarm(struct proc *); -void psignal_xcpu(struct proc *); -void psignal_sigprof(struct proc *); void signal_setast(thread_t sig_actthread); void pgsigio(pid_t pgid, int signalnum); @@ -230,4 +234,17 @@ int sig_try_locked(struct proc *p); #endif /* BSD_KERNEL_PRIVATE */ + +#ifdef XNU_KERNEL_PRIVATE + +/* Functions exported to Mach as well */ + +#define COREDUMP_IGNORE_ULIMIT 0x0001 /* Ignore the process's core file ulimit. */ +#define COREDUMP_FULLFSYNC 0x0002 /* Run F_FULLFSYNC on the core file's vnode */ + +int coredump(struct proc *p, uint32_t reserve_mb, int coredump_flags); + +#endif /* XNU_KERNEL_PRIVATE */ + + #endif /* !_SYS_SIGNALVAR_H_ */ diff --git a/bsd/sys/socket.h b/bsd/sys/socket.h index 3b6197bf6..8afe6c4da 100644 --- a/bsd/sys/socket.h +++ b/bsd/sys/socket.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -329,12 +329,33 @@ struct so_tcdbg { #if MPTCP #define SO_MPTCP_FASTJOIN 0x1111 /* fast join MPTCP */ #endif /* MPTCP */ - -#define SO_AWDL_UNRESTRICTED 0x1113 /* try to use AWDL in restricted mode */ #endif /* PRIVATE */ - #define SO_NUMRCVPKT 0x1112 /* number of datagrams in receive socket buffer */ +#ifdef PRIVATE +#define SO_AWDL_UNRESTRICTED 0x1113 /* try to use AWDL in restricted mode */ +#define SO_EXTENDED_BK_IDLE 0x1114 /* extended time to keep socket idle after app is suspended (int) */ +#endif /* PRIVATE */ +typedef __uint32_t sae_associd_t; +#define SAE_ASSOCID_ANY 0 +#define SAE_ASSOCID_ALL ((sae_associd_t)(-1ULL)) + +typedef __uint32_t sae_connid_t; +#define SAE_CONNID_ANY 0 +#define SAE_CONNID_ALL ((sae_connid_t)(-1ULL)) + +/* connectx() flag parameters */ +#define CONNECT_RESUME_ON_READ_WRITE 0x1 /* resume connect() on read/write */ +#define CONNECT_DATA_IDEMPOTENT 0x2 /* data is idempotent */ + +/* sockaddr endpoints */ +typedef struct sa_endpoints { + unsigned int sae_srcif; /* optional source interface */ + struct sockaddr *sae_srcaddr; /* optional source address */ + socklen_t sae_srcaddrlen; /* size of source address */ + struct sockaddr *sae_dstaddr; /* destination address */ + socklen_t sae_dstaddrlen; /* size of destination address */ +} sa_endpoints_t; #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ /* @@ -792,6 +813,46 @@ struct user32_msghdr_x { user32_size_t msg_datalen; /* byte length of buffer in msg_iov */ }; +/* + * In-kernel representation of "struct sa_endpoints" from + * userspace. Has enough precision for 32-bit or + * 64-bit clients, but does not need to be packed. + */ + +struct user_sa_endpoints { + unsigned int sae_srcif; /* optional source interface */ + user_addr_t sae_srcaddr; /* optional source address */ + socklen_t sae_srcaddrlen; /* size of source address */ + user_addr_t sae_dstaddr; /* destination address */ + socklen_t sae_dstaddrlen; /* size of destination address */ +}; + +/* + * LP64 user version of struct sa_endpoints + * WARNING - keep in sync with struct sa_endpoints + */ + +struct user64_sa_endpoints { + unsigned int sae_srcif; /* optional source interface */ + user64_addr_t sae_srcaddr; /* optional source address */ + socklen_t sae_srcaddrlen; /* size of source address */ + user64_addr_t sae_dstaddr; /* destination address */ + socklen_t sae_dstaddrlen; /* size of destination address */ +}; + +/* + * ILP32 user version of struct sa_endpoints + * WARNING - keep in sync with struct sa_endpoints + */ + +struct user32_sa_endpoints { + unsigned int sae_srcif; /* optional source interface */ + user32_addr_t sae_srcaddr; /* optional source address */ + socklen_t sae_srcaddrlen; /* size of source address */ + user32_addr_t sae_dstaddr; /* destination address */ + socklen_t sae_dstaddrlen; /* size of destination address */ +}; + #endif /* XNU_KERNEL_PRIVATE */ #define MSG_OOB 0x1 /* process out-of-band data */ @@ -997,20 +1058,13 @@ struct user32_sf_hdtr { #ifdef PRIVATE #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -typedef __uint32_t associd_t; -#define ASSOCID_ANY 0 -#define ASSOCID_ALL ((associd_t)(-1ULL)) - -typedef __uint32_t connid_t; -#define CONNID_ANY 0 -#define CONNID_ALL ((connid_t)(-1ULL)) /* * Structure for SIOCGASSOCIDS */ struct so_aidreq { __uint32_t sar_cnt; /* number of associations */ - associd_t *sar_aidp; /* array of association IDs */ + sae_associd_t *sar_aidp; /* array of association IDs */ }; #ifdef BSD_KERNEL_PRIVATE @@ -1029,20 +1083,20 @@ struct so_aidreq64 { * Structure for SIOCGCONNIDS */ struct so_cidreq { - associd_t scr_aid; /* association ID */ + sae_associd_t scr_aid; /* association ID */ __uint32_t scr_cnt; /* number of connections */ - connid_t *scr_cidp; /* array of connection IDs */ + sae_connid_t *scr_cidp; /* array of connection IDs */ }; #ifdef BSD_KERNEL_PRIVATE struct so_cidreq32 { - associd_t scr_aid; + sae_associd_t scr_aid; __uint32_t scr_cnt; user32_addr_t scr_cidp; }; struct so_cidreq64 { - associd_t scr_aid; + sae_associd_t scr_aid; __uint32_t scr_cnt; user64_addr_t scr_cidp __attribute__((aligned(8))); }; @@ -1052,7 +1106,7 @@ struct so_cidreq64 { * Structure for SIOCGCONNINFO */ struct so_cinforeq { - connid_t scir_cid; /* connection ID */ + sae_connid_t scir_cid; /* connection ID */ __uint32_t scir_flags; /* see flags below */ __uint32_t scir_ifindex; /* (last) outbound interface */ __int32_t scir_error; /* most recent error */ @@ -1067,7 +1121,7 @@ struct so_cinforeq { #ifdef BSD_KERNEL_PRIVATE struct so_cinforeq32 { - connid_t scir_cid; + sae_connid_t scir_cid; __uint32_t scir_flags; __uint32_t scir_ifindex; __int32_t scir_error; @@ -1081,7 +1135,7 @@ struct so_cinforeq32 { }; struct so_cinforeq64 { - connid_t scir_cid; + sae_connid_t scir_cid; __uint32_t scir_flags; __uint32_t scir_ifindex; __int32_t scir_error; @@ -1116,7 +1170,7 @@ struct so_cinforeq64 { * Structure for SIOC{S,G}CONNORDER */ struct so_cordreq { - connid_t sco_cid; /* connection ID */ + sae_connid_t sco_cid; /* connection ID */ __uint32_t sco_rank; /* rank (0 means unspecified) */ }; @@ -1170,10 +1224,8 @@ struct kev_socket_closed { #ifndef KERNEL __BEGIN_DECLS -extern int connectx(int s, struct sockaddr *, socklen_t, struct sockaddr *, - socklen_t, __uint32_t, associd_t, connid_t *); -extern int disconnectx(int s, associd_t, connid_t); -extern int peeloff(int s, associd_t); + +extern int peeloff(int s, sae_associd_t); extern int socket_delegate(int, int, int, pid_t); /* @@ -1181,7 +1233,7 @@ extern int socket_delegate(int, int, int, pid_t); * several datagrams at once in the array of message headers "msgp". * * recvmsg_x() can be used only with protocols handlers that have been specially - * modified to handle sending and receiving several datagrams at once. + * modified to support sending and receiving several datagrams at once. * * The size of the array "msgp" is given by the argument "cnt". * @@ -1201,11 +1253,7 @@ extern int socket_delegate(int, int, int, pid_t); * recvmsg_x() may return with less than "cnt" datagrams received based on * the low water mark and the amount of data pending in the socket buffer. * - * Address and ancillary data are not supported so the following fields - * must be set to zero on input: - * "msg_name", "msg_namelen", "msg_control" and "msg_controllen". - * - * recvmsg_x() returns the number of datagrams that have been received , + * recvmsg_x() returns the number of datagrams that have been received, * or -1 if an error occurred. * * NOTE: This a private system call, the API is subject to change. @@ -1217,7 +1265,7 @@ ssize_t recvmsg_x(int s, const struct msghdr_x *msgp, u_int cnt, int flags); * several datagrams at once in the array of message headers "msgp". * * sendmsg_x() can be used only with protocols handlers that have been specially - * modified to support to handle sending and receiving several datagrams at once. + * modified to support sending and receiving several datagrams at once. * * The size of the array "msgp" is given by the argument "cnt". * @@ -1250,6 +1298,7 @@ __END_DECLS #ifndef KERNEL __BEGIN_DECLS + int accept(int, struct sockaddr * __restrict, socklen_t * __restrict) __DARWIN_ALIAS_C(accept); int bind(int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS(bind); @@ -1280,6 +1329,9 @@ int sendfile(int, int, off_t, off_t *, struct sf_hdtr *, int); #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) void pfctlinput(int, struct sockaddr *); +int connectx(int , const sa_endpoints_t *, sae_associd_t, unsigned int, + const struct iovec *, unsigned int, size_t *, sae_connid_t *); +int disconnectx(int , sae_associd_t, sae_connid_t); #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ __END_DECLS #endif /* !KERNEL */ diff --git a/bsd/sys/socketvar.h b/bsd/sys/socketvar.h index 33ddf98e2..e0b810b0c 100644 --- a/bsd/sys/socketvar.h +++ b/bsd/sys/socketvar.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -150,6 +150,7 @@ struct msg_state { struct socket { int so_zone; /* zone we were allocated from */ short so_type; /* generic type, see socket.h */ + u_short so_error; /* error affecting connection */ u_int32_t so_options; /* from socket call, see socket.h */ short so_linger; /* time to linger while closing */ short so_state; /* internal state flags SS_*, below */ @@ -175,7 +176,6 @@ struct socket { connections */ short so_qlimit; /* max number queued connections */ short so_timeo; /* connection timeout */ - u_short so_error; /* error affecting connection */ pid_t so_pgid; /* pgid for signals */ u_int32_t so_oobmark; /* chars to oob mark */ /* @@ -193,8 +193,8 @@ struct socket { struct mbuf *sb_lastrecord; /* first mbuf of last record */ struct socket *sb_so; /* socket back ptr for kexts */ struct selinfo sb_sel; /* process selecting rd/wr */ - u_int32_t sb_flags; /* flags, see below */ struct timeval sb_timeo; /* timeout for read/write */ + u_int32_t sb_flags; /* flags, see below */ u_int32_t sb_idealsize; /* Ideal size for the sb based on bandwidth and delay */ void (*sb_upcall)(struct socket *, void *arg, int waitf); @@ -203,6 +203,7 @@ struct socket { u_int32_t sb_waiters; /* # of data/space waiters */ thread_t sb_cfil_thread; /* content filter thread */ u_int32_t sb_cfil_refs; /* # of nested calls */ + u_int32_t sb_preconn_hiwat;/* preconnect hiwat mark */ } so_rcv, so_snd; #define SB_MAX (8192*1024) /* default for max chars in sockbuf */ #define LOW_SB_MAX (2*9*1024) /* lower limit on max socket buffer @@ -220,7 +221,6 @@ struct socket { #define SB_AUTOSIZE 0x400 /* automatically size socket buffer */ #define SB_TRIM 0x800 /* Trim the socket buffer */ #define SB_NOCOMPRESS 0x1000 /* do not compress socket buffer */ - caddr_t so_tpcb; /* Misc. protocol control block, used by some kexts */ @@ -230,28 +230,33 @@ struct socket { /* NB: generation count must not be first; easiest to make it last. */ so_gen_t so_gencnt; /* generation count */ TAILQ_HEAD(, eventqelt) so_evlist; - boolean_t cached_in_sock_layer; /* bundled with inpcb and tcpcb */ STAILQ_ENTRY(socket) so_cache_ent; /* socache entry */ - u_int32_t cache_timestamp; /* time socket was cached */ caddr_t so_saved_pcb; /* Saved pcb when cacheing */ + u_int32_t cache_timestamp; /* time socket was cached */ + + pid_t last_pid; /* pid of most recent accessor */ + u_int64_t last_upid; /* upid of most recent accessor */ + struct mbuf *so_temp; /* Holding area for outbound frags */ /* Plug-in support - make the socket interface overridable */ struct mbuf *so_tail; struct socket_filter_entry *so_filt; /* NKE hook */ u_int32_t so_flags; /* Flags */ #define SOF_NOSIGPIPE 0x00000001 -#define SOF_NOADDRAVAIL 0x00000002 /* EADDRNOTAVAIL if src addr is gone */ -#define SOF_PCBCLEARING 0x00000004 /* pru_disconnect done; don't call pru_detach */ -#define SOF_DEFUNCT 0x00000008 /* socket marked as inactive */ -#define SOF_CLOSEWAIT 0x00000010 /* blocked in close awaiting some events */ -#define SOF_REUSESHAREUID 0x00000040 /* Allows SO_REUSEADDR/SO_REUSEPORT - for multiple so_uid */ -#define SOF_MULTIPAGES 0x00000080 /* jumbo clusters may be used for sosend */ -#define SOF_ABORTED 0x00000100 /* soabort was already called once */ -#define SOF_OVERFLOW 0x00000200 /* socket was dropped as overflow of listen q */ -#define SOF_NOTIFYCONFLICT 0x00000400 /* notify that a bind was done on a - port already in use */ -#define SOF_UPCALLCLOSEWAIT 0x00000800 /* block close until upcall returns */ +#define SOF_NOADDRAVAIL 0x00000002 /* EADDRNOTAVAIL if src addr is gone */ +#define SOF_PCBCLEARING 0x00000004 /* pru_disconnect done; don't + call pru_detach */ +#define SOF_DEFUNCT 0x00000008 /* socket marked as inactive */ +#define SOF_CLOSEWAIT 0x00000010 /* blocked in close awaiting some events */ +#define SOF_REUSESHAREUID 0x00000040 /* Allows SO_REUSEADDR/SO_REUSEPORT + for multiple so_uid */ +#define SOF_MULTIPAGES 0x00000080 /* jumbo clusters may be used for sosend */ +#define SOF_ABORTED 0x00000100 /* soabort was already called once */ +#define SOF_OVERFLOW 0x00000200 /* socket was dropped as overflow of + listen q */ +#define SOF_NOTIFYCONFLICT 0x00000400 /* notify that a bind was done on a + port already in use */ +#define SOF_UPCALLCLOSEWAIT 0x00000800 /* block close until upcall returns */ #define SOF_BINDRANDOMPORT 0x00001000 /* Randomized port number for bind */ #define SOF_NPX_SETOPTSHUT 0x00002000 /* Non POSIX extension to allow setsockopt(2) after shut down */ @@ -261,7 +266,7 @@ struct socket { #define SOF_SUSPENDED 0x00020000 /* i/f output queue is suspended */ #define SOF_INCOMP_INPROGRESS 0x00040000 /* incomp socket is being processed */ #define SOF_NOTSENT_LOWAT 0x00080000 /* A different lowat on not sent - data has been set */ + data has been set */ #define SOF_KNOTE 0x00100000 /* socket is on the EV_SOCK klist */ #define SOF_USELRO 0x00200000 /* TCP must use LRO on these sockets */ #define SOF_ENABLE_MSGS 0x00400000 /* TCP must enable message delivery */ @@ -279,25 +284,26 @@ struct socket { int so_usecount; /* refcounting of socket use */; int so_retaincnt; u_int32_t so_filteruse; /* usecount for the socket filters */ - u_int32_t so_traffic_mgt_flags; /* traffic_mgt socket config */ + u_int16_t so_traffic_class; + u_int8_t so_traffic_mgt_flags; /* traffic_mgt socket config */ + u_int8_t so_restrictions; thread_t so_send_filt_thread; - u_int32_t so_restrictions; /* for debug pruposes */ #define SO_LCKDBG_MAX 4 /* number of debug locking Link Registers recorded */ void *lock_lr[SO_LCKDBG_MAX]; /* locking calling history */ - int next_lock_lr; void *unlock_lr[SO_LCKDBG_MAX]; /* unlocking caller history */ - int next_unlock_lr; + u_int8_t next_lock_lr; + u_int8_t next_unlock_lr; + + u_int16_t so_pktheadroom; /* headroom before packet payload */ + + u_int32_t so_ifdenied_notifies; /* # of notifications generated */ struct label *so_label; /* MAC label for socket */ struct label *so_peerlabel; /* cached MAC label for socket peer */ thread_t so_background_thread; /* thread that marked this socket background */ - int so_traffic_class; - - u_int64_t last_upid; /* upid of most recent accessor */ - pid_t last_pid; /* pid of most recent accessor */ struct data_stats so_tc_stats[SO_TC_STATS_MAX]; struct klist so_klist; /* klist for EV_SOCK events */ @@ -308,21 +314,28 @@ struct socket { u_int32_t so_eventmask; /* event mask */ - u_int64_t e_upid; /* upid of the effective owner */ pid_t e_pid; /* pid of the effective owner */ + u_int64_t e_upid; /* upid of the effective owner */ uuid_t last_uuid; /* uuid of most recent accessor */ uuid_t e_uuid; /* uuid of effective owner */ uuid_t so_vuuid; /* UUID of the Voucher originator */ int32_t so_policy_gencnt; /* UUID policy gencnt */ - u_int32_t so_ifdenied_notifies; /* # of notifications generated */ u_int32_t so_flags1; #define SOF1_POST_FALLBACK_SYNC 0x00000001 /* fallback to TCP */ #define SOF1_AWDL_PRIVILEGED 0x00000002 #define SOF1_IF_2KCL 0x00000004 /* interface prefers 2 KB clusters */ #define SOF1_DEFUNCTINPROG 0x00000008 +#define SOF1_DATA_IDEMPOTENT 0x00000010 /* idempotent data for TFO */ +#define SOF1_PRECONNECT_DATA 0x00000020 /* request for preconnect data */ +#define SOF1_EXTEND_BK_IDLE_WANTED 0x00000040 /* option set */ +#define SOF1_EXTEND_BK_IDLE_INPROG 0x00000080 /* socket */ +#define SOF1_CACHED_IN_SOCK_LAYER 0x00000100 /* bundled with inpcb and + tcpcb */ + + u_int64_t so_extended_bk_start; }; /* Control message accessor in mbufs */ @@ -492,6 +505,27 @@ struct xsockstat_n { u_int32_t xst_kind; /* XSO_STATS */ struct data_stats xst_tc_stats[SO_TC_STATS_MAX]; }; + +/* + * Global socket statistics + */ +struct soextbkidlestat { + u_int32_t so_xbkidle_maxperproc; + u_int32_t so_xbkidle_time; + u_int32_t so_xbkidle_rcvhiwat; + int32_t so_xbkidle_notsupp; + int32_t so_xbkidle_toomany; + int32_t so_xbkidle_wantok; + int32_t so_xbkidle_active; + int32_t so_xbkidle_nocell; + int32_t so_xbkidle_notime; + int32_t so_xbkidle_forced; + int32_t so_xbkidle_resumed; + int32_t so_xbkidle_expired; + int32_t so_xbkidle_resched; + int32_t so_xbkidle_nodlgtd; + int32_t so_xbkidle_drained; +}; #endif /* PRIVATE */ #pragma pack() @@ -636,6 +670,7 @@ struct so_procinfo { pid_t spi_epid; uuid_t spi_uuid; uuid_t spi_euuid; + int spi_delegated; }; extern u_int32_t sb_max; @@ -652,9 +687,13 @@ extern uint32_t tcp_autosndbuf_max; extern u_int32_t sotcdb; extern u_int32_t net_io_policy_throttled; extern u_int32_t net_io_policy_log; +extern u_int32_t net_io_policy_throttle_best_effort; #if CONFIG_PROC_UUID_POLICY extern u_int32_t net_io_policy_uuid; #endif /* CONFIG_PROC_UUID_POLICY */ + +extern struct soextbkidlestat soextbkidlestat; + #endif /* BSD_KERNEL_PRIVATE */ struct mbuf; @@ -662,6 +701,18 @@ struct sockaddr; struct ucred; struct uio; +#define SOCK_MSG_SA 0x01 +#define SOCK_MSG_CONTROL 0x02 +#define SOCK_MSG_DATA 0x04 + +struct recv_msg_elem { + struct uio *uio; + struct sockaddr *psa; + struct mbuf *controlp; + int which; + int flags; +}; + /* * From uipc_socket and friends */ @@ -679,6 +730,7 @@ extern int sodisconnect(struct socket *so); extern void sofree(struct socket *so); extern void sofreelastref(struct socket *, int); extern void soisconnected(struct socket *so); +extern boolean_t socanwrite(struct socket *so); extern void soisconnecting(struct socket *so); extern void soisdisconnected(struct socket *so); extern void soisdisconnecting(struct socket *so); @@ -691,13 +743,13 @@ extern int sooptcopyout(struct sockopt *sopt, void *data, size_t len); extern int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); extern int soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc); +extern void soreserve_preconnect(struct socket *so, unsigned int pre_cc); extern void sorwakeup(struct socket *so); extern int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags); -extern int sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uio, - u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags); -extern int soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uio, - u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); +extern int sosend_list(struct socket *so, struct uio **uio, u_int uiocnt, int flags); +extern int soreceive_list(struct socket *so, struct recv_msg_elem *msgarray, u_int msgcnt, + int *flags); extern void sonullevent(struct socket *so, void *arg, uint32_t hint); __END_DECLS @@ -771,11 +823,11 @@ extern int soconnect(struct socket *so, struct sockaddr *nam); extern int soconnectlock(struct socket *so, struct sockaddr *nam, int dolock); extern int soconnect2(struct socket *so1, struct socket *so2); extern int soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl, - struct sockaddr_list **dst_sl, struct proc *, uint32_t, associd_t, - connid_t *, uint32_t, void *, u_int32_t); -extern int sodisconnectx(struct socket *so, associd_t, connid_t); -extern int sodisconnectxlocked(struct socket *so, associd_t, connid_t); -extern int sopeelofflocked(struct socket *so, associd_t, struct socket **); + struct sockaddr_list **dst_sl, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uint32_t, void *, u_int32_t, uio_t, user_ssize_t*); +extern int sodisconnectx(struct socket *so, sae_associd_t, sae_connid_t); +extern int sodisconnectxlocked(struct socket *so, sae_associd_t, sae_connid_t); +extern int sopeelofflocked(struct socket *so, sae_associd_t, struct socket **); extern void soevupcall(struct socket *, u_int32_t); /* flags for socreate_internal */ #define SOCF_ASYNC 0x1 /* non-blocking socket */ @@ -792,12 +844,17 @@ extern void somultipages(struct socket *, boolean_t); extern void soif2kcl(struct socket *, boolean_t); extern int sosetdefunct(struct proc *, struct socket *, int level, boolean_t); extern int sodefunct(struct proc *, struct socket *, int level); +extern int soresume(struct proc *, struct socket *, int); +extern void resume_proc_sockets(proc_t); +extern int so_check_extended_bk_idle_time(struct socket *); +extern void so_drain_extended_bk_idle(struct socket *); extern void sohasoutofband(struct socket *so); extern void sodisconnectwakeup(struct socket *so); extern int soisthrottled(struct socket *so); extern int soisprivilegedtraffic(struct socket *so); extern int soissrcbackground(struct socket *so); extern int soissrcrealtime(struct socket *so); +extern int soissrcbesteffort(struct socket *so); extern int solisten(struct socket *so, int backlog); extern struct socket *sodropablereq(struct socket *head); extern int socket_lock(struct socket *so, int refcount); @@ -845,6 +902,14 @@ extern struct sockaddr_list *sockaddrlist_dup(const struct sockaddr_list *, #define PKT_SCF_IPV6 0x00000001 /* IPv6 packet */ #define PKT_SCF_TCP_ACK 0x00000002 /* Pure TCP ACK */ +/* + * Flags for connectx(2) user-protocol request routine. + */ + +#define CONNREQF_MPTCP 0x1 /* called internally by MPTCP */ +#define CONNREQF_UIO 0x2 /* there's data */ +#define CONNREQF_IDEM 0x4 /* data is idempotent */ + extern void set_packet_service_class(struct mbuf *, struct socket *, mbuf_svc_class_t, u_int32_t); extern void so_tc_update_stats(struct mbuf *, struct socket *, @@ -905,6 +970,7 @@ extern void evsofree(struct socket *); extern int tcp_notsent_lowat_check(struct socket *so); extern user_ssize_t uio_array_resid(struct uio **, u_int); +extern user_ssize_t recv_msg_array_resid(struct recv_msg_elem *, u_int); void sotoxsocket_n(struct socket *, struct xsocket_n *); void sbtoxsockbuf_n(struct sockbuf *, struct xsockbuf_n *); diff --git a/bsd/sys/sockio.h b/bsd/sys/sockio.h index d020b9573..96f2519e5 100644 --- a/bsd/sys/sockio.h +++ b/bsd/sys/sockio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -226,9 +226,7 @@ #define SIOCGIFQUEUESTATS _IOWR('i', 147, struct if_qstatsreq) #define SIOCSIFTHROTTLE _IOWR('i', 148, struct if_throttlereq) #define SIOCGIFTHROTTLE _IOWR('i', 149, struct if_throttlereq) -#endif /* PRIVATE */ -#ifdef PRIVATE #define SIOCGASSOCIDS _IOWR('s', 150, struct so_aidreq) /* get associds */ #define SIOCGCONNIDS _IOWR('s', 151, struct so_cidreq) /* get connids */ #define SIOCGCONNINFO _IOWR('s', 152, struct so_cinforeq) /* get conninfo */ @@ -242,9 +240,7 @@ #endif /* BSD_KERNEL_PRIVATE */ #define SIOCSCONNORDER _IOWR('s', 153, struct so_cordreq) /* set conn order */ #define SIOCGCONNORDER _IOWR('s', 154, struct so_cordreq) /* get conn order */ -#endif /* PRIVATE */ -#ifdef PRIVATE #define SIOCSIFLOG _IOWR('i', 155, struct ifreq) #define SIOCGIFLOG _IOWR('i', 156, struct ifreq) #define SIOCGIFDELEGATE _IOWR('i', 157, struct ifreq) @@ -254,5 +250,27 @@ #define SIOCSIFEXPENSIVE _IOWR('i', 161, struct ifreq) /* mark interface expensive */ #define SIOCGIF2KCL _IOWR('i', 162, struct ifreq) /* interface prefers 2 KB clusters */ #define SIOCSIF2KCL _IOWR('i', 163, struct ifreq) +#define SIOCGSTARTDELAY _IOWR('i', 164, struct ifreq) + +#define SIOCAIFAGENTID _IOWR('i', 165, struct if_agentidreq) /* Add netagent id */ +#define SIOCDIFAGENTID _IOWR('i', 166, struct if_agentidreq) /* Delete netagent id */ +#define SIOCGIFAGENTIDS _IOWR('i', 167, struct if_agentidsreq) /* Get netagent ids */ +#define SIOCGIFAGENTDATA _IOWR('i', 168, struct netagent_req) /* Get netagent data */ +#ifdef BSD_KERNEL_PRIVATE +#define SIOCGIFAGENTIDS32 _IOWR('i', 167, struct if_agentidsreq32) +#define SIOCGIFAGENTIDS64 _IOWR('i', 167, struct if_agentidsreq64) +#define SIOCGIFAGENTDATA32 _IOWR('i', 168, struct netagent_req32) +#define SIOCGIFAGENTDATA64 _IOWR('i', 168, struct netagent_req64) +#endif /* BSD_KERNEL_PRIVATE */ + +#define SIOCSIFINTERFACESTATE _IOWR('i', 169, struct ifreq) /* set interface state */ +#define SIOCGIFINTERFACESTATE _IOWR('i', 170, struct ifreq) /* get interface state */ +#define SIOCSIFPROBECONNECTIVITY _IOWR('i', 171, struct ifreq) /* Start/Stop probes to check connectivity */ +#define SIOCGIFPROBECONNECTIVITY _IOWR('i', 172, struct ifreq) /* check if connectivity probes are enabled */ + +#define SIOCGIFFUNCTIONALTYPE _IOWR('i', 173, struct ifreq) /* get interface functional type */ +#define SIOCSIFNETSIGNATURE _IOWR('i', 174, struct if_nsreq) +#define SIOCGIFNETSIGNATURE _IOWR('i', 175, struct if_nsreq) #endif /* PRIVATE */ + #endif /* !_SYS_SOCKIO_H_ */ diff --git a/bsd/sys/spawn_internal.h b/bsd/sys/spawn_internal.h index 7054f6bb9..e794747db 100644 --- a/bsd/sys/spawn_internal.h +++ b/bsd/sys/spawn_internal.h @@ -41,11 +41,13 @@ #define _SYS_SPAWN_INTERNAL_H_ #include /* __offsetof(), __darwin_size_t */ +#include #include /* PATH_MAX */ #include #include #include #include +#include /* COALITION_NUM_TYPES */ /* * Allowable posix_spawn() port action types @@ -117,6 +119,17 @@ typedef struct _posix_spawn_mac_policy_extensions { #define PS_MAC_EXTENSIONS_INIT_COUNT 2 +/* + * Coalition posix spawn attributes + */ +struct _posix_spawn_coalition_info { + struct { + uint64_t psci_id; + uint32_t psci_role; + uint32_t psci_reserved1; + uint64_t psci_reserved2; + } psci_info[COALITION_NUM_TYPES]; +}; /* * A posix_spawnattr structure contains all of the attribute elements that @@ -124,6 +137,7 @@ typedef struct _posix_spawn_mac_policy_extensions { * presence of a bit in the flags field. All fields are initialized to the * appropriate default values by posix_spawnattr_init(). */ + typedef struct _posix_spawnattr { short psa_flags; /* spawn attribute flags */ short flags_padding; /* get the flags to be int aligned */ @@ -135,15 +149,16 @@ typedef struct _posix_spawnattr { int psa_apptype; /* app type and process spec behav */ uint64_t psa_cpumonitor_percent; /* CPU usage monitor percentage */ uint64_t psa_cpumonitor_interval; /* CPU usage monitor interval, in seconds */ - uint64_t psa_coalitionid; /* coalition to spawn into */ + uint64_t psa_reserved; - short psa_jetsam_flags; /* jetsam flags */ - short short_padding; /* Padding for alignment issues */ - int psa_priority; /* jetsam relative importance */ - int psa_high_water_mark; /* jetsam resident page count limit */ - int int_padding; /* Padding for alignment issues */ + short psa_jetsam_flags; /* jetsam flags */ + short short_padding; /* Padding for alignment issues */ + int psa_priority; /* jetsam relative importance */ + int psa_memlimit_active; /* jetsam memory limit (in MB) when process is active */ + int psa_memlimit_inactive; /* jetsam memory limit (in MB) when process is inactive */ uint64_t psa_qos_clamp; /* QoS Clamp to set on the new process */ + uint64_t psa_darwin_role; /* PRIO_DARWIN_ROLE to set on the new process */ /* * NOTE: Extensions array pointers must stay at the end so that @@ -152,16 +167,25 @@ typedef struct _posix_spawnattr { */ _posix_spawn_port_actions_t psa_ports; /* special/exception ports */ _posix_spawn_mac_policy_extensions_t psa_mac_extensions; /* MAC policy-specific extensions. */ + struct _posix_spawn_coalition_info *psa_coalition_info; /* coalition info */ + void *reserved; } *_posix_spawnattr_t; /* - * Jetsam flags + * Jetsam flags eg: psa_jetsam_flags */ #define POSIX_SPAWN_JETSAM_SET 0x8000 -#define POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY 0x1 -#define POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND 0x2 -#define POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL 0x4 +#define POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY 0x01 +#define POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND 0x02 +#define POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL 0x04 /* to be deprecated */ + +/* + * Additional flags available for use with + * the posix_spawnattr_setjetsam_ext() call + */ +#define POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL 0x04 /* if set, limit is fatal when the process is active */ +#define POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL 0x08 /* if set, limit is fatal when the process is inactive */ /* * Deprecated posix_spawn psa_flags values @@ -215,6 +239,10 @@ typedef struct _posix_spawnattr { #define POSIX_SPAWN_PROC_CLAMP_MAINTENANCE 0x00000003 #define POSIX_SPAWN_PROC_CLAMP_LAST 0x00000004 +/* Setting to indicate no change to darwin role */ +#define POSIX_SPAWN_DARWIN_ROLE_NONE 0x00000000 +/* Other possible values are specified by PRIO_DARWIN_ROLE in sys/resource.h */ + /* * Allowable posix_spawn() file actions */ @@ -308,7 +336,11 @@ struct _posix_spawn_args_desc { _posix_spawn_mac_policy_extensions_t mac_extensions; /* pointer to policy-specific * attributes */ + __darwin_size_t coal_info_size; + struct _posix_spawn_coalition_info *coal_info; /* pointer to coalition info */ + __darwin_size_t reserved_size; + void *reserved; }; #ifdef KERNEL @@ -328,6 +360,10 @@ struct user32__posix_spawn_args_desc { uint32_t port_actions; /* pointer to block */ uint32_t mac_extensions_size; uint32_t mac_extensions; + uint32_t coal_info_size; + uint32_t coal_info; + uint32_t reserved_size; + uint32_t reserved; }; struct user__posix_spawn_args_desc { @@ -339,6 +375,10 @@ struct user__posix_spawn_args_desc { user_addr_t port_actions; /* pointer to block */ user_size_t mac_extensions_size; /* size of MAC-specific attrs. */ user_addr_t mac_extensions; /* pointer to block */ + user_size_t coal_info_size; + user_addr_t coal_info; + user_size_t reserved_size; + user_addr_t reserved; }; diff --git a/bsd/sys/stackshot.h b/bsd/sys/stackshot.h new file mode 100644 index 000000000..50a1eb500 --- /dev/null +++ b/bsd/sys/stackshot.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_STACKSHOT_H +#define _SYS_STACKSHOT_H + +#include + +#define STACKSHOT_CONFIG_TYPE 1 + +typedef struct stackshot_config { + /* Input options */ + int sc_pid; /* PID to trace, or -1 for the entire system */ + uint32_t sc_flags; /* Stackshot flags */ + uint64_t sc_since_timestamp; /* Get traces of threads that have run since this time (NOT YET SUPPORTED) */ + + /* Stackshot results */ + uint64_t sc_buffer; /* Pointer to stackshot buffer */ + uint32_t sc_size; /* Length of the stackshot buffer */ + + /* Internals */ + uint64_t sc_out_buffer_addr; /* Location where the kernel should copy the address of the newly mapped buffer in user space */ + uint64_t sc_out_size_addr; /* Location where the kernel should copy the size of the stackshot buffer */ +} stackshot_config_t; + +#ifndef KERNEL + +#if !LIBSYSCALL_INTERFACE +typedef struct stackshot_config stackshot_config_t; +#endif + +stackshot_config_t * stackshot_config_create(void); +int stackshot_config_set_pid(stackshot_config_t * stackshot_config, int pid); +int stackshot_config_set_flags(stackshot_config_t * stackshot_config, uint32_t flags); +int stackshot_capture_with_config(stackshot_config_t * stackshot_config); +void * stackshot_config_get_stackshot_buffer(stackshot_config_t * stackshot_config); +uint32_t stackshot_config_get_stackshot_size(stackshot_config_t * stackshot_config); +int stackshot_config_set_size_hint(stackshot_config_t * stackshot_config, uint32_t suggested_size); +int stackshot_config_dealloc_buffer(stackshot_config_t * stackshot_config); +int stackshot_config_dealloc(stackshot_config_t * stackshot_config); + +#endif /* KERNEL */ + +#endif /* _SYS_STACKSHOT_H */ diff --git a/bsd/sys/stat.h b/bsd/sys/stat.h index 3614aaac2..c9fac7e25 100644 --- a/bsd/sys/stat.h +++ b/bsd/sys/stat.h @@ -480,17 +480,18 @@ extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp); /* * Super-user changeable flags. */ +#define SF_SUPPORTED 0x001f0000 /* mask of superuser supported flags */ #define SF_SETTABLE 0xffff0000 /* mask of superuser changeable flags */ #define SF_ARCHIVED 0x00010000 /* file is archived */ #define SF_IMMUTABLE 0x00020000 /* file may not be changed */ #define SF_APPEND 0x00040000 /* writes to file may only append */ #define SF_RESTRICTED 0x00080000 /* restricted access */ +#define SF_NOUNLINK 0x00100000 /* Item may not be removed, renamed or mounted on */ /* * The following two bits are reserved for FreeBSD. They are not * implemented in Mac OS X. */ -/* #define SF_NOUNLINK 0x00100000 */ /* file may not be removed or renamed */ /* #define SF_SNAPSHOT 0x00200000 */ /* snapshot inode */ /* NOTE: There is no SF_HIDDEN bit. */ diff --git a/bsd/sys/sysctl.h b/bsd/sys/sysctl.h index f224e999a..d597067de 100644 --- a/bsd/sys/sysctl.h +++ b/bsd/sys/sysctl.h @@ -521,7 +521,7 @@ SYSCTL_DECL(_user); #define KERN_SPECULATIVE_READS 64 /* int: whether speculative reads are disabled */ #define KERN_OSVERSION 65 /* for build number i.e. 9A127 */ #define KERN_SAFEBOOT 66 /* are we booted safe? */ -#define KERN_LCTX 67 /* node: login context */ + /* 67 was KERN_LCTX (login context) */ #define KERN_RAGEVNODE 68 #define KERN_TTY 69 /* node: tty settings */ #define KERN_CHECKOPENEVT 70 /* spi: check the VOPENEVT flag on vnodes at open time */ @@ -586,6 +586,10 @@ SYSCTL_DECL(_user); #define KERN_KDSET_TYPEFILTER 22 #define KERN_KDBUFWAIT 23 #define KERN_KDCPUMAP 24 +#define KERN_KDWAIT_BG_TRACE_RESET 25 +#define KERN_KDSET_BG_TYPEFILTER 26 +#define KERN_KDWRITEMAP_V3 27 +#define KERN_KDWRITETR_V3 28 #define CTL_KERN_NAMES { \ { 0, 0 }, \ @@ -655,7 +659,7 @@ SYSCTL_DECL(_user); { "speculative_reads_disabled", CTLTYPE_INT }, \ { "osversion", CTLTYPE_STRING }, \ { "safeboot", CTLTYPE_INT }, \ - { "lctx", CTLTYPE_NODE }, \ + { "dummy", CTLTYPE_INT }, /* deprecated: lctx */ \ { "rage_vnode", CTLTYPE_INT }, \ { "tty", CTLTYPE_NODE }, \ { "check_openevt", CTLTYPE_INT }, \ @@ -681,13 +685,6 @@ SYSCTL_DECL(_user); #define KERN_PROC_RUID 6 /* by real uid */ #define KERN_PROC_LCID 7 /* by login context id */ -/* - * KERN_LCTX subtypes - */ -#define KERN_LCTX_ALL 0 /* everything */ -#define KERN_LCTX_LCID 1 /* by login context id */ - - #if defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL) /* * KERN_PROC subtype ops return arrays of augmented proc structures: @@ -735,20 +732,10 @@ struct kinfo_proc { #define EPROC_SLEADER 0x02 /* session leader */ #define COMAPT_MAXLOGNAME 12 char e_login[COMAPT_MAXLOGNAME]; /* short setlogin() name */ -#if CONFIG_LCTX - pid_t e_lcid; - int32_t e_spare[3]; -#else int32_t e_spare[4]; -#endif } kp_eproc; }; -struct kinfo_lctx { - pid_t id; /* Login Context ID */ - int mc; /* Member Count */ -}; - #endif /* defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL) */ #ifdef BSD_KERNEL_PRIVATE @@ -803,12 +790,7 @@ struct user32_kinfo_proc { short e_xswrss; int32_t e_flag; char e_login[COMAPT_MAXLOGNAME]; /* short setlogin() name */ -#if CONFIG_LCTX - pid_t e_lcid; - int32_t e_spare[3]; -#else int32_t e_spare[4]; -#endif } kp_eproc; }; struct user64_kinfo_proc { @@ -832,12 +814,7 @@ struct user64_kinfo_proc { short e_xswrss; int32_t e_flag; char e_login[COMAPT_MAXLOGNAME]; /* short setlogin() name */ -#if CONFIG_LCTX - pid_t e_lcid; - int32_t e_spare[3]; -#else int32_t e_spare[4]; -#endif } kp_eproc; }; diff --git a/bsd/sys/sysent.h b/bsd/sys/sysent.h index e69e93b2b..297de2cc2 100644 --- a/bsd/sys/sysent.h +++ b/bsd/sys/sysent.h @@ -42,7 +42,7 @@ typedef void sy_munge_t(void *); struct sysent { /* system call table */ sy_call_t *sy_call; /* implementing function */ -#if CONFIG_REQUIRES_U32_MUNGING +#if CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4)) sy_munge_t *sy_arg_munge32; /* system call arguments munger for 32-bit process */ #endif int32_t sy_return_type; /* system call return types */ @@ -57,7 +57,7 @@ extern struct sysent sysent[]; #endif /* __INIT_SYSENT_C__ */ extern int nsysent; -#define NUM_SYSENT 490 /* Current number of defined syscalls */ +#define NUM_SYSENT 500 /* Current number of defined syscalls */ /* * Valid values for sy_cancel diff --git a/bsd/sys/systm.h b/bsd/sys/systm.h index df3379053..2cbbba27f 100644 --- a/bsd/sys/systm.h +++ b/bsd/sys/systm.h @@ -123,6 +123,7 @@ extern const char copyright[]; /* system copyright */ extern int boothowto; /* reboot flags, from console subsystem */ extern int show_space; +extern int minimalboot; extern int nblkdev; /* number of entries in bdevsw */ extern int nchrdev; /* number of entries in cdevsw */ @@ -228,7 +229,7 @@ typedef struct __throttle_info_handle *throttle_info_handle_t; int throttle_info_ref_by_mask(uint64_t throttle_mask, throttle_info_handle_t *throttle_info_handle); void throttle_info_rel_by_mask(throttle_info_handle_t throttle_info_handle); void throttle_info_update_by_mask(void *throttle_info_handle, int flags); -void throttle_info_disable_throttle(int devno); +void throttle_info_disable_throttle(int devno, boolean_t isfusion); /* * 'throttle_info_handle' acquired via 'throttle_info_ref_by_mask' * 'policy' should be specified as either IOPOL_UTILITY or IPOL_THROTTLE, diff --git a/bsd/sys/time.h b/bsd/sys/time.h index fdf777617..da5e4d784 100644 --- a/bsd/sys/time.h +++ b/bsd/sys/time.h @@ -80,6 +80,11 @@ #include #include #include + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#include +#endif /* !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) */ + #ifdef KERNEL #include #include diff --git a/bsd/sys/ubc.h b/bsd/sys/ubc.h index b3389d4b0..720ae9818 100644 --- a/bsd/sys/ubc.h +++ b/bsd/sys/ubc.h @@ -89,6 +89,7 @@ int ubc_cs_generation_check(vnode_t); int cs_entitlements_blob_get(proc_t, void **, size_t *); int cs_blob_get(proc_t, void **, size_t *); const char *cs_identity_get(proc_t); + #endif /* cluster IO routines */ @@ -118,6 +119,9 @@ void cluster_zero(upl_t, upl_offset_t, int, buf_t); int cluster_copy_upl_data(uio_t, upl_t, int, int *); int cluster_copy_ubc_data(vnode_t, uio_t, int *, int); +typedef struct cl_direct_read_lock cl_direct_read_lock_t; +cl_direct_read_lock_t *cluster_lock_direct_read(vnode_t vp, lck_rw_type_t exclusive); +void cluster_unlock_direct_read(cl_direct_read_lock_t *lck); /* UPL routines */ int ubc_create_upl(vnode_t, off_t, int, upl_t *, upl_page_info_t **, int); @@ -134,6 +138,8 @@ upl_size_t ubc_upl_maxbufsize(void); int is_file_clean(vnode_t, off_t); +errno_t mach_to_bsd_errno(kern_return_t mach_err); + __END_DECLS #endif /* _SYS_UBC_H_ */ diff --git a/bsd/sys/ubc_internal.h b/bsd/sys/ubc_internal.h index 90424d745..f5b04763a 100644 --- a/bsd/sys/ubc_internal.h +++ b/bsd/sys/ubc_internal.h @@ -44,14 +44,13 @@ #include #include #include +#include #include #include #include -#include - #define UBC_INFO_NULL ((struct ubc_info *) 0) @@ -95,6 +94,7 @@ struct cl_writebehind { struct cl_wextent cl_clusters[MAX_CLUSTERS]; /* packed write behind clusters */ }; +struct cs_hash; struct cs_blob { struct cs_blob *csb_next; @@ -107,10 +107,11 @@ struct cs_blob { vm_size_t csb_mem_size; vm_offset_t csb_mem_offset; vm_address_t csb_mem_kaddr; - unsigned char csb_sha1[SHA1_RESULTLEN]; - unsigned int csb_sigpup; + unsigned char csb_cdhash[CS_CDHASH_LEN]; + struct cs_hash *csb_hashtype; const char *csb_teamid; - unsigned int csb_platform_binary; + unsigned int csb_platform_binary:1; + unsigned int csb_platform_path:1; }; /* @@ -195,7 +196,7 @@ int UBCINFOEXISTS(const struct vnode *); /* code signing */ struct cs_blob; -int ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t, int); +int ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t, int, struct cs_blob **); int ubc_cs_sigpup_add(vnode_t, vm_address_t, vm_size_t); struct cs_blob *ubc_get_cs_blobs(vnode_t); void ubc_get_cs_mtime(vnode_t, struct timespec *); diff --git a/bsd/sys/uio_internal.h b/bsd/sys/uio_internal.h index 959eb501c..e1535ae82 100644 --- a/bsd/sys/uio_internal.h +++ b/bsd/sys/uio_internal.h @@ -144,7 +144,7 @@ struct uio { off_t uio_offset; enum uio_seg uio_segflg; enum uio_rw uio_rw; - user_ssize_t uio_resid_64; + user_size_t uio_resid_64; int uio_size; /* size for use with kfree */ int uio_max_iovs; /* max number of iovecs this uio_t can hold */ u_int32_t uio_flags; diff --git a/bsd/sys/user.h b/bsd/sys/user.h index 8fc6c578a..4e5235754 100644 --- a/bsd/sys/user.h +++ b/bsd/sys/user.h @@ -65,6 +65,7 @@ #define _SYS_USER_H_ #include +struct waitq_set; #ifndef KERNEL /* stuff that *used* to be included by user.h, or is now needed */ #include @@ -144,7 +145,7 @@ struct uthread { union { struct _select_data { u_int64_t abstime; - char * wql; + uint64_t *wqp; int count; struct select_nocancel_args *args; /* original syscall arguments */ int32_t *retval; /* place to store return val */ @@ -158,12 +159,12 @@ struct uthread { struct _kevent { struct _kqueue_scan scan;/* space for the generic data */ struct fileproc *fp; /* fileproc we hold iocount on */ - int fd; /* filedescriptor for kq */ - int32_t *retval; /* place to store return val */ + int fd; /* filedescriptor for kq */ + unsigned int eventflags; /* flags to determine kevent size/direction */ + int eventcount; /* user-level event count */ + int eventout; /* number of events output */ + int32_t *retval; /* place to store return val */ user_addr_t eventlist; /* user-level event list address */ - size_t eventsize; /* kevent or kevent64_s */ - int eventcount; /* user-level event count */ - int eventout; /* number of events output */ } ss_kevent; /* saved state for kevent() */ struct _kauth { @@ -198,8 +199,8 @@ struct uthread { struct proc * uu_proc; thread_t uu_thread; void * uu_userstate; - wait_queue_set_t uu_wqset; /* cached across select calls */ - size_t uu_allocsize; /* ...size of select cache */ + struct waitq_set *uu_wqset; /* waitq state cached across select calls */ + size_t uu_wqstate_sz; /* ...size of uu_wqset buffer */ int uu_flag; sigset_t uu_siglist; /* signals pending for the thread */ sigset_t uu_sigwait; /* sigwait on this thread*/ @@ -235,6 +236,16 @@ struct uthread { void * uu_vps[32]; void * uu_pcs[32][10]; #endif + +#if PROC_REF_DEBUG +#define NUM_PROC_REFS_TO_TRACK 32 +#define PROC_REF_STACK_DEPTH 10 + int uu_proc_refcount; + int uu_pindex; + void * uu_proc_ps[NUM_PROC_REFS_TO_TRACK]; + uintptr_t uu_proc_pcs[NUM_PROC_REFS_TO_TRACK][PROC_REF_STACK_DEPTH]; +#endif + #if CONFIG_DTRACE uint32_t t_dtrace_errno; /* Most recent errno */ siginfo_t t_dtrace_siginfo; @@ -277,7 +288,6 @@ struct uthread { #endif /* CONFIG_DTRACE */ void * uu_threadlist; char * pth_name; - struct label * uu_label; /* MAC label */ /* Document Tracking struct used to track a "tombstone" for a document */ struct doc_tombstone *t_tombstone; diff --git a/bsd/sys/vnode.h b/bsd/sys/vnode.h index 309842aee..e243a7a2f 100644 --- a/bsd/sys/vnode.h +++ b/bsd/sys/vnode.h @@ -170,6 +170,7 @@ enum vtagtype { #define IO_SYSCALL_DISPATCH 0x100000 /* I/O was originated from a file table syscall */ #define IO_SWAP_DISPATCH 0x200000 /* I/O was originated from the swap layer */ #define IO_SKIP_ENCRYPTION 0x400000 /* Skips en(de)cryption on the IO. Must be initiated from kernel */ +#define IO_EVTONLY 0x800000 /* the i/o is being done on an fd that's marked O_EVTONLY */ /* * Component Name: this structure describes the pathname @@ -247,6 +248,14 @@ struct vnode_fsparam { #define VNCREATE_FLAVOR 0 #define VCREATESIZE sizeof(struct vnode_fsparam) +#ifdef KERNEL_PRIVATE +/* + * For use with SPI to create trigger vnodes. + */ +struct vnode_trigger_param; +#define VNCREATE_TRIGGER (('T' << 8) + ('V')) +#define VNCREATE_TRIGGER_SIZE sizeof(struct vnode_trigger_param) +#endif /* KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE @@ -450,9 +459,6 @@ struct vnode_trigger_param { uint32_t vnt_flags; /* optional flags (see below) */ }; -#define VNCREATE_TRIGGER (('T' << 8) + ('V')) -#define VNCREATE_TRIGGER_SIZE sizeof(struct vnode_trigger_param) - /* * vnode trigger flags (vnt_flags) * @@ -683,7 +689,8 @@ struct vnode_attr { /* * Flags for va_dataprotect_flags */ -#define VA_DP_RAWENCRYPTED 0x0001 +#define VA_DP_RAWENCRYPTED 0x0001 +#define VA_DP_RAWUNENCRYPTED 0x0002 #endif @@ -801,6 +808,37 @@ __BEGIN_DECLS */ errno_t vnode_create(uint32_t, uint32_t, void *, vnode_t *); +#if KERNEL_PRIVATE +/*! + @function vnode_create_empty + @abstract Create an empty, uninitialized vnode. + @discussion Returns with an iocount held on the vnode which must eventually be + dropped with vnode_put(). The next operation performed on the vnode must be + vnode_initialize (or vnode_put if the vnode is not needed anymore). + This interface is provided as a mechanism to pre-flight obtaining a vnode for + certain filesystem operations which may need to get a vnode without filesystem + locks held. It is imperative that nothing be done with the vnode till the + succeeding vnode_initialize (or vnode_put as the case may be) call. + @param vpp Pointer to a vnode pointer, to be filled in with newly created vnode. + @return 0 for success, error code otherwise. + */ +errno_t vnode_create_empty(vnode_t *); + +/*! + @function vnode_initialize + @abstract Initialize a vnode obtained by vnode_create_empty + @discussion Does not drop iocount held on the vnode which must eventually be + dropped with vnode_put(). In case of an error however, the vnode's iocount is + dropped and the vnode must not be referenced again by the caller. + @param flavor Should be VNCREATE_FLAVOR. + @param size Size of the struct vnode_fsparam in "data". + @param data Pointer to a struct vnode_fsparam containing initialization information. + @param vpp Pointer to a vnode pointer, to be filled in with newly created vnode. + @return 0 for success, error code otherwise. + */ +errno_t vnode_initialize(uint32_t, uint32_t, void *, vnode_t *); +#endif /* KERNEL_PRIVATE */ + /*! @function vnode_addfsref @abstract Mark a vnode as being stored in a filesystem hash. @@ -1116,6 +1154,58 @@ void vnode_setnoreadahead(vnode_t); */ void vnode_clearnoreadahead(vnode_t); +/*! + @function vnode_isfastdevicecandidate + @abstract Check if a vnode is a candidate to store on the fast device of a composite disk system + @param vp The vnode which you want to test. + @return Nonzero if the vnode is marked as a fast-device candidate + @return void. + */ +int vnode_isfastdevicecandidate(vnode_t); + +/*! + @function vnode_setfastdevicecandidate + @abstract Mark a vnode as a candidate to store on the fast device of a composite disk system + @abstract If the vnode is a directory, all its children will inherit this bit. + @param vp The vnode which you want marked. + @return void. + */ +void vnode_setfastdevicecandidate(vnode_t); + +/*! + @function vnode_clearfastdevicecandidate + @abstract Clear the status of a vnode being a candidate to store on the fast device of a composite disk system. + @param vp The vnode whose flag to clear. + @return void. + */ +void vnode_clearfastdevicecandidate(vnode_t); + +/*! + @function vnode_isautocandidate + @abstract Check if a vnode was automatically selected to be fast-dev candidate (see vnode_setfastdevicecandidate) + @param vp The vnode which you want to test. + @return Nonzero if the vnode was automatically marked as a fast-device candidate + @return void. + */ +int vnode_isautocandidate(vnode_t); + +/*! + @function vnode_setfastdevicecandidate + @abstract Mark a vnode as an automatically selected candidate for storing on the fast device of a composite disk system + @abstract If the vnode is a directory, all its children will inherit this bit. + @param vp The vnode which you want marked. + @return void. + */ +void vnode_setautocandidate(vnode_t); + +/*! + @function vnode_clearautocandidate + @abstract Clear the status of a vnode being an automatic candidate (see above) + @param vp The vnode whose flag to clear. + @return void. + */ +void vnode_clearautocandidate(vnode_t); + /* left only for compat reasons as User code depends on this from getattrlist, for ex */ /*! @@ -1481,20 +1571,6 @@ int vnode_recycle(vnode_t); #endif /* BSD_KERNEL_PRIVATE */ -/*! - @function vnode_notify - @abstract Send a notification up to VFS. - @param vp Vnode for which to provide notification. - @param vap Attributes for that vnode, to be passed to fsevents. - @discussion Filesystem determines which attributes to pass up using - vfs_get_notify_attributes(&vap). The most specific events possible should be passed, - e.g. VNODE_EVENT_FILE_CREATED on a directory rather than just VNODE_EVENT_WRITE, but - a less specific event can be passed up if more specific information is not available. - Will not reenter the filesystem. - @return 0 for success, else an error code. - */ -int vnode_notify(vnode_t, uint32_t, struct vnode_attr*); - /*! @function vnode_ismonitored @abstract Check whether a file has watchers that would make it useful to query a server @@ -1516,15 +1592,6 @@ int vnode_ismonitored(vnode_t); int vnode_isdyldsharedcache(vnode_t); -/*! - @function vfs_get_notify_attributes - @abstract Determine what attributes are required to send up a notification with vnode_notify(). - @param vap Structure to initialize and activate required attributes on. - @discussion Will not reenter the filesystem. - @return 0 for success, nonzero for error (currently always succeeds). - */ -int vfs_get_notify_attributes(struct vnode_attr *vap); - /*! @function vn_getpath_fsenter @abstract Attempt to get a vnode's path, willing to enter the filesystem. @@ -1637,11 +1704,35 @@ errno_t vnode_close(vnode_t, int, vfs_context_t); */ int vn_getpath(struct vnode *vp, char *pathbuf, int *len); +/*! + @function vnode_notify + @abstract Send a notification up to VFS. + @param vp Vnode for which to provide notification. + @param vap Attributes for that vnode, to be passed to fsevents. + @discussion Filesystem determines which attributes to pass up using + vfs_get_notify_attributes(&vap). The most specific events possible should be passed, + e.g. VNODE_EVENT_FILE_CREATED on a directory rather than just VNODE_EVENT_WRITE, but + a less specific event can be passed up if more specific information is not available. + Will not reenter the filesystem. + @return 0 for success, else an error code. + */ +int vnode_notify(vnode_t, uint32_t, struct vnode_attr*); + +/*! + @function vfs_get_notify_attributes + @abstract Determine what attributes are required to send up a notification with vnode_notify(). + @param vap Structure to initialize and activate required attributes on. + @discussion Will not reenter the filesystem. + @return 0 for success, nonzero for error (currently always succeeds). + */ +int vfs_get_notify_attributes(struct vnode_attr *vap); + /* * Flags for the vnode_lookup and vnode_open */ #define VNODE_LOOKUP_NOFOLLOW 0x01 #define VNODE_LOOKUP_NOCROSSMOUNT 0x02 +#define VNODE_LOOKUP_CROSSMOUNTNOWAIT 0x04 /*! @function vnode_lookup @abstract Convert a path into a vnode. diff --git a/bsd/sys/vnode_internal.h b/bsd/sys/vnode_internal.h index abc1566a4..73722ba63 100644 --- a/bsd/sys/vnode_internal.h +++ b/bsd/sys/vnode_internal.h @@ -260,6 +260,13 @@ struct vnode { #define VNEEDSSNAPSHOT 0x1000000 #define VNOCS 0x2000000 /* is there no code signature available */ #define VISDIRTY 0x4000000 /* vnode will need IO if reclaimed */ +#define VFASTDEVCANDIDATE 0x8000000 /* vnode is a candidate to store on a fast device */ +#define VAUTOCANDIDATE 0x10000000 /* vnode was automatically marked as a fast-dev candidate */ +/* + 0x20000000 not used + 0x40000000 not used + 0x80000000 not used. +*/ /* * This structure describes vnode data which is specific to a file descriptor. @@ -552,6 +559,22 @@ void vfsinit(void); void vnode_lock(vnode_t); void vnode_unlock(vnode_t); +void vn_print_state(vnode_t /* vp */, const char * /* fmt */, ...) + __printflike(2,3); + +#if DEVELOPMENT || DEBUG +#define VNASSERT(exp, vp, msg) \ +do { \ + if (__improbable(!(exp))) { \ + vn_print_state(vp, "VNASSERT failed %s:%d\n", __FILE__, \ + __LINE__); \ + panic msg; \ + } \ +} while (0) +#else +#define VNASSERT(exp, vp, msg) +#endif /* DEVELOPMENT || DEBUG */ + /* * XXX exported symbols; should be static */ @@ -570,6 +593,7 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, int *numdirent, vfs_context_t ctxp); void vnode_setswapmount(vnode_t); +int64_t vnode_getswappin_avail(vnode_t); #if CONFIG_TRIGGERS /* VFS Internal Vnode Trigger Interfaces (Private) */ diff --git a/bsd/sys/work_interval.h b/bsd/sys/work_interval.h new file mode 100644 index 000000000..cc9ba9fb7 --- /dev/null +++ b/bsd/sys/work_interval.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_WORK_INTERVAL_H +#define _SYS_WORK_INTERVAL_H + +#include +#include +#include + +__BEGIN_DECLS + +/* + * Trusted clients with deadline-sensitive work may report information + * about the execution of their work using the work interval facility. + * This is intended to be a higher-level semantic than realtime scheduling, + * which operates at the level of thread block/unblock. A high level + * operation may have many blocking points, including IPC to other tasks, + * and this this metric will capture the overall time to complete a unit of + * work. + * + * A work interval is defined by several timestamps, namely (S)tart, + * (F)inish, (D)eadline, and (N)ext start. + * + * ... ----+==================+--------+--+==== ... + * | | | | + * S F D N + * + * \__________________/ + * Active + * \___________________________/ + * Work Interval + * + * \_________/ + * | + * report information here ---------+ + * + * Definitions: + * + * Start: Absolute time when the current deadline-oriented work began. Due + * to scheduling latency, preemption, and blocking points, the + * thread controlling the work interval may actually begin + * executing after this ideal time (which may be the previous work + * interval's "next start") + * Finish: Absolute time when the current deadline-oriented work finished. + * This will typically be a timestamp taken before reporting using + * the work interval interface. + * Deadline: Absolute time by which the current work was expected to finish. + * In cases where the amount of computation (or preemption, or time + * spent blocked) causes the active period to take longer than + * expected, F may be greater than D. + * Next start: Absolute time when the next deadline-oriented work is + * expected to begin. This is typically the same as Deadline. + * Active: The fraction of the work interval spent completing the work. In + * cases where the Finish time exceeded the Deadline, this fraction + * will be >1.0. + * + * Basic Use: + * + * Clients should report information for a work interval after finishing + * work for the current interval but before the next work interval begins. + * + * If Finish far exceeds the previously expected Deadline, the + * caller may adjust Next Start to align to a multiple of the period + * (and skip over several work intervals that could not be + * executed). + * + * Caution (!): + * + * Because the information supplied via this facility directly influences power + * management decisions, clients should strive to be as accurate as possible. + * Failure to do so will adversely impact system power and performance. + * + */ +#ifndef KERNEL + +typedef struct work_interval *work_interval_t; + +/* Create a new work interval handle (currently for the current thread only). Flags is unused */ +int work_interval_create(work_interval_t *interval_handle, uint32_t flags); + +/* Notify the power management subsystem that the work for a current interval has completed */ +int work_interval_notify(work_interval_t interval_handle, uint64_t start, uint64_t finish, uint64_t deadline, uint64_t next_start, uint32_t flags); + +/* Notify, with "finish" implicitly set to the current time */ +int work_interval_notify_simple(work_interval_t interval_handle, uint64_t start, uint64_t deadline, uint64_t next_start); + +/* Deallocate work interval (currently for the current thread only) */ +int work_interval_destroy(work_interval_t interval_handle); + +#endif /* KERNEL */ + +#if PRIVATE + +/* Private interface between Libsyscall and xnu */ +#define WORK_INTERVAL_OPERATION_CREATE 0x00000001 /* arg is a uint64_t * that accepts work interval ID as an OUT param */ +#define WORK_INTERVAL_OPERATION_DESTROY 0x00000002 +#define WORK_INTERVAL_OPERATION_NOTIFY 0x00000003 /* arg is a work_interval_notification_t */ + +struct work_interval_notification { + uint64_t start; + uint64_t finish; + uint64_t deadline; + uint64_t next_start; + uint32_t flags; + uint32_t unused1; +}; +typedef struct work_interval_notification *work_interval_notification_t; + +int __work_interval_ctl(uint32_t operation, uint64_t work_interval_id, void *arg, size_t len); + +#endif /* PRIVATE */ + +__END_DECLS + +#endif /* _SYS_WORK_INTERVAL_H */ diff --git a/bsd/uuid/Makefile b/bsd/uuid/Makefile index 10f9cecaa..1e5f59ecc 100644 --- a/bsd/uuid/Makefile +++ b/bsd/uuid/Makefile @@ -24,9 +24,6 @@ EXPORT_MI_LIST = ${KERNELFILES} EXPORT_MI_DIR = uuid -# /System/Library/Frameworks/System.framework/PrivateHeaders -INSTALL_MI_LCL_LIST = ${DATAFILES} - # /System/Library/Frameworks/Kernel.framework/PrivateHeaders INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} diff --git a/bsd/uxkern/ux_exception.c b/bsd/uxkern/ux_exception.c index c3313018c..795a40808 100644 --- a/bsd/uxkern/ux_exception.c +++ b/bsd/uxkern/ux_exception.c @@ -76,8 +76,7 @@ extern mach_msg_return_t mach_msg_send(mach_msg_header_t *msg, mach_msg_option_t option, mach_msg_size_t send_size, mach_msg_timeout_t send_timeout, mach_port_name_t notify); extern thread_t convert_port_to_thread(ipc_port_t port); -extern void ipc_port_release(ipc_port_t); - +extern void ipc_port_release_send(ipc_port_t port); @@ -256,7 +255,7 @@ catch_mach_exception_raise( (void *) &thread_port) == MACH_MSG_SUCCESS)) { if (IPC_PORT_VALID(thread_port)) { th_act = convert_port_to_thread(thread_port); - ipc_port_release(thread_port); + ipc_port_release_send(thread_port); } else { th_act = THREAD_NULL; } diff --git a/bsd/vfs/kpi_vfs.c b/bsd/vfs/kpi_vfs.c index 1a71f6b69..19a4be3d1 100644 --- a/bsd/vfs/kpi_vfs.c +++ b/bsd/vfs/kpi_vfs.c @@ -1754,6 +1754,52 @@ vnode_clearnoreadahead(vnode_t vp) vnode_unlock(vp); } +int +vnode_isfastdevicecandidate(vnode_t vp) +{ + return ((vp->v_flag & VFASTDEVCANDIDATE)? 1 : 0); +} + +void +vnode_setfastdevicecandidate(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag |= VFASTDEVCANDIDATE; + vnode_unlock(vp); +} + +void +vnode_clearfastdevicecandidate(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag &= ~VFASTDEVCANDIDATE; + vnode_unlock(vp); +} + +int +vnode_isautocandidate(vnode_t vp) +{ + return ((vp->v_flag & VAUTOCANDIDATE)? 1 : 0); +} + +void +vnode_setautocandidate(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag |= VAUTOCANDIDATE; + vnode_unlock(vp); +} + +void +vnode_clearautocandidate(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag &= ~VAUTOCANDIDATE; + vnode_unlock(vp); +} + + + /* mark vnode_t to skip vflush() is SKIPSYSTEM */ void @@ -1833,7 +1879,7 @@ vnode_setname(vnode_t vp, char * name) void vnode_vfsname(vnode_t vp, char * buf) { - strncpy(buf, vp->v_mount->mnt_vtable->vfc_name, MFSNAMELEN); + strlcpy(buf, vp->v_mount->mnt_vtable->vfc_name, MFSNAMELEN); } /* return the FS type number */ @@ -2459,6 +2505,11 @@ vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) goto out; } + /* Never allow the setting of any unsupported superuser flags. */ + if (VATTR_IS_ACTIVE(vap, va_flags)) { + vap->va_flags &= (SF_SUPPORTED | UF_SETTABLE); + } + error = VNOP_SETATTR(vp, vap, ctx); if ((error == 0) && !VATTR_ALL_SUPPORTED(vap)) @@ -3326,7 +3377,7 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct * We have to be able to use the root filesystem's device vnode even when * devfs isn't mounted (yet/anymore), so we can't go looking at its mount * structure. If there is no data pointer, it doesn't matter whether - * the device is 64-bit ready. Any command (like DKIOCSYNCHRONIZECACHE) + * the device is 64-bit ready. Any command (like DKIOCSYNCHRONIZE) * which passes NULL for its data pointer can therefore be used during * mount or unmount of the root filesystem. * @@ -3826,11 +3877,6 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s } else { _err = VNOP_RENAME(fdvp, *fvpp, fcnp, tdvp, *tvpp, tcnp, ctx); } -#if CONFIG_MACF - if (_err == 0) { - mac_vnode_notify_rename(ctx, *fvpp, tdvp, tcnp); - } -#endif /* * If moved to a new directory that is restricted, @@ -3850,6 +3896,12 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s } } +#if CONFIG_MACF + if (_err == 0) { + mac_vnode_notify_rename(ctx, *fvpp, tdvp, tcnp); + } +#endif + #if CONFIG_APPLEDOUBLE /* * Rename any associated extended attribute file (._ AppleDouble file). @@ -4891,6 +4943,9 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, if ((vp->v_flag & VLOCKLOCAL)) { /* Advisory locking done at this layer */ _err = lf_advlock(&a); + } else if (flags & F_OFD_LOCK) { + /* Non-local locking doesn't work for OFD locks */ + _err = err_advlock(&a); } else { /* Advisory locking done by underlying filesystem */ _err = (*vp->v_op[vnop_advlock_desc.vdesc_offset])(&a); diff --git a/bsd/vfs/vfs_attrlist.c b/bsd/vfs/vfs_attrlist.c index 54d0323f8..38d7a3f24 100644 --- a/bsd/vfs/vfs_attrlist.c +++ b/bsd/vfs/vfs_attrlist.c @@ -473,7 +473,7 @@ struct getattrlist_attrtab { static struct getattrlist_attrtab getattrlist_common_tab[] = { {ATTR_CMN_NAME, VATTR_BIT(va_name), sizeof(struct attrreference), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_DEVID, 0, sizeof(dev_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_FSID, VATTR_BIT(va_fsid), sizeof(fsid_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_FSID, 0, sizeof(fsid_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_OBJTYPE, 0, sizeof(fsobj_type_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_OBJTAG, 0, sizeof(fsobj_tag_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_OBJID, VATTR_BIT(va_fileid) | VATTR_BIT(va_linkid), sizeof(fsobj_id_t), KAUTH_VNODE_READ_ATTRIBUTES}, @@ -1446,14 +1446,6 @@ attr_pack_common(vfs_context_t ctx, struct vnode *vp, struct attrlist *alp, } else if (VATTR_IS_SUPPORTED(vap, va_fsid64)) { ATTR_PACK8((*abp), vap->va_fsid64); abp->actual.commonattr |= ATTR_CMN_FSID; - } else if (VATTR_IS_SUPPORTED(vap, va_fsid)) { - fsid_t fsid; - - /* va_fsid is 32 bits */ - fsid.val[0] = vap->va_fsid; - fsid.val[1] = 0; - ATTR_PACK8((*abp), fsid); - abp->actual.commonattr |= ATTR_CMN_FSID; } else if (!return_valid || pack_invalid) { fsid_t fsid = {{0}}; @@ -2493,17 +2485,6 @@ vfs_attr_pack(vnode_t vp, uio_t uio, struct attrlist *alp, uint64_t options, error = getattrlist_setupvattr_all(alp, vap, v_type, &fixedsize, proc_is64bit(vfs_context_proc(ctx))); - /* - * Ugly hack to correctly report fsids. vs_fsid is 32 bits and - * there is va_fsid64 as well but filesystems have to say that - * both are supported so that the value can be used correctly. - * So we set va_fsid if the filesystem has only set va_fsid64. - */ - - if ((alp->commonattr & ATTR_CMN_FSID) && - VATTR_IS_SUPPORTED(vap, va_fsid64)) - VATTR_SET_SUPPORTED(vap, va_fsid); - if (error) { VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: setup for request failed"); @@ -3238,9 +3219,18 @@ readdirattr(vnode_t dvp, struct fd_vn_data *fvd, uio_t auio, } /* - * We have an iocount on the directory already + * We have an iocount on the directory already. + * + * Note that we supply NOCROSSMOUNT to the namei call as we attempt to acquire + * a vnode for this particular entry. This is because the native call will + * (likely) attempt to emit attributes based on its own metadata in order to avoid + * creating vnodes where posssible. If the native call is not going to walk + * up the vnode mounted-on chain in order to find the top-most mount point, then we + * should not either in this emulated readdir+getattrlist() approach. We + * will be responsible for setting DIR_MNTSTATUS_MNTPOINT on that directory that + * contains a mount point. */ - NDINIT(&nd, LOOKUP, OP_GETATTR, AUDITVNPATH1 | USEDVP, + NDINIT(&nd, LOOKUP, OP_GETATTR, (AUDITVNPATH1 | USEDVP | NOCROSSMOUNT), UIO_SYSSPACE, CAST_USER_ADDR_T(name_buffer), ctx); nd.ni_dvp = dvp; @@ -3738,7 +3728,8 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con } if (al.commonattr & ATTR_CMN_CHGTIME) { ATTR_UNPACK_TIME(va.va_change_time, proc_is64); - VATTR_SET_ACTIVE(&va, va_change_time); + al.commonattr &= ~ATTR_CMN_CHGTIME; + /*quietly ignore change time; advisory in man page*/ } if (al.commonattr & ATTR_CMN_ACCTIME) { ATTR_UNPACK_TIME(va.va_access_time, proc_is64); @@ -3772,6 +3763,10 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con if (al.commonattr & ATTR_CMN_FLAGS) { ATTR_UNPACK(va.va_flags); VATTR_SET_ACTIVE(&va, va_flags); +#if CONFIG_MACF + if ((error = mac_vnode_check_setflags(ctx, vp, va.va_flags)) != 0) + goto out; +#endif } if (al.commonattr & ATTR_CMN_EXTENDED_SECURITY) { @@ -3834,18 +3829,20 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con volname = cursor; ATTR_UNPACK(ar); /* attr_length cannot be 0! */ - if ((ar.attr_dataoffset < 0) || (ar.attr_length == 0)) { + if ((ar.attr_dataoffset < 0) || (ar.attr_length == 0) || + (ar.attr_length > uap->bufferSize) || + (uap->bufferSize - ar.attr_length < (unsigned)ar.attr_dataoffset)) { VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: bad offset supplied (2) ", ar.attr_dataoffset); error = EINVAL; goto out; } - volname += ar.attr_dataoffset; - if ((volname + ar.attr_length) > bufend) { + if (volname >= bufend - ar.attr_dataoffset - ar.attr_length) { error = EINVAL; VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: volume name too big for caller buffer"); goto out; } + volname += ar.attr_dataoffset; /* guarantee NUL termination */ volname[ar.attr_length - 1] = 0; } diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index c6e919d9e..9c4b20a0f 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -110,7 +110,6 @@ #include #include -#include int bcleanbuf(buf_t bp, boolean_t discard); static int brecover_data(buf_t bp); @@ -366,9 +365,14 @@ buf_markfua(buf_t bp) { } #if CONFIG_PROTECT -void -buf_setcpaddr(buf_t bp, struct cprotect *entry) { - bp->b_attr.ba_cpentry = entry; +cpx_t bufattr_cpx(bufattr_t bap) +{ + return bap->ba_cpx; +} + +void bufattr_setcpx(bufattr_t bap, cpx_t cpx) +{ + bap->ba_cpx = cpx; } void @@ -376,46 +380,38 @@ buf_setcpoff (buf_t bp, uint64_t foffset) { bp->b_attr.ba_cp_file_off = foffset; } -void * -bufattr_cpaddr(bufattr_t bap) { - return (bap->ba_cpentry); -} - uint64_t bufattr_cpoff(bufattr_t bap) { - return (bap->ba_cp_file_off); -} - -void -bufattr_setcpaddr(bufattr_t bap, void *cp_entry_addr) { - bap->ba_cpentry = cp_entry_addr; + return bap->ba_cp_file_off; } void bufattr_setcpoff(bufattr_t bap, uint64_t foffset) { - bap->ba_cp_file_off = foffset; + bap->ba_cp_file_off = foffset; } -#else -void * -bufattr_cpaddr(bufattr_t bap __unused) { - return NULL; -} +#else // !CONTECT_PROTECT uint64_t bufattr_cpoff(bufattr_t bap __unused) { return 0; } -void -bufattr_setcpaddr(bufattr_t bap __unused, void *cp_entry_addr __unused) { -} - void bufattr_setcpoff(__unused bufattr_t bap, __unused uint64_t foffset) { return; } -#endif /* CONFIG_PROTECT */ + +struct cpx *bufattr_cpx(__unused bufattr_t bap) +{ + return NULL; +} + +void bufattr_setcpx(__unused bufattr_t bap, __unused struct cpx *cpx) +{ +} + +#endif /* !CONFIG_PROTECT */ bufattr_t bufattr_alloc() { @@ -685,6 +681,8 @@ buf_callback(buf_t bp) errno_t buf_setcallback(buf_t bp, void (*callback)(buf_t, void *), void *transaction) { + assert(!ISSET(bp->b_flags, B_FILTER) && ISSET(bp->b_lflags, BL_BUSY)); + if (callback) bp->b_flags |= (B_CALL | B_ASYNC); else @@ -920,6 +918,8 @@ void buf_setfilter(buf_t bp, void (*filter)(buf_t, void *), void *transaction, void (**old_iodone)(buf_t, void *), void **old_transaction) { + assert(ISSET(bp->b_lflags, BL_BUSY)); + if (old_iodone) *old_iodone = bp->b_iodone; if (old_transaction) @@ -1317,9 +1317,10 @@ buf_strategy(vnode_t devvp, void *ap) #if CONFIG_PROTECT /* Capture f_offset in the bufattr*/ - if (bp->b_attr.ba_cpentry != 0) { + cpx_t cpx = bufattr_cpx(buf_attr(bp)); + if (cpx) { /* No need to go here for older EAs */ - if(bp->b_attr.ba_cpentry->cp_flags & CP_OFF_IV_ENABLED) { + if(cpx_use_offset_for_iv(cpx)) { off_t f_offset; if ((error = VNOP_BLKTOOFF(bp->b_vp, bp->b_lblkno, &f_offset))) return error; @@ -1337,7 +1338,7 @@ buf_strategy(vnode_t devvp, void *ap) * each I/O to IOFlashStorage. But from our perspective * we have only issued a single I/O. */ - bufattr_setcpoff (&(bp->b_attr), (u_int64_t)f_offset); + buf_setcpoff(bp, f_offset); CP_DEBUG((CPDBG_OFFSET_IO | DBG_FUNC_NONE), (uint32_t) f_offset, (uint32_t) bp->b_lblkno, (uint32_t) bp->b_blkno, (uint32_t) bp->b_bcount, 0); } } @@ -2447,7 +2448,7 @@ buf_brelse_shadow(buf_t bp) lck_mtx_lock_spin(buf_mtxp); - bp_head = (buf_t)bp->b_orig; + __IGNORE_WCASTALIGN(bp_head = (buf_t)bp->b_orig); if (bp_head->b_whichq != -1) panic("buf_brelse_shadow: bp_head on freelist %d\n", bp_head->b_whichq); @@ -3104,6 +3105,25 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int size_t contig_bytes; int bmap_flags; +#if DEVELOPMENT || DEBUG + /* + * Apple implemented file systems use UBC excludively; they should + * not call in here." + */ + const char* excldfs[] = {"hfs", "afpfs", "smbfs", "acfs", + "exfat", "msdos", "webdav", NULL}; + + for (int i = 0; excldfs[i] != NULL; i++) { + if (vp->v_mount && + !strcmp(vp->v_mount->mnt_vfsstat.f_fstypename, + excldfs[i])) { + panic("%s %s calls buf_getblk", + excldfs[i], + operation == BLK_READ ? "BLK_READ" : "BLK_WRITE"); + } + } +#endif + if ( (bp->b_upl) ) panic("bp already has UPL: %p",bp); @@ -3355,7 +3375,7 @@ allocbuf(buf_t bp, int size) *(void **)(&bp->b_datap) = grab_memory_for_meta_buf(nsize); } else { bp->b_datap = (uintptr_t)NULL; - kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size, VM_KERN_MEMORY_FILE); CLR(bp->b_flags, B_ZALLOC); } bcopy((void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); @@ -3368,7 +3388,7 @@ allocbuf(buf_t bp, int size) if ((vm_size_t)bp->b_bufsize < desired_size) { /* reallocate to a bigger size */ bp->b_datap = (uintptr_t)NULL; - kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size, VM_KERN_MEMORY_FILE); bcopy((const void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); kmem_free(kernel_map, elem, bp->b_bufsize); } else { @@ -3384,7 +3404,7 @@ allocbuf(buf_t bp, int size) *(void **)(&bp->b_datap) = grab_memory_for_meta_buf(nsize); SET(bp->b_flags, B_ZALLOC); } else - kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size, VM_KERN_MEMORY_FILE); } if (bp->b_datap == 0) @@ -3660,8 +3680,6 @@ bcleanbuf(buf_t bp, boolean_t discard) buf_release_credentials(bp); - bp->b_redundancy_flags = 0; - /* If discarding, just move to the empty queue */ if (discard) { lck_mtx_lock_spin(buf_mtxp); @@ -3676,6 +3694,7 @@ bcleanbuf(buf_t bp, boolean_t discard) bp->b_bufsize = 0; bp->b_datap = (uintptr_t)NULL; bp->b_upl = (void *)NULL; + bp->b_fsprivate = (void *)NULL; /* * preserve the state of whether this buffer * was allocated on the fly or not... @@ -3688,6 +3707,7 @@ bcleanbuf(buf_t bp, boolean_t discard) #endif bp->b_lflags = BL_BUSY; bp->b_flags = (bp->b_flags & B_HDRALLOC); + bp->b_redundancy_flags = 0; bp->b_dev = NODEV; bp->b_blkno = bp->b_lblkno = 0; bp->b_iodone = NULL; @@ -4160,6 +4180,7 @@ alloc_io_buf(vnode_t vp, int priv) bp->b_bcount = 0; bp->b_bufsize = 0; bp->b_upl = NULL; + bp->b_fsprivate = (void *)NULL; bp->b_vp = vp; bzero(&bp->b_attr, sizeof(struct bufattr)); diff --git a/bsd/vfs/vfs_cache.c b/bsd/vfs/vfs_cache.c index 1575aafea..36b1d24e6 100644 --- a/bsd/vfs/vfs_cache.c +++ b/bsd/vfs/vfs_cache.c @@ -1252,11 +1252,11 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, } if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { - - if (mp->mnt_realrootvp == NULLVP || mp->mnt_generation != mount_generation || - mp->mnt_realrootvp_vid != mp->mnt_realrootvp->v_id) - break; - vp = mp->mnt_realrootvp; + vnode_t tmp_vp = mp->mnt_realrootvp; + if (tmp_vp == NULLVP || mp->mnt_generation != mount_generation || + mp->mnt_realrootvp_vid != tmp_vp->v_id) + break; + vp = tmp_vp; } #if CONFIG_TRIGGERS @@ -1265,10 +1265,8 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, * trigger in hand, resolve it. Note that we don't need to * leave the fast path if the mount has already happened. */ - if ((vp->v_resolve != NULL) && - (vp->v_resolve->vr_resolve_func != NULL)) { + if (vp->v_resolve) break; - } #endif /* CONFIG_TRIGGERS */ @@ -1711,6 +1709,25 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0); else ncp->nc_name = strname; + + // + // If the bytes of the name associated with the vnode differ, + // use the name associated with the vnode since the file system + // may have set that explicitly in the case of a lookup on a + // case-insensitive file system where the case of the looked up + // name differs from what is on disk. For more details, see: + // FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories + // + const char *vn_name = vp ? vp->v_name : NULL; + unsigned int len = vn_name ? strlen(vn_name) : 0; + if (vn_name && ncp && ncp->nc_name && strncmp(ncp->nc_name, vn_name, len) != 0) { + unsigned int hash = hash_string(vn_name, len); + + vfs_removename(ncp->nc_name); + ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, 0); + ncp->nc_hashval = hash; + } + /* * make us the newest entry in the cache * i.e. we'll be the last to be stolen diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c index 621825949..57fe3431a 100644 --- a/bsd/vfs/vfs_cluster.c +++ b/bsd/vfs/vfs_cluster.c @@ -95,6 +95,8 @@ #include +#include + #if 0 #undef KERNEL_DEBUG #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT @@ -138,12 +140,25 @@ struct clios { int io_wanted; /* someone is sleeping waiting for a change in state */ }; +struct cl_direct_read_lock { + LIST_ENTRY(cl_direct_read_lock) chain; + int32_t ref_count; + vnode_t vp; + lck_rw_t rw_lock; +}; + +#define CL_DIRECT_READ_LOCK_BUCKETS 61 + +static LIST_HEAD(cl_direct_read_locks, cl_direct_read_lock) + cl_direct_read_locks[CL_DIRECT_READ_LOCK_BUCKETS]; + +static lck_spin_t cl_direct_read_spin_lock; + static lck_grp_t *cl_mtx_grp; static lck_attr_t *cl_mtx_attr; static lck_grp_attr_t *cl_mtx_grp_attr; static lck_mtx_t *cl_transaction_mtxp; - #define IO_UNKNOWN 0 #define IO_DIRECT 1 #define IO_CONTIG 2 @@ -236,7 +251,12 @@ int (*bootcache_contains_block)(dev_t device, u_int64_t blkno) = NULL; #define MAX_IO_REQUEST_SIZE (1024 * 1024 * 512) #define MAX_IO_CONTIG_SIZE MAX_UPL_SIZE_BYTES #define MAX_VECTS 16 -#define MIN_DIRECT_WRITE_SIZE (4 * PAGE_SIZE) +/* + * The MIN_DIRECT_WRITE_SIZE governs how much I/O should be issued before we consider + * allowing the caller to bypass the buffer cache. For small I/Os (less than 16k), + * we have not historically allowed the write to bypass the UBC. + */ +#define MIN_DIRECT_WRITE_SIZE (16384) #define WRITE_THROTTLE 6 #define WRITE_THROTTLE_SSD 2 @@ -287,6 +307,11 @@ cluster_init(void) { if (cl_transaction_mtxp == NULL) panic("cluster_init: failed to allocate cl_transaction_mtxp"); + + lck_spin_init(&cl_direct_read_spin_lock, cl_mtx_grp, cl_mtx_attr); + + for (int i = 0; i < CL_DIRECT_READ_LOCK_BUCKETS; ++i) + LIST_INIT(&cl_direct_read_locks[i]); } @@ -507,6 +532,142 @@ cluster_iostate_wait(struct clios *iostate, u_int target, const char *wait_name) lck_mtx_unlock(&iostate->io_mtxp); } +static void cluster_handle_associated_upl(struct clios *iostate, upl_t upl, + upl_offset_t upl_offset, upl_size_t size) +{ + if (!size) + return; + + upl_t associated_upl = upl_associated_upl(upl); + + if (!associated_upl) + return; + +#if 0 + printf("1: %d %d\n", upl_offset, upl_offset + size); +#endif + + /* + * The associated UPL is page aligned to file offsets whereas the + * UPL it's attached to has different alignment requirements. The + * upl_offset that we have refers to @upl. The code that follows + * has to deal with the first and last pages in this transaction + * which might straddle pages in the associated UPL. To keep + * track of these pages, we use the mark bits: if the mark bit is + * set, we know another transaction has completed its part of that + * page and so we can unlock that page here. + * + * The following illustrates what we have to deal with: + * + * MEM u <------------ 1 PAGE ------------> e + * +-------------+----------------------+----------------- + * | |######################|################# + * +-------------+----------------------+----------------- + * FILE | <--- a ---> o <------------ 1 PAGE ------------> + * + * So here we show a write to offset @o. The data that is to be + * written is in a buffer that is not page aligned; it has offset + * @a in the page. The upl that carries the data starts in memory + * at @u. The associated upl starts in the file at offset @o. A + * transaction will always end on a page boundary (like @e above) + * except for the very last transaction in the group. We cannot + * unlock the page at @o in the associated upl until both the + * transaction ending at @e and the following transaction (that + * starts at @e) has completed. + */ + + /* + * We record whether or not the two UPLs are aligned as the mark + * bit in the first page of @upl. + */ + upl_page_info_t *pl = UPL_GET_INTERNAL_PAGE_LIST(upl); + bool is_unaligned = upl_page_get_mark(pl, 0); + + if (is_unaligned) { + upl_page_info_t *assoc_pl = UPL_GET_INTERNAL_PAGE_LIST(associated_upl); + + upl_offset_t upl_end = upl_offset + size; + assert(upl_end >= PAGE_SIZE); + + upl_size_t assoc_upl_size = upl_get_size(associated_upl); + + /* + * In the very first transaction in the group, upl_offset will + * not be page aligned, but after that it will be and in that + * case we want the preceding page in the associated UPL hence + * the minus one. + */ + assert(upl_offset); + if (upl_offset) + upl_offset = trunc_page_32(upl_offset - 1); + + lck_mtx_lock_spin(&iostate->io_mtxp); + + // Look at the first page... + if (upl_offset + && !upl_page_get_mark(assoc_pl, upl_offset >> PAGE_SHIFT)) { + /* + * The first page isn't marked so let another transaction + * completion handle it. + */ + upl_page_set_mark(assoc_pl, upl_offset >> PAGE_SHIFT, true); + upl_offset += PAGE_SIZE; + } + + // And now the last page... + + /* + * This needs to be > rather than >= because if it's equal, it + * means there's another transaction that is sharing the last + * page. + */ + if (upl_end > assoc_upl_size) + upl_end = assoc_upl_size; + else { + upl_end = trunc_page_32(upl_end); + const int last_pg = (upl_end >> PAGE_SHIFT) - 1; + + if (!upl_page_get_mark(assoc_pl, last_pg)) { + /* + * The last page isn't marked so mark the page and let another + * transaction completion handle it. + */ + upl_page_set_mark(assoc_pl, last_pg, true); + upl_end -= PAGE_SIZE; + } + } + + lck_mtx_unlock(&iostate->io_mtxp); + +#if 0 + printf("2: %d %d\n", upl_offset, upl_end); +#endif + + if (upl_end <= upl_offset) + return; + + size = upl_end - upl_offset; + } else { + assert(!(upl_offset & PAGE_MASK)); + assert(!(size & PAGE_MASK)); + } + + boolean_t empty; + + /* + * We can unlock these pages now and as this is for a + * direct/uncached write, we want to dump the pages too. + */ + kern_return_t kr = upl_abort_range(associated_upl, upl_offset, size, + UPL_ABORT_DUMP_PAGES, &empty); + + assert(!kr); + + if (!kr && empty) { + upl_set_associated_upl(upl, NULL); + upl_deallocate(associated_upl); + } +} static int cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags, vnode_t vp) @@ -570,7 +731,7 @@ cluster_iodone(buf_t bp, void *callback_arg) struct clios *iostate; boolean_t transaction_complete = FALSE; - cbp_head = (buf_t)(bp->b_trans_head); + __IGNORE_WCASTALIGN(cbp_head = (buf_t)(bp->b_trans_head)); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START, cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0); @@ -581,7 +742,7 @@ cluster_iodone(buf_t bp, void *callback_arg) lck_mtx_lock_spin(cl_transaction_mtxp); bp->b_flags |= B_TDONE; - + if (bp->b_flags & B_TWANTED) { CLR(bp->b_flags, B_TWANTED); need_wakeup = TRUE; @@ -656,6 +817,14 @@ cluster_iodone(buf_t bp, void *callback_arg) cbp = cbp_next; } + + if (ISSET(b_flags, B_COMMIT_UPL)) { + cluster_handle_associated_upl(iostate, + cbp_head->b_upl, + upl_offset, + transaction_size); + } + if (error == 0 && total_resid) error = EIO; @@ -702,14 +871,13 @@ cluster_iodone(buf_t bp, void *callback_arg) } if (b_flags & B_COMMIT_UPL) { - - pg_offset = upl_offset & PAGE_MASK; + pg_offset = upl_offset & PAGE_MASK; commit_size = (pg_offset + transaction_size + (PAGE_SIZE - 1)) & ~PAGE_MASK; if (error) upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, commit_size, error, b_flags, vp); else { - upl_flags = UPL_COMMIT_FREE_ON_EMPTY; + upl_flags = UPL_COMMIT_FREE_ON_EMPTY; if ((b_flags & B_PHYS) && (b_flags & B_READ)) upl_flags |= UPL_COMMIT_SET_DIRTY; @@ -1037,6 +1205,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no io_flags |= B_PASSIVE; if (flags & CL_ENCRYPTED) io_flags |= B_ENCRYPTED_IO; + if (vp->v_flag & VSYSTEM) io_flags |= B_META; @@ -1049,7 +1218,37 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no * read in from the file */ zero_offset = upl_offset + non_rounded_size; + } else if (!ISSET(flags, CL_READ) && ISSET(flags, CL_DIRECT_IO)) { + assert(ISSET(flags, CL_COMMIT)); + + // For a direct/uncached write, we need to lock pages... + + upl_t cached_upl; + + /* + * Create a UPL to lock the pages in the cache whilst the + * write is in progress. + */ + ubc_create_upl(vp, f_offset, non_rounded_size, &cached_upl, + NULL, UPL_SET_LITE); + + /* + * Attach this UPL to the other UPL so that we can find it + * later. + */ + upl_set_associated_upl(upl, cached_upl); + + if (upl_offset & PAGE_MASK) { + /* + * The two UPLs are not aligned, so mark the first page in + * @upl so that cluster_handle_associated_upl can handle + * it accordingly. + */ + upl_page_info_t *pl = UPL_GET_INTERNAL_PAGE_LIST(upl); + upl_page_set_mark(pl, 0, true); + } } + while (size) { daddr64_t blkno; daddr64_t lblkno; @@ -1330,6 +1529,10 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no commit_offset = upl_offset & ~PAGE_MASK; } + + // Associated UPL is currently only used in the direct write path + assert(!upl_associated_upl(upl)); + if ( (flags & CL_COMMIT) && pg_count) { ubc_upl_commit_range(upl, commit_offset, pg_count * PAGE_SIZE, UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY); @@ -1426,9 +1629,13 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no if (flags & CL_PAGEOUT) { u_int i; - for (i = 0; i < pg_count; i++) { - if (buf_invalblkno(vp, lblkno + i, 0) == EBUSY) - panic("BUSY bp found in cluster_io"); + /* + * since blocks are in offsets of 0x1000, scale + * iteration to (PAGE_SIZE * pg_count) of blks. + */ + for (i = 0; i < (PAGE_SIZE * pg_count)/0x1000; i++) { + if (buf_invalblkno(vp, lblkno + i, 0) == EBUSY) + panic("BUSY bp found in cluster_io"); } } if (flags & CL_ASYNC) { @@ -1553,34 +1760,41 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no } } if (error) { - int abort_size; + int abort_size; io_size = 0; - + if (cbp_head) { - /* - * first wait until all of the outstanding I/O - * for this partial transaction has completed - */ - cluster_wait_IO(cbp_head, (flags & CL_ASYNC)); + /* + * Wait until all of the outstanding I/O + * for this partial transaction has completed + */ + cluster_wait_IO(cbp_head, (flags & CL_ASYNC)); /* * Rewind the upl offset to the beginning of the * transaction. */ upl_offset = cbp_head->b_uploffset; + } - for (cbp = cbp_head; cbp;) { - buf_t cbp_next; - - size += cbp->b_bcount; - io_size += cbp->b_bcount; + if (ISSET(flags, CL_COMMIT)) { + cluster_handle_associated_upl(iostate, upl, upl_offset, + upl_end_offset - upl_offset); + } - cbp_next = cbp->b_trans_next; - free_io_buf(cbp); - cbp = cbp_next; - } + // Free all the IO buffers in this transaction + for (cbp = cbp_head; cbp;) { + buf_t cbp_next; + + size += cbp->b_bcount; + io_size += cbp->b_bcount; + + cbp_next = cbp->b_trans_next; + free_io_buf(cbp); + cbp = cbp_next; } + if (iostate) { int need_wakeup = 0; @@ -1608,12 +1822,13 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no if (need_wakeup) wakeup((caddr_t)&iostate->io_wanted); } + if (flags & CL_COMMIT) { int upl_flags; - pg_offset = upl_offset & PAGE_MASK; + pg_offset = upl_offset & PAGE_MASK; abort_size = (upl_end_offset - upl_offset + PAGE_MASK) & ~PAGE_MASK; - + upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, abort_size, error, io_flags, vp); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE, @@ -2101,7 +2316,7 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in upl_size_t upl_size, vector_upl_size = 0; vm_size_t upl_needed_size; mach_msg_type_number_t pages_in_pl; - int upl_flags; + upl_control_flags_t upl_flags; kern_return_t kret; mach_msg_type_number_t i; int force_data_sync; @@ -2252,13 +2467,15 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START, (int)upl_offset, upl_needed_size, (int)iov_base, io_size, 0); + vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map; for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) { pages_in_pl = 0; upl_size = upl_needed_size; upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC | - UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE; + UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE + | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE); - kret = vm_map_get_upl(current_map(), + kret = vm_map_get_upl(map, (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)), &upl_size, &upl, @@ -2343,14 +2560,6 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in */ } - /* - * Now look for pages already in the cache - * and throw them away. - * uio->uio_offset is page aligned within the file - * io_size is a multiple of PAGE_SIZE - */ - ubc_range_op(vp, uio->uio_offset, uio->uio_offset + io_size, UPL_ROP_DUMP, NULL); - /* * we want push out these writes asynchronously so that we can overlap * the preparation of the next I/O @@ -2492,7 +2701,7 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type, upl_size_t upl_size; vm_size_t upl_needed_size; mach_msg_type_number_t pages_in_pl; - int upl_flags; + upl_control_flags_t upl_flags; kern_return_t kret; struct clios iostate; int error = 0; @@ -2531,9 +2740,11 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type, pages_in_pl = 0; upl_size = upl_needed_size; upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC | - UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE; + UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE + | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE); - kret = vm_map_get_upl(current_map(), + vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map; + kret = vm_map_get_upl(map, (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)), &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, 0); @@ -3378,7 +3589,7 @@ cluster_read_ext(vnode_t vp, struct uio *uio, off_t filesize, int xflags, int (* flags |= IO_NOCACHE; if ((vp->v_flag & VRAOFF) || speculative_reads_disabled) flags |= IO_RAOFF; - + if (flags & IO_SKIP_ENCRYPTION) flags |= IO_ENCRYPTED; /* @@ -3991,6 +4202,73 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file return (retval); } +/* + * We don't want another read/write lock for every vnode in the system + * so we keep a hash of them here. There should never be very many of + * these around at any point in time. + */ +cl_direct_read_lock_t *cluster_lock_direct_read(vnode_t vp, lck_rw_type_t type) +{ + struct cl_direct_read_locks *head + = &cl_direct_read_locks[(uintptr_t)vp / sizeof(*vp) + % CL_DIRECT_READ_LOCK_BUCKETS]; + + struct cl_direct_read_lock *lck, *new_lck = NULL; + + for (;;) { + lck_spin_lock(&cl_direct_read_spin_lock); + + LIST_FOREACH(lck, head, chain) { + if (lck->vp == vp) { + ++lck->ref_count; + lck_spin_unlock(&cl_direct_read_spin_lock); + if (new_lck) { + // Someone beat us to it, ditch the allocation + lck_rw_destroy(&new_lck->rw_lock, cl_mtx_grp); + FREE(new_lck, M_TEMP); + } + lck_rw_lock(&lck->rw_lock, type); + return lck; + } + } + + if (new_lck) { + // Use the lock we allocated + LIST_INSERT_HEAD(head, new_lck, chain); + lck_spin_unlock(&cl_direct_read_spin_lock); + lck_rw_lock(&new_lck->rw_lock, type); + return new_lck; + } + + lck_spin_unlock(&cl_direct_read_spin_lock); + + // Allocate a new lock + MALLOC(new_lck, cl_direct_read_lock_t *, sizeof(*new_lck), + M_TEMP, M_WAITOK); + lck_rw_init(&new_lck->rw_lock, cl_mtx_grp, cl_mtx_attr); + new_lck->vp = vp; + new_lck->ref_count = 1; + + // Got to go round again + } +} + +void cluster_unlock_direct_read(cl_direct_read_lock_t *lck) +{ + lck_rw_done(&lck->rw_lock); + + lck_spin_lock(&cl_direct_read_spin_lock); + if (lck->ref_count == 1) { + LIST_REMOVE(lck, chain); + lck_spin_unlock(&cl_direct_read_spin_lock); + lck_rw_destroy(&lck->rw_lock, cl_mtx_grp); + FREE(lck, M_TEMP); + } else { + --lck->ref_count; + lck_spin_unlock(&cl_direct_read_spin_lock); + } +} + static int cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, u_int32_t *read_length, int flags, int (*callback)(buf_t, void *), void *callback_arg) @@ -4002,7 +4280,7 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, upl_size_t upl_size, vector_upl_size = 0; vm_size_t upl_needed_size; unsigned int pages_in_pl; - int upl_flags; + upl_control_flags_t upl_flags; kern_return_t kret; unsigned int i; int force_data_sync; @@ -4032,6 +4310,7 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, off_t v_upl_uio_offset = 0; int vector_upl_index=0; upl_t vector_upl = NULL; + cl_direct_read_lock_t *lock = NULL; user_addr_t orig_iov_base = 0; user_addr_t last_iov_base = 0; @@ -4267,7 +4546,7 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, goto wait_for_dreads; } - /* + /* * Don't re-check the UBC data if we are looking for uncached IO * or asking for encrypted blocks. */ @@ -4278,6 +4557,19 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, io_size = 0; + if (!lock) { + /* + * We hold a lock here between the time we check the + * cache and the time we issue I/O. This saves us + * from having to lock the pages in the cache. Not + * all clients will care about this lock but some + * clients may want to guarantee stability between + * here and when the I/O is issued in which case they + * will take the lock exclusively. + */ + lock = cluster_lock_direct_read(vp, LCK_RW_TYPE_SHARED); + } + ubc_range_op(vp, uio->uio_offset, uio->uio_offset + xsize, UPL_ROP_ABSENT, (int *)&io_size); if (io_size == 0) { @@ -4322,17 +4614,18 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, else no_zero_fill = 0; + vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map; for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) { pages_in_pl = 0; upl_size = upl_needed_size; - upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE; - + upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE + | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE); if (no_zero_fill) upl_flags |= UPL_NOZEROFILL; if (force_data_sync) upl_flags |= UPL_FORCE_DATA_SYNC; - kret = vm_map_create_upl(current_map(), + kret = vm_map_create_upl(map, (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)), &upl_size, &upl, NULL, &pages_in_pl, &upl_flags); @@ -4417,7 +4710,6 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START, upl, (int)upl_offset, (int)uio->uio_offset, io_size, 0); - if(!useVectorUPL) { if (no_zero_fill) io_flag &= ~CL_PRESERVE; @@ -4447,6 +4739,12 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, } last_iov_base = iov_base + io_size; + if (lock) { + // We don't need to wait for the I/O to complete + cluster_unlock_direct_read(lock); + lock = NULL; + } + /* * update the uio structure */ @@ -4493,6 +4791,11 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); reset_vector_run_state(); } + + // We don't need to wait for the I/O to complete + if (lock) + cluster_unlock_direct_read(lock); + /* * make sure all async reads that are part of this stream * have completed before we return @@ -4545,7 +4848,7 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, upl_size_t upl_size; vm_size_t upl_needed_size; mach_msg_type_number_t pages_in_pl; - int upl_flags; + upl_control_flags_t upl_flags; kern_return_t kret; struct clios iostate; int error= 0; @@ -4599,13 +4902,15 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, pages_in_pl = 0; upl_size = upl_needed_size; - upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE; + upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE + | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 92)) | DBG_FUNC_START, (int)upl_offset, (int)upl_size, (int)iov_base, io_size, 0); - kret = vm_map_get_upl(current_map(), + vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map; + kret = vm_map_get_upl(map, (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)), &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, 0); @@ -4751,7 +5056,7 @@ cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t m user_addr_t iov_base = 0; upl_t upl; upl_size_t upl_size; - int upl_flags; + upl_control_flags_t upl_flags; int retval = 0; /* @@ -4775,9 +5080,10 @@ cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t m else upl_size = (u_int32_t)iov_len; - upl_flags = UPL_QUERY_OBJECT_TYPE; - - if ((vm_map_get_upl(current_map(), + upl_flags = UPL_QUERY_OBJECT_TYPE | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE); + + vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map; + if ((vm_map_get_upl(map, (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)), &upl_size, &upl, NULL, NULL, &upl_flags, 0)) != KERN_SUCCESS) { /* @@ -5875,15 +6181,15 @@ is_file_clean(vnode_t vp, off_t filesize) * single hashtable entry. Each hashtable entry is aligned to this * size within the file. */ -#define DRT_BITVECTOR_PAGES 256 +#define DRT_BITVECTOR_PAGES ((1024 * 1024) / PAGE_SIZE) /* * File offset handling. * - * DRT_ADDRESS_MASK is dependent on DRT_BITVECTOR_PAGES; - * the correct formula is (~(DRT_BITVECTOR_PAGES * PAGE_SIZE) - 1) + * DRT_ADDRESS_MASK is dependent on DRT_BITVECTOR_PAGES; + * the correct formula is (~((DRT_BITVECTOR_PAGES * PAGE_SIZE) - 1)) */ -#define DRT_ADDRESS_MASK (~((1 << 20) - 1)) +#define DRT_ADDRESS_MASK (~((DRT_BITVECTOR_PAGES * PAGE_SIZE) - 1)) #define DRT_ALIGN_ADDRESS(addr) ((addr) & DRT_ADDRESS_MASK) /* @@ -5981,7 +6287,15 @@ is_file_clean(vnode_t vp, off_t filesize) */ struct vfs_drt_hashentry { u_int64_t dhe_control; - u_int32_t dhe_bitvector[DRT_BITVECTOR_PAGES / 32]; +/* +* dhe_bitvector was declared as dhe_bitvector[DRT_BITVECTOR_PAGES / 32]; +* DRT_BITVECTOR_PAGES is defined as ((1024 * 1024) / PAGE_SIZE) +* Since PAGE_SIZE is only known at boot time, +* -define MAX_DRT_BITVECTOR_PAGES for smallest supported page size (4k) +* -declare dhe_bitvector array for largest possible length +*/ +#define MAX_DRT_BITVECTOR_PAGES (1024 * 1024)/( 4 * 1024) + u_int32_t dhe_bitvector[MAX_DRT_BITVECTOR_PAGES/32]; }; /* @@ -6117,7 +6431,7 @@ vfs_drt_alloc_map(struct vfs_drt_clustermap **cmapp) */ kret = kmem_alloc(kernel_map, (vm_offset_t *)&cmap, - (nsize == DRT_HASH_SMALL_MODULUS) ? DRT_SMALL_ALLOCATION : DRT_LARGE_ALLOCATION); + (nsize == DRT_HASH_SMALL_MODULUS) ? DRT_SMALL_ALLOCATION : DRT_LARGE_ALLOCATION, VM_KERN_MEMORY_FILE); if (kret != KERN_SUCCESS) return(kret); cmap->scm_magic = DRT_SCM_MAGIC; diff --git a/bsd/vfs/vfs_fsevents.c b/bsd/vfs/vfs_fsevents.c index 6a90031c6..d2f32bd74 100644 --- a/bsd/vfs/vfs_fsevents.c +++ b/bsd/vfs/vfs_fsevents.c @@ -172,14 +172,14 @@ is_ignored_directory(const char *path) { return 0; } -#define IS_TLD(x) strnstr((char *) path, x, MAXPATHLEN) +#define IS_TLD(x) strnstr(__DECONST(char *, path), x, MAXPATHLEN) if (IS_TLD("/.Spotlight-V100/") || IS_TLD("/.MobileBackups/") || IS_TLD("/Backups.backupdb/")) { return 1; } #undef IS_TLD - + return 0; } @@ -782,7 +782,8 @@ add_fsevent(int type, vfs_context_t ctx, ...) continue; } - if ( watcher->event_list[type] == FSE_REPORT + if ( type < watcher->num_events + && watcher->event_list[type] == FSE_REPORT && watcher_cares_about_dev(watcher, dev)) { if (watcher_add_event(watcher, kfse) != 0) { @@ -1534,7 +1535,7 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio) // its type or which device it is for) // kfse = watcher->event_queue[watcher->rd]; - if (!kfse || kfse->type == FSE_INVALID || kfse->refcount < 1) { + if (!kfse || kfse->type == FSE_INVALID || kfse->type >= watcher->num_events || kfse->refcount < 1) { break; } @@ -1641,62 +1642,25 @@ fseventsf_write(__unused struct fileproc *fp, __unused struct uio *uio, } #pragma pack(push, 4) -typedef struct ext_fsevent_dev_filter_args { - uint32_t num_devices; - user_addr_t devices; -} ext_fsevent_dev_filter_args; -#pragma pack(pop) - -#define NEW_FSEVENTS_DEVICE_FILTER _IOW('s', 100, ext_fsevent_dev_filter_args) - -typedef struct old_fsevent_dev_filter_args { - uint32_t num_devices; - int32_t devices; -} old_fsevent_dev_filter_args; - -#define OLD_FSEVENTS_DEVICE_FILTER _IOW('s', 100, old_fsevent_dev_filter_args) - -#if __LP64__ -/* need this in spite of the padding due to alignment of devices */ typedef struct fsevent_dev_filter_args32 { - uint32_t num_devices; - uint32_t devices; - int32_t pad1; + uint32_t num_devices; + user32_addr_t devices; } fsevent_dev_filter_args32; -#endif +typedef struct fsevent_dev_filter_args64 { + uint32_t num_devices; + user64_addr_t devices; +} fsevent_dev_filter_args64; +#pragma pack(pop) + +#define FSEVENTS_DEVICE_FILTER_32 _IOW('s', 100, fsevent_dev_filter_args32) +#define FSEVENTS_DEVICE_FILTER_64 _IOW('s', 100, fsevent_dev_filter_args64) static int fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx) { fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data; int ret = 0; - ext_fsevent_dev_filter_args *devfilt_args, _devfilt_args; - - if (proc_is64bit(vfs_context_proc(ctx))) { - devfilt_args = (ext_fsevent_dev_filter_args *)data; - } - else if (cmd == OLD_FSEVENTS_DEVICE_FILTER) { - old_fsevent_dev_filter_args *udev_filt_args = (old_fsevent_dev_filter_args *)data; - - devfilt_args = &_devfilt_args; - memset(devfilt_args, 0, sizeof(ext_fsevent_dev_filter_args)); - - devfilt_args->num_devices = udev_filt_args->num_devices; - devfilt_args->devices = CAST_USER_ADDR_T(udev_filt_args->devices); - } - else { -#if __LP64__ - fsevent_dev_filter_args32 *udev_filt_args = (fsevent_dev_filter_args32 *)data; -#else - fsevent_dev_filter_args *udev_filt_args = (fsevent_dev_filter_args *)data; -#endif - - devfilt_args = &_devfilt_args; - memset(devfilt_args, 0, sizeof(ext_fsevent_dev_filter_args)); - - devfilt_args->num_devices = udev_filt_args->num_devices; - devfilt_args->devices = CAST_USER_ADDR_T(udev_filt_args->devices); - } + fsevent_dev_filter_args64 *devfilt_args, _devfilt_args; OSAddAtomic(1, &fseh->active); if (fseh->flags & FSEH_CLOSING) { @@ -1725,8 +1689,29 @@ fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx break; } - case OLD_FSEVENTS_DEVICE_FILTER: - case NEW_FSEVENTS_DEVICE_FILTER: { + case FSEVENTS_DEVICE_FILTER_32: { + if (proc_is64bit(vfs_context_proc(ctx))) { + ret = EINVAL; + break; + } + fsevent_dev_filter_args32 *devfilt_args32 = (fsevent_dev_filter_args32 *)data; + + devfilt_args = &_devfilt_args; + memset(devfilt_args, 0, sizeof(fsevent_dev_filter_args64)); + devfilt_args->num_devices = devfilt_args32->num_devices; + devfilt_args->devices = CAST_USER_ADDR_T(devfilt_args32->devices); + goto handle_dev_filter; + } + + case FSEVENTS_DEVICE_FILTER_64: + if (!proc_is64bit(vfs_context_proc(ctx))) { + ret = EINVAL; + break; + } + devfilt_args = (fsevent_dev_filter_args64 *)data; + + handle_dev_filter: + { int new_num_devices; dev_t *devices_not_to_watch, *tmp=NULL; @@ -2107,7 +2092,7 @@ fseventswrite(__unused dev_t dev, struct uio *uio, __unused int ioflag) lck_mtx_lock(&event_writer_lock); if (write_buffer == NULL) { - if (kmem_alloc(kernel_map, (vm_offset_t *)&write_buffer, WRITE_BUFFER_SIZE)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&write_buffer, WRITE_BUFFER_SIZE, VM_KERN_MEMORY_FILE)) { lck_mtx_unlock(&event_writer_lock); return ENOMEM; } @@ -2172,21 +2157,22 @@ static const struct fileops fsevents_fops = { fseventsf_drain }; -typedef struct ext_fsevent_clone_args { - user_addr_t event_list; - int32_t num_events; - int32_t event_queue_depth; - user_addr_t fd; -} ext_fsevent_clone_args; +typedef struct fsevent_clone_args32 { + user32_addr_t event_list; + int32_t num_events; + int32_t event_queue_depth; + user32_addr_t fd; +} fsevent_clone_args32; -typedef struct old_fsevent_clone_args { - uint32_t event_list; - int32_t num_events; - int32_t event_queue_depth; - uint32_t fd; -} old_fsevent_clone_args; +typedef struct fsevent_clone_args64 { + user64_addr_t event_list; + int32_t num_events; + int32_t event_queue_depth; + user64_addr_t fd; +} fsevent_clone_args64; -#define OLD_FSEVENTS_CLONE _IOW('s', 1, old_fsevent_clone_args) +#define FSEVENTS_CLONE_32 _IOW('s', 1, fsevent_clone_args32) +#define FSEVENTS_CLONE_64 _IOW('s', 1, fsevent_clone_args64) static int fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p) @@ -2194,38 +2180,32 @@ fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, s struct fileproc *f; int fd, error; fsevent_handle *fseh = NULL; - ext_fsevent_clone_args *fse_clone_args, _fse_clone; + fsevent_clone_args64 *fse_clone_args, _fse_clone; int8_t *event_list; int is64bit = proc_is64bit(p); switch (cmd) { - case OLD_FSEVENTS_CLONE: { - old_fsevent_clone_args *old_args = (old_fsevent_clone_args *)data; + case FSEVENTS_CLONE_32: { + if (is64bit) { + return EINVAL; + } + fsevent_clone_args32 *args32 = (fsevent_clone_args32 *)data; fse_clone_args = &_fse_clone; - memset(fse_clone_args, 0, sizeof(ext_fsevent_clone_args)); + memset(fse_clone_args, 0, sizeof(fsevent_clone_args64)); - fse_clone_args->event_list = CAST_USER_ADDR_T(old_args->event_list); - fse_clone_args->num_events = old_args->num_events; - fse_clone_args->event_queue_depth = old_args->event_queue_depth; - fse_clone_args->fd = CAST_USER_ADDR_T(old_args->fd); + fse_clone_args->event_list = CAST_USER_ADDR_T(args32->event_list); + fse_clone_args->num_events = args32->num_events; + fse_clone_args->event_queue_depth = args32->event_queue_depth; + fse_clone_args->fd = CAST_USER_ADDR_T(args32->fd); goto handle_clone; } - - case FSEVENTS_CLONE: - if (is64bit) { - fse_clone_args = (ext_fsevent_clone_args *)data; - } else { - fsevent_clone_args *ufse_clone = (fsevent_clone_args *)data; - - fse_clone_args = &_fse_clone; - memset(fse_clone_args, 0, sizeof(ext_fsevent_clone_args)); - fse_clone_args->event_list = CAST_USER_ADDR_T(ufse_clone->event_list); - fse_clone_args->num_events = ufse_clone->num_events; - fse_clone_args->event_queue_depth = ufse_clone->event_queue_depth; - fse_clone_args->fd = CAST_USER_ADDR_T(ufse_clone->fd); + case FSEVENTS_CLONE_64: + if (!is64bit) { + return EINVAL; } + fse_clone_args = (fsevent_clone_args64 *)data; handle_clone: if (fse_clone_args->num_events < 0 || fse_clone_args->num_events > 4096) { diff --git a/bsd/vfs/vfs_journal.c b/bsd/vfs/vfs_journal.c index 628e5e7dc..23b21860a 100644 --- a/bsd/vfs/vfs_journal.c +++ b/bsd/vfs/vfs_journal.c @@ -339,6 +339,13 @@ do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction buf_t bp; off_t max_iosize; struct bufattr *bap; + boolean_t was_vm_privileged = FALSE; + boolean_t need_vm_privilege = FALSE; + + if (jnl->fsmount) { + if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) + need_vm_privilege = TRUE; + } if (*offset < 0 || *offset > jnl->jhdr->size) { panic("jnl: do_jnl_io: bad offset 0x%llx (max 0x%llx)\n", *offset, jnl->jhdr->size); @@ -406,12 +413,26 @@ do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction buf_markfua(bp); } + if (need_vm_privilege == TRUE) { + /* + * if we block waiting for memory, and there is enough pressure to + * cause us to try and create a new swap file, we may end up deadlocking + * due to waiting for the journal on the swap file creation path... + * by making ourselves vm_privileged, we give ourselves the best chance + * of not blocking + */ + was_vm_privileged = set_vm_privilege(TRUE); + } DTRACE_IO1(journal__start, buf_t, bp); err = VNOP_STRATEGY(bp); if (!err) { err = (int)buf_biowait(bp); } DTRACE_IO1(journal__done, buf_t, bp); + + if (need_vm_privilege == TRUE && was_vm_privileged == FALSE) + set_vm_privilege(FALSE); + free_io_buf(bp); if (err) { @@ -471,7 +492,21 @@ write_journal_header(journal *jnl, int updating_start, uint32_t sequence_num) // writes. // if (!updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) { - ret = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context); + + dk_synchronize_t sync_request = { + .options = DK_SYNCHRONIZE_OPTION_BARRIER, + }; + + /* + * If device doesn't support barrier-only flush, or + * the journal is on a different device, use full flush. + */ + if (!(jnl->flags & JOURNAL_FEATURE_BARRIER) || (jnl->jdev != jnl->fsdev)) { + sync_request.options = 0; + jnl->flush_counter++; + } + + ret = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, &context); } if (ret != 0) { // @@ -513,7 +548,21 @@ write_journal_header(journal *jnl, int updating_start, uint32_t sequence_num) // may seem obscure, it's not. // if (updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) { - VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context); + + dk_synchronize_t sync_request = { + .options = DK_SYNCHRONIZE_OPTION_BARRIER, + }; + + /* + * If device doesn't support barrier-only flush, or + * the journal is on a different device, use full flush. + */ + if (!(jnl->flags & JOURNAL_FEATURE_BARRIER) || (jnl->jdev != jnl->fsdev)) { + sync_request.options = 0; + jnl->flush_counter++; + } + + VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, &context); } return 0; @@ -785,6 +834,8 @@ update_fs_block(journal *jnl, void *block_ptr, off_t fs_block, size_t bsize) { int ret; struct buf *oblock_bp=NULL; + boolean_t was_vm_privileged = FALSE; + // first read the block we want. ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp); @@ -813,11 +864,25 @@ update_fs_block(journal *jnl, void *block_ptr, off_t fs_block, size_t bsize) // copy the journal data over top of it memcpy((char *)buf_dataptr(oblock_bp), block_ptr, bsize); - if ((ret = VNOP_BWRITE(oblock_bp)) != 0) { + if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * if we block waiting for memory, and there is enough pressure to + * cause us to try and create a new swap file, we may end up deadlocking + * due to waiting for the journal on the swap file creation path... + * by making ourselves vm_privileged, we give ourselves the best chance + * of not blocking + */ + was_vm_privileged = set_vm_privilege(TRUE); + } + ret = VNOP_BWRITE(oblock_bp); + + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); + + if (ret != 0) { printf("jnl: %s: update_fs_block: failed to update block %lld (ret %d)\n", jnl->jdev_name, fs_block,ret); return ret; } - // and now invalidate it so that if someone else wants to read // it in a different size they'll be able to do it. ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp); @@ -1138,7 +1203,7 @@ replay_journal(journal *jnl) orig_jnl_start = jnl->jhdr->start; // allocate memory for the header_block. we'll read each blhdr into this - if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&buff, jnl->jhdr->blhdr_size)) { + if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&buff, jnl->jhdr->blhdr_size, VM_KERN_MEMORY_FILE)) { printf("jnl: %s: replay_journal: no memory for block buffer! (%d bytes)\n", jnl->jdev_name, jnl->jhdr->blhdr_size); return -1; @@ -1273,7 +1338,7 @@ replay_journal(journal *jnl) if (blhdr->flags & BLHDR_CHECK_CHECKSUMS) { check_block_checksums = 1; - if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize, VM_KERN_MEMORY_FILE)) { goto bad_replay; } } else { @@ -1423,7 +1488,7 @@ replay_journal(journal *jnl) max_bsize = (max_bsize + PAGE_SIZE) & ~(PAGE_SIZE - 1); } - if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize, VM_KERN_MEMORY_FILE)) { goto bad_replay; } @@ -1573,6 +1638,10 @@ get_io_info(struct vnode *devvp, size_t phys_blksz, journal *jnl, struct vfs_con if (features & DK_FEATURE_UNMAP) { jnl->flags |= JOURNAL_USE_UNMAP; } + + if (features & DK_FEATURE_BARRIER) { + jnl->flags |= JOURNAL_FEATURE_BARRIER; + } } // @@ -1715,7 +1784,7 @@ journal_create(struct vnode *jvp, get_io_info(jvp, phys_blksz, jnl, &context); - if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) { + if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz, VM_KERN_MEMORY_FILE)) { printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz); goto bad_kmem_alloc; } @@ -1893,7 +1962,7 @@ journal_open(struct vnode *jvp, get_io_info(jvp, phys_blksz, jnl, &context); - if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) { + if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz, VM_KERN_MEMORY_FILE)) { printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz); goto bad_kmem_alloc; } @@ -2006,8 +2075,8 @@ journal_open(struct vnode *jvp, // take care of replaying the journal if necessary if (flags & JOURNAL_RESET) { - printf("jnl: %s: journal start/end pointers reset! (jnl %p; s 0x%llx e 0x%llx)\n", - jdev_name, jnl, jnl->jhdr->start, jnl->jhdr->end); + printf("jnl: %s: journal start/end pointers reset! (s 0x%llx e 0x%llx)\n", + jdev_name, jnl->jhdr->start, jnl->jhdr->end); jnl->jhdr->start = jnl->jhdr->end; } else if (replay_journal(jnl) != 0) { printf("jnl: %s: journal_open: Error replaying the journal!\n", jdev_name); @@ -2129,7 +2198,7 @@ journal_is_clean(struct vnode *jvp, memset(&jnl, 0, sizeof(jnl)); - if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl.header_buf, phys_blksz)) { + if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl.header_buf, phys_blksz, VM_KERN_MEMORY_FILE)) { printf("jnl: %s: is_clean: could not allocate space for header buffer (%d bytes)\n", jdev_name, phys_blksz); ret = ENOMEM; goto cleanup_jdev_name; @@ -2272,8 +2341,7 @@ journal_close(journal *jnl) } else { // if we're here the journal isn't valid any more. // so make sure we don't leave any locked blocks lying around - printf("jnl: %s: close: journal %p, is invalid. aborting outstanding transactions\n", jnl->jdev_name, jnl); - + printf("jnl: %s: close: journal is invalid. aborting outstanding transactions\n", jnl->jdev_name); if (jnl->active_tr || jnl->cur_tr) { transaction *tr; @@ -2478,7 +2546,8 @@ static errno_t journal_allocate_transaction(journal *jnl) { transaction *tr; - boolean_t was_vm_privileged; + boolean_t was_vm_privileged = FALSE; + kern_return_t retval; if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) { /* @@ -2496,13 +2565,16 @@ journal_allocate_transaction(journal *jnl) tr->tbuffer_size = jnl->tbuffer_size; - if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&tr->tbuffer, tr->tbuffer_size)) { + retval = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&tr->tbuffer, tr->tbuffer_size, VM_KERN_MEMORY_FILE); + + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); + + if (retval) { FREE_ZONE(tr, sizeof(transaction), M_JNL_TR); jnl->active_tr = NULL; return ENOMEM; } - if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) - set_vm_privilege(FALSE); // journal replay code checksum check depends on this. memset(tr->tbuffer, 0, BLHDR_CHECKSUM_SIZE); @@ -2601,6 +2673,7 @@ int journal_modify_block_start(journal *jnl, struct buf *bp) { transaction *tr; + boolean_t was_vm_privileged = FALSE; CHECK_JOURNAL(jnl); @@ -2611,6 +2684,17 @@ journal_modify_block_start(journal *jnl, struct buf *bp) return EINVAL; } + if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * if we block waiting for memory, and there is enough pressure to + * cause us to try and create a new swap file, we may end up deadlocking + * due to waiting for the journal on the swap file creation path... + * by making ourselves vm_privileged, we give ourselves the best chance + * of not blocking + */ + was_vm_privileged = set_vm_privilege(TRUE); + } + // XXXdbg - for debugging I want this to be true. later it may // not be necessary. if ((buf_flags(bp) & B_META) == 0) { @@ -2651,7 +2735,7 @@ journal_modify_block_start(journal *jnl, struct buf *bp) printf("jnl: %s: phys blksz got bigger (was: %d/%d now %d)\n", jnl->jdev_name, jnl->header_buf_size, jnl->jhdr->jhdr_size, phys_blksz); - if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&new_header_buf, phys_blksz)) { + if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&new_header_buf, phys_blksz, VM_KERN_MEMORY_FILE)) { printf("jnl: modify_block_start: %s: create: phys blksz change (was %d, now %d) but could not allocate space for new header\n", jnl->jdev_name, jnl->jhdr->jhdr_size, phys_blksz); bad = 1; @@ -2673,6 +2757,9 @@ journal_modify_block_start(journal *jnl, struct buf *bp) if (bad) { panic("jnl: mod block start: bufsize %d not a multiple of block size %d\n", buf_size(bp), jnl->jhdr->jhdr_size); + + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); return -1; } } @@ -2681,6 +2768,9 @@ journal_modify_block_start(journal *jnl, struct buf *bp) if (tr->total_bytes+buf_size(bp) >= (jnl->jhdr->size - jnl->jhdr->jhdr_size)) { panic("jnl: transaction too big (%d >= %lld bytes, bufsize %d, tr %p bp %p)\n", tr->total_bytes, (tr->jnl->jhdr->size - jnl->jhdr->jhdr_size), buf_size(bp), tr, bp); + + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); return -1; } @@ -2702,6 +2792,9 @@ journal_modify_block_start(journal *jnl, struct buf *bp) } buf_setflags(bp, B_LOCKED); + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); + return 0; } @@ -2844,7 +2937,7 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(buf_t bp, vo // through prev->binfo[0].bnum. that's a skanky way to do things but // avoids having yet another linked list of small data structures to manage. - if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&nblhdr, tr->tbuffer_size)) { + if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&nblhdr, tr->tbuffer_size, VM_KERN_MEMORY_FILE)) { panic("jnl: end_tr: no space for new block tr @ %p (total bytes: %d)!\n", tr, tr->total_bytes); } @@ -2883,7 +2976,16 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(buf_t bp, vo vnode_t vp; vp = buf_vnode(bp); - vnode_ref(vp); + if (vnode_ref(vp)) { + // Nobody checks the return values, so... + jnl->flags |= JOURNAL_INVALID; + + buf_brelse(bp); + + // We're probably here due to a force unmount, so EIO is appropriate + return EIO; + } + bsize = buf_size(bp); blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp)); @@ -2923,7 +3025,8 @@ journal_kill_block(journal *jnl, struct buf *bp) free_old_stuff(jnl); if (jnl->flags & JOURNAL_INVALID) { - return EINVAL; + buf_brelse(bp); + return 0; } tr = jnl->active_tr; @@ -2972,15 +3075,17 @@ journal_kill_block(journal *jnl, struct buf *bp) buf_markinvalid(bp); buf_brelse(bp); - break; + return 0; } } - - if (i < blhdr->num_blocks) { - break; - } } + /* + * We did not find the block in any transaction buffer but we still + * need to release it or else it will be left locked forever. + */ + buf_brelse(bp); + return 0; } @@ -3041,7 +3146,7 @@ trim_realloc(journal *jnl, struct jnl_trim_list *trim) { void *new_extents; uint32_t new_allocated_count; - boolean_t was_vm_privileged; + boolean_t was_vm_privileged = FALSE; if (jnl_kdebug) KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_START, VM_KERNEL_ADDRPERM(trim), 0, trim->allocated_count, trim->extent_count, 0); @@ -3590,7 +3695,7 @@ static int journal_trim_flush(journal *jnl, transaction *tr) { int errno = 0; - boolean_t was_vm_privileged; + boolean_t was_vm_privileged = FALSE; if (jnl_kdebug) KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_FLUSH | DBG_FUNC_START, VM_KERNEL_ADDRPERM(jnl), tr, 0, tr->trim.extent_count, 0); @@ -4039,13 +4144,23 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba size_t tbuffer_offset; int bufs_written = 0; int ret_val = 0; + boolean_t was_vm_privileged = FALSE; KERNEL_DEBUG(0xbbbbc028|DBG_FUNC_START, jnl, tr, 0, 0, 0); + if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * if we block waiting for memory, and there is enough pressure to + * cause us to try and create a new swap file, we may end up deadlocking + * due to waiting for the journal on the swap file creation path... + * by making ourselves vm_privileged, we give ourselves the best chance + * of not blocking + */ + was_vm_privileged = set_vm_privilege(TRUE); + } end = jnl->jhdr->end; for (blhdr = tr->blhdr; blhdr; blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) { - boolean_t was_vm_privileged; amt = blhdr->bytes_used; @@ -4054,22 +4169,9 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba blhdr->checksum = 0; blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE); - if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) { - /* - * if we block waiting for memory, and there is enough pressure to - * cause us to try and create a new swap file, we may end up deadlocking - * due to waiting for the journal on the swap file creation path... - * by making ourselves vm_privileged, we give ourselves the best chance - * of not blocking - */ - was_vm_privileged = set_vm_privilege(TRUE); - } - if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *))) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *), VM_KERN_MEMORY_FILE)) { panic("can't allocate %zd bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *)); } - if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) - set_vm_privilege(FALSE); - tbuffer_offset = jnl->jhdr->blhdr_size; for (i = 1; i < blhdr->num_blocks; i++) { @@ -4092,8 +4194,8 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba lblkno = buf_lblkno(bp); if (vp == NULL && lblkno == blkno) { - printf("jnl: %s: end_tr: bad news! bp @ %p w/null vp and l/blkno = %qd/%qd. aborting the transaction (tr %p jnl %p).\n", - jnl->jdev_name, bp, lblkno, blkno, tr, jnl); + printf("jnl: %s: end_tr: bad news! buffer w/null vp and l/blkno = %qd/%qd. aborting the transaction.\n", + jnl->jdev_name, lblkno, blkno); ret_val = -1; goto bad_journal; } @@ -4107,17 +4209,17 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba size_t contig_bytes; if (VNOP_BLKTOOFF(vp, lblkno, &f_offset)) { - printf("jnl: %s: end_tr: vnop_blktooff failed @ %p, jnl %p\n", jnl->jdev_name, bp, jnl); + printf("jnl: %s: end_tr: vnop_blktooff failed\n", jnl->jdev_name); ret_val = -1; goto bad_journal; } if (VNOP_BLOCKMAP(vp, f_offset, buf_count(bp), &blkno, &contig_bytes, NULL, 0, NULL)) { - printf("jnl: %s: end_tr: can't blockmap the bp @ %p, jnl %p\n", jnl->jdev_name, bp, jnl); + printf("jnl: %s: end_tr: can't blockmap the buffer", jnl->jdev_name); ret_val = -1; goto bad_journal; } if ((uint32_t)contig_bytes < buf_count(bp)) { - printf("jnl: %s: end_tr: blk not physically contiguous on disk@ %p, jnl %p\n", jnl->jdev_name, bp, jnl); + printf("jnl: %s: end_tr: blk not physically contiguous on disk\n", jnl->jdev_name); ret_val = -1; goto bad_journal; } @@ -4287,6 +4389,8 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba bad_journal: if (ret_val == -1) { + abort_transaction(jnl, tr); // cleans up list of extents to be trimmed + /* * 'flush_aborted' is protected by the flushing condition... we need to * set it before dropping the condition so that it will be @@ -4304,12 +4408,14 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba jnl->flags |= JOURNAL_INVALID; jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] &= ~0x8000000000000000LL; - abort_transaction(jnl, tr); // cleans up list of extents to be trimmed journal_unlock(jnl); } else unlock_condition(jnl, &jnl->flushing); + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); + KERNEL_DEBUG(0xbbbbc028|DBG_FUNC_END, jnl, tr, bufs_written, ret_val, 0); return (ret_val); @@ -4395,13 +4501,24 @@ abort_transaction(journal *jnl, transaction *tr) bp_vp = buf_vnode(tbp); - buf_setfilter(tbp, NULL, NULL, NULL, NULL); - - if (buf_shadow(tbp)) + if (buf_shadow(tbp)) { sbp = tbp; - else + buf_setfilter(tbp, NULL, NULL, NULL, NULL); + } else { + assert(ISSET(buf_flags(tbp), B_LOCKED)); + sbp = NULL; + do { + errno = buf_acquire(tbp, BAC_REMOVE, 0, 0); + } while (errno == EAGAIN); + + if (!errno) { + buf_setfilter(tbp, NULL, NULL, NULL, NULL); + buf_brelse(tbp); + } + } + if (bp_vp) { errno = buf_meta_bread(bp_vp, buf_lblkno(tbp), @@ -4430,8 +4547,8 @@ abort_transaction(journal *jnl, transaction *tr) */ vnode_rele_ext(bp_vp, 0, 1); } else { - printf("jnl: %s: abort_tr: could not find block %lld vp %p!\n", - jnl->jdev_name, blhdr->binfo[i].bnum, tbp); + printf("jnl: %s: abort_tr: could not find block %lld for vnode!\n", + jnl->jdev_name, blhdr->binfo[i].bnum); if (bp) { buf_brelse(bp); } @@ -4569,9 +4686,11 @@ journal_end_transaction(journal *jnl) * guarantees consistent journal content on the disk. */ int -journal_flush(journal *jnl, boolean_t wait_for_IO) +journal_flush(journal *jnl, journal_flush_options_t options) { boolean_t drop_lock = FALSE; + errno_t error = 0; + uint32_t flush_count; CHECK_JOURNAL(jnl); @@ -4588,13 +4707,16 @@ journal_flush(journal *jnl, boolean_t wait_for_IO) drop_lock = TRUE; } + if (ISSET(options, JOURNAL_FLUSH_FULL)) + flush_count = jnl->flush_counter; + // if we're not active, flush any buffered transactions if (jnl->active_tr == NULL && jnl->cur_tr) { transaction *tr = jnl->cur_tr; jnl->cur_tr = NULL; - if (wait_for_IO) { + if (ISSET(options, JOURNAL_WAIT_FOR_IO)) { wait_condition(jnl, &jnl->flushing, "journal_flush"); wait_condition(jnl, &jnl->asyncIO, "journal_flush"); } @@ -4620,10 +4742,26 @@ journal_flush(journal *jnl, boolean_t wait_for_IO) */ wait_condition(jnl, &jnl->flushing, "journal_flush"); } - if (wait_for_IO) { + if (ISSET(options, JOURNAL_WAIT_FOR_IO)) { wait_condition(jnl, &jnl->asyncIO, "journal_flush"); } + if (ISSET(options, JOURNAL_FLUSH_FULL)) { + + dk_synchronize_t sync_request = { + .options = 0, + }; + + // We need a full cache flush. If it has not been done, do it here. + if (flush_count == jnl->flush_counter) + error = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, vfs_context_kernel()); + + // If external journal partition is enabled, flush filesystem data partition. + if (jnl->jdev != jnl->fsdev) + error = VNOP_IOCTL(jnl->fsdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, vfs_context_kernel()); + + } + KERNEL_DEBUG(DBG_JOURNAL_FLUSH | DBG_FUNC_END, jnl, 0, 0, 0, 0); return 0; @@ -4752,7 +4890,7 @@ int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbu tr = jnl->active_tr; CHECK_TRANSACTION(tr); jnl->active_tr = NULL; - ret = journal_flush(jnl, TRUE); + ret = journal_flush(jnl, JOURNAL_WAIT_FOR_IO); jnl->active_tr = tr; if (ret) { @@ -4812,6 +4950,10 @@ int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbu return ret; } +uint32_t journal_current_txn(journal *jnl) +{ + return jnl->sequence_num + (jnl->active_tr || jnl->cur_tr ? 0 : 1); +} #else // !JOURNALING - so provide stub functions @@ -4900,7 +5042,7 @@ journal_end_transaction(__unused journal *jnl) } int -journal_flush(__unused journal *jnl, __unused boolean_t wait_for_IO) +journal_flush(__unused journal *jnl, __unused journal_flush_options_t options) { return EINVAL; } diff --git a/bsd/vfs/vfs_journal.h b/bsd/vfs/vfs_journal.h index 5b9578b37..42fd81e5c 100644 --- a/bsd/vfs/vfs_journal.h +++ b/bsd/vfs/vfs_journal.h @@ -187,6 +187,7 @@ typedef struct journal { volatile off_t old_start[16]; // this is how we do lazy start update int last_flush_err; // last error from flushing the cache + uint32_t flush_counter; // a monotonically increasing value assigned on track cache flush } journal; /* internal-only journal flags (top 16 bits) */ @@ -196,6 +197,7 @@ typedef struct journal { #define JOURNAL_NEED_SWAP 0x00080000 // swap any data read from disk #define JOURNAL_DO_FUA_WRITES 0x00100000 // do force-unit-access writes #define JOURNAL_USE_UNMAP 0x00200000 // device supports UNMAP (TRIM) +#define JOURNAL_FEATURE_BARRIER 0x00400000 // device supports barrier-only flush /* journal_open/create options are always in the low-16 bits */ @@ -338,7 +340,13 @@ int journal_request_immediate_flush (journal *jnl); int journal_end_transaction(journal *jnl); int journal_active(journal *jnl); -int journal_flush(journal *jnl, boolean_t wait_for_IO); + +typedef enum journal_flush_options { + JOURNAL_WAIT_FOR_IO = 0x01, // Flush journal and metadata blocks, wait for async IO to complete. + JOURNAL_FLUSH_FULL = 0x02, // Flush track cache to media +} journal_flush_options_t; + +int journal_flush(journal *jnl, journal_flush_options_t options); void *journal_owner(journal *jnl); // compare against current_thread() int journal_uses_fua(journal *jnl); void journal_lock(journal *jnl); @@ -365,6 +373,8 @@ void journal_unlock(journal *jnl); int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbuffer_size, errno_t (*callback)(void *), void *callback_arg); +uint32_t journal_current_txn(journal *jnl); + __END_DECLS #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index 4beff12a6..09bd470a9 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -90,7 +90,7 @@ #include #include #include - +#include /* to get the prototype for strstr() in sys/dtrace_glue.h */ #if CONFIG_MACF #include #endif @@ -371,6 +371,7 @@ namei(struct nameidata *ndp) if ( (error = lookup(ndp)) ) { goto error_out; } + /* * Check for symbolic link */ @@ -633,15 +634,6 @@ lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int goto nextname; } -#if CONFIG_TRIGGERS - if (dp->v_resolve) { - error = vnode_trigger_resolve(dp, ndp, ctx); - if (error) { - goto out; - } - } -#endif /* CONFIG_TRIGGERS */ - /* * Take into account any additional components consumed by * the underlying filesystem. @@ -1315,73 +1307,90 @@ lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vn uint32_t depth = 0; vnode_t mounted_on_dp; int current_mount_generation = 0; +#if CONFIG_TRIGGERS + vnode_t triggered_dp = NULLVP; + int retry_cnt = 0; +#define MAX_TRIGGER_RETRIES 1 +#endif - mounted_on_dp = dp; - current_mount_generation = mount_generation; - - while ((dp->v_type == VDIR) && dp->v_mountedhere && - ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { + if (dp->v_type != VDIR || cnp->cn_flags & NOCROSSMOUNT) + return 0; - if (dp->v_mountedhere->mnt_lflag & MNT_LFORCE) { - break; // don't traverse into a forced unmount - } + mounted_on_dp = dp; #if CONFIG_TRIGGERS - /* - * For a trigger vnode, call its resolver when crossing its mount (if requested) - */ - if (dp->v_resolve) { - (void) vnode_trigger_resolve(dp, ndp, ctx); - } +restart: #endif - vnode_lock(dp); - - if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) { + current_mount_generation = mount_generation; + while (dp->v_mountedhere) { + vnode_lock_spin(dp); + if ((mp = dp->v_mountedhere)) { mp->mnt_crossref++; vnode_unlock(dp); + } else { + vnode_unlock(dp); + break; + } + if (ISSET(mp->mnt_lflag, MNT_LFORCE)) { + mount_dropcrossref(mp, dp, 0); + break; // don't traverse into a forced unmount + } - if (vfs_busy(mp, vbusyflags)) { - mount_dropcrossref(mp, dp, 0); - if (vbusyflags == LK_NOWAIT) { - error = ENOENT; - goto out; - } - - continue; - } - - error = VFS_ROOT(mp, &tdp, ctx); + if (vfs_busy(mp, vbusyflags)) { mount_dropcrossref(mp, dp, 0); - vfs_unbusy(mp); - - if (error) { + if (vbusyflags == LK_NOWAIT) { + error = ENOENT; goto out; } - vnode_put(dp); - ndp->ni_vp = dp = tdp; - depth++; + continue; + } + + error = VFS_ROOT(mp, &tdp, ctx); -#if CONFIG_TRIGGERS - /* - * Check if root dir is a trigger vnode - */ - if (dp->v_resolve) { - error = vnode_trigger_resolve(dp, ndp, ctx); - if (error) { - goto out; - } - } -#endif + mount_dropcrossref(mp, dp, 0); + vfs_unbusy(mp); - } else { - vnode_unlock(dp); + if (error) { + goto out; + } + + vnode_put(dp); + ndp->ni_vp = dp = tdp; + if (dp->v_type != VDIR) { +#if DEVELOPMENT || DEBUG + panic("%s : Root of filesystem not a directory\n", + __FUNCTION__); +#else break; +#endif } + depth++; } +#if CONFIG_TRIGGERS + /* + * The triggered_dp check here is required but is susceptible to a + * (unlikely) race in which trigger mount is done from here and is + * unmounted before we get past vfs_busy above. We retry to deal with + * that case but it has the side effect of unwanted retries for + * "special" processes which don't want to trigger mounts. + */ + if (dp->v_resolve && retry_cnt < MAX_TRIGGER_RETRIES) { + error = vnode_trigger_resolve(dp, ndp, ctx); + if (error) + goto out; + if (dp == triggered_dp) + retry_cnt += 1; + else + retry_cnt = 0; + triggered_dp = dp; + goto restart; + } +#endif /* CONFIG_TRIGGERS */ + if (depth) { mp = mounted_on_dp->v_mountedhere; diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index 898319ab0..9fec68cd4 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -140,8 +140,6 @@ #include #endif -#define PANIC_PRINTS_VNODES - extern lck_grp_t *vnode_lck_grp; extern lck_attr_t *vnode_lck_attr; @@ -230,6 +228,12 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); static void record_vp(vnode_t vp, int count); #endif +#if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) +extern int bootarg_no_vnode_jetsam; /* from bsd_init.c default value is 0 */ +#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */ + +boolean_t root_is_CF_drive = FALSE; + #if CONFIG_TRIGGERS static int vnode_resolver_create(mount_t, vnode_t, struct vnode_trigger_param *, boolean_t external); static void vnode_resolver_detach(vnode_t); @@ -651,7 +655,7 @@ static void vnode_iterate_panic_hook(panic_hook_t *hook_) if (panic_phys_range_before(hook->vp, &phys, &range)) { kdb_log("vp = %p, phys = %p, prev (%p: %p-%p)\n", - hook->mp, phys, range.type, range.phys_start, + hook->vp, phys, range.type, range.phys_start, range.phys_start + range.len); } else { kdb_log("vp = %p, phys = %p, prev (!)\n", hook->vp, phys); @@ -1122,6 +1126,13 @@ vfs_mountroot(void) */ vfs_init_io_attributes(rootvp, mp); + if ((mp->mnt_ioflags & MNT_IOFLAGS_FUSION_DRIVE) && + (mp->mnt_ioflags & MNT_IOFLAGS_IOSCHED_SUPPORTED)) { + /* + * only for CF + */ + root_is_CF_drive = TRUE; + } /* * Shadow the VFC_VFSNATIVEXATTR flag to MNTK_EXTENDED_ATTRS. */ @@ -1683,12 +1694,33 @@ vnode_list_add(vnode_t vp) #if DIAGNOSTIC lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); #endif + +again: + /* * if it is already on a list or non zero references return */ if (VONLIST(vp) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || (vp->v_lflag & VL_TERMINATE)) return; + /* + * In vclean, we might have deferred ditching locked buffers + * because something was still referencing them (indicated by + * usecount). We can ditch them now. + */ + if (ISSET(vp->v_lflag, VL_DEAD) + && (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))) { + ++vp->v_iocount; // Probably not necessary, but harmless +#ifdef JOE_DEBUG + record_vp(vp, 1); +#endif + vnode_unlock(vp); + buf_invalidateblks(vp, BUF_INVALIDATE_LOCKED, 0, 0); + vnode_lock(vp); + vnode_dropiocount(vp); + goto again; + } + vnode_list_lock(); if ((vp->v_flag & VRAGE) && !(vp->v_lflag & VL_DEAD)) { @@ -2002,7 +2034,13 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags) vnode_lock_spin(vp); - if ((vp->v_id != vid) || ((vp->v_lflag & (VL_DEAD | VL_TERMINATE)))) { + // If vnode is already terminating, wait for it... + while (vp->v_id == vid && ISSET(vp->v_lflag, VL_TERMINATE)) { + vp->v_lflag |= VL_TERMWANT; + msleep(&vp->v_lflag, &vp->v_lock, PVFS, "vflush", NULL); + } + + if ((vp->v_id != vid) || ISSET(vp->v_lflag, VL_DEAD)) { vnode_unlock(vp); mount_lock(mp); continue; @@ -2165,12 +2203,6 @@ vclean(vnode_t vp, int flags) vp->v_lflag |= VL_TERMINATE; - /* - * remove the vnode from any mount list - * it might be on... - */ - insmntque(vp, (struct mount *)0); - #if NAMEDSTREAMS is_namedstream = vnode_isnamedstream(vp); #endif @@ -2197,8 +2229,16 @@ vclean(vnode_t vp, int flags) else #endif { - VNOP_FSYNC(vp, MNT_WAIT, ctx); - buf_invalidateblks(vp, BUF_WRITE_DATA | BUF_INVALIDATE_LOCKED, 0, 0); + VNOP_FSYNC(vp, MNT_WAIT, ctx); + + /* + * If the vnode is still in use (by the journal for + * example) we don't want to invalidate locked buffers + * here. In that case, either the journal will tidy them + * up, or we will deal with it when the usecount is + * finally released in vnode_rele_internal. + */ + buf_invalidateblks(vp, BUF_WRITE_DATA | (active ? 0 : BUF_INVALIDATE_LOCKED), 0, 0); } if (UBCINFOEXISTS(vp)) /* @@ -2260,6 +2300,14 @@ vclean(vnode_t vp, int flags) vnode_lock(vp); + /* + * Remove the vnode from any mount list it might be on. It is not + * safe to do this any earlier because unmount needs to wait for + * any vnodes to terminate and it cannot do that if it cannot find + * them. + */ + insmntque(vp, (struct mount *)0); + vp->v_mount = dead_mountp; vp->v_op = dead_vnodeop_p; vp->v_tag = VT_NON; @@ -3071,6 +3119,8 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) u_int64_t temp; u_int32_t features; vfs_context_t ctx = vfs_context_current(); + dk_corestorage_info_t cs_info; + boolean_t cs_present = FALSE;; int isssd = 0; int isvirtual = 0; @@ -3243,19 +3293,31 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) if (features & DK_FEATURE_FORCE_UNIT_ACCESS) mp->mnt_ioflags |= MNT_IOFLAGS_FUA_SUPPORTED; - + + if (VNOP_IOCTL(devvp, DKIOCCORESTORAGE, (caddr_t)&cs_info, 0, ctx) == 0) + cs_present = TRUE; + if (features & DK_FEATURE_UNMAP) { mp->mnt_ioflags |= MNT_IOFLAGS_UNMAP_SUPPORTED; - if (VNOP_IOCTL(devvp, _DKIOCCORESTORAGE, NULL, 0, ctx) == 0) + if (cs_present == TRUE) mp->mnt_ioflags |= MNT_IOFLAGS_CSUNMAP_SUPPORTED; } + if (cs_present == TRUE) { + /* + * for now we'll use the following test as a proxy for + * the underlying drive being FUSION in nature + */ + if ((cs_info.flags & DK_CORESTORAGE_PIN_YOUR_METADATA)) + mp->mnt_ioflags |= MNT_IOFLAGS_FUSION_DRIVE; + } + #if CONFIG_IOSCHED if (iosched_enabled && (features & DK_FEATURE_PRIORITY)) { mp->mnt_ioflags |= MNT_IOFLAGS_IOSCHED_SUPPORTED; - throttle_info_disable_throttle(mp->mnt_devbsdunit); + throttle_info_disable_throttle(mp->mnt_devbsdunit, (mp->mnt_ioflags & MNT_IOFLAGS_FUSION_DRIVE) != 0); } -#endif /* CONFIG_IOSCHED */ +#endif /* CONFIG_IOSCHED */ return (error); } @@ -3751,6 +3813,44 @@ SYSCTL_NODE(_vfs_generic, VFS_CONF, conf, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_vfs_generic_conf, ""); +/* + * Print vnode state. + */ +void +vn_print_state(struct vnode *vp, const char *fmt, ...) +{ + va_list ap; + char perm_str[] = "(VM_KERNEL_ADDRPERM pointer)"; + char fs_name[MFSNAMELEN]; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + printf("vp 0x%0llx %s: ", (uint64_t)VM_KERNEL_ADDRPERM(vp), perm_str); + printf("tag %d, type %d\n", vp->v_tag, vp->v_type); + /* Counts .. */ + printf(" iocount %d, usecount %d, kusecount %d references %d\n", + vp->v_iocount, vp->v_usecount, vp->v_kusecount, vp->v_references); + printf(" writecount %d, numoutput %d\n", vp->v_writecount, + vp->v_numoutput); + /* Flags */ + printf(" flag 0x%x, lflag 0x%x, listflag 0x%x\n", vp->v_flag, + vp->v_lflag, vp->v_listflag); + + if (vp->v_mount == NULL || vp->v_mount == dead_mountp) { + strlcpy(fs_name, "deadfs", MFSNAMELEN); + } else { + vfs_name(vp->v_mount, fs_name); + } + + printf(" v_data 0x%0llx %s\n", + (vp->v_data ? (uint64_t)VM_KERNEL_ADDRPERM(vp->v_data) : 0), + perm_str); + printf(" v_mount 0x%0llx %s vfs_name %s\n", + (vp->v_mount ? (uint64_t)VM_KERNEL_ADDRPERM(vp->v_mount) : 0), + perm_str, fs_name); +} + long num_reusedvnodes = 0; @@ -4050,9 +4150,15 @@ new_vnode(vnode_t *vpp) vnode_list_unlock(); tablefull("vnode"); log(LOG_EMERG, "%d desired, %d numvnodes, " - "%d free, %d dead, %d rage\n", - desiredvnodes, numvnodes, freevnodes, deadvnodes, ragevnodes); + "%d free, %d dead, %d async, %d rage\n", + desiredvnodes, numvnodes, freevnodes, deadvnodes, async_work_vnodes, ragevnodes); #if CONFIG_JETSAM + +#if DEVELOPMENT || DEBUG + if (bootarg_no_vnode_jetsam) + panic("vnode table is full\n"); +#endif /* DEVELOPMENT || DEBUG */ + /* * Running out of vnodes tends to make a system unusable. Start killing * processes that jetsam knows are killable. @@ -4265,6 +4371,17 @@ vnode_put(vnode_t vp) return(retval); } +static inline void +vn_set_dead(vnode_t vp) +{ + vp->v_mount = NULL; + vp->v_op = dead_vnodeop_p; + vp->v_tag = VT_NON; + vp->v_data = NULL; + vp->v_type = VBAD; + vp->v_lflag |= VL_DEAD; +} + int vnode_put_locked(vnode_t vp) { @@ -4444,6 +4561,8 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags) int withvid = vflags & VNODE_WITHID; for (;;) { + int sleepflg = 0; + /* * if it is a dead vnode with deadfs */ @@ -4476,7 +4595,8 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags) /* * If this vnode is getting drained, there are some cases where - * we can't block. + * we can't block or, in case of tty vnodes, want to be + * interruptible. */ if (vp->v_lflag & VL_DRAIN) { /* @@ -4498,15 +4618,24 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags) * failed because an unmount is in progress. */ if (withvid && (vp->v_mount) && vfs_isunmount(vp->v_mount)) - return(ENODEV); + return (ENODEV); + + if (vnode_istty(vp)) { + sleepflg = PCATCH; + } } vnode_lock_convert(vp); if (vp->v_lflag & VL_TERMINATE) { + int error; + vp->v_lflag |= VL_TERMWANT; - msleep(&vp->v_lflag, &vp->v_lock, PVFS, "vnode getiocount", NULL); + error = msleep(&vp->v_lflag, &vp->v_lock, + (PVFS | sleepflg), "vnode getiocount", NULL); + if (error) + return (error); } else msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_getiocount", NULL); } @@ -4637,16 +4766,13 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags) vnode_unlock(vp); } -/* USAGE: - * The following api creates a vnode and associates all the parameter specified in vnode_fsparam - * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias - * is obsoleted by this. - */ -int -vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) +static int +vnode_create_internal(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp, + int init_vnode) { int error; int insert = 1; + int existing_vnode; vnode_t vp; vnode_t nvp; vnode_t dvp; @@ -4656,34 +4782,68 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) #if CONFIG_TRIGGERS struct vnode_trigger_param *tinfo = NULL; #endif - if (param == NULL) - return (EINVAL); - - /* Do quick sanity check on the parameters. */ - if (param->vnfs_vtype == VBAD) { - return EINVAL; + if (*vpp) { + vp = *vpp; + *vpp = NULLVP; + existing_vnode = 1; + } else { + existing_vnode = 0; } -#if CONFIG_TRIGGERS - if ((flavor == VNCREATE_TRIGGER) && (size == VNCREATE_TRIGGER_SIZE)) { - tinfo = (struct vnode_trigger_param *)data; + if (init_vnode) { + /* Do quick sanity check on the parameters. */ + if ((param == NULL) || (param->vnfs_vtype == VBAD)) { + error = EINVAL; + goto error_out; + } - /* Validate trigger vnode input */ - if ((param->vnfs_vtype != VDIR) || - (tinfo->vnt_resolve_func == NULL) || - (tinfo->vnt_flags & ~VNT_VALID_MASK)) { - return (EINVAL); +#if CONFIG_TRIGGERS + if ((flavor == VNCREATE_TRIGGER) && (size == VNCREATE_TRIGGER_SIZE)) { + tinfo = (struct vnode_trigger_param *)data; + + /* Validate trigger vnode input */ + if ((param->vnfs_vtype != VDIR) || + (tinfo->vnt_resolve_func == NULL) || + (tinfo->vnt_flags & ~VNT_VALID_MASK)) { + error = EINVAL; + goto error_out; + } + /* Fall through a normal create (params will be the same) */ + flavor = VNCREATE_FLAVOR; + size = VCREATESIZE; } - /* Fall through a normal create (params will be the same) */ - flavor = VNCREATE_FLAVOR; - size = VCREATESIZE; - } #endif - if ((flavor != VNCREATE_FLAVOR) || (size != VCREATESIZE)) - return (EINVAL); + if ((flavor != VNCREATE_FLAVOR) || (size != VCREATESIZE)) { + error = EINVAL; + goto error_out; + } + } - if ( (error = new_vnode(&vp)) ) - return(error); + if (!existing_vnode) { + if ((error = new_vnode(&vp)) ) { + return (error); + } + if (!init_vnode) { + /* Make it so that it can be released by a vnode_put) */ + vn_set_dead(vp); + *vpp = vp; + return (0); + } + } else { + /* + * A vnode obtained by vnode_create_empty has been passed to + * vnode_initialize - Unset VL_DEAD set by vn_set_dead. After + * this point, it is set back on any error. + * + * N.B. vnode locking - We make the same assumptions as the + * "unsplit" vnode_create did - i.e. it is safe to update the + * vnode's fields without the vnode lock. This vnode has been + * out and about with the filesystem and hopefully nothing + * was done to the vnode between the vnode_create_empty and + * now when it has come in through vnode_initialize. + */ + vp->v_lflag &= ~VL_DEAD; + } dvp = param->vnfs_dvp; cnp = param->vnfs_cnp; @@ -4702,12 +4862,7 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) #ifdef JOE_DEBUG record_vp(vp, 1); #endif - vp->v_mount = NULL; - vp->v_op = dead_vnodeop_p; - vp->v_tag = VT_NON; - vp->v_data = NULL; - vp->v_type = VBAD; - vp->v_lflag |= VL_DEAD; + vn_set_dead(vp); vnode_put(vp); return(error); @@ -4735,12 +4890,7 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) error = vnode_resolver_create(param->vnfs_mp, vp, tinfo, FALSE); if (error) { printf("vnode_create: vnode_resolver_create() err %d\n", error); - vp->v_mount = NULL; - vp->v_op = dead_vnodeop_p; - vp->v_tag = VT_NON; - vp->v_data = NULL; - vp->v_type = VBAD; - vp->v_lflag |= VL_DEAD; + vn_set_dead(vp); #ifdef JOE_DEBUG record_vp(vp, 1); #endif @@ -4862,6 +5012,58 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) vp->v_flag |= VRAGE; } return (0); + +error_out: + if (existing_vnode) { + vnode_put(vp); + } + return (error); +} + +/* USAGE: + * The following api creates a vnode and associates all the parameter specified in vnode_fsparam + * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias + * is obsoleted by this. + */ +int +vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) +{ + *vpp = NULLVP; + return (vnode_create_internal(flavor, size, data, vpp, 1)); +} + +int +vnode_create_empty(vnode_t *vpp) +{ + *vpp = NULLVP; + return (vnode_create_internal(VNCREATE_FLAVOR, VCREATESIZE, NULL, + vpp, 0)); +} + +int +vnode_initialize(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) +{ + if (*vpp == NULLVP) { + panic("NULL vnode passed to vnode_initialize"); + } +#if DEVELOPMENT || DEBUG + /* + * We lock to check that vnode is fit for unlocked use in + * vnode_create_internal. + */ + vnode_lock_spin(*vpp); + VNASSERT(((*vpp)->v_iocount == 1), *vpp, + ("vnode_initialize : iocount not 1, is %d", (*vpp)->v_iocount)); + VNASSERT(((*vpp)->v_usecount == 0), *vpp, + ("vnode_initialize : usecount not 0, is %d", (*vpp)->v_usecount)); + VNASSERT(((*vpp)->v_lflag & VL_DEAD), *vpp, + ("vnode_initialize : v_lflag does not have VL_DEAD, is 0x%x", + (*vpp)->v_lflag)); + VNASSERT(((*vpp)->v_data == NULL), *vpp, + ("vnode_initialize : v_data not NULL")); + vnode_unlock(*vpp); +#endif + return (vnode_create_internal(flavor, size, data, vpp, 1)); } int @@ -5167,6 +5369,9 @@ vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t ctx) if (flags & VNODE_LOOKUP_NOCROSSMOUNT) ndflags |= NOCROSSMOUNT; + if (flags & VNODE_LOOKUP_CROSSMOUNTNOWAIT) + ndflags |= CN_NBMOUNTLOOK; + /* XXX AUDITVNPATH1 needed ? */ NDINIT(&nd, LOOKUP, OP_LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); @@ -5202,6 +5407,9 @@ vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_ if (lflags & VNODE_LOOKUP_NOCROSSMOUNT) ndflags |= NOCROSSMOUNT; + if (lflags & VNODE_LOOKUP_CROSSMOUNTNOWAIT) + ndflags |= CN_NBMOUNTLOOK; + /* XXX AUDITVNPATH1 needed ? */ NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); @@ -8065,6 +8273,20 @@ vnode_setswapmount(vnode_t vp) } +int64_t +vnode_getswappin_avail(vnode_t vp) +{ + int64_t max_swappin_avail = 0; + + mount_lock(vp->v_mount); + if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_SWAPPIN_SUPPORTED) + max_swappin_avail = vp->v_mount->mnt_max_swappin_available; + mount_unlock(vp->v_mount); + + return (max_swappin_avail); +} + + void vn_setunionwait(vnode_t vp) { @@ -8130,7 +8352,7 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * if (error == EBUSY) *restart_flag = 1; if (error != 0) - goto outsc; + return (error); /* * set up UIO @@ -8298,7 +8520,8 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * if (open_flag) VNOP_CLOSE(vp, FREAD, ctx); - uio_free(auio); + if (auio) + uio_free(auio); FREE(rbuf, M_TEMP); vnode_resume(vp); @@ -8320,10 +8543,9 @@ lock_vnode_and_post(vnode_t vp, int kevent_num) } } - -#ifdef PANIC_PRINTS_VNODES - void panic_print_vnodes(void); +/* define PANIC_PRINTS_VNODES only if investigation is required. */ +#ifdef PANIC_PRINTS_VNODES static const char *__vtype(uint16_t vtype) { @@ -8360,7 +8582,8 @@ static const char *__vtype(uint16_t vtype) static char *__vpath(vnode_t vp, char *str, int len, int depth) { int vnm_len; - char *dst, *src; + const char *src; + char *dst; if (len <= 0) return str; @@ -8371,15 +8594,13 @@ static char *__vpath(vnode_t vp, char *str, int len, int depth) /* follow mount vnodes to get the full path */ if ((vp->v_flag & VROOT)) { if (vp->v_mount != NULL && vp->v_mount->mnt_vnodecovered) { - if (len < 1) - return str + len; return __vpath(vp->v_mount->mnt_vnodecovered, str, len, depth+1); } return str + len; } - src = (char *)vp->v_name; + src = vp->v_name; vnm_len = strlen(src); if (vnm_len > len) { /* truncate the name to fit in the string */ diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index e2b135a7b..a949a717d 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,7 @@ #include #include +#include #if CONFIG_MACF #include @@ -738,7 +740,7 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp, if ( (error = namei(&nd)) ) goto out1; - strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN); + strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN); devvp = nd.ni_vp; nameidone(&nd); @@ -989,6 +991,8 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp, /* Now that mount is setup, notify the listeners */ vfs_notify_mount(pvp); + IOBSDMountChange(mp, kIOMountChangeMount); + } else { /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */ if (mp->mnt_vnodelist.tqh_first != NULL) { @@ -1524,8 +1528,8 @@ relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, placed = TRUE; - strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN); - strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN); + strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN); + strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN); /* Forbid future moves */ mount_lock(mp); @@ -1550,7 +1554,7 @@ relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, return 0; out3: - strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN); + strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN); mount_lock(mp); mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED); @@ -1920,6 +1924,8 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) } } + IOBSDMountChange(mp, kIOMountChangeUnmount); + #if CONFIG_TRIGGERS vfs_nested_trigger_unmounts(mp, flags, ctx); did_vflush = 1; @@ -3295,15 +3301,16 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, int flags, oflags; int type, indx, error; struct flock lf; - int no_controlling_tty = 0; - int deny_controlling_tty = 0; - struct session *sessp = SESSION_NULL; + struct vfs_context context; oflags = uflags; if ((oflags & O_ACCMODE) == O_ACCMODE) return(EINVAL); + flags = FFLAGS(uflags); + CLR(flags, FENCRYPTED); + CLR(flags, FUNENCRYPTED); AUDIT_ARG(fflags, oflags); AUDIT_ARG(mode, vap->va_mode); @@ -3314,68 +3321,26 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, } uu->uu_dupfd = -indx - 1; - if (!(p->p_flag & P_CONTROLT)) { - sessp = proc_session(p); - no_controlling_tty = 1; - /* - * If conditions would warrant getting a controlling tty if - * the device being opened is a tty (see ttyopen in tty.c), - * but the open flags deny it, set a flag in the session to - * prevent it. - */ - if (SESS_LEADER(p, sessp) && - sessp->s_ttyvp == NULL && - (flags & O_NOCTTY)) { - session_lock(sessp); - sessp->s_flags |= S_NOCTTY; - session_unlock(sessp); - deny_controlling_tty = 1; - } - } - if ((error = vn_open_auth(ndp, &flags, vap))) { if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */ if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) { fp_drop(p, indx, NULL, 0); *retval = indx; - if (deny_controlling_tty) { - session_lock(sessp); - sessp->s_flags &= ~S_NOCTTY; - session_unlock(sessp); - } - if (sessp != SESSION_NULL) - session_rele(sessp); return (0); } } if (error == ERESTART) error = EINTR; fp_free(p, indx, fp); - - if (deny_controlling_tty) { - session_lock(sessp); - sessp->s_flags &= ~S_NOCTTY; - session_unlock(sessp); - } - if (sessp != SESSION_NULL) - session_rele(sessp); return (error); } uu->uu_dupfd = 0; vp = ndp->ni_vp; - fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY); + fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED); fp->f_fglob->fg_ops = &vnops; fp->f_fglob->fg_data = (caddr_t)vp; -#if CONFIG_PROTECT - if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) { - if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) { - fp->f_fglob->fg_flag |= FENCRYPTED; - } - } -#endif - if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -3402,33 +3367,6 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) goto bad; - /* - * If the open flags denied the acquisition of a controlling tty, - * clear the flag in the session structure that prevented the lower - * level code from assigning one. - */ - if (deny_controlling_tty) { - session_lock(sessp); - sessp->s_flags &= ~S_NOCTTY; - session_unlock(sessp); - } - - /* - * If a controlling tty was set by the tty line discipline, then we - * want to set the vp of the tty into the session structure. We have - * a race here because we can't get to the vp for the tp in ttyopen, - * because it's not passed as a parameter in the open path. - */ - if (no_controlling_tty && (p->p_flag & P_CONTROLT)) { - vnode_t ttyvp; - - session_lock(sessp); - ttyvp = sessp->s_ttyvp; - sessp->s_ttyvp = vp; - sessp->s_ttyvid = vnode_vid(vp); - session_unlock(sessp); - } - /* * For directories we hold some additional information in the fd. */ @@ -3440,6 +3378,18 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, vnode_put(vp); + /* + * The first terminal open (without a O_NOCTTY) by a session leader + * results in it being set as the controlling terminal. + */ + if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) && + !(flags & O_NOCTTY)) { + int tmp = 0; + + (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY, + (caddr_t)&tmp, ctx); + } + proc_fdlock(p); if (flags & O_CLOEXEC) *fdflags(p, indx) |= UF_EXCLOSE; @@ -3451,19 +3401,9 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, *retval = indx; - if (sessp != SESSION_NULL) - session_rele(sessp); return (0); bad: - if (deny_controlling_tty) { - session_lock(sessp); - sessp->s_flags &= ~S_NOCTTY; - session_unlock(sessp); - } - if (sessp != SESSION_NULL) - session_rele(sessp); - - struct vfs_context context = *vfs_context_current(); + context = *vfs_context_current(); context.vc_ucred = fp->f_fglob->fg_cred; if ((fp->f_fglob->fg_flag & FHASLOCK) && @@ -3629,15 +3569,27 @@ int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, * 2. set a flag to mark it as requiring open-raw-encrypted semantics. */ if (flags & O_CREAT) { - VATTR_SET(&va, va_dataprotect_class, class); + /* lower level kernel code validates that the class is valid before applying it. */ + if (class != PROTECTION_CLASS_DEFAULT) { + /* + * PROTECTION_CLASS_DEFAULT implies that we make the class for this + * file behave the same as open (2) + */ + VATTR_SET(&va, va_dataprotect_class, class); + } } - if (dpflags & O_DP_GETRAWENCRYPTED) { + if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) { if ( flags & (O_RDWR | O_WRONLY)) { /* Not allowed to write raw encrypted bytes */ return EINVAL; } - VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); + if (uap->dpflags & O_DP_GETRAWENCRYPTED) { + VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); + } + if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) { + VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED); + } } error = open1(vfs_context_current(), &nd, uap->flags, &va, @@ -3816,9 +3768,6 @@ mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval) } switch (uap->mode & S_IFMT) { - case S_IFMT: /* used by badsect to flag bad sectors */ - VATTR_SET(&va, va_type, VBAD); - break; case S_IFCHR: VATTR_SET(&va, va_type, VCHR); break; @@ -4353,17 +4302,18 @@ symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd, error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx); #if CONFIG_MACF - if (error == 0) + if (error == 0 && vp) error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx); #endif /* do fallback attribute handling */ - if (error == 0) + if (error == 0 && vp) error = vnode_setattr_fallback(vp, &va, ctx); if (error == 0) { int update_flags = 0; + /*check if a new vnode was created, else try to get one*/ if (vp == NULL) { nd.ni_cnd.cn_nameiop = LOOKUP; #if CONFIG_TRIGGERS @@ -4544,10 +4494,12 @@ unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp, if (!batched) { error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL); if (error) { - if (error == ENOENT && - retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) { - do_retry = 1; - retry_count++; + if (error == ENOENT) { + assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES); + if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) { + do_retry = 1; + retry_count++; + } } goto out; } @@ -4612,16 +4564,18 @@ unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp, goto out; } goto lookup_continue; - } else if (error == ENOENT && batched && - retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) { - /* - * For compound VNOPs, the authorization callback may - * return ENOENT in case of racing hardlink lookups - * hitting the name cache, redrive the lookup. - */ - do_retry = 1; - retry_count += 1; - goto out; + } else if (error == ENOENT && batched) { + assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES); + if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) { + /* + * For compound VNOPs, the authorization callback may + * return ENOENT in case of racing hardlink lookups + * hitting the name cache, redrive the lookup. + */ + do_retry = 1; + retry_count += 1; + goto out; + } } } @@ -6713,15 +6667,17 @@ renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from, if (!batched) { error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL); if (error) { - if (error == ENOENT && - retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) { - /* - * We encountered a race where after doing the namei, tvp stops - * being valid. If so, simply re-drive the rename call from the - * top. - */ - do_retry = 1; - retry_count += 1; + if (error == ENOENT) { + assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES); + if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) { + /* + * We encountered a race where after doing the namei, tvp stops + * being valid. If so, simply re-drive the rename call from the + * top. + */ + do_retry = 1; + retry_count += 1; + } } goto out1; } @@ -6994,10 +6950,12 @@ renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from, * ENOENT in case of racing hardlink lookups hitting the name * cache, redrive the lookup. */ - if (batched && error == ENOENT && - retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) { - do_retry = 1; - retry_count += 1; + if (batched && error == ENOENT) { + assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES); + if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) { + do_retry = 1; + retry_count += 1; + } } goto out1; @@ -7424,10 +7382,12 @@ rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath, if (!batched) { error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL); if (error) { - if (error == ENOENT && - restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) { - restart_flag = 1; - restart_count += 1; + if (error == ENOENT) { + assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES); + if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) { + restart_flag = 1; + restart_count += 1; + } } goto out; } @@ -7484,16 +7444,18 @@ rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath, if (error == EKEEPLOOKING) { goto continue_lookup; - } else if (batched && error == ENOENT && - restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) { - /* - * For compound VNOPs, the authorization callback - * may return ENOENT in case of racing hard link lookups - * redrive the lookup. - */ - restart_flag = 1; - restart_count += 1; - goto out; + } else if (batched && error == ENOENT) { + assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES); + if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) { + /* + * For compound VNOPs, the authorization callback + * may return ENOENT in case of racing hard link lookups + * redrive the lookup. + */ + restart_flag = 1; + restart_count += 1; + goto out; + } } #if CONFIG_APPLEDOUBLE /* @@ -9308,11 +9270,12 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long memp = NULL; + /* * ensure the buffer is large enough for underlying calls */ #ifndef HFSIOC_GETPATH -typedef char pn_t[MAXPATHLEN]; + typedef char pn_t[MAXPATHLEN]; #define HFSIOC_GETPATH _IOWR('h', 13, pn_t) #endif @@ -9324,7 +9287,6 @@ typedef char pn_t[MAXPATHLEN]; size = MAXPATHLEN; } - if (size > sizeof (stkbuf)) { if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM; data = memp; @@ -9719,27 +9681,30 @@ ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval) /* Get the vnode for the file we are getting info on: */ if ((error = file_vnode(uap->fd, &vp))) - goto done; + return error; fd = uap->fd; if ((error = vnode_getwithref(vp))) { - goto done; + file_drop(fd); + return error; } #if CONFIG_MACF - error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd); - if (error) { - goto done; + if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) { + file_drop(fd); + vnode_put(vp); + return error; } #endif error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx); -done: - if (fd != -1) - file_drop(fd); + file_drop(fd); - if (vp) + /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/ + if (vp) { vnode_put(vp); + } + return error; } /* end of fsctl system call */ @@ -9959,7 +9924,12 @@ fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval) return (EINVAL); if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { - return (error); + if (error == EPERM) { + /* if the string won't fit in attrname, copyinstr emits EPERM */ + return (ENAMETOOLONG); + } + /* Otherwise return the default error from copyinstr to detect ERANGE, etc */ + return error; } if (xattr_protected(attrname)) return(EPERM); diff --git a/bsd/vfs/vfs_utfconv.c b/bsd/vfs/vfs_utfconv.c index f785b0d8c..8639edc99 100644 --- a/bsd/vfs/vfs_utfconv.c +++ b/bsd/vfs/vfs_utfconv.c @@ -62,13 +62,13 @@ /* Surrogate Pair Constants */ #define SP_HALF_SHIFT 10 -#define SP_HALF_BASE 0x0010000UL -#define SP_HALF_MASK 0x3FFUL +#define SP_HALF_BASE 0x0010000u +#define SP_HALF_MASK 0x3FFu -#define SP_HIGH_FIRST 0xD800UL -#define SP_HIGH_LAST 0xDBFFUL -#define SP_LOW_FIRST 0xDC00UL -#define SP_LOW_LAST 0xDFFFUL +#define SP_HIGH_FIRST 0xD800u +#define SP_HIGH_LAST 0xDBFFu +#define SP_LOW_FIRST 0xDC00u +#define SP_LOW_LAST 0xDFFFu #include "vfs_utfconvdata.h" @@ -148,7 +148,7 @@ static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars); static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining); -static void priortysort(u_int16_t* characters, int count); +static void prioritysort(u_int16_t* characters, int count); static u_int16_t ucs_to_sfm(u_int16_t ucs_ch, int lastchar); @@ -196,7 +196,7 @@ utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, int fl u_int16_t * chp = NULL; u_int16_t sequence[8]; int extra = 0; - int charcnt; + size_t charcnt; int swapbytes = (flags & UTF_REVERSE_ENDIAN); int decompose = (flags & UTF_DECOMPOSED); size_t len; @@ -266,7 +266,7 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, u_int16_t * chp = NULL; u_int16_t sequence[8]; int extra = 0; - int charcnt; + size_t charcnt; int swapbytes = (flags & UTF_REVERSE_ENDIAN); int nullterm = ((flags & UTF_NO_NULL_TERM) == 0); int decompose = (flags & UTF_DECOMPOSED); @@ -378,6 +378,23 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, return (result); } +// Pushes a character taking account of combining character sequences +static void push(uint16_t ucs_ch, int *combcharcnt, uint16_t **ucsp) +{ + /* + * Make multiple combining character sequences canonical + */ + if (unicode_combinable(ucs_ch)) { + ++*combcharcnt; /* start tracking a run */ + } else if (*combcharcnt) { + if (*combcharcnt > 1) { + prioritysort(*ucsp - *combcharcnt, *combcharcnt); + } + *combcharcnt = 0; /* start over */ + } + + *(*ucsp)++ = ucs_ch; +} /* * utf8_decodestr - Decodes a UTF-8 string back to Unicode @@ -417,13 +434,12 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, unsigned int byte; int combcharcnt = 0; int result = 0; - int decompose, precompose, swapbytes, escaping; + int decompose, precompose, escaping; int sfmconv; int extrabytes; decompose = (flags & UTF_DECOMPOSED); precompose = (flags & UTF_PRECOMPOSED); - swapbytes = (flags & UTF_REVERSE_ENDIAN); escaping = (flags & UTF_ESCAPE_ILLEGAL); sfmconv = (flags & UTF_SFM_CONVERSIONS); @@ -497,7 +513,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST; if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST) goto escape4; - *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch; + push(ucs_ch, &combcharcnt, &ucsp); if (ucsp >= bufend) goto toolong; ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST; @@ -505,7 +521,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, --ucsp; goto escape4; } - *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch; + *ucsp++ = ucs_ch; continue; default: result = EINVAL; @@ -516,30 +532,22 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, u_int16_t sequence[8]; int count, i; - /* Before decomposing a new unicode character, sort - * previous combining characters, if any, and reset - * the counter. - */ - if (combcharcnt > 1) { - priortysort(ucsp - combcharcnt, combcharcnt); - } - combcharcnt = 0; - count = unicode_decompose(ucs_ch, sequence); + for (i = 0; i < count; ++i) { - ucs_ch = sequence[i]; - *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch; if (ucsp >= bufend) goto toolong; + + push(sequence[i], &combcharcnt, &ucsp); } - combcharcnt += count - 1; - continue; + + continue; } } else if (precompose && (ucsp != bufstart)) { u_int16_t composite, base; if (unicode_combinable(ucs_ch)) { - base = swapbytes ? OSSwapInt16(*(ucsp - 1)) : *(ucsp - 1); + base = ucsp[-1]; composite = unicode_combine(base, ucs_ch); if (composite) { --ucsp; @@ -553,19 +561,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, if (ucs_ch == altslash) ucs_ch = '/'; - /* - * Make multiple combining character sequences canonical - */ - if (unicode_combinable(ucs_ch)) { - ++combcharcnt; /* start tracking a run */ - } else if (combcharcnt) { - if (combcharcnt > 1) { - priortysort(ucsp - combcharcnt, combcharcnt); - } - combcharcnt = 0; /* start over */ - } - - *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch; + push(ucs_ch, &combcharcnt, &ucsp); continue; /* @@ -593,23 +589,32 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, /* Make a previous combining sequence canonical. */ if (combcharcnt > 1) { - priortysort(ucsp - combcharcnt, combcharcnt); + prioritysort(ucsp - combcharcnt, combcharcnt); } combcharcnt = 0; ucs_ch = '%'; - *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch; + *ucsp++ = ucs_ch; ucs_ch = hexdigits[byte >> 4]; - *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch; + *ucsp++ = ucs_ch; ucs_ch = hexdigits[byte & 0x0F]; - *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch; + *ucsp++ = ucs_ch; } /* * Make a previous combining sequence canonical */ if (combcharcnt > 1) { - priortysort(ucsp - combcharcnt, combcharcnt); + prioritysort(ucsp - combcharcnt, combcharcnt); } + + if (flags & UTF_REVERSE_ENDIAN) { + uint16_t *p = bufstart; + while (p < ucsp) { + *p = OSSwapInt16(*p); + ++p; + } + } + exit: *ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart; @@ -804,7 +809,7 @@ utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, if (unicode_bytes <= sizeof(unicodebuf)) unistr = &unicodebuf[0]; else - MALLOC(unistr, u_int16_t *, unicode_bytes, M_TEMP, M_WAITOK); + MALLOC(unistr, uint16_t *, unicode_bytes, M_TEMP, M_WAITOK); /* Normalize the string. */ result = utf8_decodestr(inbufstart, inbuflen, unistr, &unicode_bytes, @@ -1014,12 +1019,12 @@ unicode_combine(u_int16_t base, u_int16_t combining) /* - * priortysort - order combining chars into canonical order + * prioritysort - order combining chars into canonical order * * Similar to CFUniCharPrioritySort */ static void -priortysort(u_int16_t* characters, int count) +prioritysort(u_int16_t* characters, int count) { u_int32_t p1, p2; u_int16_t *ch1, *ch2; diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index 9b431080f..ca14ddec6 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -115,7 +115,7 @@ int ubc_setcred(struct vnode *, struct proc *); #include #endif -extern void sigpup_attach_vnode(vnode_t); /* XXX */ +#include static int vn_closefile(struct fileglob *fp, vfs_context_t ctx); static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, @@ -195,8 +195,6 @@ vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx) kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, (uintptr_t)vp, 0); - sigpup_attach_vnode(vp); - return 0; bad: @@ -308,6 +306,12 @@ vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, return error; } +/* + * This is the number of times we'll loop in vn_open_auth without explicitly + * yielding the CPU when we determine we have to retry. + */ +#define RETRY_NO_YIELD_COUNT 5 + /* * Open a file with authorization, updating the contents of the structures * pointed to by ndp, fmodep, and vap as necessary to perform the requested @@ -367,6 +371,7 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) boolean_t need_vnop_open; boolean_t batched; boolean_t ref_failed; + int nretries = 0; again: vp = NULL; @@ -446,10 +451,9 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) if (error) { /* - * Check for a creation or unlink race. + * Check for a create race. */ - if (((error == EEXIST) && !(fmode & O_EXCL)) || - ((error == ENOENT) && (fmode & O_CREAT))){ + if ((error == EEXIST) && !(fmode & O_EXCL)){ if (vp) vnode_put(vp); goto again; @@ -571,21 +575,32 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) } #if CONFIG_PROTECT - /* - * Perform any content protection access checks prior to calling - * into the filesystem, if the raw encrypted mode was not - * requested. - * - * If the va_dataprotect_flags are NOT active, or if they are, - * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need - * to perform the checks. - */ - if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) || - ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) { - error = cp_handle_open (vp, fmode); - if (error) { + // If raw encrypted mode is requested, handle that here + if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags) + && ISSET(vap->va_dataprotect_flags, VA_DP_RAWENCRYPTED)) { + fmode |= FENCRYPTED; + } + if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags) + && ISSET(vap->va_dataprotect_flags, VA_DP_RAWUNENCRYPTED)) { + /* Don't allow unencrypted io request from user space unless entitled */ + boolean_t entitled = FALSE; +#if !SECURE_KERNEL + entitled = IOTaskHasEntitlement(current_task(), "com.apple.private.security.file-unencrypt-access"); +#endif + if (!entitled) { + error = EPERM; goto bad; } + fmode |= FUNENCRYPTED; + } + + /* + * Perform any content protection access checks prior to calling + * into the filesystem. + */ + error = cp_handle_open (vp, fmode); + if (error) { + goto bad; } #endif @@ -649,6 +664,27 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) * EREDRIVEOPEN: means that we were hit by the tty allocation race. */ if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) { + /* + * We'll retry here but it may be possible that we get + * into a retry "spin" inside the kernel and not allow + * threads, which need to run in order for the retry + * loop to end, to run. An example is an open of a + * terminal which is getting revoked and we spin here + * without yielding becasue namei and VNOP_OPEN are + * successful but vnode_ref fails. The revoke needs + * threads with an iocount to run but if spin here we + * may possibly be blcoking other threads from running. + * + * We start yielding the CPU after some number of + * retries for increasing durations. Note that this is + * still a loop without an exit condition. + */ + nretries += 1; + if (nretries > RETRY_NO_YIELD_COUNT) { + /* Every hz/100 secs is 10 msecs ... */ + tsleep(&nretries, PVFS, "vn_open_auth_retry", + MIN((nretries * (hz/100)), hz)); + } goto again; } } @@ -968,6 +1004,12 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) if (fp->f_fglob->fg_flag & FENCRYPTED) { ioflag |= IO_ENCRYPTED; } + if (fp->f_fglob->fg_flag & FUNENCRYPTED) { + ioflag |= IO_SKIP_ENCRYPTION; + } + if (fp->f_fglob->fg_flag & O_EVTONLY) { + ioflag |= IO_EVTONLY; + } if (fp->f_fglob->fg_flag & FNORDAHEAD) ioflag |= IO_RAOFF; @@ -980,7 +1022,7 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) } count = uio_resid(uio); - if (vnode_isswap(vp)) { + if (vnode_isswap(vp) && !(IO_SKIP_ENCRYPTION & ioflag)) { /* special case for swap files */ error = vn_read_swapfile(vp, uio); } else { @@ -1044,6 +1086,8 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) ioflag |= IO_NODIRECT; if (fp->f_fglob->fg_flag & FSINGLE_WRITER) ioflag |= IO_SINGLE_WRITER; + if (fp->f_fglob->fg_flag & O_EVTONLY) + ioflag |= IO_EVTONLY; /* * Treat synchronous mounts and O_FSYNC on the fd as equivalent. @@ -1254,8 +1298,11 @@ vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat6 sb64->st_atimespec = va.va_access_time; sb64->st_mtimespec = va.va_modify_time; sb64->st_ctimespec = va.va_change_time; - sb64->st_birthtimespec = - VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time; + if (VATTR_IS_SUPPORTED(&va, va_create_time)) { + sb64->st_birthtimespec = va.va_create_time; + } else { + sb64->st_birthtimespec.tv_sec = sb64->st_birthtimespec.tv_nsec = 0; + } sb64->st_blksize = va.va_iosize; sb64->st_flags = va.va_flags; sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512; @@ -1476,26 +1523,32 @@ vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) static int vn_closefile(struct fileglob *fg, vfs_context_t ctx) { - struct vnode *vp = (struct vnode *)fg->fg_data; + struct vnode *vp = fg->fg_data; int error; - struct flock lf; if ( (error = vnode_getwithref(vp)) == 0 ) { - - if ((fg->fg_flag & FHASLOCK) && - FILEGLOB_DTYPE(fg) == DTYPE_VNODE) { - lf.l_whence = SEEK_SET; - lf.l_start = 0; - lf.l_len = 0; - lf.l_type = F_UNLCK; - - (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx, NULL); + if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE && + ((fg->fg_flag & FHASLOCK) != 0 || + (fg->fg_lflags & FG_HAS_OFDLOCK) != 0)) { + struct flock lf = { + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + .l_type = F_UNLCK + }; + + if ((fg->fg_flag & FHASLOCK) != 0) + (void) VNOP_ADVLOCK(vp, (caddr_t)fg, + F_UNLCK, &lf, F_FLOCK, ctx, NULL); + + if ((fg->fg_lflags & FG_HAS_OFDLOCK) != 0) + (void) VNOP_ADVLOCK(vp, (caddr_t)fg, + F_UNLCK, &lf, F_OFD_LOCK, ctx, NULL); } error = vn_close(vp, fg->fg_flag, ctx); - - (void)vnode_put(vp); + (void) vnode_put(vp); } - return(error); + return (error); } /* diff --git a/bsd/vfs/vfs_xattr.c b/bsd/vfs/vfs_xattr.c index a6fc32251..be1898b45 100644 --- a/bsd/vfs/vfs_xattr.c +++ b/bsd/vfs/vfs_xattr.c @@ -598,7 +598,7 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) } iosize = bufsize = MIN(datasize, NS_IOBUFSIZE); - if (kmem_alloc(kernel_map, (vm_offset_t *)&bufptr, bufsize)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&bufptr, bufsize, VM_KERN_MEMORY_FILE)) { return (ENOMEM); } auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); @@ -941,7 +941,7 @@ default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsopera size_t iosize; iosize = bufsize = MIN(datasize, NS_IOBUFSIZE); - if (kmem_alloc(kernel_map, (vm_offset_t *)&bufptr, bufsize)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&bufptr, bufsize, VM_KERN_MEMORY_FILE)) { error = ENOMEM; goto out; } @@ -3215,7 +3215,7 @@ shift_data_down(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t } orig_chunk = chunk; - if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk, VM_KERN_MEMORY_FILE)) { return ENOMEM; } @@ -3270,7 +3270,7 @@ shift_data_up(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t c orig_chunk = chunk; end = start + len; - if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk, VM_KERN_MEMORY_FILE)) { return ENOMEM; } diff --git a/bsd/vm/dp_backing_file.c b/bsd/vm/dp_backing_file.c index 31b87e8a0..e17287ff7 100644 --- a/bsd/vm/dp_backing_file.c +++ b/bsd/vm/dp_backing_file.c @@ -602,7 +602,7 @@ macx_swapinfo( kern_return_t kr; error = 0; - if (COMPRESSED_PAGER_IS_ACTIVE) { + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { if (vm_swap_up == TRUE) { diff --git a/bsd/vm/vm_compressor_backing_file.c b/bsd/vm/vm_compressor_backing_file.c index 9a663e9e8..7ec5873db 100644 --- a/bsd/vm/vm_compressor_backing_file.c +++ b/bsd/vm/vm_compressor_backing_file.c @@ -39,13 +39,16 @@ #include #include #include +#include void vm_swapfile_open(const char *path, vnode_t *vp); void vm_swapfile_close(uint64_t path, vnode_t vp); -int vm_swapfile_preallocate(vnode_t vp, uint64_t *size); +int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin); uint64_t vm_swapfile_get_blksize(vnode_t vp); uint64_t vm_swapfile_get_transfer_size(vnode_t vp); int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags); +int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size); + void vm_swapfile_open(const char *path, vnode_t *vp) @@ -96,7 +99,7 @@ vm_swapfile_close(uint64_t path_addr, vnode_t vp) } int -vm_swapfile_preallocate(vnode_t vp, uint64_t *size) +vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin) { int error = 0; uint64_t file_size = 0; @@ -126,7 +129,6 @@ vm_swapfile_preallocate(vnode_t vp, uint64_t *size) } } #endif - error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); if (error) { @@ -138,10 +140,24 @@ vm_swapfile_preallocate(vnode_t vp, uint64_t *size) if (error) { printf("vnode_size (new file) for swap file failed: %d\n", error); + goto done; } - assert(file_size == *size); + if (pin != NULL && *pin != FALSE) { + + assert(vnode_tag(vp) == VT_HFS); + + error = hfs_pin_vnode(VTOHFS(vp), vp, HFS_PIN_IT | HFS_DATALESS_PIN, NULL, ctx); + + if (error) { + printf("hfs_pin_vnode for swap files failed: %d\n", error); + /* this is not fatal, carry on with files wherever they landed */ + *pin = FALSE; + error = 0; + } + } + vnode_lock_spin(vp); SET(vp->v_flag, VSWAP); vnode_unlock(vp); @@ -149,6 +165,23 @@ vm_swapfile_preallocate(vnode_t vp, uint64_t *size) return error; } + +int +vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size) +{ + int error = 0; + vfs_context_t ctx; + + ctx = vfs_context_kernel(); + + error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset, + UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + + return (error); +} + + + int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags) { @@ -158,10 +191,12 @@ vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flag kern_return_t kr = KERN_SUCCESS; upl_t upl = NULL; unsigned int count = 0; - int upl_create_flags = 0, upl_control_flags = 0; + upl_control_flags_t upl_create_flags = 0; + int upl_control_flags = 0; upl_size_t upl_size = 0; - upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE; + upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE + | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK); #if ENCRYPTED_SWAP upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index 06b5d4e1b..099a70fb6 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -104,6 +104,18 @@ int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t); int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); +#if DEVELOPMENT || DEBUG +extern int radar_20146450; +SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, ""); + +extern int macho_printf; +SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, ""); + +extern int apple_protect_pager_data_request_debug; +SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, ""); + +#endif /* DEVELOPMENT || DEBUG */ + SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, ""); SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, ""); SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, ""); @@ -147,6 +159,7 @@ extern int allow_stack_exec, allow_data_exec; SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, ""); SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, ""); + #endif /* !SECURE_KERNEL */ static const char *prot_values[] = { @@ -192,9 +205,11 @@ SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, static int64_t last_unnest_log_time = 0; static int shared_region_unnest_log_count = 0; -void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) { - struct timeval tv; - const char *pcommstr; +void log_unnest_badness( + vm_map_t m, + vm_map_offset_t s, + vm_map_offset_t e) { + struct timeval tv; if (shared_region_unnest_logging == 0) return; @@ -211,9 +226,7 @@ void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) { } } - pcommstr = current_proc()->p_comm; - - printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e); + printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m)); } int @@ -248,7 +261,7 @@ vslock( vm_map_page_mask(map)), vm_map_round_page(addr+len, vm_map_page_mask(map)), - VM_PROT_READ | VM_PROT_WRITE, + VM_PROT_READ | VM_PROT_WRITE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_BSD), FALSE); switch (kret) { @@ -947,6 +960,7 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) } } + task_reference(target); #if CONFIG_MEMORYSTATUS @@ -1234,8 +1248,10 @@ _shared_region_map_and_slide( } #if CONFIG_MACF + /* pass in 0 for the offset argument because AMFI does not need the offset + of the shared cache */ error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), - fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot); + fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot); if (error) { goto done; } @@ -1562,6 +1578,10 @@ extern unsigned int vm_page_purgeable_wired_count; SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_purgeable_wired_count, 0, "Wired purgeable page count"); +extern unsigned int vm_pageout_purged_objects; +SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_pageout_purged_objects, 0, "System purged object count"); + extern int madvise_free_debug; SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)"); diff --git a/config/BSDKernel.exports b/config/BSDKernel.exports index 86f815241..f858726ab 100644 --- a/config/BSDKernel.exports +++ b/config/BSDKernel.exports @@ -6,8 +6,8 @@ _VNOP_STRATEGY _VNOP_WRITE __FREE __FREE_ZONE -__MALLOC -__MALLOC_ZONE +__MALLOC:__MALLOC_external +__MALLOC_ZONE:__MALLOC_ZONE_external _advisory_read _advisory_read_ext _bcd2bin_data @@ -468,6 +468,7 @@ _nop_write _nulldev _nullop _physio +_proc_chrooted _proc_exiting _proc_find _proc_forcequota @@ -650,6 +651,7 @@ _vfs_flags _vfs_fsadd _vfs_fsprivate _vfs_fsremove +_vfs_get_notify_attributes _vfs_getnewfsid _vfs_getvfs _vfs_init_io_attributes @@ -728,6 +730,7 @@ _vnode_iterate _vnode_lookup _vnode_mount _vnode_mountedhere +_vnode_notify _vnode_open _vnode_put _vnode_putname diff --git a/config/IOKit.exports b/config/IOKit.exports index 0a112a032..29c12c6f5 100644 --- a/config/IOKit.exports +++ b/config/IOKit.exports @@ -19,7 +19,6 @@ _IOFreePageable _IOGetTime _IOIteratePageableMaps _IOKitBSDInit -_IOKitResetTime _IOLibInit _IOLockAlloc _IOLockFree @@ -45,11 +44,6 @@ _IOMappedWrite16 _IOMappedWrite32 _IOMappedWrite64 _IOMappedWrite8 -_IOMapperIOVMAlloc -_IOMapperIOVMFree -_IOMapperInsertPPNPages -_IOMapperInsertPage -_IOMapperInsertUPLPages _IONDRVLibrariesInitialize _IONetworkNamePrefixMatching _IOPageableMapForAddress @@ -82,6 +76,7 @@ _IOSimpleLockTryLock:_lck_spin_try_lock _IOSimpleLockUnlock:_lck_spin_unlock _IOSizeToAlignment _IOSleep +_IOSleepWithLeeway _IOSystemShutdownNotification _IOZeroTvalspec _OSKernelStackRemaining @@ -235,13 +230,18 @@ __ZN11IOResourcesD0Ev __ZN11IOResourcesD2Ev __ZN12IODMACommand10gMetaClassE __ZN12IODMACommand10superClassE +__ZN12IODMACommand10withRefConEPv __ZN12IODMACommand10writeBytesEyPKvy __ZN12IODMACommand12cloneCommandEPv __ZN12IODMACommand12getAlignmentEv +__ZN12IODMACommand14initWithRefConEPv __ZN12IODMACommand17getNumAddressBitsEv +__ZN12IODMACommand18getAlignmentLengthEv __ZN12IODMACommand19setMemoryDescriptorEPK18IOMemoryDescriptorb __ZN12IODMACommand21clearMemoryDescriptorEb +__ZNK12IODMACommand21getIOMemoryDescriptorEv __ZN12IODMACommand26getPreparedOffsetAndLengthEPyS0_ +__ZN12IODMACommand28getAlignmentInternalSegmentsEv __ZN12IODMACommand4freeEv __ZN12IODMACommand7prepareEyybb __ZN12IODMACommand8completeEbb @@ -596,18 +596,6 @@ __ZN16IODMAEventSourceC2EPK11OSMetaClass __ZN16IODMAEventSourceC2Ev __ZN16IODMAEventSourceD0Ev __ZN16IODMAEventSourceD2Ev -__ZN16IOKitDiagnostics10gMetaClassE -__ZN16IOKitDiagnostics10superClassE -__ZN16IOKitDiagnostics11diagnosticsEv -__ZN16IOKitDiagnostics9MetaClassC1Ev -__ZN16IOKitDiagnostics9MetaClassC2Ev -__ZN16IOKitDiagnostics9metaClassE -__ZN16IOKitDiagnosticsC1EPK11OSMetaClass -__ZN16IOKitDiagnosticsC1Ev -__ZN16IOKitDiagnosticsC2EPK11OSMetaClass -__ZN16IOKitDiagnosticsC2Ev -__ZN16IOKitDiagnosticsD0Ev -__ZN16IOKitDiagnosticsD2Ev __ZN16IOPMinformeeList10gMetaClassE __ZN16IOPMinformeeList10initializeEv __ZN16IOPMinformeeList10nextInListEP12IOPMinformee @@ -654,7 +642,6 @@ __ZN17IOBigMemoryCursorC2Ev __ZN17IOBigMemoryCursorD0Ev __ZN17IOBigMemoryCursorD2Ev __ZN17IOPolledInterface10gMetaClassE -__ZN17IOPolledInterface15checkAllForWorkEv __ZN17IOPolledInterfaceC2EPK11OSMetaClass __ZN17IOPolledInterfaceD2Ev __ZN17IOPowerConnection10gMetaClassE @@ -1012,7 +999,6 @@ __ZN8IOMapper17setMapperRequiredEb __ZN8IOMapper19copyMapperForDeviceEP9IOService __ZN8IOMapper28copyMapperForDeviceWithIndexEP9IOServicej __ZN8IOMapper19waitForSystemMapperEv -__ZN8IOMapper13iovmMapMemoryEP8OSObjectjjjP13upl_page_infoPK21IODMAMapSpecification __ZN8IOMapper4freeEv __ZN8IOMapper5startEP9IOService __ZN8IOMapper7gSystemE @@ -1256,9 +1242,6 @@ __ZNK15IORegistryPlane9MetaClass5allocEv __ZNK15IORegistryPlane9serializeEP11OSSerialize __ZNK16IODMAEventSource12getMetaClassEv __ZNK16IODMAEventSource9MetaClass5allocEv -__ZNK16IOKitDiagnostics12getMetaClassEv -__ZNK16IOKitDiagnostics9MetaClass5allocEv -__ZNK16IOKitDiagnostics9serializeEP11OSSerialize __ZNK16IOPMinformeeList12getMetaClassEv __ZNK16IOPMinformeeList9MetaClass5allocEv __ZNK16IORangeAllocator12getMetaClassEv @@ -1319,7 +1302,6 @@ __ZNK28IOFilterInterruptEventSource9MetaClass5allocEv __ZNK29IOInterleavedMemoryDescriptor12getMetaClassEv __ZNK29IOInterleavedMemoryDescriptor9MetaClass5allocEv __ZNK8IOMapper12getMetaClassEv -__ZNK8IOMapper13getBypassMaskEPy __ZNK8IOMapper9MetaClass5allocEv __ZNK9IOCommand12getMetaClassEv __ZNK9IOCommand9MetaClass5allocEv @@ -1360,7 +1342,6 @@ __ZTV15IOPMPowerSource __ZTV15IORegistryEntry __ZTV15IORegistryPlane __ZTV16IODMAEventSource -__ZTV16IOKitDiagnostics __ZTV16IOPMinformeeList __ZTV16IORangeAllocator __ZTV17IOBigMemoryCursor @@ -1413,7 +1394,6 @@ __ZTVN15IOPMPowerSource9MetaClassE __ZTVN15IORegistryEntry9MetaClassE __ZTVN15IORegistryPlane9MetaClassE __ZTVN16IODMAEventSource9MetaClassE -__ZTVN16IOKitDiagnostics9MetaClassE __ZTVN16IOPMinformeeList9MetaClassE __ZTVN16IORangeAllocator9MetaClassE __ZTVN17IOBigMemoryCursor9MetaClassE @@ -1499,6 +1479,7 @@ _gIOParentMatchKey _gIOPathMatchKey _gIOPlatformActiveActionKey _gIOPlatformHaltRestartActionKey +_gIOPlatformPanicActionKey _gIOPlatformQuiesceActionKey _gIOPlatformSleepActionKey _gIOPlatformWakeActionKey diff --git a/config/IOKit.x86_64.exports b/config/IOKit.x86_64.exports index f053710c8..3aadfffa4 100644 --- a/config/IOKit.x86_64.exports +++ b/config/IOKit.x86_64.exports @@ -47,12 +47,12 @@ __ZN12IODMACommand14OutputLittle32EPS_NS_9Segment64EPvj __ZN12IODMACommand14OutputLittle64EPS_NS_9Segment64EPvj __ZN12IODMACommand15genIOVMSegmentsEPFbPS_NS_9Segment64EPvjEPyS2_Pj __ZN12IODMACommand15genIOVMSegmentsEPyPvPj +__ZN12IODMACommand16createCopyBufferEjy +__ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvjEPKNS_14SegmentOptionsEjP8IOMapperS2_ +__ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvjEPKNS_14SegmentOptionsEjP8IOMapperS2_ +__ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvjEPKNS_14SegmentOptionsEjP8IOMapperyybb __ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperS2_ __ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperS2_ -__ZN12IODMACommand22_RESERVEDIODMACommand3Ev -__ZN12IODMACommand22_RESERVEDIODMACommand4Ev -__ZN12IODMACommand22_RESERVEDIODMACommand5Ev -__ZN12IODMACommand22_RESERVEDIODMACommand6Ev __ZN12IODMACommand22_RESERVEDIODMACommand7Ev __ZN12IODMACommand22_RESERVEDIODMACommand8Ev __ZN12IODMACommand22_RESERVEDIODMACommand9Ev @@ -179,7 +179,6 @@ __ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACom __ZN16IODMAEventSource15startDMACommandEP12IODMACommandjyy __ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandiyy __ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandiyyES8_j -__ZN16IOKitDiagnostics12updateOffsetEP12OSDictionaryjPKc __ZN16IORangeAllocator10deallocateEyy __ZN16IORangeAllocator12allocElementEj __ZN16IORangeAllocator13allocateRangeEyy @@ -366,25 +365,22 @@ __ZN29IOInterleavedMemoryDescriptor19setMemoryDescriptorEP18IOMemoryDescriptoryy __ZN29IOInterleavedMemoryDescriptor22clearMemoryDescriptorsEj __ZN29IOInterleavedMemoryDescriptor7prepareEj __ZN29IOInterleavedMemoryDescriptor8completeEj -__ZN8IOMapper10allocTableEy -__ZN8IOMapper10iovmInsertEjjP13upl_page_infoj -__ZN8IOMapper10iovmInsertEjjPjj -__ZN8IOMapper11NewARTTableEyPPvPj -__ZN8IOMapper12FreeARTTableEP6OSDatay +__ZN8IOMapper18_RESERVEDIOMapper0Ev +__ZN8IOMapper18_RESERVEDIOMapper1Ev +__ZN8IOMapper18_RESERVEDIOMapper2Ev +__ZN8IOMapper18_RESERVEDIOMapper3Ev __ZN8IOMapper18_RESERVEDIOMapper4Ev __ZN8IOMapper18_RESERVEDIOMapper5Ev __ZN8IOMapper18_RESERVEDIOMapper6Ev __ZN8IOMapper18_RESERVEDIOMapper7Ev __ZN8IOMapper18_RESERVEDIOMapper8Ev __ZN8IOMapper18_RESERVEDIOMapper9Ev -__ZN8IOMapper18iovmFreeDMACommandEP12IODMACommandjj __ZN8IOMapper19_RESERVEDIOMapper10Ev __ZN8IOMapper19_RESERVEDIOMapper11Ev __ZN8IOMapper19_RESERVEDIOMapper12Ev __ZN8IOMapper19_RESERVEDIOMapper13Ev __ZN8IOMapper19_RESERVEDIOMapper14Ev __ZN8IOMapper19_RESERVEDIOMapper15Ev -__ZN8IOMapper19iovmAllocDMACommandEP12IODMACommandj __ZN8IOSyncer10gMetaClassE __ZN8IOSyncer10superClassE __ZN8IOSyncer13privateSignalEv diff --git a/config/Libkern.exports b/config/Libkern.exports index 04d952f8f..6f67de350 100644 --- a/config/Libkern.exports +++ b/config/Libkern.exports @@ -84,6 +84,7 @@ __ZN11OSSerialize12withCapacityEj __ZN11OSSerialize14addXMLStartTagEPK15OSMetaClassBasePKc __ZN11OSSerialize14ensureCapacityEj __ZN11OSSerialize16initWithCapacityEj +__ZN11OSSerialize18binaryWithCapacityEjPvS0_:__ZN11OSSerialize18binaryWithCapacityEjPFPK15OSMetaClassBasePvPS_P12OSCollectionPK8OSSymbolS2_ES3_ __ZN11OSSerialize20previouslySerializedEPK15OSMetaClassBase __ZN11OSSerialize20setCapacityIncrementEj __ZN11OSSerialize4freeEv @@ -390,6 +391,7 @@ __ZN9OSBooleanD2Ev __ZNK10OSIterator12getMetaClassEv __ZNK10OSIterator9MetaClass5allocEv __ZNK11OSMetaClass12getClassNameEv +__ZNK11OSMetaClass18getClassNameSymbolEv __ZNK11OSMetaClass12getClassSizeEv __ZNK11OSMetaClass12getMetaClassEv __ZNK11OSMetaClass12taggedRetainEPKv @@ -727,3 +729,4 @@ _vsnprintf _vsscanf _zError _zlibVersion +___llvm_profile_runtime diff --git a/config/MACFramework.exports b/config/MACFramework.exports index aa8401775..d6f7ad04d 100644 --- a/config/MACFramework.exports +++ b/config/MACFramework.exports @@ -9,9 +9,9 @@ _mac_label_set _mac_audit_text _mac_iokit_check_hid_control - -_mac_thread_get_threadlabel -_mac_thread_get_uthreadlabel +_mac_iokit_check_nvram_delete +_mac_iokit_check_nvram_get +_mac_iokit_check_nvram_set _sbuf_cat _sbuf_data diff --git a/config/MASTER b/config/MASTER index 7e0b5f60c..d612020b7 100644 --- a/config/MASTER +++ b/config/MASTER @@ -53,7 +53,7 @@ ####################################################################### # -options INET # # +options INET # # options HW_AST # Hardware ast support # options HW_FOOTPRINT # Cache footprint support # @@ -75,7 +75,7 @@ options AH_ALL_CRYPTO # AH all crypto algs # options IPCOMP_ZLIB # IP compression using zlib # options PF # Packet Filter # options PF_ALTQ # PF ALTQ (Alternate Queueing) # -options PF_ECN # PF use ECN marking # +options PF_ECN # PF use ECN marking # options PFLOG # PF log interface # options PKTSCHED_CBQ # CBQ packet scheduler # options PKTSCHED_HFSC # H-FSC packet scheduler # @@ -99,10 +99,10 @@ options ICMP_BANDLIM # ICMP bandwidth limiting sysctl options IFNET_INPUT_SANITY_CHK # allow dlil/ifnet input sanity check # options MULTIPATH # Multipath domain # options MPTCP # Multipath TCP # -options SYSV_SEM # SVID semaphores # -options SYSV_MSG # SVID messages # -options SYSV_SHM # SVID shared mem # -options PSYNCH # pthread synch # +options SYSV_SEM # SVID semaphores # +options SYSV_MSG # SVID messages # +options SYSV_SHM # SVID shared mem # +options PSYNCH # pthread synch # options FLOW_DIVERT # options NECP # options CONTENT_FILTER # # @@ -115,12 +115,12 @@ options OLD_SEMWAIT_SIGNAL # old semwait_signal handler # # 4.4 general kernel # -options SOCKETS # socket support # -options DIAGNOSTIC # diagnostics # -options GPROF # build profiling # +options SOCKETS # socket support # +options DIAGNOSTIC # diagnostics # +options GPROF # build profiling # options PROFILE # kernel profiling # options SENDFILE # sendfile # -options NETWORKING # networking layer # +options NETWORKING # networking layer # options CONFIG_FSE # file system events # options CONFIG_IMAGEBOOT # local image boot # options CONFIG_MBUF_JUMBO # jumbo cluster pool # @@ -132,18 +132,18 @@ options CONFIG_WORKQUEUE # # # 4.4 filesystems # -options HFS # HFS/HFS+ support # -options MOCKFS # Boot from an executable # +options HFS # HFS/HFS+ support # +options MOCKFS # Boot from an executable # options FIFO # fifo support # options FDESC # fdesc_fs support # options DEVFS # devfs support # -options JOURNALING # journaling support # -options HFS_COMPRESSION # hfs compression # -options CONFIG_HFS_STD # hfs standard support # -options CONFIG_HFS_TRIM # hfs trims unused blocks # -options CONFIG_HFS_MOUNT_UNMAP #hfs trims blocks at mount # -options CONFIG_HFS_DIRLINK #allow directory hardlink creation # -options CONFIG_DEV_KMEM # /dev/kmem device for reading KVA # +options JOURNALING # journaling support # +options HFS_COMPRESSION # hfs compression # +options CONFIG_HFS_STD # hfs standard support # +options CONFIG_HFS_TRIM # hfs trims unused blocks # +options CONFIG_HFS_MOUNT_UNMAP # hfs trims blocks at mount # +options CONFIG_HFS_DIRLINK # allow directory hardlink creation # +options CONFIG_DEV_KMEM # /dev/kmem device for reading KVA # # # file system features @@ -167,30 +167,30 @@ options NFSSERVER # Be an NFS server # # # Machine Independent Apple Features # -profile # build a profiling kernel # +profile # build a profiling kernel # # # IPv6 Support # -options "INET6" # kernel IPv6 Support # -options IPV6SEND # Secure Neighbor Discovery # -options IPSEC # IP security # -options IPSEC_ESP # IP security # -options "IPV6FIREWALL" # IPv6 Firewall Feature # +options "INET6" # kernel IPv6 Support # +options IPV6SEND # Secure Neighbor Discovery # +options IPSEC # IP security # +options IPSEC_ESP # IP security # +options "IPV6FIREWALL" # IPv6 Firewall Feature # options "IPV6FIREWALL_DEFAULT_TO_ACCEPT" #IPv6 Firewall Feature # #options "IPV6FIREWALL_VERBOSE" #IPv6 Firewall Feature # -pseudo-device gif 1 # -pseudo-device dummy 2 # -pseudo-device stf 1 # +pseudo-device gif 1 # +pseudo-device dummy 2 # +pseudo-device stf 1 # -options CRYPTO # -options CRYPTO_SHA2 # -options ENCRYPTED_SWAP # +options CRYPTO # +options CRYPTO_SHA2 # +options ENCRYPTED_SWAP # -options ZLIB # inflate/deflate support # +options ZLIB # inflate/deflate support # -options IF_BRIDGE # +options IF_BRIDGE # # # configurable kernel event related resources @@ -215,13 +215,13 @@ options CONFIG_KN_HASHSIZE=20 # options CONFIG_VNODES=263168 # options CONFIG_VNODES=263168 # options CONFIG_VNODES=10240 # -options CONFIG_VNODES=750 # +options CONFIG_VNODES=750 # -options CONFIG_VNODE_FREE_MIN=500 # -options CONFIG_VNODE_FREE_MIN=300 # -options CONFIG_VNODE_FREE_MIN=200 # -options CONFIG_VNODE_FREE_MIN=100 # -options CONFIG_VNODE_FREE_MIN=75 # +options CONFIG_VNODE_FREE_MIN=500 # +options CONFIG_VNODE_FREE_MIN=300 # +options CONFIG_VNODE_FREE_MIN=200 # +options CONFIG_VNODE_FREE_MIN=100 # +options CONFIG_VNODE_FREE_MIN=75 # options CONFIG_NC_HASH=5120 # options CONFIG_NC_HASH=4096 # @@ -236,15 +236,6 @@ options CONFIG_VFS_NAMES=2048 # options CONFIG_MAX_CLUSTERS=8 # options CONFIG_MAX_CLUSTERS=4 # -# -# configurable kauth credential related resources -# -options KAUTH_CRED_PRIMES_COUNT=7 # -options KAUTH_CRED_PRIMES_COUNT=3 # - -options KAUTH_CRED_PRIMES="{97, 241, 397, 743, 1499, 3989, 7499}" # -options KAUTH_CRED_PRIMES="{5, 17, 97}" # - # # configurable options for minumum number of buffers for kernel memory # @@ -260,9 +251,9 @@ options CONFIG_MIN_NIOBUF=32 # # # set maximum space used for packet buffers # -options CONFIG_NMBCLUSTERS="((1024 * 1024) / MCLBYTES)" # -options CONFIG_NMBCLUSTERS="((1024 * 512) / MCLBYTES)" # -options CONFIG_NMBCLUSTERS="((1024 * 256) / MCLBYTES)" # +options CONFIG_NMBCLUSTERS="((1024 * 1024) / MCLBYTES)" # +options CONFIG_NMBCLUSTERS="((1024 * 512) / MCLBYTES)" # +options CONFIG_NMBCLUSTERS="((1024 * 256) / MCLBYTES)" # # # Configure size of TCP hash table @@ -282,8 +273,8 @@ options CONFIG_ICMP_BANDLIM=50 # # CONFIG_AIO_PROCESS_MAX - process limit of async IO requests. # CONFIG_AIO_THREAD_COUNT - number of async IO worker threads created. # -options CONFIG_AIO_MAX=360 # -options CONFIG_AIO_MAX=180 # +options CONFIG_AIO_MAX=360 # +options CONFIG_AIO_MAX=180 # options CONFIG_AIO_MAX=90 # options CONFIG_AIO_MAX=45 # options CONFIG_AIO_MAX=20 # @@ -296,15 +287,15 @@ options CONFIG_AIO_PROCESS_MAX=12 # options CONFIG_AIO_PROCESS_MAX=8 # options CONFIG_AIO_PROCESS_MAX=4 # -options CONFIG_AIO_THREAD_COUNT=16 # +options CONFIG_AIO_THREAD_COUNT=16 # options CONFIG_AIO_THREAD_COUNT=8 # options CONFIG_AIO_THREAD_COUNT=4 # options CONFIG_AIO_THREAD_COUNT=3 # options CONFIG_AIO_THREAD_COUNT=2 # -options CONFIG_MAXVIFS=32 # -options CONFIG_MAXVIFS=16 # -options CONFIG_MAXVIFS=2 # +options CONFIG_MAXVIFS=32 # +options CONFIG_MAXVIFS=16 # +options CONFIG_MAXVIFS=2 # options CONFIG_MFCTBLSIZ=256 # options CONFIG_MFCTBLSIZ=128 # @@ -316,6 +307,12 @@ options CONFIG_MFCTBLSIZ=16 # options CONFIG_MSG_BSIZE=4096 # options CONFIG_MSG_BSIZE=16384 # +# +# maximum size of the per-process Mach IPC table +# +options CONFIG_IPC_TABLE_ENTRIES_STEPS=64 # 137898 entries # +options CONFIG_IPC_TABLE_ENTRIES_STEPS=256 # 300714 entries # + # # configurable kernel - use these options to strip strings from panic # and printf calls. @@ -343,35 +340,41 @@ options CONFIG_ENFORCE_LIBRARY_VALIDATION # # # code decryption... used on embedded for app protection, DSMOS on desktop # -options CONFIG_CODE_DECRYPTION # +options CONFIG_CODE_DECRYPTION # # # User Content Protection, used on embedded # -options CONFIG_PROTECT # +options CONFIG_PROTECT # # # enable per-process memory priority tracking # -options CONFIG_MEMORYSTATUS # +options CONFIG_MEMORYSTATUS # # # enable jetsam - used on embedded # -options CONFIG_JETSAM # +options CONFIG_JETSAM # + +# +# enable new wait queue implementation stats / debugging +# +options CONFIG_WAITQ_STATS # +options CONFIG_WAITQ_DEBUG # # # enable freezing of suspended processes - used on embedded # -options CONFIG_FREEZE # +options CONFIG_FREEZE # -options CHECK_CS_VALIDATION_BITMAP # +options CHECK_CS_VALIDATION_BITMAP # # # enable detectiion of file cache thrashing - used on platforms with # dynamic VM compression enabled # -options CONFIG_PHANTOM_CACHE # +options CONFIG_PHANTOM_CACHE # # # memory pressure event support @@ -390,28 +393,23 @@ options CONFIG_IOSCHED # options IMPORTANCE_INHERITANCE # options IMPORTANCE_DEBUG # -options CONFIG_TELEMETRY # +options CONFIG_TELEMETRY # options CONFIG_PROC_UUID_POLICY # -# -# In-kernel tests -# -options CONFIG_IN_KERNEL_TESTS # - # # ECC data logging # -options CONFIG_ECC_LOGGING # +options CONFIG_ECC_LOGGING # # # Ethernet (ARP) # -pseudo-device ether # +pseudo-device ether # # # Network loopback device # -pseudo-device loop # +pseudo-device loop # # # UCB pseudo terminal service # @@ -459,12 +457,13 @@ pseudo-device profile_prvd 1 init profile_init # # options HIBERNATION # system hibernation # -options IOKITCPP # C++ implementation # -options IOKITSTATS # IOKit statistics # -options CONFIG_SLEEP # # +options IOKITCPP # C++ implementation # +options IOKITSTATS # IOKit statistics # +options IOTRACKING # IOKit tracking # +options CONFIG_SLEEP # # options CONFIG_MAX_THREADS=64 # IOConfigThread threads -options NO_KEXTD # -options NO_KERNEL_HID # +options NO_KEXTD # +options NO_KERNEL_HID # # # Libkern configuration options @@ -485,21 +484,19 @@ options CONFIG_STATIC_CPPINIT # Static library initializes kext cpp ru # CONFIG_KEXT_BASEMENT - alloc post boot loaded kexts after prelinked kexts # -options CONFIG_KEXT_BASEMENT # # +options CONFIG_KEXT_BASEMENT # # # # security configuration options # -options CONFIG_LCTX # Login Context - -options CONFIG_MACF # Mandatory Access Control Framework # +options CONFIG_MACF # Mandatory Access Control Framework # options CONFIG_MACF_SOCKET_SUBSET # MAC socket subest (no labels) # -#options CONFIG_MACF_SOCKET # MAC socket labels # -#options CONFIG_MACF_NET # mbuf # -#options CONFIG_MACF_DEBUG # debug # +#options CONFIG_MACF_SOCKET # MAC socket labels # +#options CONFIG_MACF_NET # mbuf # +#options CONFIG_MACF_DEBUG # debug # -options CONFIG_AUDIT # Kernel auditing # +options CONFIG_AUDIT # Kernel auditing # # @@ -534,13 +531,13 @@ options MACH_ASSERT # # # options MACH_DEBUG # IPC debugging interface # options MACH_IPC_DEBUG # Enable IPC debugging calls # -options MACH_VM_DEBUG # # +options MACH_VM_DEBUG # # # # MACH_MP_DEBUG control the possible dead locks that may occur by controlling # that IPL level has been raised down to SPL0 after some calls to # hardclock device driver. # -options MACH_MP_DEBUG # # +options MACH_MP_DEBUG # # # # ZONE_DEBUG keeps track of all zalloc()ed elements to perform further # operations on each element. @@ -584,13 +581,13 @@ options MACH_LDEBUG # # # options KDEBUG # kernel tracing # options IST_KDEBUG # limited tracing # -options NO_KDEBUG # no kernel tracing # +options NO_KDEBUG # no kernel tracing # # # CONFIG_DTRACE enables code needed to support DTrace. Currently this is # only used for delivery of traps/interrupts to DTrace. # -options CONFIG_DTRACE # # +options CONFIG_DTRACE # # # kernel performance tracing options KPERF # @@ -598,13 +595,13 @@ options KPC # # MACH_COUNTERS enables code that handles various counters in the system. # -options MACH_COUNTERS # # +options MACH_COUNTERS # # # DEVELOPMENT define for development builds -options DEVELOPMENT # dev kernel # +options DEVELOPMENT # dev kernel # # DEBUG kernel -options DEBUG # general debugging code # +options DEBUG # general debugging code # ########################################################## # @@ -654,11 +651,6 @@ options CONFIG_ZLEAK_TRACE_MAP_NUM=4096 # # vc_progress_white - make the progress gear white instead of black options CONFIG_VC_PROGRESS_WHITE # -# -# Context switched counters -# -options CONFIG_COUNTERS # - # # Timeshare scheduler implementations # @@ -668,41 +660,55 @@ options CONFIG_SCHED_GRRR # options CONFIG_SCHED_GRRR_CORE # options CONFIG_SCHED_MULTIQ # options CONFIG_SCHED_TIMESHARE_CORE # -options CONFIG_SCHED_FAIRSHARE_CORE # -options CONFIG_SCHED_IDLE_IN_PLACE # +options CONFIG_SCHED_IDLE_IN_PLACE # +options CONFIG_SCHED_SFI # options CONFIG_GZALLOC # +options CONFIG_SCHED_DEFERRED_AST # # Enable allocation of contiguous physical memory through vm_map_enter_cpm() -options VM_CPM # +options VM_CPM # -options CONFIG_SKIP_PRECISE_USER_KERNEL_TIME # +options CONFIG_SKIP_PRECISE_USER_KERNEL_TIME # # # Switch to disable cpu, wakeup and high memory watermark monitors # -options CONFIG_NOMONITORS # +options CONFIG_NOMONITORS # -options MACH_KDP # KDP # -options CONFIG_SERIAL_KDP # KDP over serial # +options MACH_KDP # KDP # +options CONFIG_SERIAL_KDP # KDP over serial # options CONFIG_KDP_INTERACTIVE_DEBUGGING # +# +# Kernel Power On Self Tests +# +options CONFIG_XNUPOST # + +# +# Kernel proc reference instrumentation +# +options PROC_REF_DEBUG # + # # Kernel Voucher Attr Manager for Activity Trace # -options CONFIG_ATM # +options CONFIG_ATM # # # Kernel Voucher Attr Manager for BANK # -options CONFIG_BANK # +options CONFIG_BANK # # Group related tasks together into coalitions options CONFIG_COALITIONS # +# Enable support for sysdiagnose notifications +options CONFIG_SYSDIAGNOSE # + # Configurable Security Restrictions -options CONFIG_CSR # +options CONFIG_CSR # # # Console options diff --git a/config/MASTER.x86_64 b/config/MASTER.x86_64 index 6e356ba91..3baa99963 100644 --- a/config/MASTER.x86_64 +++ b/config/MASTER.x86_64 @@ -18,8 +18,8 @@ # # KERNEL_BASE = [ intel medium config_requires_u32_munging ] # KERNEL_RELEASE = [ KERNEL_BASE ] -# KERNEL_DEV = [ KERNEL_BASE development mach_assert ] -# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert ] +# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug] +# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert config_waitq_stats config_waitq_debug ] # BSD = [ mach_bsd sysv_sem sysv_msg sysv_shm config_imageboot config_workqueue psynch config_proc_uuid_policy ] # FILESYS = [ devfs hfs journaling fdesc config_dev_kmem config_fse quota namedstreams fifo config_volfs hfs_compression config_hfs_std config_hfs_alloc_rbtree config_hfs_trim config_imgsrc_access config_triggers config_ext_resolver config_searchfs config_hfs_dirlink config_appledouble ] # NFS = [ nfsclient nfsserver ] @@ -29,22 +29,28 @@ # PKTSCHED = [ pktsched_cbq pktsched_fairq pktsched_hfsc pktsched_priq ] # CLASSQ = [ classq_blue classq_red classq_rio ] # MULTIPATH = [ multipath mptcp ] -# IOKIT = [ iokit iokitcpp hibernation config_sleep iokitstats hypervisor ] -# LIBKERN = [ libkerncpp config_kxld config_kec_fips zlib crypto_sha2 ] -# PERF_DBG = [ config_dtrace mach_kdp config_serial_kdp kdp_interactive_debugging kperf kpc config_counters zleaks config_gzalloc ] -# MACH_BASE = [ mach config_kext_basement mdebug ipc_debug config_mca config_vmx config_mtrr config_lapic config_telemetry importance_inheritance config_atm config_bank config_coalitions hypervisor config_iosched ] +# IOKIT_BASE = [ iokit iokitcpp hibernation config_sleep iokitstats hypervisor ] +# IOKIT_RELEASE = [ IOKIT_BASE ] +# IOKIT_DEV = [ IOKIT_BASE iotracking ] +# IOKIT_DEBUG = [ IOKIT_BASE iotracking ] +# LIBKERN_BASE = [ libkerncpp config_kxld config_kec_fips zlib crypto_sha2 ] +# LIBKERN_RELEASE =[ LIBKERN_BASE ] +# LIBKERN_DEV = [ LIBKERN_BASE iotracking ] +# LIBKERN_DEBUG = [ LIBKERN_BASE iotracking ] +# PERF_DBG = [ config_dtrace mach_kdp config_serial_kdp kdp_interactive_debugging kperf kpc zleaks config_gzalloc ] +# MACH_BASE = [ mach config_kext_basement mdebug ipc_debug config_mca config_vmx config_mtrr config_lapic config_telemetry importance_inheritance config_atm config_bank config_coalitions hypervisor config_iosched config_sysdiagnose ] # MACH_RELEASE = [ MACH_BASE ] # MACH_DEV = [ MACH_BASE task_zone_info ] # MACH_DEBUG = [ MACH_BASE task_zone_info ] -# SCHED_BASE = [ config_sched_traditional config_sched_multiq ] +# SCHED_BASE = [ config_sched_traditional config_sched_multiq config_sched_sfi ] # SCHED_RELEASE = [ SCHED_BASE ] # SCHED_DEV = [ SCHED_BASE ] # SCHED_DEBUG = [ SCHED_BASE config_sched_grrr config_sched_proto ] # VM = [ vm_pressure_events memorystatus dynamic_codesigning config_code_decryption encrypted_swap phantom_cache] # SECURITY = [ config_macf config_audit config_csr ] -# RELEASE = [ KERNEL_RELEASE BSD FILESYS NFS NETWORKING PF VPN IOKIT LIBKERN PERF_DBG MACH_RELEASE SCHED_RELEASE VM SECURITY ] -# DEVELOPMENT = [ KERNEL_DEV BSD FILESYS NFS NETWORKING PF VPN IOKIT LIBKERN PERF_DBG MACH_DEV SCHED_DEV VM SECURITY ] -# DEBUG = [ KERNEL_DEBUG BSD FILESYS NFS NETWORKING PF VPN IOKIT LIBKERN PERF_DBG MACH_DEBUG SCHED_DEBUG VM SECURITY ] +# RELEASE = [ KERNEL_RELEASE BSD FILESYS NFS NETWORKING PF VPN IOKIT_RELEASE LIBKERN_RELEASE PERF_DBG MACH_RELEASE SCHED_RELEASE VM SECURITY ] +# DEVELOPMENT = [ KERNEL_DEV BSD FILESYS NFS NETWORKING PF VPN IOKIT_DEV LIBKERN_DEV PERF_DBG MACH_DEV SCHED_DEV VM SECURITY ] +# DEBUG = [ KERNEL_DEBUG BSD FILESYS NFS NETWORKING PF VPN IOKIT_DEBUG LIBKERN_DEBUG PERF_DBG MACH_DEBUG SCHED_DEBUG VM SECURITY ] # ###################################################################### # diff --git a/config/Mach.exports b/config/Mach.exports index 1cca2a9b7..1ea2e2030 100644 --- a/config/Mach.exports +++ b/config/Mach.exports @@ -55,3 +55,5 @@ _thread_policy_set _thread_reference _thread_terminate _thread_wakeup_prim +_vm_kernel_addrperm_external +_vm_kernel_unslide_or_perm_external diff --git a/config/MasterVersion b/config/MasterVersion index a2049c677..3aded3442 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -14.5.0 +15.0.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/Private.exports b/config/Private.exports index d7995f81e..e04f8d26b 100644 --- a/config/Private.exports +++ b/config/Private.exports @@ -1,3 +1,4 @@ +_PE_i_can_has_debugger __ZN16IOPlatformExpert* __ZNK16IOPlatformExpert* __ZTV16IOPlatformExpert @@ -10,12 +11,12 @@ __ZTV5IOCPU __ZN24IOCPUInterruptController* __ZNK24IOCPUInterruptController* __ZTV24IOCPUInterruptController +_PE_i_can_has_kernel_configuration _assert_wait_deadline_with_leeway _assert_wait_timeout_with_leeway _audio_active _b_to_q _bdevsw -_boot _bootcache_contains_block _bsd_hostname _bsd_set_dependency_capable @@ -52,20 +53,32 @@ _convert_task_to_port _cp_key_store_action _cp_register_wraps _cpu_to_processor -_cs_enforcement _cs_blob_reset_cache -_cs_require_lv +_cs_debug +_cs_enforcement _cs_entitlement_flags _cs_entitlements_blob_get _cs_get_cdhash _cs_identity_get -_cs_register_cscsr -_csfg_get_teamid +_cs_require_lv +_cs_restricted +_csblob_find_blob_bytes +_csblob_get_cdhash +_csblob_get_entitlements +_csblob_get_identity +_csblob_get_platform_binary +_csblob_get_flags +_csblob_get_teamid +_csfg_get_cdhash _csfg_get_path _csfg_get_platform_binary -_csproc_get_teamid +_csfg_get_teamid +_csproc_get_blob _csproc_get_platform_binary +_csproc_get_teamid +_csvnode_get_blob _csvnode_get_teamid +_csvnode_print_debug _ctl_enqueuembuf_list _ctl_id_by_name _ctl_name_by_id @@ -74,6 +87,8 @@ _fd_rdwr _get_aiotask _gpu_accumulate_time _gpu_describe +_gpu_fceiling_cb_register +_gpu_submission_telemetry _hz _ifnet_allocate_extended _ifnet_bandwidths @@ -89,7 +104,7 @@ _ifnet_enqueue _ifnet_flowid _ifnet_get_delegate _ifnet_get_inuse_address_list -_ifnet_get_ipsec_offload_frames +_ifnet_get_keepalive_offload_frames _ifnet_get_local_ports _ifnet_get_local_ports_extended _ifnet_get_rcvq_maxlen @@ -102,6 +117,7 @@ _ifnet_inet_defrouter_llreachinfo _ifnet_input_extended _ifnet_latencies _ifnet_link_quality +_ifnet_link_status_report _ifnet_notice_master_elected _ifnet_notice_node_absence _ifnet_notice_node_presence @@ -143,8 +159,12 @@ _kdp_unregister_link _kdp_unregister_send_receive _kern_asl_msg _kern_asl_msg_va -_kmem_alloc_kobject -_kmem_alloc_pageable +_kern_stack_snapshot_with_reason +_kernel_debug_string +_kevent_qos_internal +_kmem_alloc_kobject:_kmem_alloc_kobject_external +_kmem_alloc_pageable:_kmem_alloc_pageable_external +_kx_qsort _linesw _log _logwakeup @@ -176,6 +196,7 @@ _mbuf_get_traffic_class_index _mbuf_get_traffic_class_max_count _mbuf_is_service_class_privileged:_mbuf_is_traffic_class_privileged _mbuf_pkthdr_aux_flags +_mbuf_get_unsent_data_bytes _mcl_to_paddr _mountroot_post_hook _net_add_domain:_net_add_domain_old @@ -183,29 +204,8 @@ _net_add_proto:_net_add_proto_old _net_del_domain:_net_del_domain_old _net_del_proto:_net_del_proto_old _netboot_root -_perf_monitor_register_* -_perf_monitor_unregister _pffinddomain:_pffinddomain_old _pffindproto:_pffindproto_old -_pmc_config_set_interrupt_threshold -_pmc_config_set_value -_pmc_create_config -_pmc_find_by_name -_pmc_free_config -_pmc_free_pmc_list -_pmc_get_accessible_core_list -_pmc_get_name -_pmc_get_pmc_list -_pmc_register -_pmc_reservation_free -_pmc_reservation_read -_pmc_reservation_start -_pmc_reservation_stop -_pmc_reservation_write -_pmc_reserve -_pmc_reserve_task -_pmc_reserve_thread -_pmc_unregister _port_name_to_task _port_name_to_thread _post_sys_powersource @@ -213,6 +213,7 @@ _prng_factory_register _proc_getexecutablevnode _proc_pidbackgrounded _proc_pidversion +_proc_set_responsible_pid _proc_task _proc_uniqueid _pru_abort_notsupp @@ -268,6 +269,7 @@ _soreserve _sorwakeup _sosend _strnstr +_sysdiagnose_notify_user _termioschars _thread_call_allocate_with_priority _thread_call_cancel_wait @@ -282,6 +284,7 @@ _throttle_info_mount_rel _throttle_info_ref_by_mask _throttle_info_rel_by_mask _throttle_info_release +_throttle_info_reset_window _throttle_info_update _throttle_info_update_by_mask _throttle_lowpri_io @@ -317,7 +320,6 @@ _vfs_context_bind _vfs_context_get_special_port _vfs_context_set_special_port _vfs_devvp -_vfs_get_notify_attributes _vfs_getattr _vfs_getbyid _vfs_mntlabel @@ -337,15 +339,16 @@ _vm_map_page_shift _vm_map_page_size _vm_map_round_page_mask _vm_map_trunc_page_mask -_vm_map_wire_and_extract +_vm_map_wire_and_extract:_vm_map_wire_and_extract_external _vm_page_wire_count _vn_getpath_fsenter _vn_searchfs_inappropriate_name +_vnode_create_empty +_vnode_initialize _vnode_isdyldsharedcache _vnode_ismonitored _vnode_istty _vnode_lookup_continue_needed -_vnode_notify _vnop_compound_mkdir_desc _vnop_compound_open_desc _vnop_compound_remove_desc diff --git a/config/Private.x86_64.exports b/config/Private.x86_64.exports index 8df705eb6..35ecccdf5 100644 --- a/config/Private.x86_64.exports +++ b/config/Private.x86_64.exports @@ -14,11 +14,15 @@ _apply_func_phys _bufattr_delayidlesleep _cpu_to_lapic _cpuid_features +_cpuid_leaf7_features _cpuid_info _csr_check _csr_get_active_config -_csr_get_pending_config _csr_set_allow_all +_hv_ept_pmap_create +_hv_get* +_hv_release* +_hv_set* _lapic_end_of_interrupt _lapic_get_cmci_vector _lapic_unmask_perfcnt_interrupt @@ -46,6 +50,4 @@ _xts_encrypt _xts_start _aes_decrypt _PE_reboot_on_panic -_hv_set* -_hv_get* -_hv_release* + diff --git a/config/Unsupported.exports b/config/Unsupported.exports index 7d3ed4251..6cea97213 100644 --- a/config/Unsupported.exports +++ b/config/Unsupported.exports @@ -1,3 +1,4 @@ +_PE_i_can_has_debugger _Debugger _FastUnicodeCompare _KUNCExecute @@ -7,7 +8,6 @@ _KUNCUserNotificationDisplayFromBundle _KUNCUserNotificationDisplayNotice _NDR_record _PE_kputc -__Z22OSFlushObjectTrackListv __ZN11IOMemoryMap9wireRangeEjyy __ZN15IOWatchDogTimer10gMetaClassE __ZN15IOWatchDogTimer10superClassE @@ -50,6 +50,7 @@ __ZTV16IOPlatformDevice __ZTV9IODTNVRAM __ZTVN15IOWatchDogTimer9MetaClassE __doprnt +__doprnt_log __dtrace_register_anon_DOF _aes_decrypt_cbc _aes_decrypt_key @@ -60,7 +61,6 @@ _aes_encrypt_key _aes_encrypt_key128 _aes_encrypt_key256 _appleClut8 -_boot _cons_cinput _cons_ops _conslog_putc @@ -72,8 +72,6 @@ _delay_for_interval _des_ecb_encrypt _des_ecb_key_sched _gIODTSharedInterrupts -_gOSObjectTrackList -_gOSObjectTrackThread _gPEClockFrequencyInfo _gPESerialBaud _get_bsdtask_info @@ -88,7 +86,7 @@ _host_get_exception_ports _host_priv_self _hz _ipc_kernel_map -_kalloc +_kalloc:_kalloc_external _kauth_cred_issuser _kauth_cred_label_update _kauth_guid_equal @@ -100,7 +98,7 @@ _kernel_map _kernel_pmap _kev_post_msg _kfree -_kmem_alloc +_kmem_alloc:_kmem_alloc_external _kmem_free _kmputc _lck_mtx_assert @@ -174,7 +172,7 @@ _vm_deallocate _vm_map _vm_map_deallocate _vm_map_unwire -_vm_map_wire +_vm_map_wire:_vm_map_wire_external _set_vm_privilege _vm_protect _vm_region diff --git a/config/Unsupported.x86_64.exports b/config/Unsupported.x86_64.exports index 4eb17cafa..0f3ed92d1 100644 --- a/config/Unsupported.x86_64.exports +++ b/config/Unsupported.x86_64.exports @@ -42,3 +42,4 @@ _sock_retain _tmrCvt _tsc_get_info _PE_state + diff --git a/config/Unused.exports b/config/Unused.exports index e4a00cd8a..4acf84e35 100644 --- a/config/Unused.exports +++ b/config/Unused.exports @@ -6,3 +6,4 @@ _atm_mana* _bank_mana* _ipc_importance_mana* _user_data_mana* +_arm_hardware_page_size diff --git a/iokit/.clang-format b/iokit/.clang-format new file mode 100644 index 000000000..cd99c24e5 --- /dev/null +++ b/iokit/.clang-format @@ -0,0 +1,30 @@ +# See top level .clang-format for explanation of options +AlignEscapedNewlinesLeft: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: false +AlwaysBreakBeforeMultilineStrings: true +BinPackArguments: true +BinPackParameters: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Allman +ColumnLimit: 132 +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +PointerAlignment: Middle +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +TabWidth: 4 +UseTab: Never diff --git a/iokit/Families/IONVRAM/IONVRAMController.cpp b/iokit/Families/IONVRAM/IONVRAMController.cpp index 7a866c5af..4d8829507 100644 --- a/iokit/Families/IONVRAM/IONVRAMController.cpp +++ b/iokit/Families/IONVRAM/IONVRAMController.cpp @@ -38,13 +38,11 @@ OSDefineAbstractStructors(IONVRAMController, IOService); // init // // **************************************************************************** -bool IONVRAMController::start(IOService *provider) +void IONVRAMController::registerService(IOOptionBits options) { - if(!super::start(provider)) return false; - - getPlatform()->registerNVRAMController(this); - - return true; + super::registerService(options); + + getPlatform()->registerNVRAMController(this); } // **************************************************************************** diff --git a/iokit/IOKit/IOBSD.h b/iokit/IOKit/IOBSD.h index 8938656ca..f2aadfed8 100644 --- a/iokit/IOKit/IOBSD.h +++ b/iokit/IOKit/IOBSD.h @@ -38,4 +38,35 @@ #define kIOBSDMinorKey "BSD Minor" // (an OSNumber) #define kIOBSDUnitKey "BSD Unit" // (an OSNumber) + +#ifdef XNU_KERNEL_PRIVATE + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct IOPolledFileIOVars; +struct mount; + +enum +{ + kIOMountChangeMount = 0x00000101, + kIOMountChangeUnmount = 0x00000102, + kIOMountChangeWillResize = 0x00000201, + kIOMountChangeDidResize = 0x00000202, +}; +extern void IOBSDMountChange(struct mount * mp, uint32_t op); +extern boolean_t IOTaskHasEntitlement(task_t task, const char * entitlement); + +extern struct IOPolledFileIOVars * gIOPolledCoreFileVars; + +#ifdef __cplusplus +} +#endif + +#endif /* XNU_KERNEL_PRIVATE */ + #endif /* !_IOBSD_H */ diff --git a/iokit/IOKit/IOBufferMemoryDescriptor.h b/iokit/IOKit/IOBufferMemoryDescriptor.h index fdb0a398c..486ce4e13 100644 --- a/iokit/IOKit/IOBufferMemoryDescriptor.h +++ b/iokit/IOKit/IOBufferMemoryDescriptor.h @@ -120,7 +120,7 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 15); protected: - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; public: @@ -270,7 +270,7 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor #ifndef __LP64__ virtual void * getVirtualSegment(IOByteCount offset, - IOByteCount * length) APPLE_KEXT_DEPRECATED; /* use getBytesNoCopy() instead */ + IOByteCount * length) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; /* use getBytesNoCopy() instead */ #endif /* !__LP64__ */ }; diff --git a/iokit/IOKit/IOCPU.h b/iokit/IOKit/IOCPU.h index 615d103af..a9ae2e605 100644 --- a/iokit/IOKit/IOCPU.h +++ b/iokit/IOKit/IOCPU.h @@ -78,17 +78,19 @@ class IOCPU : public IOService public: static void initCPUs(void); - virtual bool start(IOService *provider); - virtual OSObject *getProperty(const OSSymbol *aKey) const; - virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject); - virtual bool serializeProperties(OSSerialize *serialize) const; - virtual IOReturn setProperties(OSObject *properties); + virtual bool start(IOService *provider) APPLE_KEXT_OVERRIDE; + virtual OSObject *getProperty(const OSSymbol *aKey) const APPLE_KEXT_OVERRIDE; + virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject) APPLE_KEXT_OVERRIDE; + virtual bool serializeProperties(OSSerialize *serialize) const APPLE_KEXT_OVERRIDE; + virtual IOReturn setProperties(OSObject *properties) APPLE_KEXT_OVERRIDE; virtual void initCPU(bool boot) = 0; virtual void quiesceCPU(void) = 0; virtual kern_return_t startCPU(vm_offset_t start_paddr, vm_offset_t arg_paddr) = 0; virtual void haltCPU(void) = 0; virtual void signalCPU(IOCPU *target); + virtual void signalCPUDeferred(IOCPU * target); + virtual void signalCPUCancel(IOCPU * target); virtual void enableCPUTimeBase(bool enable); virtual UInt32 getCPUNumber(void); @@ -113,6 +115,7 @@ void IOCPUSleepKernel(void); extern "C" kern_return_t IOCPURunPlatformQuiesceActions(void); extern "C" kern_return_t IOCPURunPlatformActiveActions(void); extern "C" kern_return_t IOCPURunPlatformHaltRestartActions(uint32_t message); +extern "C" kern_return_t IOCPURunPlatformPanicActions(uint32_t message); class IOCPUInterruptController : public IOInterruptController { @@ -137,17 +140,17 @@ class IOCPUInterruptController : public IOInterruptController virtual IOReturn registerInterrupt(IOService *nub, int source, void *target, IOInterruptHandler handler, - void *refCon); + void *refCon) APPLE_KEXT_OVERRIDE; virtual IOReturn getInterruptType(IOService *nub, int source, - int *interruptType); + int *interruptType) APPLE_KEXT_OVERRIDE; - virtual IOReturn enableInterrupt(IOService *nub, int source); - virtual IOReturn disableInterrupt(IOService *nub, int source); - virtual IOReturn causeInterrupt(IOService *nub, int source); + virtual IOReturn enableInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE; + virtual IOReturn disableInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE; + virtual IOReturn causeInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE; virtual IOReturn handleInterrupt(void *refCon, IOService *nub, - int source); + int source) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(IOCPUInterruptController, 0); OSMetaClassDeclareReservedUnused(IOCPUInterruptController, 1); diff --git a/iokit/IOKit/IOCatalogue.h b/iokit/IOKit/IOCatalogue.h index 63781fc75..693e0ef7e 100644 --- a/iokit/IOKit/IOCatalogue.h +++ b/iokit/IOKit/IOCatalogue.h @@ -81,7 +81,7 @@ class IOCatalogue : public OSObject @function free @abstract Cleans up the database and deallocates memory allocated at initialization. This is never called in normal operation of the system. */ - void free( void ); + void free( void ) APPLE_KEXT_OVERRIDE; /*! @function findDrivers @@ -215,7 +215,7 @@ class IOCatalogue : public OSObject @param s The serializer object. @result Returns false if unable to serialize database, most likely due to memory shortage. */ - virtual bool serialize(OSSerialize * s) const; + virtual bool serialize(OSSerialize * s) const APPLE_KEXT_OVERRIDE; bool serializeData(IOOptionBits kind, OSSerialize * s) const; diff --git a/iokit/IOKit/IOCommand.h b/iokit/IOKit/IOCommand.h index 136c41bd5..fc77b627f 100644 --- a/iokit/IOKit/IOCommand.h +++ b/iokit/IOKit/IOCommand.h @@ -71,7 +71,7 @@ class IOCommand : public OSObject OSDeclareDefaultStructors(IOCommand) public: - virtual bool init(void); + virtual bool init(void) APPLE_KEXT_OVERRIDE; /*! @var fCommandChain This variable is used by the current 'owner' to queue the command. During the life cycle of a command it moves through a series of queues. This is the queue pointer for it. Only valid while 'ownership' is clear. For instance a IOCommandPool uses this pointer to maintain its list of free commands. May be manipulated using the kern/queue.h macros */ diff --git a/iokit/IOKit/IOCommandGate.h b/iokit/IOKit/IOCommandGate.h index d38c88670..26624116f 100644 --- a/iokit/IOKit/IOCommandGate.h +++ b/iokit/IOKit/IOCommandGate.h @@ -38,14 +38,14 @@ /*! @class IOCommandGate : public IOEventSource - @abstract Single-threaded work-loop client request mechanism. - @discussion An IOCommandGate instance is an extremely light way mechanism -that executes an action on the driver's work-loop. 'On the work-loop' is -actually a lie but the work-loop single threaded semantic is maintained for this -event source. Using the work-loop gate rather than execution by the workloop. -The command gate tests for a potential self dead lock by checking if the -runCommand request is made from the work-loop's thread, it doesn't check for a -mutual dead lock though where a pair of work loop's dead lock each other. + @abstract Single-threaded work loop client request mechanism. + @discussion An IOCommandGate instance is an extremely lightweight mechanism +that executes an action on the driver's work loop. Although the code does not +technically execute on the work loop itself, a single-threaded work loop semantic +is maintained for this event source using the work loop gate. The command gate +tests for a potential self dead lock by checking if the runCommand request is +made from the work loop's thread, it doesn't check for a mutual dead lock though +where a pair of work loop's dead lock each other.

The IOCommandGate is a lighter weight version of the IOCommandQueue and should be used in preference. Generally use a command queue whenever you need a @@ -113,17 +113,17 @@ compiler warning. Defaults to zero, see $link IOEventSource::setAction. virtual bool init(OSObject *owner, Action action = 0); // Superclass overrides - virtual void free(); - virtual void setWorkLoop(IOWorkLoop *inWorkLoop); + virtual void free() APPLE_KEXT_OVERRIDE; + virtual void setWorkLoop(IOWorkLoop *inWorkLoop) APPLE_KEXT_OVERRIDE; /*! @function runCommand - @abstract Single thread a command with the target work-loop. + @abstract Single thread a command with the target work loop. @discussion Client function that causes the current action to be called in -a single threaded manner. Beware the work-loop's gate is recursive and command +a single threaded manner. Beware the work loop's gate is recursive and command gates can cause direct or indirect re-entrancy. When the executing on a -client's thread runCommand will sleep until the work-loop's gate opens for +client's thread runCommand will sleep until the work loop's gate opens for execution of client actions, the action is single threaded against all other -work-loop event sources. If the command is disabled the attempt to run a command will be stalled until enable is called. +work loop event sources. If the command is disabled the attempt to run a command will be stalled until enable is called. @param arg0 Parameter for action of command gate, defaults to 0. @param arg1 Parameter for action of command gate, defaults to 0. @param arg2 Parameter for action of command gate, defaults to 0. @@ -134,28 +134,28 @@ work-loop event sources. If the command is disabled the attempt to run a comman void *arg2 = 0, void *arg3 = 0); /*! @function runAction - @abstract Single thread a call to an action with the target work-loop. + @abstract Single thread a call to an action with the target work loop. @discussion Client function that causes the given action to be called in -a single threaded manner. Beware the work-loop's gate is recursive and command +a single threaded manner. Beware the work loop's gate is recursive and command gates can cause direct or indirect re-entrancy. When the executing on a -client's thread runAction will sleep until the work-loop's gate opens for +client's thread runAction will sleep until the work loop's gate opens for execution of client actions, the action is single threaded against all other -work-loop event sources. If the command is disabled the attempt to run a command will be stalled until enable is called. - @param action Pointer to function to be executed in work-loop context. +work loop event sources. If the command is disabled the attempt to run a command will be stalled until enable is called. + @param action Pointer to function to be executed in the context of the work loop. @param arg0 Parameter for action parameter, defaults to 0. @param arg1 Parameter for action parameter, defaults to 0. @param arg2 Parameter for action parameter, defaults to 0. @param arg3 Parameter for action parameter, defaults to 0. - @result kIOReturnSuccess if successful. kIOReturnBadArgument if action is not defined, kIOReturnAborted if a disabled command gate is free()ed before being reenabled. + @result The return value of action if it was called, kIOReturnBadArgument if action is not defined, kIOReturnAborted if a disabled command gate is free()ed before being reenabled. */ virtual IOReturn runAction(Action action, void *arg0 = 0, void *arg1 = 0, void *arg2 = 0, void *arg3 = 0); /*! @function attemptCommand - @abstract Single thread a command with the target work-loop. + @abstract Single thread a command with the target work loop. @discussion Client function that causes the current action to be called in -a single threaded manner. When the executing on a client's thread attemptCommand will fail if the work-loop's gate is closed. +a single threaded manner. When the executing on a client's thread attemptCommand will fail if the work loop's gate is closed. @param arg0 Parameter for action of command gate, defaults to 0. @param arg1 Parameter for action of command gate, defaults to 0. @param arg2 Parameter for action of command gate, defaults to 0. @@ -166,12 +166,12 @@ a single threaded manner. When the executing on a client's thread attemptComman void *arg2 = 0, void *arg3 = 0); /*! @function attemptAction - @abstract Single thread a call to an action with the target work-loop. + @abstract Single thread a call to an action with the target work loop. @discussion Client function that causes the given action to be called in -a single threaded manner. Beware the work-loop's gate is recursive and command +a single threaded manner. Beware the work loop's gate is recursive and command gates can cause direct or indirect re-entrancy. When the executing on a -client's thread attemptCommand will fail if the work-loop's gate is closed. - @param action Pointer to function to be executed in work-loop context. +client's thread attemptCommand will fail if the work loop's gate is closed. + @param action Pointer to function to be executed in context of the work loop. @param arg0 Parameter for action parameter, defaults to 0. @param arg1 Parameter for action parameter, defaults to 0. @param arg2 Parameter for action parameter, defaults to 0. @@ -201,12 +201,12 @@ client's thread attemptCommand will fail if the work-loop's gate is closed. /*! @function disable @abstract Disable the command gate @discussion When a command gate is disabled all future calls to runAction and runCommand will stall until the gate is enable()d later. This can be used to block client threads when a system sleep is requested. The IOWorkLoop thread itself will never stall, even when making runAction/runCommand calls. This call must be made from a gated context, to clear potential race conditions. */ - virtual void disable(); + virtual void disable() APPLE_KEXT_OVERRIDE; /*! @function enable @abstract Enable command gate, this will unblock any blocked Commands and Actions. @discussion Enable the command gate. The attemptAction/attemptCommand calls will now be enabled and can succeeed. Stalled runCommand/runAction calls will be woken up. */ - virtual void enable(); + virtual void enable() APPLE_KEXT_OVERRIDE; /*! @function commandSleep @abstract Put a thread that is currently holding the command gate to sleep. diff --git a/iokit/IOKit/IOCommandPool.h b/iokit/IOKit/IOCommandPool.h index 91069f3d3..442815761 100644 --- a/iokit/IOKit/IOCommandPool.h +++ b/iokit/IOKit/IOCommandPool.h @@ -108,7 +108,7 @@ class IOCommandPool : public OSObject * Free all of this object's outstanding resources. */ - virtual void free(void); + virtual void free(void) APPLE_KEXT_OVERRIDE; public: diff --git a/iokit/IOKit/IOCommandQueue.h b/iokit/IOKit/IOCommandQueue.h index 1dfc5270e..124369d41 100644 --- a/iokit/IOKit/IOCommandQueue.h +++ b/iokit/IOKit/IOCommandQueue.h @@ -59,9 +59,9 @@ class IOCommandQueue : public IOEventSource int producerIndex, consumerIndex; int size; - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; - virtual bool checkForWork(); + virtual bool checkForWork() APPLE_KEXT_OVERRIDE; public: static IOCommandQueue *commandQueue(OSObject *inOwner, diff --git a/iokit/IOKit/IOConditionLock.h b/iokit/IOKit/IOConditionLock.h index f628feebc..a3a044069 100644 --- a/iokit/IOKit/IOConditionLock.h +++ b/iokit/IOKit/IOConditionLock.h @@ -54,7 +54,7 @@ class IOConditionLock : public OSObject public: static IOConditionLock *withCondition(int condition, bool inIntr = true); virtual bool initWithCondition(int condition, bool inIntr = true); - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; virtual bool tryLock(); // acquire lock, no waiting virtual int lock(); // acquire lock (enter critical section) diff --git a/iokit/IOKit/IODMACommand.h b/iokit/IOKit/IODMACommand.h index a680aec74..54e0815bb 100644 --- a/iokit/IOKit/IODMACommand.h +++ b/iokit/IOKit/IODMACommand.h @@ -31,6 +31,7 @@ #include #include class IOMapper; +class IOBufferMemoryDescriptor; /**************************** class IODMACommand ***************************/ @@ -46,6 +47,20 @@ class IOMapper; The IODMACommand can be used in a 'weak-linked' manner. To do this you must avoid using any static member functions. Use the, much slower but safe, weakWithSpecification function. On success a dma command instance will be returned. This instance can then be used to clone as many commands as is needed. Remember deriving from this class can not be done weakly, that is no weak subclassing! */ + +enum +{ + kIODMAMapOptionMapped = 0x00000000, + kIODMAMapOptionBypassed = 0x00000001, + kIODMAMapOptionNonCoherent = 0x00000002, + kIODMAMapOptionUnmapped = 0x00000003, + kIODMAMapOptionTypeMask = 0x0000000f, + + kIODMAMapOptionNoCacheStore = 0x00000010, // Memory in descriptor + kIODMAMapOptionOnChip = 0x00000020, // Indicates DMA is on South Bridge + kIODMAMapOptionIterateOnly = 0x00000040 // DMACommand will be used as a cursor only +}; + class IODMACommand : public IOCommand { OSDeclareDefaultStructors(IODMACommand); @@ -78,14 +93,25 @@ friend class IODMAEventSource; @constant kMaxMappingOptions Internal use only */ enum MappingOptions { - kMapped = 0x00000000, - kBypassed = 0x00000001, - kNonCoherent = 0x00000002, - kTypeMask = 0x0000000f, - - kNoCacheStore = 0x00000010, // Memory in descriptor - kOnChip = 0x00000020, // Indicates DMA is on South Bridge - kIterateOnly = 0x00000040 // DMACommand will be used as a cursor only + kMapped = kIODMAMapOptionMapped, + kBypassed = kIODMAMapOptionBypassed, + kNonCoherent = kIODMAMapOptionNonCoherent, + kUnmapped = kIODMAMapOptionUnmapped, + kTypeMask = kIODMAMapOptionTypeMask, + + kNoCacheStore = kIODMAMapOptionNoCacheStore, // Memory in descriptor + kOnChip = kIODMAMapOptionOnChip, // Indicates DMA is on South Bridge + kIterateOnly = kIODMAMapOptionIterateOnly // DMACommand will be used as a cursor only + }; + + struct SegmentOptions { + uint8_t fStructSize; + uint8_t fNumAddressBits; + uint64_t fMaxSegmentSize; + uint64_t fMaxTransferSize; + uint32_t fAlignment; + uint32_t fAlignmentLength; + uint32_t fAlignmentInternalSegments; }; /*! @enum SynchronizeOptions @@ -187,7 +213,8 @@ friend class IODMAEventSource; @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. @param alignment Alignment restriction, in bytes, on I/O bus addresses. Defaults to single byte alignment. @param mapper For mapping types kMapped & kBypassed mapper is used to define the hardware that will perform the mapping, defaults to the system mapper. - @result Returns a new memory cursor if successfully created and initialized, 0 otherwise. + @param refCon Reference Constant + @result Returns a new IODMACommand if successfully created and initialized, 0 otherwise. */ static IODMACommand * withSpecification(SegmentFunction outSegFunc, @@ -210,6 +237,7 @@ friend class IODMAEventSource; @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. @param alignment Alignment restriction, in bytes, on I/O bus addresses. Defaults to single byte alignment. @param mapper For mapping types kMapped & kBypassed mapper is used to define the hardware that will perform the mapping, defaults to the system mapper. + @param refCon Reference Constant @result kIOReturnSuccess if everything is OK, otherwise kIOReturnBadArgument if newCommand is NULL, kIOReturnUnsupported if the kernel doesn't export IODMACommand or IOReturnError if the new command fails to init, q.v. initWithSpecification. */ // Note that the function has the attribute always_inline. @@ -228,11 +256,27 @@ friend class IODMAEventSource; IOMapper *mapper = 0, void *refCon = 0) __attribute__((always_inline)); + static IODMACommand * + withSpecification(SegmentFunction outSegFunc, + const SegmentOptions * segmentOptions, + uint32_t mappingOptions, + IOMapper * mapper, + void * refCon); + + +/*! @function withRefCon + @abstract Creates and initializes an unspecified IODMACommand. + @discussion Factory function to create and initialize an unspecified IODMACommand. prepareWithSpecification() must be used to prepare the IODMACommand before use. + @param refCon Reference Constant + @result Returns a new IODMACommand if successfully created and initialized, 0 otherwise. +*/ + static IODMACommand * withRefCon(void * refCon); + /*! @function cloneCommand @abstract Creates a new command based on the specification of the current one. @discussion Factory function to create and initialise an IODMACommand in one operation. The current command's specification will be duplicated in the new object, but however none of its state will be duplicated. This means that it is safe to clone a command even if it is currently active and running, however you must be certain that the command to be duplicated does have a valid reference for the duration. - @result Returns a new memory cursor if successfully created and initialised, 0 otherwise. + @result Returns a new IODMACommand if successfully created and initialised, 0 otherwise. */ virtual IODMACommand *cloneCommand(void *refCon = 0); @@ -245,6 +289,7 @@ friend class IODMAEventSource; @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. @param alignment Alignment restriction, in bytes, on I/O bus addresses. Defaults to single byte alignment. @param mapper For mapping types kMapped & kBypassed mapper is used to define the hardware that will perform the mapping, defaults to the system mapper. + @param refCon Reference Constant @result Can fail if the mapping type is not recognised, if one of the 3 mandatory parameters are set to 0, if a 32 bit output function is selected when more than 32 bits of address is required or, if kBypassed is requested on a machine that doesn't support bypassing. Returns true otherwise. */ virtual bool initWithSpecification( SegmentFunction outSegFunc, @@ -278,6 +323,11 @@ friend class IODMAEventSource; */ virtual const IOMemoryDescriptor *getMemoryDescriptor() const; +/*! @function getIOMemoryDescriptor + @abstract Get the memory descriptor to be used for DMA +*/ + IOMemoryDescriptor * getIOMemoryDescriptor() const; + /*! @function prepare @abstract Prepare the memory for an I/O transfer. @discussion Allocate the mapping resources neccessary for this transfer, specifying a sub range of the IOMemoryDescriptor that will be the target of the I/O. The complete() method frees these resources. Data may be copied to buffers for kIODirectionOut memory descriptors, depending on hardware mapping resource availabilty or alignment restrictions. It should be noted that the this function may block and should only be called on the clients context, i.e never call this routine while gated; also the call itself is not thread safe though this should be an issue as each IODMACommand is independant. @@ -362,15 +412,20 @@ friend class IODMAEventSource; UInt32 *numSegments) { return genIOVMSegments(offset, segments, numSegments); }; - IOReturn - genIOVMSegments(SegmentFunction segmentFunction, - UInt64 *offsetP, - void *segmentsP, - UInt32 *numSegmentsP); - - virtual void free(); + IOReturn + genIOVMSegments(SegmentFunction segmentFunction, + UInt64 *offsetP, + void *segmentsP, + UInt32 *numSegmentsP); + + virtual void free() APPLE_KEXT_OVERRIDE; private: + IOReturn setSpecification(SegmentFunction outSegFunc, + const SegmentOptions * segmentOptions, + uint32_t mappingOptions, + IOMapper * mapper); + typedef IOReturn (*InternalSegmentFunction)( void *reference, IODMACommand *target, @@ -428,12 +483,11 @@ friend class IODMAEventSource; bool flushCache = true, bool synchronize = true); - static IOReturn transferSegment( - void *reference, - IODMACommand *target, - Segment64 segment, - void *segments, - UInt32 segmentIndex); + static IOReturn transferSegment(void *reference, + IODMACommand *target, + Segment64 segment, + void *segments, + UInt32 segmentIndex); /*! @function getPreparedOffsetAndLength @abstract Returns the offset and length into the target IOMemoryDescriptor of a prepared IODDMACommand. @@ -444,17 +498,49 @@ friend class IODMAEventSource; virtual IOReturn getPreparedOffsetAndLength(UInt64 * offset, UInt64 * length); - UInt8 getNumAddressBits(void); - UInt32 getAlignment(void); - + UInt8 getNumAddressBits(void); + UInt32 getAlignment(void); + uint32_t getAlignmentLength(void); + uint32_t getAlignmentInternalSegments(void); + + +/*! @function initWithRefCon + @abstract Secondary initializer for the IODMACommand class. + @param refCon Reference Constant + @result Can fail if super init fails. Returns true otherwise. +*/ + + virtual + bool initWithRefCon(void * refCon = 0); + + virtual + bool initWithSpecification(SegmentFunction outSegFunc, + const SegmentOptions * segmentOptions, + uint32_t mappingOptions, + IOMapper * mapper, + void * refCon); + + virtual + IOReturn prepareWithSpecification(SegmentFunction outSegFunc, + const SegmentOptions * segmentOptions, + uint32_t mappingOptions, + IOMapper * mapper, + uint64_t offset, + uint64_t length, + bool flushCache = true, + bool synchronize = true); + + virtual + IOBufferMemoryDescriptor * createCopyBuffer(IODirection direction, UInt64 length); + private: OSMetaClassDeclareReservedUsed(IODMACommand, 0); OSMetaClassDeclareReservedUsed(IODMACommand, 1); OSMetaClassDeclareReservedUsed(IODMACommand, 2); - OSMetaClassDeclareReservedUnused(IODMACommand, 3); - OSMetaClassDeclareReservedUnused(IODMACommand, 4); - OSMetaClassDeclareReservedUnused(IODMACommand, 5); - OSMetaClassDeclareReservedUnused(IODMACommand, 6); + OSMetaClassDeclareReservedUsed(IODMACommand, 3); + OSMetaClassDeclareReservedUsed(IODMACommand, 4); + OSMetaClassDeclareReservedUsed(IODMACommand, 5); + OSMetaClassDeclareReservedUsed(IODMACommand, 6); OSMetaClassDeclareReservedUnused(IODMACommand, 7); OSMetaClassDeclareReservedUnused(IODMACommand, 8); OSMetaClassDeclareReservedUnused(IODMACommand, 9); @@ -478,9 +564,8 @@ friend class IODMAEventSource; Maximum size of a transfer that this memory cursor is allowed to generate */ UInt64 fMaxTransferSize; -/*! @var fBypassMask - Mask to be ored into the address to bypass the given iommu's mapping. */ - UInt64 fBypassMask; + UInt32 fAlignMaskLength; + UInt32 fAlignMaskInternalSegments; /*! @var fMapper Client defined mapper. */ @@ -507,7 +592,7 @@ friend class IODMAEventSource; /*! @var fMappingOptions What type of I/O virtual address mapping is required for this command */ - MappingOptions fMappingOptions; + uint32_t fMappingOptions; /*! @var fActive fActive indicates that this DMA command is currently prepared and ready to go */ diff --git a/iokit/IOKit/IODMAController.h b/iokit/IOKit/IODMAController.h index 4fab6e080..e5b8f106d 100644 --- a/iokit/IOKit/IODMAController.h +++ b/iokit/IOKit/IODMAController.h @@ -57,6 +57,7 @@ class IODMAController : public IOService virtual IOByteCount getFIFODepth(UInt32 dmaIndex, IODirection direction) = 0; virtual IOReturn setFIFODepth(UInt32 dmaIndex, IOByteCount depth) = 0; virtual IOByteCount validFIFODepth(UInt32 dmaIndex, IOByteCount depth, IODirection direction) = 0; + virtual IOReturn setFrameSize(UInt32 dmaIndex, UInt8 byteCount) = 0; virtual IOReturn setDMAConfig(UInt32 dmaIndex, IOService *provider, UInt32 reqIndex) = 0; virtual bool validDMAConfig(UInt32 dmaIndex, IOService *provider, UInt32 reqIndex) = 0; @@ -64,7 +65,7 @@ class IODMAController : public IOService static const OSSymbol *createControllerName(UInt32 phandle); static IODMAController *getController(IOService *provider, UInt32 dmaIndex); - virtual bool start(IOService *provider); + virtual bool start(IOService *provider) APPLE_KEXT_OVERRIDE; }; diff --git a/iokit/IOKit/IODMAEventSource.h b/iokit/IOKit/IODMAEventSource.h index 58d851bf9..9aa34a93f 100644 --- a/iokit/IOKit/IODMAEventSource.h +++ b/iokit/IOKit/IODMAEventSource.h @@ -66,6 +66,8 @@ class IODMAEventSource : public IOEventSource virtual IOReturn setFIFODepth(IOByteCount depth); virtual IOByteCount validFIFODepth(IOByteCount depth, IODirection direction); + virtual IOReturn setFrameSize(UInt8 byteCount); + virtual IOReturn setDMAConfig(UInt32 dmaIndex); virtual bool validDMAConfig(UInt32 dmaIndex); @@ -84,8 +86,8 @@ class IODMAEventSource : public IOEventSource Action completion = 0, Action notification = 0, UInt32 dmaIndex = 0); - virtual bool checkForWork(void); - virtual void free(void); + virtual bool checkForWork(void) APPLE_KEXT_OVERRIDE; + virtual void free(void) APPLE_KEXT_OVERRIDE; }; #endif /* _IOKIT_IODMAEVENTSOURCE_H */ diff --git a/iokit/IOKit/IODataQueue.h b/iokit/IOKit/IODataQueue.h index ecb92459b..8af46e5b3 100644 --- a/iokit/IOKit/IODataQueue.h +++ b/iokit/IOKit/IODataQueue.h @@ -72,7 +72,7 @@ class IODataQueue : public OSObject void * notifyMsg; - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! * @function sendDataAvailableNotification diff --git a/iokit/IOKit/IODeviceTreeSupport.h b/iokit/IOKit/IODeviceTreeSupport.h index 9b39a2dc2..531202f41 100644 --- a/iokit/IOKit/IODeviceTreeSupport.h +++ b/iokit/IOKit/IODeviceTreeSupport.h @@ -48,6 +48,7 @@ extern const OSSymbol * gIODTPHandleKey; extern const OSSymbol * gIODTCompatibleKey; extern const OSSymbol * gIODTTypeKey; extern const OSSymbol * gIODTModelKey; +extern const OSSymbol * gIODTTargetTypeKey; extern const OSSymbol * gIODTAAPLInterruptsKey; extern const OSSymbol * gIODTDefaultInterruptController; diff --git a/iokit/IOKit/IOEventSource.h b/iokit/IOKit/IOEventSource.h index 10a392afc..66ee9054a 100644 --- a/iokit/IOKit/IOEventSource.h +++ b/iokit/IOKit/IOEventSource.h @@ -161,7 +161,7 @@ successfully. */ virtual bool init(OSObject *owner, IOEventSource::Action action = 0); - virtual void free( void ); + virtual void free( void ) APPLE_KEXT_OVERRIDE; /*! @function checkForWork @abstract Virtual member function used by IOWorkLoop for work diff --git a/iokit/IOKit/IOFilterInterruptEventSource.h b/iokit/IOKit/IOFilterInterruptEventSource.h index de05c90d5..60154944b 100644 --- a/iokit/IOKit/IOFilterInterruptEventSource.h +++ b/iokit/IOKit/IOFilterInterruptEventSource.h @@ -71,7 +71,7 @@ class IOFilterInterruptEventSource : public IOInterruptEventSource virtual bool init(OSObject *inOwner, IOInterruptEventSource::Action inAction = 0, IOService *inProvider = 0, - int inIntIndex = 0); + int inIntIndex = 0) APPLE_KEXT_OVERRIDE; static IOInterruptEventSource * interruptEventSource(OSObject *inOwner, @@ -136,11 +136,11 @@ successfully. */ /*! @function normalInterruptOccurred @abstract Override $link IOInterruptEventSource::normalInterruptOccured to make a filter callout. */ - virtual void normalInterruptOccurred(void *self, IOService *prov, int ind); + virtual void normalInterruptOccurred(void *self, IOService *prov, int ind) APPLE_KEXT_OVERRIDE; /*! @function disableInterruptOccurred @abstract Override $link IOInterruptEventSource::disableInterruptOccurred to make a filter callout. */ - virtual void disableInterruptOccurred(void *self, IOService *prov, int ind); + virtual void disableInterruptOccurred(void *self, IOService *prov, int ind) APPLE_KEXT_OVERRIDE; private: OSMetaClassDeclareReservedUnused(IOFilterInterruptEventSource, 0); diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h index caf03f074..a9d25fa98 100644 --- a/iokit/IOKit/IOHibernatePrivate.h +++ b/iokit/IOKit/IOHibernatePrivate.h @@ -303,7 +303,7 @@ typedef struct hibernate_statistics_t hibernate_statistics_t; void IOHibernateSystemInit(IOPMrootDomain * rootDomain); IOReturn IOHibernateSystemSleep(void); -IOReturn IOHibernateOpenForDebugData(void); +void IOOpenDebugDataFile(const char *fname, uint64_t size); IOReturn IOHibernateIOKitSleep(void); IOReturn IOHibernateSystemHasSlept(void); IOReturn IOHibernateSystemWake(void); @@ -315,34 +315,6 @@ void IOHibernateSystemRestart(void); #endif /* __cplusplus */ -#ifdef _SYS_CONF_H_ -typedef void (*kern_get_file_extents_callback_t)(void * ref, uint64_t start, uint64_t size); - -struct kern_direct_file_io_ref_t * -kern_open_file_for_direct_io(const char * name, boolean_t create_file, - kern_get_file_extents_callback_t callback, - void * callback_ref, - - off_t set_file_size, - - off_t write_file_offset, - caddr_t write_file_addr, - vm_size_t write_file_len, - - dev_t * partition_device_result, - dev_t * image_device_result, - uint64_t * partitionbase_result, - uint64_t * maxiocount_result, - uint32_t * oflags); -int -kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len, int ioflag); -void -kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, - off_t write_offset, caddr_t addr, vm_size_t write_length, - off_t discard_offset, off_t discard_end); -#endif /* _SYS_CONF_H_ */ - - void vm_compressor_do_warmup(void); @@ -358,8 +330,6 @@ hibernate_alloc_page_lists( kern_return_t hibernate_setup(IOHibernateImageHeader * header, - uint32_t free_page_ratio, - uint32_t free_page_time, boolean_t vmflush, hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired, diff --git a/iokit/IOKit/IOInterleavedMemoryDescriptor.h b/iokit/IOKit/IOInterleavedMemoryDescriptor.h index e5c2a943f..e1c122aef 100644 --- a/iokit/IOKit/IOInterleavedMemoryDescriptor.h +++ b/iokit/IOKit/IOInterleavedMemoryDescriptor.h @@ -48,7 +48,7 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor IOByteCount * _descriptorLengths; bool _descriptorPrepared; - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; public: @@ -100,7 +100,7 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor virtual addr64_t getPhysicalSegment( IOByteCount offset, IOByteCount * length, - IOOptionBits options = 0 ); + IOOptionBits options = 0 ) APPLE_KEXT_OVERRIDE; /*! @function prepare @abstract Prepare the memory for an I/O transfer. @@ -108,7 +108,7 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor @param forDirection The direction of the I/O to be performed, or kIODirectionNone for the direction specified by the memory descriptor. @result An IOReturn code. */ - virtual IOReturn prepare(IODirection forDirection = kIODirectionNone); + virtual IOReturn prepare(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE; /*! @function complete @abstract Complete processing of the memory after an I/O transfer finishes. @@ -116,7 +116,7 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor @param forDirection The direction of the I/O just completed, or kIODirectionNone for the direction specified by the memory descriptor. @result An IOReturn code. */ - virtual IOReturn complete(IODirection forDirection = kIODirectionNone); + virtual IOReturn complete(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE; }; #endif /* !_IOINTERLEAVEDMEMORYDESCRIPTOR_H */ diff --git a/iokit/IOKit/IOInterruptAccounting.h b/iokit/IOKit/IOInterruptAccounting.h index fdea6295c..7e03f6bd5 100644 --- a/iokit/IOKit/IOInterruptAccounting.h +++ b/iokit/IOKit/IOInterruptAccounting.h @@ -144,5 +144,12 @@ enum { kInterruptAccountingInvalidStatisticIndex /* Sentinel value for checking for a nonsensical index */ }; +/* + * IOReporting group name; exposed publicly for the purpose of getting channels by group + * name; other strings (subgroup names, statistic names) are not exposed, as we may want + * to change them in the future. + */ +#define kInterruptAccountingGroupName "Interrupt Statistics (by index)" + #endif /* __IOKIT_IOINTERRUPTACCOUNTING_PRIVATE_H */ diff --git a/iokit/IOKit/IOInterruptAccountingPrivate.h b/iokit/IOKit/IOInterruptAccountingPrivate.h index ef50297a7..5f37136e1 100644 --- a/iokit/IOKit/IOInterruptAccountingPrivate.h +++ b/iokit/IOKit/IOInterruptAccountingPrivate.h @@ -118,28 +118,6 @@ static const char * const kInterruptAccountingStatisticNameArray[IA_NUM_INTERRUP [kInterruptAccountingIdleExitsIndex] = kInterruptAccountingChannelNameIdleExits, }; -/* - * IOReporting group names. - */ -static const char * const kInterruptAccountingGroupName = "Interrupt Statistics (by index)"; - -/* - * TODO: Generate the subgroup name strings? - */ -#define IA_MAX_SUBGROUP_NAME (32) - -static const char * const kInterruptAccountingSubgroupNames[IA_MAX_SUBGROUP_NAME] = { - "0", "1", "2" , "3", "4", "5", "6", "7", - "8", "9", "10", "11", "12", "13", "14", "15", - "16", "17", "18", "19", "20", "21", "22", "23", - "24", "25", "26", "27", "28", "29", "30", "31"}; - -/* - * As long as we use a lookup table, we may be out of bounds for a valid index. In this case, fall - * back on a generic subgroup name that indicates we have screwed up. - */ -static const char * const kInterruptAccountingGenericSubgroupName = "(Index > 31)"; - /* * For updating the statistics in the data structure. We cannot guarantee all of our platforms will be * able to do a 64-bit store in a single transaction. So, for new platforms, call out to the hardware diff --git a/iokit/IOKit/IOInterruptController.h b/iokit/IOKit/IOInterruptController.h index 71f55e549..d389a79e3 100644 --- a/iokit/IOKit/IOInterruptController.h +++ b/iokit/IOKit/IOInterruptController.h @@ -133,17 +133,17 @@ class IOSharedInterruptController : public IOInterruptController virtual IOReturn registerInterrupt(IOService *nub, int source, void *target, IOInterruptHandler handler, - void *refCon); - virtual IOReturn unregisterInterrupt(IOService *nub, int source); + void *refCon) APPLE_KEXT_OVERRIDE; + virtual IOReturn unregisterInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE; virtual IOReturn getInterruptType(IOService *nub, int source, - int *interruptType); + int *interruptType) APPLE_KEXT_OVERRIDE; - virtual IOReturn enableInterrupt(IOService *nub, int source); - virtual IOReturn disableInterrupt(IOService *nub, int source); + virtual IOReturn enableInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE; + virtual IOReturn disableInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE; - virtual IOInterruptAction getInterruptHandlerAddress(void); - virtual IOReturn handleInterrupt(void *refCon, IOService *nub, int source); + virtual IOInterruptAction getInterruptHandlerAddress(void) APPLE_KEXT_OVERRIDE; + virtual IOReturn handleInterrupt(void *refCon, IOService *nub, int source) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(IOSharedInterruptController, 0); OSMetaClassDeclareReservedUnused(IOSharedInterruptController, 1); diff --git a/iokit/IOKit/IOInterruptEventSource.h b/iokit/IOKit/IOInterruptEventSource.h index 553eb4104..074af7930 100644 --- a/iokit/IOKit/IOInterruptEventSource.h +++ b/iokit/IOKit/IOInterruptEventSource.h @@ -108,17 +108,17 @@ class IOInterruptEventSource : public IOEventSource /*! @function free @abstract Sub-class implementation of free method, disconnects from the interrupt source. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @function checkForWork @abstract Pure Virtual member function used by IOWorkLoop for issueing a client calls. @discussion This function called when the work-loop is ready to check for any work to do and then to call out the owner/action. @result Return true if this function needs to be called again before all its outstanding events have been processed. */ - virtual bool checkForWork(); + virtual bool checkForWork() APPLE_KEXT_OVERRIDE; /*! @function setWorkLoop @abstract Sub-class implementation of setWorkLoop method. */ - virtual void setWorkLoop(IOWorkLoop *inWorkLoop); + virtual void setWorkLoop(IOWorkLoop *inWorkLoop) APPLE_KEXT_OVERRIDE; public: @@ -153,13 +153,13 @@ successfully. */ @discussion A subclass implementation is expected to respect the enabled state when checkForWork is called. Calling this function will cause the work-loop to be signalled so that a checkForWork is performed. */ - virtual void enable(); + virtual void enable() APPLE_KEXT_OVERRIDE; /*! @function disable @abstract Disable event source. @discussion A subclass implementation is expected to respect the enabled state when checkForWork is called. */ - virtual void disable(); + virtual void disable() APPLE_KEXT_OVERRIDE; /*! @function getProvider @abstract Get'ter for $link provider variable. diff --git a/iokit/IOKit/IOKernelReporters.h b/iokit/IOKit/IOKernelReporters.h index de529f405..58475afd9 100644 --- a/iokit/IOKit/IOKernelReporters.h +++ b/iokit/IOKit/IOKernelReporters.h @@ -277,7 +277,7 @@ class IOReporter : public OSObject Locking: same-instance concurrency UNSAFE */ - virtual void free(void); + virtual void free(void) APPLE_KEXT_OVERRIDE; /*********************************/ @@ -1214,7 +1214,7 @@ class IOStateReporter : public IOReporter Locking: same-instance concurrency UNSAFE */ - virtual void free(void); + virtual void free(void) APPLE_KEXT_OVERRIDE; protected: @@ -1240,10 +1240,10 @@ class IOStateReporter : public IOReporter [see IOReporter::handle*Swap* for more info] */ - virtual IOReturn handleSwapPrepare(int newNChannels); + virtual IOReturn handleSwapPrepare(int newNChannels) APPLE_KEXT_OVERRIDE; virtual IOReturn handleAddChannelSwap(uint64_t channel_id, - const OSSymbol *symChannelName); - virtual void handleSwapCleanup(int swapNChannels); + const OSSymbol *symChannelName) APPLE_KEXT_OVERRIDE; + virtual void handleSwapCleanup(int swapNChannels) APPLE_KEXT_OVERRIDE; /*! @function IOStateReporter::updateChannelValues @abstract Update accounting of time spent in current state @@ -1258,7 +1258,7 @@ class IOStateReporter : public IOReporter Locking: Caller must ensure that the reporter (data) lock is held. */ - virtual IOReturn updateChannelValues(int channel_index); + virtual IOReturn updateChannelValues(int channel_index) APPLE_KEXT_OVERRIDE; /*! @function IOStateReporter::setStateByIndices @abstract update a channel state without validating channel_id @@ -1383,7 +1383,7 @@ class IOHistogramReporter : public IOReporter /*! @function IOHistogramReporter::with @abstract Initializes the IOHistogramReporter instance variables and data structures - @param reportingService - IOService instanciator and data provider into the reporter object + @param reportingService - The I/O Kit service for this reporter's channels @param categories - The categories in which the report should be classified @param channelID - uint64_t channel identifier @param channelName - rich channel name as char* @@ -1443,7 +1443,7 @@ FIXME: need more explanation of the config Locking: same-instance concurrency UNSAFE */ - virtual void free(void); + virtual void free(void) APPLE_KEXT_OVERRIDE; protected: @@ -1473,7 +1473,7 @@ FIXME: need more explanation of the config Locking: same-instance concurrency SAFE, MAY BLOCK */ - IOReportLegendEntry* handleCreateLegend(void); + IOReportLegendEntry* handleCreateLegend(void) APPLE_KEXT_OVERRIDE; private: @@ -1578,11 +1578,11 @@ class IOReportLegend : public OSObject temporary reporter objects for the purpose of creating their legend entries. User-space legends are tracked by 12836893. - The static version of addReporterLegend adds the reporter's - legend directly to reportingService's kIOReportLegendKey. This - will result in serialized getProperty() and setProperty() calls - on reportingService and should be avoided when many reporters - objects are in use. + The static version of addReporterLegend adds the reporter's legend + directly to reportingService's kIOReportLegendKey. It is not + possible to safely update kIOReportLegendKey from multiple threads. + + Locking: same-reportingService and same-IORLegend concurrency UNSAFE */ IOReturn addReporterLegend(IOReporter *reporter, const char *groupName, @@ -1614,7 +1614,7 @@ class IOReportLegend : public OSObject in the I/O Kit registry, its ownership will now be with the registry. */ - void free(void); + void free(void) APPLE_KEXT_OVERRIDE; diff --git a/iokit/IOKit/IOKitDebug.h b/iokit/IOKit/IOKitDebug.h index 3e9c1de3b..86f34c995 100644 --- a/iokit/IOKit/IOKitDebug.h +++ b/iokit/IOKit/IOKitDebug.h @@ -44,10 +44,10 @@ class IOKitDiagnostics : public OSObject public: static OSObject * diagnostics( void ); - virtual bool serialize(OSSerialize *s) const; + virtual bool serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE; private: static void updateOffset( OSDictionary * dict, - UInt32 value, const char * name ); + UInt64 value, const char * name ); }; #endif /* __cplusplus */ @@ -77,13 +77,15 @@ enum { kIOLogHibernate = 0x00100000ULL, kIOStatistics = 0x04000000ULL, kIOSleepWakeWdogOff = 0x40000000ULL, + kIOKextSpinDump = 0x80000000ULL, // debug aids - change behaviour kIONoFreeObjects = 0x00100000ULL, kIOLogSynchronous = 0x00200000ULL, // IOLog completes synchronously - kOSTraceObjectAlloc = 0x00400000ULL, + kIOTracking = 0x00400000ULL, kIOWaitQuietPanics = 0x00800000ULL, - kIOWaitQuietBeforeRoot = 0x01000000ULL, + kIOWaitQuietBeforeRoot = 0x01000000ULL, + kIOTrackingBoot = 0x02000000ULL, _kIODebugTopFlag = 0x8000000000000000ULL // force enum to be 64 bits }; @@ -126,6 +128,102 @@ extern void OSPrintMemory( void ); #endif #define IOPrintMemory OSPrintMemory + + +#define kIOKitDiagnosticsClientClassName "IOKitDiagnosticsClient" + +enum +{ + kIOKitDiagnosticsClientType = 0x99000002 +}; + + +struct IOKitDiagnosticsParameters +{ + size_t size; + uint64_t value; + uint32_t options; + uint32_t reserved[3]; +}; +typedef struct IOKitDiagnosticsParameters IOKitDiagnosticsParameters; + +enum +{ + kIOTrackingCallSiteBTs = 16, +}; + +struct IOTrackingCallSiteInfo +{ + uint32_t count; + size_t size[2]; + uintptr_t bt[kIOTrackingCallSiteBTs]; +}; + +#define kIOMallocTrackingName "IOMalloc" +#define kIOWireTrackingName "IOWire" +#define kIOMapTrackingName "IOMap" + +#if KERNEL && IOTRACKING + +struct IOTrackingQueue; +struct IOTrackingCallSite; + +struct IOTracking +{ + queue_chain_t link; + IOTrackingCallSite * site; +#if !defined(__LP64__) + uint32_t flags; +#endif +}; + +struct IOTrackingAddress +{ + IOTracking tracking; + uintptr_t address; + size_t size; +#if defined(__LP64__) + uint32_t flags; +#endif +}; + +void IOTrackingInit(void); +IOTrackingQueue * IOTrackingQueueAlloc(const char * name, size_t allocSize, size_t minCaptureSize, bool isAlloc); +void IOTrackingQueueFree(IOTrackingQueue * head); +void IOTrackingAdd(IOTrackingQueue * head, IOTracking * mem, size_t size, bool address); +void IOTrackingRemove(IOTrackingQueue * head, IOTracking * mem, size_t size); +void IOTrackingAlloc(IOTrackingQueue * head, uintptr_t address, size_t size); +void IOTrackingFree(IOTrackingQueue * head, uintptr_t address, size_t size); +void IOTrackingReset(IOTrackingQueue * head); +void IOTrackingAccumSize(IOTrackingQueue * head, IOTracking * mem, size_t size); +kern_return_t IOTrackingDebug(uint32_t selector, uint32_t options, + const char * names, size_t namesLen, + size_t size, OSObject ** result); + +extern IOTrackingQueue * gIOMallocTracking; +extern IOTrackingQueue * gIOWireTracking; +extern IOTrackingQueue * gIOMapTracking; + +#endif /* KERNEL && IOTRACKING */ + +enum +{ + kIOTrackingExcludeNames = 0x00000001, +}; + +enum +{ + kIOTrackingGetTracking = 0x00000001, + kIOTrackingPrintTracking = 0x00000002, + kIOTrackingResetTracking = 0x00000003, + kIOTrackingStartCapture = 0x00000004, + kIOTrackingStopCapture = 0x00000005, + kIOTrackingSetMinCaptureSize = 0x00000006, + kIOTrackingLeaks = 0x00000007, + kIOTrackingInvalid = 0xFFFFFFFE, +}; + + #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/iokit/IOKit/IOKitDiagnosticsUserClient.h b/iokit/IOKit/IOKitDiagnosticsUserClient.h new file mode 100644 index 000000000..86370398b --- /dev/null +++ b/iokit/IOKit/IOKitDiagnosticsUserClient.h @@ -0,0 +1,17 @@ + +#include +#include + + +class IOKitDiagnosticsClient : public IOUserClient +{ + OSDeclareDefaultStructors(IOKitDiagnosticsClient) + +public: + static IOUserClient * withTask(task_t owningTask); + virtual IOReturn clientClose(void) APPLE_KEXT_OVERRIDE; + virtual IOReturn setProperties(OSObject * properties) APPLE_KEXT_OVERRIDE; + virtual IOReturn externalMethod(uint32_t selector, IOExternalMethodArguments * args, + IOExternalMethodDispatch * dispatch, OSObject * target, void * reference) APPLE_KEXT_OVERRIDE; +}; + diff --git a/iokit/IOKit/IOKitKeysPrivate.h b/iokit/IOKit/IOKitKeysPrivate.h index cae0be4f2..bea0d10f0 100644 --- a/iokit/IOKit/IOKitKeysPrivate.h +++ b/iokit/IOKit/IOKitKeysPrivate.h @@ -63,6 +63,13 @@ #define kIOClientPrivilegeSecureConsoleProcess "secureprocess" #define kIOClientPrivilegeConsoleSession "consolesession" + +// Embedded still throttles NVRAM commits via kIONVRAMSyncNowPropertyKey, but +// some clients still need a stricter NVRAM commit contract. Please use this with +// care. +#define kIONVRAMForceSyncNowPropertyKey "IONVRAM-FORCESYNCNOW-PROPERTY" + + // clientHasPrivilege security token for kIOClientPrivilegeSecureConsoleProcess typedef struct _IOUCProcessToken { void * token; @@ -71,11 +78,12 @@ typedef struct _IOUCProcessToken { #define kIOKernelHasSafeSleep 1 -#define kIOPlatformSleepActionKey "IOPlatformSleepAction" /* value is OSNumber (priority) */ -#define kIOPlatformWakeActionKey "IOPlatformWakeAction" /* value is OSNumber (priority) */ -#define kIOPlatformQuiesceActionKey "IOPlatformQuiesceAction" /* value is OSNumber (priority) */ -#define kIOPlatformActiveActionKey "IOPlatformActiveAction" /* value is OSNumber (priority) */ -#define kIOPlatformHaltRestartActionKey "IOPlatformHaltRestartAction" /* value is OSNumber (priority) */ +#define kIOPlatformSleepActionKey "IOPlatformSleepAction" /* value is OSNumber (priority) */ +#define kIOPlatformWakeActionKey "IOPlatformWakeAction" /* value is OSNumber (priority) */ +#define kIOPlatformQuiesceActionKey "IOPlatformQuiesceAction" /* value is OSNumber (priority) */ +#define kIOPlatformActiveActionKey "IOPlatformActiveAction" /* value is OSNumber (priority) */ +#define kIOPlatformHaltRestartActionKey "IOPlatformHaltRestartAction" /* value is OSNumber (priority) */ +#define kIOPlatformPanicActionKey "IOPlatformPanicAction" /* value is OSNumber (priority) */ #define kIOPlatformFunctionHandlerSet "IOPlatformFunctionHandlerSet" #if defined(__i386__) || defined(__x86_64__) @@ -96,4 +104,11 @@ enum { kIOServiceTerminateNeedWillTerminate = 0x00000100, }; +#define kIOClassNameOverrideKey "IOClassNameOverride" + +enum { + kIOClassNameOverrideNone = 0x00000001, +}; + + #endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */ diff --git a/iokit/IOKit/IOLib.h b/iokit/IOKit/IOLib.h index a290e4d91..7bf9ad8ab 100644 --- a/iokit/IOKit/IOLib.h +++ b/iokit/IOKit/IOLib.h @@ -150,9 +150,10 @@ void IOFreePageable(void * address, vm_size_t size); * Typed memory allocation macros. Both may block. */ #define IONew(type,number) \ -( ((number) != 0 && ((vm_size_t) ((sizeof(type) * (number) / (number))) != sizeof(type)) /* overflow check 21532969 */ \ -? 0 \ -: ((type*)IOMalloc(sizeof(type) * (number)))) ) +( ((number) != 0 && ((vm_size_t) ((sizeof(type) * (number) / (number))) != sizeof(type)) /* overflow check 20847256 */ \ + ? 0 \ + : ((type*)IOMalloc(sizeof(type) * (number)))) ) + #define IODelete(ptr,type,number) IOFree( (ptr) , sizeof(type) * (number) ) ///////////////////////////////////////////////////////////////////////////// @@ -271,6 +272,14 @@ void IOExitThread(void) __attribute__((deprecated)); void IOSleep(unsigned milliseconds); +/*! @function IOSleepWithLeeway + @abstract Sleep the calling thread for a number of milliseconds, with a specified leeway the kernel may use for timer coalescing. + @discussion This function blocks the calling thread for at least the number of specified milliseconds, giving time to other processes. The kernel may also coalesce any timers involved in the delay, using the leeway given as a guideline. + @param intervalMilliseconds The integer number of milliseconds to wait. + @param leewayMilliseconds The integer number of milliseconds to use as a timer coalescing guideline. */ + +void IOSleepWithLeeway(unsigned intervalMilliseconds, unsigned leewayMilliseconds); + /*! @function IODelay @abstract Spin delay for a number of microseconds. @discussion This function spins to delay for at least the number of specified microseconds. Since the CPU is busy spinning no time is made available to other processes; this method of delay should be used only for short periods. Also, the AbsoluteTime based APIs of kern/clock.h provide finer grained and lower cost delays. @@ -407,6 +416,11 @@ extern mach_timespec_t IOZeroTvalspec; #endif /* __APPLE_API_OBSOLETE */ +#if XNU_KERNEL_PRIVATE +vm_tag_t +IOMemoryTag(vm_map_t map); +#endif + __END_DECLS #endif /* !__IOKIT_IOLIB_H */ diff --git a/iokit/IOKit/IOMapper.h b/iokit/IOKit/IOMapper.h index 784077d9c..fc4f07dbe 100644 --- a/iokit/IOKit/IOMapper.h +++ b/iokit/IOKit/IOMapper.h @@ -38,16 +38,7 @@ __BEGIN_DECLS // These are C accessors to the system mapper for non-IOKit clients ppnum_t IOMapperIOVMAlloc(unsigned pages); void IOMapperIOVMFree(ppnum_t addr, unsigned pages); - ppnum_t IOMapperInsertPage(ppnum_t addr, unsigned offset, ppnum_t page); -void IOMapperInsertPPNPages(ppnum_t addr, unsigned offset, - ppnum_t *pageList, unsigned pageCount); -void IOMapperInsertUPLPages(ppnum_t addr, unsigned offset, - upl_page_info_t *pageList, unsigned pageCount); - -mach_vm_address_t IOMallocPhysical(mach_vm_size_t size, mach_vm_address_t mask); - -void IOFreePhysical(mach_vm_address_t address, mach_vm_size_t size); __END_DECLS @@ -67,6 +58,7 @@ class IOMapper : public IOService // Give the platform expert access to setMapperRequired(); friend class IOPlatformExpert; + friend class IOMemoryDescriptor; private: enum SystemMapperState { @@ -76,74 +68,67 @@ class IOMapper : public IOService kWaitMask = 3, }; protected: - void *fTable; - ppnum_t fTablePhys; - IOItemCount fTableSize; - OSData *fTableHandle; +#ifdef XNU_KERNEL_PRIVATE + uint64_t __reservedA[7]; + uint32_t __reservedB; + uint32_t fPageSize; +#else + uint64_t __reserved[8]; +#endif bool fIsSystem; - static void setMapperRequired(bool hasMapper); static void waitForSystemMapper(); virtual bool initHardware(IOService *provider) = 0; - virtual bool allocTable(IOByteCount size); - public: - virtual bool start(IOService *provider); - virtual void free(); - - // Static routines capable of allocating tables that are physically - // contiguous in real memory space. - static OSData * NewARTTable(IOByteCount size, - void ** virtAddrP, ppnum_t *physAddrP); - static void FreeARTTable(OSData *handle, IOByteCount size); - + virtual bool start(IOService *provider) APPLE_KEXT_OVERRIDE; + virtual void free() APPLE_KEXT_OVERRIDE; // To get access to the system mapper IOMapper::gSystem static IOMapper *gSystem; - virtual ppnum_t iovmAlloc(IOItemCount pages) = 0; - virtual void iovmFree(ppnum_t addr, IOItemCount pages) = 0; - - virtual void iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page) = 0; - virtual void iovmInsert(ppnum_t addr, IOItemCount offset, - ppnum_t *pageList, IOItemCount pageCount); - virtual void iovmInsert(ppnum_t addr, IOItemCount offset, - upl_page_info_t *pageList, IOItemCount pageCount); - static void checkForSystemMapper() { if ((uintptr_t) gSystem & kWaitMask) waitForSystemMapper(); }; static IOMapper * copyMapperForDevice(IOService * device); static IOMapper * copyMapperForDeviceWithIndex(IOService * device, unsigned int index); - - // Function will panic if the given address is not found in a valid - // iovm mapping. - virtual addr64_t mapAddr(IOPhysicalAddress addr) = 0; - - // Get the address mask to or into an address to bypass this mapper - virtual bool getBypassMask(addr64_t *maskP) const; - - virtual ppnum_t iovmAllocDMACommand(IODMACommand * command, IOItemCount pageCount); - virtual void iovmFreeDMACommand(IODMACommand * command, ppnum_t addr, IOItemCount pageCount); - - virtual ppnum_t iovmMapMemory( - OSObject * memory, // dma command or iomd - ppnum_t offsetPage, - ppnum_t pageCount, - uint32_t options, - upl_page_info_t * pageList, - const IODMAMapSpecification * mapSpecification); - - OSMetaClassDeclareReservedUsed(IOMapper, 0); - OSMetaClassDeclareReservedUsed(IOMapper, 1); - OSMetaClassDeclareReservedUsed(IOMapper, 2); - OSMetaClassDeclareReservedUsed(IOMapper, 3); + // { subclasses + + virtual uint64_t getPageSize(void) const = 0; + + virtual IOReturn iovmMapMemory(IOMemoryDescriptor * memory, + uint64_t descriptorOffset, + uint64_t length, + uint32_t mapOptions, + const IODMAMapSpecification * mapSpecification, + IODMACommand * dmaCommand, + const IODMAMapPageList * pageList, + uint64_t * mapAddress, + uint64_t * mapLength) = 0; + + virtual IOReturn iovmUnmapMemory(IOMemoryDescriptor * memory, + IODMACommand * dmaCommand, + uint64_t mapAddress, + uint64_t mapLength) = 0; + + virtual IOReturn iovmInsert(uint32_t options, + uint64_t mapAddress, + uint64_t offset, + uint64_t physicalAddress, + uint64_t length) = 0; + + virtual uint64_t mapToPhysicalAddress(uint64_t mappedAddress) = 0; + + // } private: + OSMetaClassDeclareReservedUnused(IOMapper, 0); + OSMetaClassDeclareReservedUnused(IOMapper, 1); + OSMetaClassDeclareReservedUnused(IOMapper, 2); + OSMetaClassDeclareReservedUnused(IOMapper, 3); OSMetaClassDeclareReservedUnused(IOMapper, 4); OSMetaClassDeclareReservedUnused(IOMapper, 5); OSMetaClassDeclareReservedUnused(IOMapper, 6); diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h index 242581bcb..7b193afc0 100644 --- a/iokit/IOKit/IOMemoryDescriptor.h +++ b/iokit/IOKit/IOMemoryDescriptor.h @@ -33,12 +33,16 @@ #include #include #include +#ifdef XNU_KERNEL_PRIVATE +#include +#endif #include class IOMemoryMap; class IOMapper; class IOService; +class IODMACommand; /* * Direction of transfer, with respect to the described memory. @@ -162,15 +166,72 @@ struct IODMAMapSpecification uint32_t resvB[4]; }; +struct IODMAMapPageList +{ + uint32_t pageOffset; + uint32_t pageListCount; + const upl_page_info_t * pageList; +}; + +// mapOptions for iovmMapMemory enum { + kIODMAMapReadAccess = 0x00000001, kIODMAMapWriteAccess = 0x00000002, kIODMAMapPhysicallyContiguous = 0x00000010, kIODMAMapDeviceMemory = 0x00000020, kIODMAMapPagingPath = 0x00000040, kIODMAMapIdentityMap = 0x00000080, + + kIODMAMapPageListFullyOccupied = 0x00000100, + kIODMAMapFixedAddress = 0x00000200, +}; + +#ifdef KERNEL_PRIVATE + +// Used for dmaCommandOperation communications for IODMACommand and mappers + +enum { + kIOMDWalkSegments = 0x01000000, + kIOMDFirstSegment = 1 | kIOMDWalkSegments, + kIOMDGetCharacteristics = 0x02000000, + kIOMDGetCharacteristicsMapped = 1 | kIOMDGetCharacteristics, + kIOMDDMAActive = 0x03000000, + kIOMDSetDMAActive = 1 | kIOMDDMAActive, + kIOMDSetDMAInactive = kIOMDDMAActive, + kIOMDAddDMAMapSpec = 0x04000000, + kIOMDDMAMap = 0x05000000, + kIOMDDMACommandOperationMask = 0xFF000000, +}; +struct IOMDDMACharacteristics { + UInt64 fLength; + UInt32 fSGCount; + UInt32 fPages; + UInt32 fPageAlign; + ppnum_t fHighestPage; + IODirection fDirection; + UInt8 fIsPrepared; +}; + +struct IOMDDMAMapArgs { + IOMapper * fMapper; + IODMACommand * fCommand; + IODMAMapSpecification fMapSpec; + uint64_t fOffset; + uint64_t fLength; + uint64_t fAlloc; + uint64_t fAllocLength; + uint8_t fMapContig; }; +struct IOMDDMAWalkSegmentArgs { + UInt64 fOffset; // Input/Output offset + UInt64 fIOVMAddr, fLength; // Output variables + UInt8 fMapped; // Input Variable, Require mapped IOVMA +}; +typedef UInt8 IOMDDMAWalkSegmentState[128]; + +#endif /* KERNEL_PRIVATE */ enum { @@ -191,6 +252,7 @@ struct IOMemoryReference; class IOMemoryDescriptor : public OSObject { friend class IOMemoryMap; + friend class IOMultiMemoryDescriptor; OSDeclareDefaultStructors(IOMemoryDescriptor); @@ -315,11 +377,12 @@ typedef IOOptionBits DMACommandOps; IOMemoryDescriptorReserved * getKernelReserved( void ); IOReturn dmaMap( IOMapper * mapper, + IODMACommand * command, const IODMAMapSpecification * mapSpec, uint64_t offset, uint64_t length, - uint64_t * address, - ppnum_t * mapPages); + uint64_t * mapAddress, + uint64_t * mapLength); #endif private: @@ -351,7 +414,7 @@ typedef IOOptionBits DMACommandOps; OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 15); protected: - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; public: static void initialize( void ); @@ -720,11 +783,14 @@ class IOMemoryMap : public OSObject ipc_port_t fRedirEntry; IOMemoryDescriptor * fOwner; uint8_t fUserClientUnmap; +#if IOTRACKING + IOTracking fTracking; +#endif #endif /* XNU_KERNEL_PRIVATE */ protected: - virtual void taggedRelease(const void *tag = 0) const; - virtual void free(); + virtual void taggedRelease(const void *tag = 0) const APPLE_KEXT_OVERRIDE; + virtual void free() APPLE_KEXT_OVERRIDE; public: /*! @function getVirtualAddress @@ -922,22 +988,23 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor bool _initialized; /* has superclass been initialized? */ public: - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; - virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const; + virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const APPLE_KEXT_OVERRIDE; - virtual uint64_t getPreparationID( void ); + virtual uint64_t getPreparationID( void ) APPLE_KEXT_OVERRIDE; #ifdef XNU_KERNEL_PRIVATE // Internal APIs may be made virtual at some time in the future. IOReturn wireVirtual(IODirection forDirection); IOReturn dmaMap( IOMapper * mapper, + IODMACommand * command, const IODMAMapSpecification * mapSpec, uint64_t offset, uint64_t length, - uint64_t * address, - ppnum_t * mapPages); + uint64_t * mapAddress, + uint64_t * mapLength); bool initMemoryEntries(size_t size, IOMapper * mapper); IOMemoryReference * memoryReferenceAlloc(uint32_t capacity, @@ -994,76 +1061,76 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor UInt32 offset, task_t task, IOOptionBits options, - IOMapper * mapper = kIOMapperSystem); + IOMapper * mapper = kIOMapperSystem) APPLE_KEXT_OVERRIDE; #ifndef __LP64__ // Secondary initialisers virtual bool initWithAddress(void * address, IOByteCount withLength, - IODirection withDirection) APPLE_KEXT_DEPRECATED; + IODirection withDirection) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; virtual bool initWithAddress(IOVirtualAddress address, IOByteCount withLength, IODirection withDirection, - task_t withTask) APPLE_KEXT_DEPRECATED; + task_t withTask) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; virtual bool initWithPhysicalAddress( IOPhysicalAddress address, IOByteCount withLength, - IODirection withDirection ) APPLE_KEXT_DEPRECATED; + IODirection withDirection ) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; virtual bool initWithRanges( IOVirtualRange * ranges, UInt32 withCount, IODirection withDirection, task_t withTask, - bool asReference = false) APPLE_KEXT_DEPRECATED; + bool asReference = false) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; virtual bool initWithPhysicalRanges(IOPhysicalRange * ranges, UInt32 withCount, IODirection withDirection, - bool asReference = false) APPLE_KEXT_DEPRECATED; + bool asReference = false) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; virtual addr64_t getPhysicalSegment64( IOByteCount offset, - IOByteCount * length ) APPLE_KEXT_DEPRECATED; + IOByteCount * length ) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, - IOByteCount * length); + IOByteCount * length) APPLE_KEXT_OVERRIDE; virtual IOPhysicalAddress getSourceSegment(IOByteCount offset, - IOByteCount * length) APPLE_KEXT_DEPRECATED; + IOByteCount * length) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; virtual void * getVirtualSegment(IOByteCount offset, - IOByteCount * length) APPLE_KEXT_DEPRECATED; + IOByteCount * length) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; #endif /* !__LP64__ */ virtual IOReturn setPurgeable( IOOptionBits newState, - IOOptionBits * oldState ); + IOOptionBits * oldState ) APPLE_KEXT_OVERRIDE; virtual addr64_t getPhysicalSegment( IOByteCount offset, IOByteCount * length, #ifdef __LP64__ - IOOptionBits options = 0 ); + IOOptionBits options = 0 ) APPLE_KEXT_OVERRIDE; #else /* !__LP64__ */ - IOOptionBits options ); + IOOptionBits options ) APPLE_KEXT_OVERRIDE; #endif /* !__LP64__ */ - virtual IOReturn prepare(IODirection forDirection = kIODirectionNone); + virtual IOReturn prepare(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE; - virtual IOReturn complete(IODirection forDirection = kIODirectionNone); + virtual IOReturn complete(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE; virtual IOReturn doMap( vm_map_t addressMap, IOVirtualAddress * atAddress, IOOptionBits options, IOByteCount sourceOffset = 0, - IOByteCount length = 0 ); + IOByteCount length = 0 ) APPLE_KEXT_OVERRIDE; virtual IOReturn doUnmap( vm_map_t addressMap, IOVirtualAddress logical, - IOByteCount length ); + IOByteCount length ) APPLE_KEXT_OVERRIDE; - virtual bool serialize(OSSerialize *s) const; + virtual bool serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE; // Factory method for cloning a persistent IOMD, see IOMemoryDescriptor static IOMemoryDescriptor * diff --git a/iokit/IOKit/IOMultiMemoryDescriptor.h b/iokit/IOKit/IOMultiMemoryDescriptor.h index 42b19a468..1a5883abd 100644 --- a/iokit/IOKit/IOMultiMemoryDescriptor.h +++ b/iokit/IOKit/IOMultiMemoryDescriptor.h @@ -45,7 +45,7 @@ class IOMultiMemoryDescriptor : public IOMemoryDescriptor UInt32 _descriptorsCount; bool _descriptorsIsAllocated; - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; public: @@ -88,7 +88,7 @@ class IOMultiMemoryDescriptor : public IOMemoryDescriptor virtual addr64_t getPhysicalSegment( IOByteCount offset, IOByteCount * length, - IOOptionBits options = 0 ); + IOOptionBits options = 0 ) APPLE_KEXT_OVERRIDE; /*! @function prepare @abstract Prepare the memory for an I/O transfer. @@ -96,7 +96,7 @@ class IOMultiMemoryDescriptor : public IOMemoryDescriptor @param forDirection The direction of the I/O just completed, or kIODirectionNone for the direction specified by the memory descriptor. @result An IOReturn code. */ - virtual IOReturn prepare(IODirection forDirection = kIODirectionNone); + virtual IOReturn prepare(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE; /*! @function complete @abstract Complete processing of the memory after an I/O transfer finishes. @@ -104,7 +104,28 @@ class IOMultiMemoryDescriptor : public IOMemoryDescriptor @param forDirection The direction of the I/O just completed, or kIODirectionNone for the direction specified by the memory descriptor. @result An IOReturn code. */ - virtual IOReturn complete(IODirection forDirection = kIODirectionNone); + virtual IOReturn complete(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE; + + virtual IOReturn setPurgeable(IOOptionBits newState, IOOptionBits * oldState) APPLE_KEXT_OVERRIDE; + +/*! @function getPageCounts + @abstract Retrieve the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor. + @discussion This method returns the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor. + @param residentPageCount - If non-null, a pointer to a byte count that will return the number of resident pages encompassed by this IOMemoryDescriptor. + @param dirtyPageCount - If non-null, a pointer to a byte count that will return the number of dirty pages encompassed by this IOMemoryDescriptor. + @result An IOReturn code. */ + + IOReturn getPageCounts(IOByteCount * residentPageCount, + IOByteCount * dirtyPageCount); + +#define IOMULTIMEMORYDESCRIPTOR_SUPPORTS_GETPAGECOUNTS 1 + +private: + virtual IOReturn doMap(vm_map_t addressMap, + IOVirtualAddress * atAddress, + IOOptionBits options, + IOByteCount sourceOffset = 0, + IOByteCount length = 0 ) APPLE_KEXT_OVERRIDE; }; #endif /* !_IOMULTIMEMORYDESCRIPTOR_H */ diff --git a/iokit/IOKit/IONVRAM.h b/iokit/IOKit/IONVRAM.h index 37d4ef4d3..31d5e0967 100644 --- a/iokit/IOKit/IONVRAM.h +++ b/iokit/IOKit/IONVRAM.h @@ -74,11 +74,11 @@ class IODTNVRAM : public IOService IONVRAMController *_nvramController; const OSSymbol *_registryPropertiesKey; UInt8 *_nvramImage; - bool _nvramImageDirty; + __unused bool _nvramImageDirty; UInt32 _ofPartitionOffset; UInt32 _ofPartitionSize; UInt8 *_ofImage; - bool _ofImageDirty; + __unused bool _ofImageDirty; OSDictionary *_ofDict; OSDictionary *_nvramPartitionOffsets; OSDictionary *_nvramPartitionLengths; @@ -138,20 +138,20 @@ class IODTNVRAM : public IOService void initProxyData(void); public: - virtual bool init(IORegistryEntry *old, const IORegistryPlane *plane); + virtual bool init(IORegistryEntry *old, const IORegistryPlane *plane) APPLE_KEXT_OVERRIDE; virtual void registerNVRAMController(IONVRAMController *nvram); virtual void sync(void); - virtual bool serializeProperties(OSSerialize *s) const; - virtual OSObject *copyProperty(const OSSymbol *aKey) const; - virtual OSObject *copyProperty(const char *aKey) const; - virtual OSObject *getProperty(const OSSymbol *aKey) const; - virtual OSObject *getProperty(const char *aKey) const; - virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject); - virtual void removeProperty(const OSSymbol *aKey); - virtual IOReturn setProperties(OSObject *properties); + virtual bool serializeProperties(OSSerialize *s) const APPLE_KEXT_OVERRIDE; + virtual OSObject *copyProperty(const OSSymbol *aKey) const APPLE_KEXT_OVERRIDE; + virtual OSObject *copyProperty(const char *aKey) const APPLE_KEXT_OVERRIDE; + virtual OSObject *getProperty(const OSSymbol *aKey) const APPLE_KEXT_OVERRIDE; + virtual OSObject *getProperty(const char *aKey) const APPLE_KEXT_OVERRIDE; + virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject) APPLE_KEXT_OVERRIDE; + virtual void removeProperty(const OSSymbol *aKey) APPLE_KEXT_OVERRIDE; + virtual IOReturn setProperties(OSObject *properties) APPLE_KEXT_OVERRIDE; virtual IOReturn readXPRAM(IOByteCount offset, UInt8 *buffer, IOByteCount length); @@ -177,6 +177,7 @@ class IODTNVRAM : public IOService virtual IOByteCount savePanicInfo(UInt8 *buffer, IOByteCount length); virtual bool safeToSync(void); + void syncInternal(bool rateLimit); }; #endif /* __cplusplus */ diff --git a/iokit/IOKit/IOPlatformExpert.h b/iokit/IOKit/IOPlatformExpert.h index c60affed9..52e1c366b 100644 --- a/iokit/IOKit/IOPlatformExpert.h +++ b/iokit/IOKit/IOPlatformExpert.h @@ -73,6 +73,7 @@ extern void PESetUTCTimeOfDay( clock_sec_t secs, clock_usec_t usecs ); /* unless it's a "well-known" property, these will read/write out the value as raw data */ +extern boolean_t PEWriteNVRAMBooleanProperty(const char *symbol, boolean_t value); extern boolean_t PEWriteNVRAMProperty(const char *symbol, const void *value, const unsigned int len); extern boolean_t PEReadNVRAMProperty(const char *symbol, void *value, unsigned int *len); @@ -92,6 +93,7 @@ extern const OSSymbol * gIOPlatformWakeActionKey; extern const OSSymbol * gIOPlatformQuiesceActionKey; extern const OSSymbol * gIOPlatformActiveActionKey; extern const OSSymbol * gIOPlatformHaltRestartActionKey; +extern const OSSymbol * gIOPlatformPanicActionKey; class IORangeAllocator; class IONVRAMController; @@ -130,8 +132,8 @@ class IOPlatformExpert : public IOService virtual void PMInstantiatePowerDomains ( void ); public: - virtual bool attach( IOService * provider ); - virtual bool start( IOService * provider ); + virtual bool attach( IOService * provider ) APPLE_KEXT_OVERRIDE; + virtual bool start( IOService * provider ) APPLE_KEXT_OVERRIDE; virtual bool configure( IOService * provider ); virtual IOService * createNub( OSDictionary * from ); @@ -165,7 +167,7 @@ class IOPlatformExpert : public IOService virtual IOReturn callPlatformFunction(const OSSymbol *functionName, bool waitForFunction, void *param1, void *param2, - void *param3, void *param4); + void *param3, void *param4) APPLE_KEXT_OVERRIDE; virtual IORangeAllocator * getPhysicalRangeAllocator(void); @@ -217,8 +219,8 @@ class IODTPlatformExpert : public IOPlatformExpert public: virtual IOService * probe( IOService * provider, - SInt32 * score ); - virtual bool configure( IOService * provider ); + SInt32 * score ) APPLE_KEXT_OVERRIDE; + virtual bool configure( IOService * provider ) APPLE_KEXT_OVERRIDE; virtual void processTopLevel( IORegistryEntry * root ); virtual const char * deleteList( void ) = 0; @@ -227,16 +229,16 @@ class IODTPlatformExpert : public IOPlatformExpert virtual bool createNubs( IOService * parent, OSIterator * iter ); virtual bool compareNubName( const IOService * nub, OSString * name, - OSString ** matched = 0 ) const; + OSString ** matched = 0 ) const APPLE_KEXT_OVERRIDE; - virtual IOReturn getNubResources( IOService * nub ); + virtual IOReturn getNubResources( IOService * nub ) APPLE_KEXT_OVERRIDE; - virtual bool getModelName( char * name, int maxLength ); - virtual bool getMachineName( char * name, int maxLength ); + virtual bool getModelName( char * name, int maxLength ) APPLE_KEXT_OVERRIDE; + virtual bool getMachineName( char * name, int maxLength ) APPLE_KEXT_OVERRIDE; - virtual void registerNVRAMController( IONVRAMController * nvram ); + virtual void registerNVRAMController( IONVRAMController * nvram ) APPLE_KEXT_OVERRIDE; - virtual int haltRestart(unsigned int type); + virtual int haltRestart(unsigned int type) APPLE_KEXT_OVERRIDE; /* virtual */ IOReturn readXPRAM(IOByteCount offset, UInt8 * buffer, IOByteCount length); @@ -265,8 +267,8 @@ class IODTPlatformExpert : public IOPlatformExpert IOByteCount offset, UInt8 * buffer, IOByteCount length); - virtual IOByteCount savePanicInfo(UInt8 *buffer, IOByteCount length); - virtual OSString* createSystemSerialNumberString(OSData* myProperty); + virtual IOByteCount savePanicInfo(UInt8 *buffer, IOByteCount length) APPLE_KEXT_OVERRIDE; + virtual OSString* createSystemSerialNumberString(OSData* myProperty) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(IODTPlatformExpert, 0); OSMetaClassDeclareReservedUnused(IODTPlatformExpert, 1); @@ -295,12 +297,17 @@ class IOPlatformExpertDevice : public IOService public: virtual bool initWithArgs( void * p1, void * p2, void * p3, void *p4 ); - virtual bool compareName( OSString * name, OSString ** matched = 0 ) const; + virtual bool compareName( OSString * name, OSString ** matched = 0 ) const APPLE_KEXT_OVERRIDE; + + virtual IOWorkLoop *getWorkLoop() const APPLE_KEXT_OVERRIDE; + virtual IOReturn setProperties( OSObject * properties ) APPLE_KEXT_OVERRIDE; + + virtual void free() APPLE_KEXT_OVERRIDE; - virtual IOWorkLoop *getWorkLoop() const; - virtual IOReturn setProperties( OSObject * properties ); + virtual IOReturn newUserClient( task_t owningTask, void * securityID, + UInt32 type, OSDictionary * properties, + IOUserClient ** handler) APPLE_KEXT_OVERRIDE; - virtual void free(); OSMetaClassDeclareReservedUnused(IOPlatformExpertDevice, 0); OSMetaClassDeclareReservedUnused(IOPlatformExpertDevice, 1); @@ -320,9 +327,9 @@ class IOPlatformDevice : public IOService ExpansionData *reserved; public: - virtual bool compareName( OSString * name, OSString ** matched = 0 ) const; - virtual IOService * matchLocation( IOService * client ); - virtual IOReturn getResources( void ); + virtual bool compareName( OSString * name, OSString ** matched = 0 ) const APPLE_KEXT_OVERRIDE; + virtual IOService * matchLocation( IOService * client ) APPLE_KEXT_OVERRIDE; + virtual IOReturn getResources( void ) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(IOPlatformDevice, 0); OSMetaClassDeclareReservedUnused(IOPlatformDevice, 1); diff --git a/iokit/IOKit/IOPolledInterface.h b/iokit/IOKit/IOPolledInterface.h index 740ab11c5..584484eab 100644 --- a/iokit/IOKit/IOPolledInterface.h +++ b/iokit/IOKit/IOPolledInterface.h @@ -29,20 +29,25 @@ #ifndef _IOPOLLEDINTERFACE_H_ #define _IOPOLLEDINTERFACE_H_ -#include -#include - -#define kIOPolledInterfaceSupportKey "IOPolledInterface" -#define kIOPolledInterfaceActiveKey "IOPolledInterfaceActive" - enum { kIOPolledPreflightState = 1, kIOPolledBeforeSleepState = 2, kIOPolledAfterSleepState = 3, - kIOPolledPostflightState = 4 + kIOPolledPostflightState = 4, + + kIOPolledPreflightCoreDumpState = 5, }; +#if defined(__cplusplus) + +#include +#include + +#define kIOPolledInterfaceSupportKey "IOPolledInterface" +#define kIOPolledInterfaceActiveKey "IOPolledInterfaceActive" +#define kIOPolledInterfaceStackKey "IOPolledInterfaceStack" + enum { kIOPolledWrite = 1, @@ -82,8 +87,6 @@ class IOPolledInterface : public OSObject virtual IOReturn checkForWork(void) = 0; - static IOReturn checkAllForWork(void); - OSMetaClassDeclareReservedUnused(IOPolledInterface, 0); OSMetaClassDeclareReservedUnused(IOPolledInterface, 1); OSMetaClassDeclareReservedUnused(IOPolledInterface, 2); @@ -102,4 +105,152 @@ class IOPolledInterface : public OSObject OSMetaClassDeclareReservedUnused(IOPolledInterface, 15); }; +#endif /* defined(__cplusplus) */ + +#ifdef XNU_KERNEL_PRIVATE + +#include +#include +#include + +enum +{ + kIOPolledFileSSD = 0x00000001 +}; + +#if !defined(__cplusplus) +typedef struct IORegistryEntry IORegistryEntry; +typedef struct OSData OSData; +typedef struct OSArray OSArray; +typedef struct IOMemoryDescriptor IOMemoryDescriptor; +typedef struct IOPolledFilePollers IOPolledFilePollers; +#else +class IOPolledFilePollers; +#endif + +struct IOPolledFileIOVars +{ + IOPolledFilePollers * pollers; + struct kern_direct_file_io_ref_t * fileRef; + OSData * fileExtents; + uint64_t block0; + IOByteCount blockSize; + uint64_t maxiobytes; + IOByteCount bufferLimit; + uint8_t * buffer; + IOByteCount bufferSize; + IOByteCount bufferOffset; + IOByteCount bufferHalf; + IOByteCount extentRemaining; + IOByteCount lastRead; + IOByteCount readEnd; + uint32_t flags; + uint64_t fileSize; + uint64_t position; + uint64_t extentPosition; + uint64_t encryptStart; + uint64_t encryptEnd; + uint64_t cryptBytes; + AbsoluteTime cryptTime; + IOPolledFileExtent * extentMap; + IOPolledFileExtent * currentExtent; + bool allocated; +}; + +typedef struct IOPolledFileIOVars IOPolledFileIOVars; + +struct IOPolledFileCryptVars +{ + uint8_t aes_iv[AES_BLOCK_SIZE]; + aes_ctx ctx; +}; +typedef struct IOPolledFileCryptVars IOPolledFileCryptVars; + +#if defined(__cplusplus) + +IOReturn IOPolledFileOpen(const char * filename, + uint64_t setFileSize, uint64_t fsFreeSize, + void * write_file_addr, size_t write_file_len, + IOPolledFileIOVars ** fileVars, + OSData ** imagePath, + uint8_t * volumeCryptKey, size_t keySize); + +IOReturn IOPolledFileClose(IOPolledFileIOVars ** pVars, + off_t write_offset, void * addr, size_t write_length, + off_t discard_offset, off_t discard_end); + +IOReturn IOPolledFilePollersSetup(IOPolledFileIOVars * vars, uint32_t openState); + +IOMemoryDescriptor * IOPolledFileGetIOBuffer(IOPolledFileIOVars * vars); + +#endif /* defined(__cplusplus) */ + +#if defined(__cplusplus) +#define __C "C" +#else +#define __C +#endif + +extern __C IOReturn IOPolledFileSeek(IOPolledFileIOVars * vars, uint64_t position); + +extern __C IOReturn IOPolledFileWrite(IOPolledFileIOVars * vars, + const uint8_t * bytes, IOByteCount size, + IOPolledFileCryptVars * cryptvars); +extern __C IOReturn IOPolledFileRead(IOPolledFileIOVars * vars, + uint8_t * bytes, IOByteCount size, + IOPolledFileCryptVars * cryptvars); + +extern __C IOReturn IOPolledFilePollersOpen(IOPolledFileIOVars * vars, uint32_t state, bool abortable); + +extern __C IOReturn IOPolledFilePollersClose(IOPolledFileIOVars * vars, uint32_t state); + +extern __C IOPolledFileIOVars * gCoreFileVars; + +#ifdef _SYS_CONF_H_ + +__BEGIN_DECLS + +typedef void (*kern_get_file_extents_callback_t)(void * ref, uint64_t start, uint64_t size); + +struct kern_direct_file_io_ref_t * +kern_open_file_for_direct_io(const char * name, boolean_t create_file, + kern_get_file_extents_callback_t callback, + void * callback_ref, + off_t set_file_size, + off_t fs_free_size, + off_t write_file_offset, + void * write_file_addr, + size_t write_file_len, + dev_t * partition_device_result, + dev_t * image_device_result, + uint64_t * partitionbase_result, + uint64_t * maxiocount_result, + uint32_t * oflags); +void +kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, + off_t write_offset, void * addr, size_t write_length, + off_t discard_offset, off_t discard_end); +int +kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag); +int +kern_read_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag); + +struct mount * +kern_file_mount(struct kern_direct_file_io_ref_t * ref); + +enum +{ + kIOPolledFileMountChangeMount = 0x00000101, + kIOPolledFileMountChangeUnmount = 0x00000102, + kIOPolledFileMountChangeWillResize = 0x00000201, + kIOPolledFileMountChangeDidResize = 0x00000202, +}; +extern void IOPolledFileMountChange(struct mount * mp, uint32_t op); + +__END_DECLS + +#endif /* _SYS_CONF_H_ */ + +#endif /* XNU_KERNEL_PRIVATE */ + #endif /* _IOPOLLEDINTERFACE_H_ */ diff --git a/iokit/IOKit/IORangeAllocator.h b/iokit/IOKit/IORangeAllocator.h index d81bbefc2..290194bb4 100644 --- a/iokit/IOKit/IORangeAllocator.h +++ b/iokit/IOKit/IORangeAllocator.h @@ -98,8 +98,8 @@ class IORangeAllocator : public OSObject { UInt32 capacity = 0, IOOptionBits options = 0 ); - virtual void free(); - virtual bool serialize(OSSerialize *s) const; + virtual void free() APPLE_KEXT_OVERRIDE; + virtual bool serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE; /*! @function getFragmentCount @abstract Accessor to return the number of free fragments in the range. diff --git a/iokit/IOKit/IORegistryEntry.h b/iokit/IOKit/IORegistryEntry.h index dd34e1764..906baaa9f 100644 --- a/iokit/IOKit/IORegistryEntry.h +++ b/iokit/IOKit/IORegistryEntry.h @@ -261,7 +261,7 @@ member function's parameter list. @abstract Standard free method for all IORegistryEntry subclasses. @discussion This method will release any resources of the entry, in particular its property table. Note that the registry entry must always be detached from the registry before free may be called, and subclasses (namely IOService) will have additional protocols for removing registry entries. free should never need be called directly. */ - virtual void free( void ); + virtual void free( void ) APPLE_KEXT_OVERRIDE; /*! @function setPropertyTable @abstract Replace a registry entry's property table. @@ -854,7 +854,7 @@ class IORegistryIterator : public OSIterator const IORegistryPlane * plane; IOOptionBits options; - virtual void free( void ); + virtual void free( void ) APPLE_KEXT_OVERRIDE; public: /*! @function iterateOver @@ -884,7 +884,7 @@ class IORegistryIterator : public OSIterator @discussion This method calls either getNextObjectFlat or getNextObjectRecursive depending on the options the iterator was created with. This implements the OSIterator defined getNextObject method. The object returned is retained while the iterator is pointing at it (its the current entry), or recursing into it. The caller should not release it. @result The next registry entry in the iteration (the current entry), or zero if the iteration has finished at this level of recursion. The entry returned is retained while the iterator is pointing at it (its the current entry), or recursing into it. The caller should not release it. */ - virtual IORegistryEntry * getNextObject( void ); + virtual IORegistryEntry * getNextObject( void ) APPLE_KEXT_OVERRIDE; /*! @function getNextObjectFlat @abstract Return the next object in the registry iteration, ignoring the kIORegistryIterateRecursively option. @@ -931,18 +931,18 @@ class IORegistryIterator : public OSIterator @abstract Exits all levels of recursion, restoring the iterator to its state at creation. @discussion This method exits all levels of recursion, and restores the iterator to its state at creation. */ - virtual void reset( void ); + virtual void reset( void ) APPLE_KEXT_OVERRIDE; /*! @function isValid @abstract Checks that no registry changes have invalidated the iteration. @discussion If a registry iteration is invalidated by changes to the registry, it will be made invalid, the currentEntry will be considered zero, and further calls to getNextObject et al. will return zero. The iterator should be reset to restart the iteration when this happens. @result false if the iterator has been invalidated by changes to the registry, true otherwise. */ - virtual bool isValid( void ); + virtual bool isValid( void ) APPLE_KEXT_OVERRIDE; /*! @function iterateAll @abstract Iterates all entries (with getNextObject) and returns a set of all returned entries. - @discussion This method will reset, then iterate all entries in the iteration (with getNextObject) until successful (ie. the iterator is valid at the end of the iteration). + @discussion This method will reset, then iterate all entries in the iteration (with getNextObject). @result A set of entries returned by the iteration. The caller should release the set when it has finished with it. Zero is returned on a resource failure. */ virtual OSOrderedSet * iterateAll( void ); diff --git a/iokit/IOKit/IOReportMacros.h b/iokit/IOKit/IOReportMacros.h index af1a1c27a..b8c6a4239 100644 --- a/iokit/IOKit/IOReportMacros.h +++ b/iokit/IOKit/IOReportMacros.h @@ -556,6 +556,148 @@ do { \ ->simple_values[(idx) % IOR_VALUES_PER_ELEMENT]) +/* ----- Histogram Reporting (HistogramReport) ----- */ + +// Internal struct for HistogramReport +typedef struct { + int bucketWidth; + IOReportElement elem[]; // Array of elements +} IOHistReportInfo; + +/* + * Determine the size required for a HistogramReport buffer. + * + * int nbuckets - number of buckets in the histogram + */ +#define HISTREPORT_BUFSIZE(nbuckets) \ + (sizeof(IOHistReportInfo) + ((nbuckets) * sizeof(IOReportElement))) + +/* + * Initialize a HistogramReport buffer. Supports only linear scale histogram. + * + * int nbuckets - number of buckets data is combined into + * uint32_t bucketWidth - size of each bucket + * void* buffer - ptr to HISTREPORT_BUFSIZE(nbuckets) bytes + * size_t bufSize - sanity check of buffer's size + * uint64_t providerID - registry Entry ID of the reporting service + * uint64_t channelID - ID of this channel, see IOREPORT_MAKEID() + * IOReportCategories categories - categories of this channel + * + * If the buffer is not of sufficient size, the macro invokes IOREPORT_ABORT. + * If that returns, the buffer is filled with 0xbadcafe. + */ +#define HISTREPORT_INIT(nbuckets, bktSize, buf, bufSize, providerID, channelID, cats) \ +do { \ + IOHistReportInfo *__info = (IOHistReportInfo *)(buf); \ + IOReportElement *__elem; \ + IOHistogramReportValues *__rep; \ + if ((bufSize) >= HISTREPORT_BUFSIZE(nbuckets)) { \ + __info->bucketWidth = (bktSize); \ + for (unsigned __no = 0; __no < (nbuckets); __no++) { \ + __elem = &(__info->elem[__no]); \ + __rep = (IOHistogramReportValues *) &(__elem->values); \ + __elem->channel_id = (channelID); \ + __elem->provider_id = (providerID); \ + __elem->channel_type.report_format = kIOReportFormatHistogram; \ + __elem->channel_type.reserved = 0; \ + __elem->channel_type.categories = (cats); \ + __elem->channel_type.nelements = (nbuckets); \ + __elem->channel_type.element_idx = __no; \ + __elem->timestamp = 0; \ + bzero(__rep, sizeof(IOHistogramReportValues)); \ + } \ + } \ + else { \ + IOREPORT_ABORT("bufSize is smaller than the required size\n"); \ + __POLLUTE_BUF((buf), (bufSize)); \ + } \ +} while (0) + +/* + * Update histogram with a new value. + * + * + * void* hist_buf - pointer to memory initialized by HISTREPORT_INIT() + * int64_t value - new value to add to the histogram + */ +#define HISTREPORT_TALLYVALUE(hist_buf, value) \ +do { \ + IOHistReportInfo *__info = (IOHistReportInfo *)(hist_buf); \ + IOReportElement *__elem; \ + IOHistogramReportValues *__rep; \ + for (unsigned __no = 0; __no < __info->elem[0].channel_type.nelements; __no++) { \ + if ((value) <= __info->bucketWidth * (__no+1)) { \ + __elem = &(__info->elem[__no]); \ + __rep = (IOHistogramReportValues *) &(__elem->values); \ + if (__rep->bucket_hits == 0) { \ + __rep->bucket_min = __rep->bucket_max = (value); \ + } \ + else if ((value) < __rep->bucket_min) { \ + __rep->bucket_min = (value); \ + } \ + else if ((value) > __rep->bucket_max) { \ + __rep->bucket_max = (value); \ + } \ + __rep->bucket_sum += (value); \ + __rep->bucket_hits++; \ + break; \ + } \ + } \ +} while (0) + +/* + * Prepare a HistogramReport for + * IOService::updateReport(kIOReportCopyChannelData...) + * + * void* array_buf - ptr to memory initialized by HISTREPORT_INIT() + * void* ptr2cpy - filled in with pointer to buffer to be copied out + * size_t size2cpy - filled in with the size of the buffer to copy out + */ + +#define HISTREPORT_UPDATEPREP(hist_buf, ptr2cpy, size2cpy) \ +do { \ + IOHistReportInfo *__info = (IOHistReportInfo *)(hist_buf); \ + (size2cpy) = __info->elem[0].channel_type.nelements * sizeof(IOReportElement); \ + (ptr2cpy) = (void *) &__info->elem[0]; \ +} while(0) + + +/* + * Update the result field received as a parameter for kIOReportGetDimensions & + * kIOReportCopyChannelData actions. + * + * void* array_buf - memory initialized by HISTREPORT_INIT() + * IOReportConfigureAction action - configure/updateReport() 'action' + * void* result - configure/updateReport() 'result' + */ + +#define HISTREPORT_UPDATERES(hist_buf, action, result) \ +do { \ + IOHistReportInfo *__info = (IOHistReportInfo *)(hist_buf); \ + int *__nElements = (int *)(result); \ + if (((action) == kIOReportGetDimensions) || ((action) == kIOReportCopyChannelData)) { \ + *__nElements += __info->elem[0].channel_type.nelements; \ + } \ +} while (0) + +/* + * Get the 64-bit channel ID of a HistogramReport. + * + * void* hist_buf - ptr to memory initialized by HISTREPORT_INIT() + */ +#define HISTREPORT_GETCHID(hist_buf) \ + (((IOHistReportInfo *)(hist_buf))->elem[0].channel_id) + +/* + * Get the IOReportChannelType of a HistogramReport. + * + * void* hist_buf - ptr to memory initialized by HISTREPORT_INIT() + */ +#define HISTREPORT_GETCHTYPE(hist_buf) \ + (*(uint64_t*)&(((IOHistReportInfo *)(hist_buf))->elem[0].channel_type)) + + + /* generic utilities */ #define __POLLUTE_BUF(buf, bufSize) \ diff --git a/iokit/IOKit/IOReturn.h b/iokit/IOKit/IOReturn.h index d4071b9aa..83d05ce0a 100644 --- a/iokit/IOKit/IOReturn.h +++ b/iokit/IOKit/IOReturn.h @@ -61,13 +61,16 @@ typedef kern_return_t IOReturn; #define sub_iokit_powermanagement err_sub(13) #define sub_iokit_hidsystem err_sub(14) #define sub_iokit_scsi err_sub(16) +#define sub_iokit_usbaudio err_sub(17) //#define sub_iokit_pccard err_sub(21) #ifdef PRIVATE #define sub_iokit_nvme err_sub(28) #endif #define sub_iokit_thunderbolt err_sub(29) - +#define sub_iokit_platform err_sub(0x2A) #define sub_iokit_audio_video err_sub(0x45) +#define sub_iokit_baseband err_sub(0x80) +#define sub_iokit_HDA err_sub(254) #define sub_iokit_hsic err_sub(0x147) #define sub_iokit_sdio err_sub(0x174) #define sub_iokit_wlan err_sub(0x208) diff --git a/iokit/IOKit/IOService.h b/iokit/IOKit/IOService.h index 5d5c093bc..e369da9b4 100644 --- a/iokit/IOKit/IOService.h +++ b/iokit/IOKit/IOService.h @@ -607,17 +607,17 @@ virtual IOReturn updateReport(IOReportChannelList *channels, /*! @function init @abstract Initializes generic IOService data structures (expansion data, etc). */ - virtual bool init( OSDictionary * dictionary = 0 ); + virtual bool init( OSDictionary * dictionary = 0 ) APPLE_KEXT_OVERRIDE; /*! @function init @abstract Initializes generic IOService data structures (expansion data, etc). */ virtual bool init( IORegistryEntry * from, - const IORegistryPlane * inPlane ); + const IORegistryPlane * inPlane ) APPLE_KEXT_OVERRIDE; /*! @function free @abstract Frees data structures that were allocated when power management was initialized on this service. */ - virtual void free( void ); + virtual void free( void ) APPLE_KEXT_OVERRIDE; /*! @function lockForArbitration @abstract Locks an IOService object against changes in state or ownership. @@ -1258,7 +1258,7 @@ virtual IOReturn updateReport(IOReportChannelList *channels, IOInterruptSource *_interruptSources; /* overrides */ - virtual bool serializeProperties( OSSerialize * s ) const; + virtual bool serializeProperties( OSSerialize * s ) const APPLE_KEXT_OVERRIDE; #ifdef KERNEL_PRIVATE /* Apple only SPI to control CPU low power modes */ @@ -1285,6 +1285,7 @@ virtual IOReturn updateReport(IOReportChannelList *channels, void setTerminateDefer(IOService * provider, bool defer); uint64_t getAuthorizationID( void ); IOReturn setAuthorizationID( uint64_t authorizationID ); + void cpusRunning(void); private: static IOReturn waitMatchIdle( UInt32 ms ); @@ -1813,6 +1814,7 @@ virtual IOReturn updateReport(IOReportChannelList *channels, IOReturn changePowerStateWithOverrideTo( IOPMPowerStateIndex ordinal, IOPMRequestTag tag ); IOReturn changePowerStateForRootDomain( IOPMPowerStateIndex ordinal ); IOReturn setIgnoreIdleTimer( bool ignore ); + IOReturn quiescePowerTree( void * target, IOPMCompletionAction action, void * param ); uint32_t getPowerStateForClient( const OSSymbol * client ); static const char * getIOMessageString( uint32_t msg ); static void setAdvisoryTickleEnable( bool enable ); @@ -1879,6 +1881,8 @@ virtual IOReturn updateReport(IOReportChannelList *channels, void stop_ack_timer ( void ); void start_ack_timer( UInt32 value, UInt32 scale ); void startSettleTimer( void ); + void start_spindump_timer( const char * delay_type ); + void stop_spindump_timer( void ); bool checkForDone ( void ); bool responseValid ( uint32_t x, int pid ); void computeDesiredState( unsigned long tempDesire, bool computeOnly ); @@ -1888,8 +1892,10 @@ virtual IOReturn updateReport(IOReportChannelList *channels, static void ack_timer_expired( thread_call_param_t, thread_call_param_t ); static void watchdog_timer_expired ( thread_call_param_t arg0, thread_call_param_t arg1 ); + static void spindump_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 ); static IOReturn actionAckTimerExpired(OSObject *, void *, void *, void *, void * ); static IOReturn watchdog_timer_expired ( OSObject *, void *, void *, void *, void * ); + static IOReturn actionSpinDumpTimerExpired(OSObject *, void *, void *, void *, void * ); static IOReturn actionDriverCalloutDone(OSObject *, void *, void *, void *, void * ); static IOPMRequest * acquirePMRequest( IOService * target, IOOptionBits type, IOPMRequest * active = 0 ); @@ -1899,6 +1905,8 @@ virtual IOReturn updateReport(IOReportChannelList *channels, static void pmTellClientWithResponse( OSObject * object, void * context ); static void pmTellCapabilityAppWithResponse ( OSObject * object, void * arg ); static void pmTellCapabilityClientWithResponse( OSObject * object, void * arg ); + static void submitPMRequest( IOPMRequest * request ); + static void submitPMRequests( IOPMRequest ** request, IOItemCount count ); bool ackTimerTick( void ); void addPowerChild1( IOPMRequest * request ); void addPowerChild2( IOPMRequest * request ); @@ -1914,14 +1922,12 @@ virtual IOReturn updateReport(IOReportChannelList *channels, void handleActivityTickle( IOPMRequest * request ); void handleInterestChanged( IOPMRequest * request ); void handleSynchronizePowerTree( IOPMRequest * request ); - void submitPMRequest( IOPMRequest * request ); - void submitPMRequest( IOPMRequest ** request, IOItemCount count ); void executePMRequest( IOPMRequest * request ); - bool servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ); - bool retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ); - bool servicePMRequestQueue( IOPMRequest * request, IOPMRequestQueue * queue ); - bool servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue ); - bool servicePMFreeQueue( IOPMRequest * request, IOPMCompletionQueue * queue ); + bool actionPMWorkQueueInvoke( IOPMRequest * request, IOPMWorkQueue * queue ); + bool actionPMWorkQueueRetire( IOPMRequest * request, IOPMWorkQueue * queue ); + bool actionPMRequestQueue( IOPMRequest * request, IOPMRequestQueue * queue ); + bool actionPMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue ); + bool actionPMCompletionQueue( IOPMRequest * request, IOPMCompletionQueue * queue ); bool notifyInterestedDrivers( void ); void notifyInterestedDriversDone( void ); bool notifyControllingDriver( void ); diff --git a/iokit/IOKit/IOServicePM.h b/iokit/IOKit/IOServicePM.h index 27d871234..17662cb4e 100644 --- a/iokit/IOKit/IOServicePM.h +++ b/iokit/IOKit/IOServicePM.h @@ -47,6 +47,8 @@ class IOPMRequest; class IOPMRequestQueue; class IOPMCompletionQueue; +typedef void (*IOPMCompletionAction)(void * target, void * param); + // PM channels for IOReporting #ifndef kPMPowerStatesChID #define kPMPowerStatesChID IOREPORT_MAKEID('P','M','S','t','H','i','s','t') diff --git a/iokit/IOKit/IOSharedDataQueue.h b/iokit/IOKit/IOSharedDataQueue.h index 09c5dc17e..16e7cdde2 100644 --- a/iokit/IOKit/IOSharedDataQueue.h +++ b/iokit/IOKit/IOSharedDataQueue.h @@ -64,7 +64,7 @@ class IOSharedDataQueue : public IODataQueue ExpansionData * _reserved; protected: - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! * @function getQueueSize @@ -110,7 +110,7 @@ class IOSharedDataQueue : public IODataQueue * @param size The size of the data queue memory region. * @result Returns true on success and false on failure. */ - virtual Boolean initWithCapacity(UInt32 size); + virtual Boolean initWithCapacity(UInt32 size) APPLE_KEXT_OVERRIDE; /*! * @function getMemoryDescriptor @@ -118,7 +118,7 @@ class IOSharedDataQueue : public IODataQueue * @discussion The IOMemoryDescriptor instance returned by this method is intended to be mapped into a user process. This is the memory region that the IODataQueueClient code operates on. * @result Returns a newly allocated IOMemoryDescriptor for the IODataQueueMemory region. Returns zero on failure. */ - virtual IOMemoryDescriptor *getMemoryDescriptor(); + virtual IOMemoryDescriptor *getMemoryDescriptor() APPLE_KEXT_OVERRIDE; /*! * @function peek @@ -146,7 +146,7 @@ class IOSharedDataQueue : public IODataQueue * @param dataSize Size of the data pointed to by data. * @result Returns true on success and false on failure. Typically failure means that the queue is full. */ - virtual Boolean enqueue(void *data, UInt32 dataSize); + virtual Boolean enqueue(void *data, UInt32 dataSize) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(IOSharedDataQueue, 0); OSMetaClassDeclareReservedUnused(IOSharedDataQueue, 1); diff --git a/iokit/IOKit/IOSubMemoryDescriptor.h b/iokit/IOKit/IOSubMemoryDescriptor.h index 0093ea32f..03f1850af 100644 --- a/iokit/IOKit/IOSubMemoryDescriptor.h +++ b/iokit/IOKit/IOSubMemoryDescriptor.h @@ -43,7 +43,7 @@ class IOSubMemoryDescriptor : public IOMemoryDescriptor IOMemoryDescriptor * _parent; IOByteCount _start; - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; public: /*! @function withSubRange @@ -79,19 +79,20 @@ class IOSubMemoryDescriptor : public IOMemoryDescriptor virtual addr64_t getPhysicalSegment( IOByteCount offset, IOByteCount * length, - IOOptionBits options = 0 ); + IOOptionBits options = 0 ) APPLE_KEXT_OVERRIDE; - virtual IOReturn prepare(IODirection forDirection = kIODirectionNone); + virtual IOReturn prepare(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE; - virtual IOReturn complete(IODirection forDirection = kIODirectionNone); + virtual IOReturn complete(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE; #ifdef __LP64__ - virtual -#endif /* __LP64__ */ + virtual IOReturn redirect( task_t safeTask, bool redirect ) APPLE_KEXT_OVERRIDE; +#else IOReturn redirect( task_t safeTask, bool redirect ); +#endif /* __LP64__ */ virtual IOReturn setPurgeable( IOOptionBits newState, - IOOptionBits * oldState ); + IOOptionBits * oldState ) APPLE_KEXT_OVERRIDE; // support map() on kIOMemoryTypeVirtual without prepare() virtual IOMemoryMap * makeMapping( @@ -100,10 +101,19 @@ class IOSubMemoryDescriptor : public IOMemoryDescriptor IOVirtualAddress atAddress, IOOptionBits options, IOByteCount offset, - IOByteCount length ); + IOByteCount length ) APPLE_KEXT_OVERRIDE; + + virtual uint64_t getPreparationID( void ) APPLE_KEXT_OVERRIDE; - virtual uint64_t getPreparationID( void ); +/*! @function getPageCounts + @abstract Retrieve the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor. + @discussion This method returns the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor. + @param residentPageCount - If non-null, a pointer to a byte count that will return the number of resident pages encompassed by this IOMemoryDescriptor. + @param dirtyPageCount - If non-null, a pointer to a byte count that will return the number of dirty pages encompassed by this IOMemoryDescriptor. + @result An IOReturn code. */ + IOReturn getPageCounts(IOByteCount * residentPageCount, + IOByteCount * dirtyPageCount); }; #endif /* !_IOSUBMEMORYDESCRIPTOR_H */ diff --git a/iokit/IOKit/IOSyncer.h b/iokit/IOKit/IOSyncer.h index f6dfce383..dbdb443c1 100644 --- a/iokit/IOKit/IOSyncer.h +++ b/iokit/IOKit/IOSyncer.h @@ -41,7 +41,7 @@ class IOSyncer : public OSObject IOSimpleLock *guardLock; volatile bool threadMustStop; IOReturn fResult; - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; virtual void privateSignal(); public: diff --git a/iokit/IOKit/IOTimeStamp.h b/iokit/IOKit/IOTimeStamp.h index b063778a3..ab589f0d0 100644 --- a/iokit/IOKit/IOTimeStamp.h +++ b/iokit/IOKit/IOTimeStamp.h @@ -190,8 +190,16 @@ IOTimeStamp(uintptr_t csc, #define IOSERVICE_TERMINATE_STOP_DEFER 16 /* 0x05080040 */ #define IOSERVICE_TERMINATE_DONE 17 /* 0x05080044 */ -#define IOSERVICE_KEXTD_ALIVE 18 /* 0x05080048 */ -#define IOSERVICE_KEXTD_READY 19 /* 0x0508004C */ +#define IOSERVICE_KEXTD_ALIVE 18 /* 0x05080048 */ +#define IOSERVICE_KEXTD_READY 19 /* 0x0508004C */ #define IOSERVICE_REGISTRY_QUIET 20 /* 0x05080050 */ +#define IOSERVICE_TERM_SET_INACTIVE 21 /* 0x05080054 */ +#define IOSERVICE_TERM_SCHED_PHASE2 22 /* 0x05080058 */ +#define IOSERVICE_TERM_START_PHASE2 23 /* 0x0508005C */ +#define IOSERVICE_TERM_TRY_PHASE2 24 /* 0x05080060 */ +#define IOSERVICE_TERM_UC_DEFER 25 /* 0x05080064 */ +#define IOSERVICE_DETACH 26 /* 0x05080068 */ + + #endif /* ! IOKIT_IOTIMESTAMP_H */ diff --git a/iokit/IOKit/IOTimerEventSource.h b/iokit/IOKit/IOTimerEventSource.h index bbbeaf964..f5accffa3 100644 --- a/iokit/IOKit/IOTimerEventSource.h +++ b/iokit/IOKit/IOTimerEventSource.h @@ -93,9 +93,9 @@ class IOTimerEventSource : public IOEventSource /*! @function free @abstract Sub-class implementation of free method, frees calloutEntry */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; - virtual void setWorkLoop(IOWorkLoop *workLoop); + virtual void setWorkLoop(IOWorkLoop *workLoop) APPLE_KEXT_OVERRIDE; public: @@ -121,12 +121,12 @@ class IOTimerEventSource : public IOEventSource /*! @function enable @abstract Enables a call to the action. @discussion Allows the action function to be called. If the timer event source was disabled while a call was outstanding and the call wasn't cancelled then it will be rescheduled. So a disable/enable pair will disable calls from this event source. */ - virtual void enable(); + virtual void enable() APPLE_KEXT_OVERRIDE; /*! @function disable @abstract Disable a timed callout. @discussion When disable returns the action will not be called until the next time enable(qv) is called. */ - virtual void disable(); + virtual void disable() APPLE_KEXT_OVERRIDE; /*! @function setTimeoutTicks diff --git a/iokit/IOKit/IOTypes.h b/iokit/IOKit/IOTypes.h index aa8afd057..6d21a1294 100644 --- a/iokit/IOKit/IOTypes.h +++ b/iokit/IOKit/IOTypes.h @@ -219,6 +219,7 @@ enum { kIOMap64Bit = 0x08000000, #endif kIOMapPrefault = 0x10000000, + kIOMapOverwrite = 0x20000000 }; /*! @enum Scale Factors diff --git a/iokit/IOKit/IOUserClient.h b/iokit/IOKit/IOUserClient.h index c50e3ead9..6286b8535 100644 --- a/iokit/IOKit/IOUserClient.h +++ b/iokit/IOKit/IOUserClient.h @@ -309,8 +309,8 @@ class IOUserClient : public IOService */ static IOReturn releaseNotificationPort(mach_port_t port); - virtual bool init(); - virtual bool init( OSDictionary * dictionary ); + virtual bool init() APPLE_KEXT_OVERRIDE; + virtual bool init( OSDictionary * dictionary ) APPLE_KEXT_OVERRIDE; // Currently ignores the all args, just passes up to IOService::init() virtual bool initWithTask( task_t owningTask, void * securityToken, UInt32 type, @@ -319,7 +319,7 @@ class IOUserClient : public IOService virtual bool initWithTask( task_t owningTask, void * securityToken, UInt32 type); - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; virtual IOReturn clientClose( void ); virtual IOReturn clientDied( void ); diff --git a/iokit/IOKit/IOWorkLoop.h b/iokit/IOKit/IOWorkLoop.h index e248a9b3b..2db7b17ef 100644 --- a/iokit/IOKit/IOWorkLoop.h +++ b/iokit/IOKit/IOWorkLoop.h @@ -169,7 +169,7 @@ IOWorkLoop uses this to determine if the event source should be polled in runEve

If the client has some outstanding requests on an event they will never be informed of completion. If an external thread is blocked on any of the event sources they will be awakened with a KERN_INTERUPTED status. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @function threadMain @discussion Work loop threads main function. This function consists of 3 @@ -201,7 +201,7 @@ IOWorkLoop uses this to determine if the event source should be polled in runEve @discussion Initializes an instance of the workloop. This method creates and initializes the signaling semaphore, the controller gate lock, and spawns the thread that will continue executing. @result Returns true if initialized successfully, false otherwise. */ - virtual bool init(); + virtual bool init() APPLE_KEXT_OVERRIDE; /*! @function getThread @abstract Gets the workThread. @@ -264,6 +264,7 @@ IOWorkLoop uses this to determine if the event source should be polled in runEve // Internal APIs used by event sources to control the thread friend class IOEventSource; friend class IOTimerEventSource; + friend class IOCommandGate; #if IOKITSTATS friend class IOStatistics; #endif diff --git a/iokit/IOKit/nvram/IONVRAMController.h b/iokit/IOKit/nvram/IONVRAMController.h index 307d666a6..29b2b722e 100644 --- a/iokit/IOKit/nvram/IONVRAMController.h +++ b/iokit/IOKit/nvram/IONVRAMController.h @@ -36,7 +36,7 @@ class IONVRAMController: public IOService OSDeclareAbstractStructors(IONVRAMController); public: - virtual bool start(IOService *provider); + virtual void registerService(IOOptionBits options = 0) APPLE_KEXT_OVERRIDE; virtual void sync(void); diff --git a/iokit/IOKit/platform/AppleMacIO.h b/iokit/IOKit/platform/AppleMacIO.h index ae12eca13..2c72d58c5 100644 --- a/iokit/IOKit/platform/AppleMacIO.h +++ b/iokit/IOKit/platform/AppleMacIO.h @@ -54,7 +54,7 @@ class AppleMacIO : public IOService virtual bool selfTest( void ); public: - virtual bool start( IOService * provider ); + virtual bool start( IOService * provider ) APPLE_KEXT_OVERRIDE; virtual IOService * createNub( IORegistryEntry * from ); diff --git a/iokit/IOKit/platform/AppleMacIODevice.h b/iokit/IOKit/platform/AppleMacIODevice.h index 8d033fd25..7a676e116 100644 --- a/iokit/IOKit/platform/AppleMacIODevice.h +++ b/iokit/IOKit/platform/AppleMacIODevice.h @@ -47,9 +47,9 @@ class AppleMacIODevice : public IOService ExpansionData *reserved; public: - virtual bool compareName( OSString * name, OSString ** matched = 0 ) const; - virtual IOService *matchLocation(IOService *client); - virtual IOReturn getResources( void ); + virtual bool compareName( OSString * name, OSString ** matched = 0 ) const APPLE_KEXT_OVERRIDE; + virtual IOService *matchLocation(IOService *client) APPLE_KEXT_OVERRIDE; + virtual IOReturn getResources( void ) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(AppleMacIODevice, 0); OSMetaClassDeclareReservedUnused(AppleMacIODevice, 1); diff --git a/iokit/IOKit/platform/AppleNMI.h b/iokit/IOKit/platform/AppleNMI.h index 14cd1ed10..216f22074 100644 --- a/iokit/IOKit/platform/AppleNMI.h +++ b/iokit/IOKit/platform/AppleNMI.h @@ -60,12 +60,12 @@ class AppleNMI : public IOService public: IOService *rootDomain; - virtual bool start(IOService *provider); + virtual bool start(IOService *provider) APPLE_KEXT_OVERRIDE; virtual IOReturn initNMI(IOInterruptController *parentController, OSData *parentSource); virtual IOReturn handleInterrupt(void *refCon, IOService *nub, int source); // Power handling methods: - virtual IOReturn powerStateWillChangeTo(IOPMPowerFlags, unsigned long, IOService*); + virtual IOReturn powerStateWillChangeTo(IOPMPowerFlags, unsigned long, IOService*) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(AppleNMI, 0); OSMetaClassDeclareReservedUnused(AppleNMI, 1); diff --git a/iokit/IOKit/platform/ApplePlatformExpert.h b/iokit/IOKit/platform/ApplePlatformExpert.h index 59728d23d..0f75950d3 100644 --- a/iokit/IOKit/platform/ApplePlatformExpert.h +++ b/iokit/IOKit/platform/ApplePlatformExpert.h @@ -69,17 +69,17 @@ class ApplePlatformExpert : public IODTPlatformExpert ExpansionData *reserved; public: - virtual bool start( IOService * provider ); - virtual bool configure( IOService * provider ); - virtual const char * deleteList( void ); - virtual const char * excludeList( void ); + virtual bool start( IOService * provider ) APPLE_KEXT_OVERRIDE; + virtual bool configure( IOService * provider ) APPLE_KEXT_OVERRIDE; + virtual const char * deleteList( void ) APPLE_KEXT_OVERRIDE; + virtual const char * excludeList( void ) APPLE_KEXT_OVERRIDE; - virtual void registerNVRAMController( IONVRAMController * nvram ); + virtual void registerNVRAMController( IONVRAMController * nvram ) APPLE_KEXT_OVERRIDE; - virtual long getGMTTimeOfDay(void); - virtual void setGMTTimeOfDay(long secs); + virtual long getGMTTimeOfDay(void) APPLE_KEXT_OVERRIDE; + virtual void setGMTTimeOfDay(long secs) APPLE_KEXT_OVERRIDE; - virtual bool getMachineName(char *name, int maxLength); + virtual bool getMachineName(char *name, int maxLength) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(ApplePlatformExpert, 0); OSMetaClassDeclareReservedUnused(ApplePlatformExpert, 1); diff --git a/iokit/IOKit/pwr_mgt/IOPMPowerSource.h b/iokit/IOKit/pwr_mgt/IOPMPowerSource.h index cb1b62744..cd0db25bf 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPowerSource.h +++ b/iokit/IOKit/pwr_mgt/IOPMPowerSource.h @@ -218,9 +218,9 @@ class IOPMPowerSource : public IOService */ static IOPMPowerSource *powerSource(void); - virtual bool init(void); + virtual bool init(void) APPLE_KEXT_OVERRIDE; - virtual void free(void); + virtual void free(void) APPLE_KEXT_OVERRIDE; /*! @function updateStatus @abstract Must be called by physical battery controller when battery state diff --git a/iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h b/iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h index cb1c8ea37..cede5e137 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h +++ b/iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h @@ -42,7 +42,7 @@ class IOPMPowerSourceList : public OSObject public: void initialize(void); - void free(void); + void free(void) APPLE_KEXT_OVERRIDE; unsigned long numberOfItems(void); IOReturn addToList(IOPMPowerSource *newPowerSource); diff --git a/iokit/IOKit/pwr_mgt/IOPMPrivate.h b/iokit/IOKit/pwr_mgt/IOPMPrivate.h index 0163072dd..25226da74 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPrivate.h +++ b/iokit/IOKit/pwr_mgt/IOPMPrivate.h @@ -97,6 +97,12 @@ enum { #define kIOPMMessageLastCallBeforeSleep \ iokit_family_msg(sub_iokit_powermanagement, 0x410) +#define kIOPMMessageIdleSleepPreventers \ + iokit_family_msg(sub_iokit_powermanagement, 0x420) + +#define kIOPMMessageSystemSleepPreventers \ + iokit_family_msg(sub_iokit_powermanagement, 0x430) + /* @enum SystemSleepReasons * @abstract The potential causes for system sleep as logged in the system event record. */ @@ -258,6 +264,8 @@ enum { // Keys for IOPMrootDomain registry properties #define kIOPMSleepStatisticsKey "SleepStatistics" #define kIOPMSleepStatisticsAppsKey "AppStatistics" +#define kIOPMIdleSleepPreventersKey "IdleSleepPreventers" +#define kIOPMSystemSleepPreventersKey "SystemSleepPreventers" // Application response statistics #define kIOPMStatsNameKey "Name" @@ -800,6 +808,9 @@ typedef struct { #define SWD_BUF_SIZE (40*PAGE_SIZE) #define SWD_INITIAL_STACK_SIZE ((SWD_BUF_SIZE/2)-sizeof(swd_hdr)) +#define SWD_SPINDUMP_SIZE (256*1024) +#define SWD_INITIAL_SPINDUMP_SIZE ((SWD_SPINDUMP_SIZE/2)-sizeof(swd_hdr)) + /* Bits in swd_flags */ #define SWD_WDOG_ENABLED 0x01 #define SWD_BOOT_BY_SW_WDOG 0x02 @@ -809,6 +820,7 @@ typedef struct { #define SWD_LOGS_IN_MEM 0x20 /* Filenames associated with the stackshots/logs generated by the SWD */ +#define kSleepWakeStackBinFilename "/var/log/SleepWakeStacks.bin" #define kSleepWakeStackFilename "/var/log/SleepWakeStacks.dump" #define kSleepWakeLogFilename "/var/log/SleepWakeLog.dump" #define kAppleOSXWatchdogStackFilename "/var/log/AppleOSXWatchdogStacks.dump" @@ -833,6 +845,19 @@ inline char const* getDumpLogFilename(swd_hdr *hdr) #define kDarkWkCntChID IOREPORT_MAKEID('G','U','I','W','k','C','n','t') #define kUserWkCntChID IOREPORT_MAKEID('D','r','k','W','k','C','n','t') +/* + * kAssertDelayChID - Histogram of time elapsed before assertion after wake. + */ +#define kAssertDelayBcktCnt 11 +#define kAssertDelayBcktSize 3 +#define kAssertDelayChID IOREPORT_MAKEID('r','d','A','s','r','t','D','l') + +/* + * kSleepDelaysChID - Histogram of time taken to put system to sleep + */ +#define kSleepDelaysBcktCnt 13 +#define kSleepDelaysBcktSize 10 +#define kSleepDelaysChID IOREPORT_MAKEID('r','d','S','l','p','D','l','y') /* Sleep Options/settings */ #define kSleepOptionDisplayCapturedModeKey "DisplayCapturedMode" diff --git a/iokit/IOKit/pwr_mgt/IOPMinformee.h b/iokit/IOKit/pwr_mgt/IOPMinformee.h index a06eb788c..9d4e0c3b9 100644 --- a/iokit/IOKit/pwr_mgt/IOPMinformee.h +++ b/iokit/IOKit/pwr_mgt/IOPMinformee.h @@ -41,7 +41,7 @@ class IOPMinformee : public OSObject void initialize( IOService * theObject ); - void free( void ); + void free( void ) APPLE_KEXT_OVERRIDE; public: IOService * whatObject; // interested driver diff --git a/iokit/IOKit/pwr_mgt/IOPMinformeeList.h b/iokit/IOKit/pwr_mgt/IOPMinformeeList.h index 8efd4a654..f06689def 100644 --- a/iokit/IOKit/pwr_mgt/IOPMinformeeList.h +++ b/iokit/IOKit/pwr_mgt/IOPMinformeeList.h @@ -31,6 +31,7 @@ class IOPMinformee; class IOService; +extern uint32_t gCanSleepTimeout; class IOPMinformeeList : public OSObject { @@ -45,7 +46,7 @@ OSDeclareDefaultStructors(IOPMinformeeList) public: void initialize ( void ); - void free ( void ); + void free ( void ) APPLE_KEXT_OVERRIDE; unsigned long numberOfItems ( void ); diff --git a/iokit/IOKit/pwr_mgt/IOPMlog.h b/iokit/IOKit/pwr_mgt/IOPMlog.h index 41727cdcb..b950816e0 100644 --- a/iokit/IOKit/pwr_mgt/IOPMlog.h +++ b/iokit/IOKit/pwr_mgt/IOPMlog.h @@ -80,5 +80,6 @@ enum PMLogEnum { kPMLogSetPinGroup, // 52 0x050700d0 - NOT USED kPMLogIdleCancel, // 53 0x050700d4 - device unidle during change kPMLogSleepWakeTracePoint, // 54 0x050700d8 - kIOPMTracePoint markers + kPMLogQuiescePowerTree, // 55 0x050700dc kIOPMlogLastEvent }; diff --git a/iokit/IOKit/pwr_mgt/IOPMpowerState.h b/iokit/IOKit/pwr_mgt/IOPMpowerState.h index dd945a43f..f4f8dd0bf 100644 --- a/iokit/IOKit/pwr_mgt/IOPMpowerState.h +++ b/iokit/IOKit/pwr_mgt/IOPMpowerState.h @@ -25,6 +25,10 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + +#ifndef _IOKIT_IOPMPOWERSTATE_H +#define _IOKIT_IOPMPOWERSTATE_H + #include /*! @header IOPMpowerState.h @@ -71,3 +75,5 @@ enum { kIOPMPowerStateVersion1 = 1, kIOPMPowerStateVersion2 = 2 }; + +#endif /* _IOKIT_IOPMPOWERSTATE_H */ diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index 071575abf..21ca1a97a 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -152,16 +152,16 @@ class IOPMrootDomain: public IOService public: static IOPMrootDomain * construct( void ); - virtual bool start( IOService * provider ); - virtual IOReturn setAggressiveness( unsigned long, unsigned long ); - virtual IOReturn getAggressiveness( unsigned long, unsigned long * ); + virtual bool start( IOService * provider ) APPLE_KEXT_OVERRIDE; + virtual IOReturn setAggressiveness( unsigned long, unsigned long ) APPLE_KEXT_OVERRIDE; + virtual IOReturn getAggressiveness( unsigned long, unsigned long * ) APPLE_KEXT_OVERRIDE; virtual IOReturn sleepSystem( void ); IOReturn sleepSystemOptions( OSDictionary *options ); - virtual IOReturn setProperties( OSObject * ); - virtual bool serializeProperties( OSSerialize * s ) const; - virtual OSObject * copyProperty( const char * aKey ) const; + virtual IOReturn setProperties( OSObject * ) APPLE_KEXT_OVERRIDE; + virtual bool serializeProperties( OSSerialize * s ) const APPLE_KEXT_OVERRIDE; + virtual OSObject * copyProperty( const char * aKey ) const APPLE_KEXT_OVERRIDE; /*! @function systemPowerEventOccurred @abstract Other drivers may inform IOPMrootDomain of system PM events @@ -329,13 +329,13 @@ class IOPMrootDomain: public IOService virtual IONotifier * registerInterest( const OSSymbol * typeOfInterest, IOServiceInterestHandler handler, - void * target, void * ref = 0 ); + void * target, void * ref = 0 ) APPLE_KEXT_OVERRIDE; virtual IOReturn callPlatformFunction( const OSSymbol *functionName, bool waitForFunction, void *param1, void *param2, - void *param3, void *param4 ); + void *param3, void *param4 ) APPLE_KEXT_OVERRIDE; /*! @function createPMAssertion @abstract Creates an assertion to influence system power behavior. @@ -392,22 +392,29 @@ class IOPMrootDomain: public IOService IOReturn restartWithStackshot(); private: - virtual IOReturn changePowerStateTo( unsigned long ordinal ); + virtual IOReturn changePowerStateTo( unsigned long ordinal ) APPLE_KEXT_COMPATIBILITY_OVERRIDE; virtual IOReturn changePowerStateToPriv( unsigned long ordinal ); - virtual IOReturn requestPowerDomainState( IOPMPowerFlags, IOPowerConnection *, unsigned long ); - virtual void powerChangeDone( unsigned long ); - virtual bool tellChangeDown( unsigned long ); - virtual bool askChangeDown( unsigned long ); - virtual void tellChangeUp( unsigned long ); - virtual void tellNoChangeDown( unsigned long ); + virtual IOReturn requestPowerDomainState( IOPMPowerFlags, IOPowerConnection *, unsigned long ) APPLE_KEXT_OVERRIDE; + virtual void powerChangeDone( unsigned long ) APPLE_KEXT_OVERRIDE; + virtual bool tellChangeDown( unsigned long ) APPLE_KEXT_OVERRIDE; + virtual bool askChangeDown( unsigned long ) APPLE_KEXT_OVERRIDE; + virtual void tellChangeUp( unsigned long ) APPLE_KEXT_OVERRIDE; + virtual void tellNoChangeDown( unsigned long ) APPLE_KEXT_OVERRIDE; virtual IOReturn configureReport(IOReportChannelList *channels, IOReportConfigureAction action, void *result, - void *destination); + void *destination) APPLE_KEXT_OVERRIDE; virtual IOReturn updateReport(IOReportChannelList *channels, IOReportUpdateAction action, void *result, - void *destination); + void *destination) APPLE_KEXT_OVERRIDE; + + void configureReportGated(uint64_t channel_id, + uint64_t action, + void *result); + IOReturn updateReportGated(uint64_t ch_id, + void *result, + IOBufferMemoryDescriptor *dest); #ifdef XNU_KERNEL_PRIVATE /* Root Domain internals */ @@ -479,6 +486,8 @@ class IOPMrootDomain: public IOService void handleQueueSleepWakeUUID( OSObject *obj); + void handleDisplayPowerOn( ); + void willNotifyPowerChildren( IOPMPowerStateIndex newPowerState ); IOReturn setMaintenanceWakeCalendar( @@ -538,11 +547,12 @@ class IOPMrootDomain: public IOService uint32_t * hibernateFreeRatio, uint32_t * hibernateFreeTime ); #endif - void takeStackshot(bool restart, bool isOSXWatchdog); + void takeStackshot(bool restart, bool isOSXWatchdog, bool isSpinDump); void sleepWakeDebugTrig(bool restart); void sleepWakeDebugEnableWdog(); bool sleepWakeDebugIsWdogEnabled(); static void saveTimeoutAppStackShot(void *p0, void *p1); + void sleepWakeDebugSaveSpinDumpFile(); private: friend class PMSettingObject; @@ -612,6 +622,16 @@ class IOPMrootDomain: public IOService OSArray *pmStatsAppResponses; IOLock *pmStatsLock; // guards pmStatsAppResponses + void *sleepDelaysReport; // report to track time taken to go to sleep + uint32_t sleepDelaysClientCnt; // Number of interested clients in sleepDelaysReport + uint64_t ts_sleepStart; + uint64_t wake2DarkwakeDelay; // Time taken to change from full wake -> Dark wake + + + void *assertOnWakeReport; // report to track time spent without any assertions held after wake + uint32_t assertOnWakeClientCnt; // Number of clients interested in assertOnWakeReport + clock_sec_t assertOnWakeSecs; // Num of secs after wake for first assertion + bool uuidPublished; // Pref: idle time before idle sleep @@ -628,6 +648,7 @@ class IOPMrootDomain: public IOService thread_call_t diskSyncCalloutEntry; thread_call_t fullWakeThreadCall; thread_call_t hibDebugSetupEntry; + thread_call_t updateConsoleUsersEntry; // Track system capabilities. uint32_t _desiredCapability; @@ -694,6 +715,8 @@ class IOPMrootDomain: public IOService unsigned int displayIdleForDemandSleep :1; unsigned int darkWakeHibernateError :1; unsigned int thermalWarningState:1; + unsigned int toldPowerdCapWillChange :1; + unsigned int displayPowerOnRequested:1; uint32_t hibernateMode; AbsoluteTime userActivityTime; @@ -754,6 +777,7 @@ class IOPMrootDomain: public IOService volatile uint32_t swd_lock; /* Lock to access swd_buffer & and its header */ void * swd_buffer; /* Memory allocated for dumping sleep/wake logs */ uint8_t swd_flags; /* Flags defined in IOPMPrivate.h */ + void * swd_spindump_buffer; IOMemoryMap * swd_logBufMap; /* Memory with sleep/wake logs from previous boot */ @@ -819,12 +843,15 @@ class IOPMrootDomain: public IOService void deregisterPMSettingObject( PMSettingObject * pmso ); + void checkForValidDebugData(const char *fname, vfs_context_t *ctx, + void *tmpBuf, struct vnode **vp); void sleepWakeDebugMemAlloc( ); + void sleepWakeDebugSpinDumpMemAlloc( ); void sleepWakeDebugDumpFromMem(IOMemoryMap *logBufMap); void sleepWakeDebugDumpFromFile( ); IOMemoryMap *sleepWakeDebugRetrieve(); errno_t sleepWakeDebugSaveFile(const char *name, char *buf, int len); - errno_t sleepWakeDebugCopyFile( struct vnode *srcVp, + errno_t sleepWakeDebugCopyFile( struct vnode *srcVp, vfs_context_t srcCtx, char *tmpBuf, uint64_t tmpBufSize, uint64_t srcOffset, @@ -848,6 +875,7 @@ class IOPMrootDomain: public IOService void systemDidNotSleep( void ); void preventTransitionToUserActive( bool prevent ); void setThermalState(OSObject *value); + void copySleepPreventersList(OSArray **idleSleepList, OSArray **systemSleepList); #endif /* XNU_KERNEL_PRIVATE */ }; @@ -858,8 +886,8 @@ class IORootParent: public IOService public: static void initialize( void ); - virtual OSObject * copyProperty( const char * aKey ) const; - bool start( IOService * nub ); + virtual OSObject * copyProperty( const char * aKey ) const APPLE_KEXT_OVERRIDE; + bool start( IOService * nub ) APPLE_KEXT_OVERRIDE; void shutDownSystem( void ); void restartSystem( void ); void sleepSystem( void ); diff --git a/iokit/IOKit/system_management/IOWatchDogTimer.h b/iokit/IOKit/system_management/IOWatchDogTimer.h index a89b95bd1..ef5db4353 100644 --- a/iokit/IOKit/system_management/IOWatchDogTimer.h +++ b/iokit/IOKit/system_management/IOWatchDogTimer.h @@ -41,9 +41,9 @@ class IOWatchDogTimer : public IOService ExpansionData *reserved; public: - virtual bool start(IOService *provider); - virtual void stop(IOService *provider); - virtual IOReturn setProperties(OSObject *properties); + virtual bool start(IOService *provider) APPLE_KEXT_OVERRIDE; + virtual void stop(IOService *provider) APPLE_KEXT_OVERRIDE; + virtual IOReturn setProperties(OSObject *properties) APPLE_KEXT_OVERRIDE; virtual void setWatchDogTimer(UInt32 timeOut) = 0; OSMetaClassDeclareReservedUnused(IOWatchDogTimer, 0); diff --git a/iokit/Kernel/IOBufferMemoryDescriptor.cpp b/iokit/Kernel/IOBufferMemoryDescriptor.cpp index 1c486fa0f..9f1902605 100644 --- a/iokit/Kernel/IOBufferMemoryDescriptor.cpp +++ b/iokit/Kernel/IOBufferMemoryDescriptor.cpp @@ -84,7 +84,7 @@ static uintptr_t IOBMDPageProc(iopa_t * a) int options = 0; // KMA_LOMEM; kr = kernel_memory_allocate(kernel_map, &vmaddr, - page_size, 0, options); + page_size, 0, options, VM_KERN_MEMORY_IOKIT); if (KERN_SUCCESS != kr) vmaddr = 0; else bzero((void *) vmaddr, page_size); @@ -167,6 +167,8 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( _alignment = alignment; + if ((capacity + alignment) < _capacity) return (false); + if ((inTask != kernel_task) && !(options & kIOMemoryPageable)) return false; @@ -232,7 +234,7 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( { IOStatisticsAlloc(kIOStatisticsMallocAligned, capacity); #if IOALLOCDEBUG - debug_iomalloc_size += capacity; + OSAddAtomic(capacity, &debug_iomalloc_size); #endif } } @@ -259,7 +261,7 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( if( options & kIOMemoryPageable) { #if IOALLOCDEBUG - debug_iomallocpageable_size += size; + OSAddAtomicLong(size, &debug_iomallocpageable_size); #endif mapTask = inTask; if (NULL == inTask) @@ -492,7 +494,7 @@ void IOBufferMemoryDescriptor::free() if (options & kIOMemoryPageable) { #if IOALLOCDEBUG - debug_iomallocpageable_size -= round_page(size); + OSAddAtomicLong(-(round_page(size)), &debug_iomallocpageable_size); #endif } else if (buffer) @@ -512,7 +514,7 @@ void IOBufferMemoryDescriptor::free() kmem_free(kernel_map, page, page_size); } #if IOALLOCDEBUG - debug_iomalloc_size -= size; + OSAddAtomic(-size, &debug_iomalloc_size); #endif IOStatisticsAlloc(kIOStatisticsFreeAligned, size); } diff --git a/iokit/Kernel/IOCPU.cpp b/iokit/Kernel/IOCPU.cpp index 00504e9eb..47a17b5a1 100644 --- a/iokit/Kernel/IOCPU.cpp +++ b/iokit/Kernel/IOCPU.cpp @@ -66,80 +66,35 @@ struct iocpu_platform_action_entry }; typedef struct iocpu_platform_action_entry iocpu_platform_action_entry_t; -queue_head_t * -iocpu_get_platform_quiesce_queue(void); - -queue_head_t * -iocpu_get_platform_active_queue(void); - -void -iocpu_platform_cpu_action_init(queue_head_t * quiesce_queue, queue_head_t * init_queue); - -void -iocpu_add_platform_action(queue_head_t * queue, iocpu_platform_action_entry_t * entry); - -void -iocpu_remove_platform_action(iocpu_platform_action_entry_t * entry); - -kern_return_t -iocpu_run_platform_actions(queue_head_t * queue, uint32_t first_priority, uint32_t last_priority, - void * param1, void * param2, void * param3); - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #define kBootCPUNumber 0 -static iocpu_platform_action_entry_t * gIOAllActionsQueue; -static queue_head_t gIOSleepActionQueue; -static queue_head_t gIOWakeActionQueue; - -static queue_head_t iocpu_quiesce_queue; -static queue_head_t iocpu_active_queue; - -static queue_head_t gIOHaltRestartActionQueue; - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -void -iocpu_platform_cpu_action_init(queue_head_t * quiesce_queue, __unused queue_head_t * init_queue) +enum { -#if 0 - enum { kNumQuiesceActions = 2 }; - static iocpu_platform_action_entry_t quiesce_actions[kNumQuiesceActions] = - { - { { NULL, NULL }, (iocpu_platform_action_t) &clean_mmu_dcache, 97000, 0, 0, NULL }, - { { NULL, NULL }, (iocpu_platform_action_t) &arm_sleep, 99000, 0, 0, NULL }, - }; - unsigned int idx; + kQueueSleep = 0, + kQueueWake = 1, + kQueueQuiesce = 2, + kQueueActive = 3, + kQueueHaltRestart = 4, + kQueuePanic = 5, + kQueueCount = 6 +}; - for (idx = 0; idx < kNumQuiesceActions; idx++) - iocpu_add_platform_action(quiesce_queue, &quiesce_actions[idx]); -#endif -} +const OSSymbol * gIOPlatformSleepActionKey; +const OSSymbol * gIOPlatformWakeActionKey; +const OSSymbol * gIOPlatformQuiesceActionKey; +const OSSymbol * gIOPlatformActiveActionKey; +const OSSymbol * gIOPlatformHaltRestartActionKey; +const OSSymbol * gIOPlatformPanicActionKey; -queue_head_t * iocpu_get_platform_quiesce_queue(void) -{ - if (!iocpu_quiesce_queue.next) - { - queue_init(&iocpu_quiesce_queue); - queue_init(&iocpu_active_queue); - iocpu_platform_cpu_action_init(&iocpu_quiesce_queue, &iocpu_active_queue); - } - return (&iocpu_quiesce_queue); -} +static queue_head_t gActionQueues[kQueueCount]; +static const OSSymbol * gActionSymbols[kQueueCount]; -queue_head_t * iocpu_get_platform_active_queue(void) -{ - if (!iocpu_active_queue.next) - { - queue_init(&iocpu_quiesce_queue); - queue_init(&iocpu_active_queue); - iocpu_platform_cpu_action_init(&iocpu_quiesce_queue, &iocpu_active_queue); - } - return (&iocpu_active_queue); -} +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -void iocpu_add_platform_action(queue_head_t * queue, iocpu_platform_action_entry_t * entry) +static void +iocpu_add_platform_action(queue_head_t * queue, iocpu_platform_action_entry_t * entry) { iocpu_platform_action_entry_t * next; @@ -154,12 +109,13 @@ void iocpu_add_platform_action(queue_head_t * queue, iocpu_platform_action_entry queue_enter(queue, entry, iocpu_platform_action_entry_t *, link); // at tail } -void iocpu_remove_platform_action(iocpu_platform_action_entry_t * entry) +static void +iocpu_remove_platform_action(iocpu_platform_action_entry_t * entry) { remque(&entry->link); } -kern_return_t +static kern_return_t iocpu_run_platform_actions(queue_head_t * queue, uint32_t first_priority, uint32_t last_priority, void * param1, void * param2, void * param3) { @@ -186,17 +142,33 @@ iocpu_run_platform_actions(queue_head_t * queue, uint32_t first_priority, uint32 extern "C" kern_return_t IOCPURunPlatformQuiesceActions(void) { - return (iocpu_run_platform_actions(iocpu_get_platform_quiesce_queue(), 0, 0U-1, + return (iocpu_run_platform_actions(&gActionQueues[kQueueQuiesce], 0, 0U-1, NULL, NULL, NULL)); } extern "C" kern_return_t IOCPURunPlatformActiveActions(void) { - return (iocpu_run_platform_actions(iocpu_get_platform_active_queue(), 0, 0U-1, + return (iocpu_run_platform_actions(&gActionQueues[kQueueActive], 0, 0U-1, NULL, NULL, NULL)); } +extern "C" kern_return_t +IOCPURunPlatformHaltRestartActions(uint32_t message) +{ + return (iocpu_run_platform_actions(&gActionQueues[kQueueHaltRestart], 0, 0U-1, + (void *)(uintptr_t) message, NULL, NULL)); +} + +extern "C" kern_return_t +IOCPURunPlatformPanicActions(uint32_t message) +{ + return (iocpu_run_platform_actions(&gActionQueues[kQueuePanic], 0, 0U-1, + (void *)(uintptr_t) message, NULL, NULL)); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + static kern_return_t IOServicePlatformAction(void * refcon0, void * refcon1, uint32_t priority, void * param1, void * param2, void * param3, @@ -215,17 +187,39 @@ IOServicePlatformAction(void * refcon0, void * refcon1, uint32_t priority, } static void -IOInstallServicePlatformAction(IOService * service, - const OSSymbol * key, queue_head_t * queue, - bool reverse) +IOInstallServicePlatformAction(IOService * service, uint32_t qidx) { - OSNumber * num; iocpu_platform_action_entry_t * entry; - uint32_t priority; + OSNumber * num; + uint32_t priority; + const OSSymbol * key = gActionSymbols[qidx]; + queue_head_t * queue = &gActionQueues[qidx]; + bool reverse; + bool uniq; num = OSDynamicCast(OSNumber, service->getProperty(key)); - if (!num) - return; + if (!num) return; + + reverse = false; + uniq = false; + switch (qidx) + { + case kQueueWake: + case kQueueActive: + reverse = true; + break; + case kQueueHaltRestart: + case kQueuePanic: + uniq = true; + break; + } + if (uniq) + { + queue_iterate(queue, entry, iocpu_platform_action_entry_t *, link) + { + if (service == entry->refcon0) return; + } + } entry = IONew(iocpu_platform_action_entry_t, 1); entry->action = &IOServicePlatformAction; @@ -239,49 +233,66 @@ IOInstallServicePlatformAction(IOService * service, entry->refcon1 = (void *) key; iocpu_add_platform_action(queue, entry); - entry->alloc_list = gIOAllActionsQueue; - gIOAllActionsQueue = entry; } -extern "C" kern_return_t -IOCPURunPlatformHaltRestartActions(uint32_t message) +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOCPUInitialize(void) { - kern_return_t ret; - IORegistryIterator * iter; - OSOrderedSet * all; - IOService * service; + for (uint32_t qidx = kQueueSleep; qidx < kQueueCount; qidx++) + { + queue_init(&gActionQueues[qidx]); + } + + gIOPlatformSleepActionKey = gActionSymbols[kQueueSleep] + = OSSymbol::withCStringNoCopy(kIOPlatformSleepActionKey); + gIOPlatformWakeActionKey = gActionSymbols[kQueueWake] + = OSSymbol::withCStringNoCopy(kIOPlatformWakeActionKey); + gIOPlatformQuiesceActionKey = gActionSymbols[kQueueQuiesce] + = OSSymbol::withCStringNoCopy(kIOPlatformQuiesceActionKey); + gIOPlatformActiveActionKey = gActionSymbols[kQueueActive] + = OSSymbol::withCStringNoCopy(kIOPlatformActiveActionKey); + gIOPlatformHaltRestartActionKey = gActionSymbols[kQueueHaltRestart] + = OSSymbol::withCStringNoCopy(kIOPlatformHaltRestartActionKey); + gIOPlatformPanicActionKey = gActionSymbols[kQueuePanic] + = OSSymbol::withCStringNoCopy(kIOPlatformPanicActionKey); +} + +IOReturn +IOInstallServicePlatformActions(IOService * service) +{ + IOInstallServicePlatformAction(service, kQueueHaltRestart); + IOInstallServicePlatformAction(service, kQueuePanic); + + return (kIOReturnSuccess); +} - if (!gIOHaltRestartActionQueue.next) +IOReturn +IORemoveServicePlatformActions(IOService * service) +{ + iocpu_platform_action_entry_t * entry; + iocpu_platform_action_entry_t * next; + + for (uint32_t qidx = kQueueSleep; qidx < kQueueCount; qidx++) { - queue_init(&gIOHaltRestartActionQueue); - iter = IORegistryIterator::iterateOver(gIOServicePlane, - kIORegistryIterateRecursively); - if (iter) + next = (typeof(entry)) queue_first(&gActionQueues[qidx]); + while (!queue_end(&gActionQueues[qidx], &next->link)) { - all = 0; - do + entry = next; + next = (typeof(entry)) queue_next(&entry->link); + if (service == entry->refcon0) { - if (all) all->release(); - all = iter->iterateAll(); + iocpu_remove_platform_action(entry); + IODelete(entry, iocpu_platform_action_entry_t, 1); } - while (!iter->isValid()); - iter->release(); - if (all) - { - while((service = (IOService *) all->getFirstObject())) - { - IOInstallServicePlatformAction(service, gIOPlatformHaltRestartActionKey, &gIOHaltRestartActionQueue, false); - all->removeObject(service); - } - all->release(); - } } } - ret = iocpu_run_platform_actions(&gIOHaltRestartActionQueue, 0, 0U-1, - (void *)(uintptr_t) message, NULL, NULL); - return (ret); + + return (kIOReturnSuccess); } + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ kern_return_t PE_cpu_start(cpu_id_t target, @@ -308,6 +319,22 @@ void PE_cpu_signal(cpu_id_t source, cpu_id_t target) if (sourceCPU && targetCPU) sourceCPU->signalCPU(targetCPU); } +void PE_cpu_signal_deferred(cpu_id_t source, cpu_id_t target) +{ + IOCPU *sourceCPU = OSDynamicCast(IOCPU, (OSObject *)source); + IOCPU *targetCPU = OSDynamicCast(IOCPU, (OSObject *)target); + + if (sourceCPU && targetCPU) sourceCPU->signalCPUDeferred(targetCPU); +} + +void PE_cpu_signal_cancel(cpu_id_t source, cpu_id_t target) +{ + IOCPU *sourceCPU = OSDynamicCast(IOCPU, (OSObject *)source); + IOCPU *targetCPU = OSDynamicCast(IOCPU, (OSObject *)target); + + if (sourceCPU && targetCPU) sourceCPU->signalCPUCancel(targetCPU); +} + void PE_cpu_machine_init(cpu_id_t target, boolean_t bootb) { IOCPU *targetCPU = OSDynamicCast(IOCPU, (OSObject *)target); @@ -358,10 +385,6 @@ void IOCPUSleepKernel(void) rootDomain->tracePoint( kIOPMTracePointSleepPlatformActions ); - queue_init(&gIOSleepActionQueue); - queue_init(&gIOWakeActionQueue); - queue_init(&gIOHaltRestartActionQueue); - iter = IORegistryIterator::iterateOver( gIOServicePlane, kIORegistryIterateRecursively ); if( iter) @@ -380,18 +403,17 @@ void IOCPUSleepKernel(void) { while((service = (IOService *) all->getFirstObject())) { - IOInstallServicePlatformAction(service, gIOPlatformSleepActionKey, &gIOSleepActionQueue, false); - IOInstallServicePlatformAction(service, gIOPlatformWakeActionKey, &gIOWakeActionQueue, true); - IOInstallServicePlatformAction(service, gIOPlatformQuiesceActionKey, iocpu_get_platform_quiesce_queue(), false); - IOInstallServicePlatformAction(service, gIOPlatformActiveActionKey, iocpu_get_platform_active_queue(), true); - IOInstallServicePlatformAction(service, gIOPlatformHaltRestartActionKey, &gIOHaltRestartActionQueue, false); + for (uint32_t qidx = kQueueSleep; qidx <= kQueueActive; qidx++) + { + IOInstallServicePlatformAction(service, qidx); + } all->removeObject(service); } all->release(); } } - iocpu_run_platform_actions(&gIOSleepActionQueue, 0, 0U-1, + iocpu_run_platform_actions(&gActionQueues[kQueueSleep], 0, 0U-1, NULL, NULL, NULL); rootDomain->tracePoint( kIOPMTracePointSleepCPUs ); @@ -423,22 +445,20 @@ void IOCPUSleepKernel(void) rootDomain->tracePoint( kIOPMTracePointWakePlatformActions ); - iocpu_run_platform_actions(&gIOWakeActionQueue, 0, 0U-1, + iocpu_run_platform_actions(&gActionQueues[kQueueWake], 0, 0U-1, NULL, NULL, NULL); iocpu_platform_action_entry_t * entry; - while ((entry = gIOAllActionsQueue)) + for (uint32_t qidx = kQueueSleep; qidx <= kQueueActive; qidx++) { - gIOAllActionsQueue = entry->alloc_list; - iocpu_remove_platform_action(entry); - IODelete(entry, iocpu_platform_action_entry_t, 1); + while (!(queue_empty(&gActionQueues[qidx]))) + { + entry = (typeof(entry)) queue_first(&gActionQueues[qidx]); + iocpu_remove_platform_action(entry); + IODelete(entry, iocpu_platform_action_entry_t, 1); + } } - if (!queue_empty(&gIOSleepActionQueue)) panic("gIOSleepActionQueue"); - if (!queue_empty(&gIOWakeActionQueue)) panic("gIOWakeActionQueue"); - if (!queue_empty(&gIOHaltRestartActionQueue)) panic("gIOHaltRestartActionQueue"); - gIOHaltRestartActionQueue.next = 0; - rootDomain->tracePoint( kIOPMTracePointWakeCPUs ); // Wake the other CPUs. @@ -556,6 +576,7 @@ bool IOCPU::serializeProperties(OSSerialize *serialize) const { bool result; OSDictionary *dict = dictionaryWithProperties(); + if (!dict) return false; dict->setObject(gIOCPUStateKey, gIOCPUStateNames[_cpuState]); result = dict->serialize(serialize); dict->release(); @@ -587,6 +608,20 @@ void IOCPU::signalCPU(IOCPU */*target*/) { } +void IOCPU::signalCPUDeferred(IOCPU *target) +{ + // Our CPU may not support deferred IPIs, + // so send a regular IPI by default + signalCPU(target); +} + +void IOCPU::signalCPUCancel(IOCPU */*target*/) +{ + // Meant to cancel signals sent by + // signalCPUDeferred; unsupported + // by default +} + void IOCPU::enableCPUTimeBase(bool /*enable*/) { } @@ -734,7 +769,10 @@ void IOCPUInterruptController::enableCPUInterrupt(IOCPU *cpu) // Ensure that the increment is seen by all processors OSIncrementAtomic(&enabledCPUs); - if (enabledCPUs == numCPUs) thread_wakeup(this); + if (enabledCPUs == numCPUs) { + IOService::cpusRunning(); + thread_wakeup(this); + } } IOReturn IOCPUInterruptController::registerInterrupt(IOService *nub, diff --git a/iokit/Kernel/IOCommandGate.cpp b/iokit/Kernel/IOCommandGate.cpp index 9b19d70ee..ae767744e 100644 --- a/iokit/Kernel/IOCommandGate.cpp +++ b/iokit/Kernel/IOCommandGate.cpp @@ -111,24 +111,41 @@ IOCommandGate::commandGate(OSObject *inOwner, Action inAction) /* virtual */ void IOCommandGate::free() { - setWorkLoop(0); + if (workLoop) setWorkLoop(0); super::free(); } +enum +{ + kSleepersRemoved = 0x00000001, + kSleepersWaitEnabled = 0x00000002, + kSleepersActions = 0x00000100, + kSleepersActionsMask = 0xffffff00, +}; + /* virtual */ void IOCommandGate::setWorkLoop(IOWorkLoop *inWorkLoop) { - uintptr_t *sleepersP = (uintptr_t *) &reserved; - if (!inWorkLoop && workLoop) { // tearing down - closeGate(); - *sleepersP |= 1; - while (*sleepersP >> 1) { + IOWorkLoop * wl; + uintptr_t * sleepersP = (uintptr_t *) &reserved; + bool defer; + + if (!inWorkLoop && (wl = workLoop)) { // tearing down + wl->closeGate(); + *sleepersP |= kSleepersRemoved; + while (*sleepersP & kSleepersWaitEnabled) { thread_wakeup_with_result(&enabled, THREAD_INTERRUPTED); sleepGate(sleepersP, THREAD_UNINT); } - *sleepersP = 0; - openGate(); + *sleepersP &= ~kSleepersWaitEnabled; + defer = (0 != (kSleepersActionsMask & *sleepersP)); + if (!defer) + { + super::setWorkLoop(0); + *sleepersP &= ~kSleepersRemoved; + } + wl->openGate(); + return; } - else super::setWorkLoop(inWorkLoop); } @@ -149,29 +166,38 @@ IOReturn IOCommandGate::runAction(Action inAction, void *arg0, void *arg1, void *arg2, void *arg3) { + IOWorkLoop * wl; + uintptr_t * sleepersP; + if (!inAction) return kIOReturnBadArgument; + if (!(wl = workLoop)) + return kIOReturnNotReady; // closeGate is recursive needn't worry if we already hold the lock. - closeGate(); + wl->closeGate(); + sleepersP = (uintptr_t *) &reserved; // If the command gate is disabled and we aren't on the workloop thread // itself then sleep until we get enabled. IOReturn res; - if (!workLoop->onThread()) { - while (!enabled) { - uintptr_t *sleepersP = (uintptr_t *) &reserved; - - *sleepersP += 2; - IOReturn res = sleepGate(&enabled, THREAD_ABORTSAFE); - *sleepersP -= 2; - - bool wakeupTearDown = (*sleepersP & 1); - if (res || wakeupTearDown) { - openGate(); + if (!wl->onThread()) + { + while (!enabled) + { + IOReturn sleepResult = kIOReturnSuccess; + if (workLoop) + { + *sleepersP |= kSleepersWaitEnabled; + sleepResult = wl->sleepGate(&enabled, THREAD_ABORTSAFE); + *sleepersP &= ~kSleepersWaitEnabled; + } + bool wakeupTearDown = (!workLoop || (0 != (*sleepersP & kSleepersRemoved))); + if ((kIOReturnSuccess != sleepResult) || wakeupTearDown) { + wl->openGate(); if (wakeupTearDown) - commandWakeup(sleepersP); // No further resources used + wl->wakeupGate(sleepersP, false); // No further resources used return kIOReturnAborted; } @@ -180,20 +206,28 @@ IOReturn IOCommandGate::runAction(Action inAction, bool trace = ( gIOKitTrace & kIOTraceCommandGates ) ? true : false; - if (trace) - IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION), + if (trace) IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION), VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner); IOStatisticsActionCall(); // Must be gated and on the work loop or enabled + + *sleepersP += kSleepersActions; res = (*inAction)(owner, arg0, arg1, arg2, arg3); - - if (trace) - IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION), + *sleepersP -= kSleepersActions; + + if (trace) IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION), VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner); + + if (kSleepersRemoved == ((kSleepersActionsMask|kSleepersRemoved) & *sleepersP)) + { + // no actions outstanding + *sleepersP &= ~kSleepersRemoved; + super::setWorkLoop(0); + } - openGate(); + wl->openGate(); return res; } @@ -203,16 +237,19 @@ IOReturn IOCommandGate::attemptAction(Action inAction, void *arg2, void *arg3) { IOReturn res; + IOWorkLoop * wl; if (!inAction) return kIOReturnBadArgument; + if (!(wl = workLoop)) + return kIOReturnNotReady; // Try to close the gate if can't get return immediately. - if (!tryCloseGate()) + if (!wl->tryCloseGate()) return kIOReturnCannotLock; // If the command gate is disabled then sleep until we get a wakeup - if (!workLoop->onThread() && !enabled) + if (!wl->onThread() && !enabled) res = kIOReturnNotPermitted; else { @@ -231,7 +268,7 @@ IOReturn IOCommandGate::attemptAction(Action inAction, VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner); } - openGate(); + wl->openGate(); return res; } diff --git a/iokit/Kernel/IODMACommand.cpp b/iokit/Kernel/IODMACommand.cpp index 3b3c0ee3a..c8477aaca 100644 --- a/iokit/Kernel/IODMACommand.cpp +++ b/iokit/Kernel/IODMACommand.cpp @@ -42,8 +42,6 @@ #include "IOKitKernelInternal.h" #define MAPTYPE(type) ((UInt) (type) & kTypeMask) -#define IS_MAPPED(type) (MAPTYPE(type) != kBypassed) -#define IS_BYPASSED(type) (MAPTYPE(type) == kBypassed) #define IS_NONCOHERENT(type) (MAPTYPE(type) == kNonCoherent) enum @@ -91,10 +89,10 @@ OSDefineMetaClassAndStructors(IODMACommand, IOCommand); OSMetaClassDefineReservedUsed(IODMACommand, 0); OSMetaClassDefineReservedUsed(IODMACommand, 1); OSMetaClassDefineReservedUsed(IODMACommand, 2); -OSMetaClassDefineReservedUnused(IODMACommand, 3); -OSMetaClassDefineReservedUnused(IODMACommand, 4); -OSMetaClassDefineReservedUnused(IODMACommand, 5); -OSMetaClassDefineReservedUnused(IODMACommand, 6); +OSMetaClassDefineReservedUsed(IODMACommand, 3); +OSMetaClassDefineReservedUsed(IODMACommand, 4); +OSMetaClassDefineReservedUsed(IODMACommand, 5); +OSMetaClassDefineReservedUsed(IODMACommand, 6); OSMetaClassDefineReservedUnused(IODMACommand, 7); OSMetaClassDefineReservedUnused(IODMACommand, 8); OSMetaClassDefineReservedUnused(IODMACommand, 9); @@ -105,6 +103,39 @@ OSMetaClassDefineReservedUnused(IODMACommand, 13); OSMetaClassDefineReservedUnused(IODMACommand, 14); OSMetaClassDefineReservedUnused(IODMACommand, 15); +IODMACommand * +IODMACommand::withRefCon(void * refCon) +{ + IODMACommand * me = new IODMACommand; + + if (me && !me->initWithRefCon(refCon)) + { + me->release(); + return 0; + } + + return me; +} + +IODMACommand * +IODMACommand::withSpecification(SegmentFunction outSegFunc, + const SegmentOptions * segmentOptions, + uint32_t mappingOptions, + IOMapper * mapper, + void * refCon) +{ + IODMACommand * me = new IODMACommand; + + if (me && !me->initWithSpecification(outSegFunc, segmentOptions, mappingOptions, + mapper, refCon)) + { + me->release(); + return 0; + } + + return me; +} + IODMACommand * IODMACommand::withSpecification(SegmentFunction outSegFunc, UInt8 numAddressBits, @@ -124,7 +155,7 @@ IODMACommand::withSpecification(SegmentFunction outSegFunc, { me->release(); return 0; - }; + } return me; } @@ -132,12 +163,54 @@ IODMACommand::withSpecification(SegmentFunction outSegFunc, IODMACommand * IODMACommand::cloneCommand(void *refCon) { - return withSpecification(fOutSeg, fNumAddressBits, fMaxSegmentSize, - fMappingOptions, fMaxTransferSize, fAlignMask + 1, fMapper, refCon); + SegmentOptions segmentOptions = + { + .fStructSize = sizeof(segmentOptions), + .fNumAddressBits = fNumAddressBits, + .fMaxSegmentSize = fMaxSegmentSize, + .fMaxTransferSize = fMaxTransferSize, + .fAlignment = fAlignMask + 1, + .fAlignmentLength = fAlignMaskInternalSegments + 1, + .fAlignmentInternalSegments = fAlignMaskLength + 1 + }; + + return (IODMACommand::withSpecification(fOutSeg, &segmentOptions, + fMappingOptions, fMapper, refCon)); } #define kLastOutputFunction ((SegmentFunction) kLastOutputFunction) +bool +IODMACommand::initWithRefCon(void * refCon) +{ + if (!super::init()) return (false); + + if (!reserved) + { + reserved = IONew(IODMACommandInternal, 1); + if (!reserved) return false; + } + bzero(reserved, sizeof(IODMACommandInternal)); + fRefCon = refCon; + + return (true); +} + +bool +IODMACommand::initWithSpecification(SegmentFunction outSegFunc, + const SegmentOptions * segmentOptions, + uint32_t mappingOptions, + IOMapper * mapper, + void * refCon) +{ + if (!initWithRefCon(refCon)) return false; + + if (kIOReturnSuccess != setSpecification(outSegFunc, segmentOptions, + mappingOptions, mapper)) return false; + + return (true); +} + bool IODMACommand::initWithSpecification(SegmentFunction outSegFunc, UInt8 numAddressBits, @@ -147,89 +220,119 @@ IODMACommand::initWithSpecification(SegmentFunction outSegFunc, UInt32 alignment, IOMapper *mapper, void *refCon) +{ + SegmentOptions segmentOptions = + { + .fStructSize = sizeof(segmentOptions), + .fNumAddressBits = numAddressBits, + .fMaxSegmentSize = maxSegmentSize, + .fMaxTransferSize = maxTransferSize, + .fAlignment = alignment, + .fAlignmentLength = 1, + .fAlignmentInternalSegments = alignment + }; + + return (initWithSpecification(outSegFunc, &segmentOptions, mappingOptions, mapper, refCon)); +} + +IOReturn +IODMACommand::setSpecification(SegmentFunction outSegFunc, + const SegmentOptions * segmentOptions, + uint32_t mappingOptions, + IOMapper * mapper) { IOService * device = 0; + UInt8 numAddressBits; + UInt64 maxSegmentSize; + UInt64 maxTransferSize; + UInt32 alignment; + + bool is32Bit; - if (!super::init() || !outSegFunc) - return false; + if (!outSegFunc || !segmentOptions) return (kIOReturnBadArgument); - bool is32Bit = (OutputHost32 == outSegFunc || OutputBig32 == outSegFunc - || OutputLittle32 == outSegFunc); + is32Bit = ((OutputHost32 == outSegFunc) + || (OutputBig32 == outSegFunc) + || (OutputLittle32 == outSegFunc)); + + numAddressBits = segmentOptions->fNumAddressBits; + maxSegmentSize = segmentOptions->fMaxSegmentSize; + maxTransferSize = segmentOptions->fMaxTransferSize; + alignment = segmentOptions->fAlignment; if (is32Bit) { if (!numAddressBits) numAddressBits = 32; else if (numAddressBits > 32) - return false; // Wrong output function for bits + return (kIOReturnBadArgument); // Wrong output function for bits } - if (numAddressBits && (numAddressBits < PAGE_SHIFT)) - return false; - - if (!maxSegmentSize) - maxSegmentSize--; // Set Max segment to -1 - if (!maxTransferSize) - maxTransferSize--; // Set Max transfer to -1 + if (numAddressBits && (numAddressBits < PAGE_SHIFT)) return (kIOReturnBadArgument); + if (!maxSegmentSize) maxSegmentSize--; // Set Max segment to -1 + if (!maxTransferSize) maxTransferSize--; // Set Max transfer to -1 if (mapper && !OSDynamicCast(IOMapper, mapper)) { device = mapper; mapper = 0; } - if (!mapper) + if (!mapper && (kUnmapped != MAPTYPE(mappingOptions))) { IOMapper::checkForSystemMapper(); mapper = IOMapper::gSystem; } fNumSegments = 0; - fBypassMask = 0; fOutSeg = outSegFunc; fNumAddressBits = numAddressBits; fMaxSegmentSize = maxSegmentSize; fMappingOptions = mappingOptions; fMaxTransferSize = maxTransferSize; - if (!alignment) - alignment = 1; + if (!alignment) alignment = 1; fAlignMask = alignment - 1; - fMapper = mapper; - fRefCon = refCon; + + alignment = segmentOptions->fAlignmentLength; + if (!alignment) alignment = 1; + fAlignMaskLength = alignment - 1; + + alignment = segmentOptions->fAlignmentInternalSegments; + if (!alignment) alignment = (fAlignMask + 1); + fAlignMaskInternalSegments = alignment - 1; switch (MAPTYPE(mappingOptions)) { - case kMapped: break; - case kNonCoherent: /*fMapper = 0;*/ break; + case kMapped: break; + case kUnmapped: break; + case kNonCoherent: break; + case kBypassed: - if (mapper && !mapper->getBypassMask(&fBypassMask)) - return false; - break; + if (!mapper) break; + return (kIOReturnBadArgument); + default: - return false; + return (kIOReturnBadArgument); }; - if (fMapper) - fMapper->retain(); - - reserved = IONew(IODMACommandInternal, 1); - if (!reserved) - return false; - bzero(reserved, sizeof(IODMACommandInternal)); + if (mapper != fMapper) + { + if (mapper) mapper->retain(); + if (fMapper) fMapper->release(); + fMapper = mapper; + } fInternalState->fIterateOnly = (0 != (kIterateOnly & mappingOptions)); fInternalState->fDevice = device; - return true; + return (kIOReturnSuccess); } void IODMACommand::free() { - if (reserved) - IODelete(reserved, IODMACommandInternal, 1); + if (reserved) IODelete(reserved, IODMACommandInternal, 1); - if (fMapper) - fMapper->release(); + if (fMapper) fMapper->release(); super::free(); } @@ -237,7 +340,7 @@ IODMACommand::free() IOReturn IODMACommand::setMemoryDescriptor(const IOMemoryDescriptor *mem, bool autoPrepare) { - IOReturn err = kIOReturnSuccess; + IOReturn err = kIOReturnSuccess; if (mem == fMemory) { @@ -312,6 +415,16 @@ IODMACommand::getMemoryDescriptor() const return fMemory; } +IOMemoryDescriptor * +IODMACommand::getIOMemoryDescriptor() const +{ + IOMemoryDescriptor * mem; + + mem = reserved->fCopyMD; + if (!mem) mem = __IODEQUALIFY(IOMemoryDescriptor *, fMemory); + + return (mem); +} IOReturn IODMACommand::segmentOp( @@ -325,10 +438,11 @@ IODMACommand::segmentOp( addr64_t maxPhys, address; uint64_t length; uint32_t numPages; + uint32_t mask; IODMACommandInternal * state = target->reserved; - if (target->fNumAddressBits && (target->fNumAddressBits < 64) && (state->fLocalMapperPageAlloc || !target->fMapper)) + if (target->fNumAddressBits && (target->fNumAddressBits < 64) && (state->fLocalMapperAlloc || !target->fMapper)) maxPhys = (1ULL << target->fNumAddressBits); else maxPhys = 0; @@ -342,8 +456,15 @@ IODMACommand::segmentOp( if (!state->fMisaligned) { - state->fMisaligned |= (0 != (state->fSourceAlignMask & address)); - if (state->fMisaligned) DEBG("misaligned %qx:%qx, %lx\n", address, length, state->fSourceAlignMask); + mask = (segmentIndex ? target->fAlignMaskInternalSegments : state->fSourceAlignMask); + state->fMisaligned |= (0 != (mask & address)); + if (state->fMisaligned) DEBG("misaligned address %qx:%qx, %x\n", address, length, mask); + } + if (!state->fMisaligned) + { + mask = target->fAlignMaskLength; + state->fMisaligned |= (0 != (mask & length)); + if (state->fMisaligned) DEBG("misaligned length %qx:%qx, %x\n", address, length, mask); } if (state->fMisaligned && (kWalkPreflight & op)) @@ -401,7 +522,7 @@ IODMACommand::segmentOp( if ((kMapped == MAPTYPE(target->fMappingOptions)) && target->fMapper) { - cpuAddr = target->fMapper->mapAddr(address); + cpuAddr = target->fMapper->mapToPhysicalAddress(address); } remapAddr = ptoa_64(vm_page_get_phys_page(lastPage)); @@ -439,6 +560,14 @@ IODMACommand::segmentOp( return kIOReturnSuccess; } +IOBufferMemoryDescriptor * +IODMACommand::createCopyBuffer(IODirection direction, UInt64 length) +{ + mach_vm_address_t mask = 0xFFFFF000; //state->fSourceAlignMask + return (IOBufferMemoryDescriptor::inTaskWithPhysicalMask(kernel_task, + direction, length, mask)); +} + IOReturn IODMACommand::walkAll(UInt8 op) { @@ -478,7 +607,7 @@ IODMACommand::walkAll(UInt8 op) DEBG("preflight fCopyPageCount %d\n", state->fCopyPageCount); - if (!state->fDoubleBuffer) + if (!fMapper && !state->fDoubleBuffer) { kern_return_t kr; @@ -506,9 +635,7 @@ IODMACommand::walkAll(UInt8 op) else { DEBG("alloc IOBMD\n"); - mach_vm_address_t mask = 0xFFFFF000; //state->fSourceAlignMask - state->fCopyMD = IOBufferMemoryDescriptor::inTaskWithPhysicalMask(kernel_task, - fMDSummary.fDirection, state->fPreparedLength, mask); + state->fCopyMD = createCopyBuffer(fMDSummary.fDirection, state->fPreparedLength); if (state->fCopyMD) { @@ -595,6 +722,40 @@ IODMACommand::getAlignment(void) return (fAlignMask + 1); } +uint32_t +IODMACommand::getAlignmentLength(void) +{ + return (fAlignMaskLength + 1); +} + +uint32_t +IODMACommand::getAlignmentInternalSegments(void) +{ + return (fAlignMaskInternalSegments + 1); +} + +IOReturn +IODMACommand::prepareWithSpecification(SegmentFunction outSegFunc, + const SegmentOptions * segmentOptions, + uint32_t mappingOptions, + IOMapper * mapper, + UInt64 offset, + UInt64 length, + bool flushCache, + bool synchronize) +{ + IOReturn ret; + + if (fActive) return kIOReturnNotPermitted; + + ret = setSpecification(outSegFunc, segmentOptions, mappingOptions, mapper); + if (kIOReturnSuccess != ret) return (ret); + + ret = prepare(offset, length, flushCache, synchronize); + + return (ret); +} + IOReturn IODMACommand::prepareWithSpecification(SegmentFunction outSegFunc, UInt8 numAddressBits, @@ -608,94 +769,36 @@ IODMACommand::prepareWithSpecification(SegmentFunction outSegFunc, bool flushCache, bool synchronize) { - if (fActive) - return kIOReturnNotPermitted; - - if (!outSegFunc) - return kIOReturnBadArgument; - - bool is32Bit = (OutputHost32 == outSegFunc || OutputBig32 == outSegFunc - || OutputLittle32 == outSegFunc); - if (is32Bit) + SegmentOptions segmentOptions = { - if (!numAddressBits) - numAddressBits = 32; - else if (numAddressBits > 32) - return kIOReturnBadArgument; // Wrong output function for bits - } - - if (numAddressBits && (numAddressBits < PAGE_SHIFT)) - return kIOReturnBadArgument; - - if (!maxSegmentSize) - maxSegmentSize--; // Set Max segment to -1 - if (!maxTransferSize) - maxTransferSize--; // Set Max transfer to -1 - - if (mapper && !OSDynamicCast(IOMapper, mapper)) - { - fInternalState->fDevice = mapper; - mapper = 0; - } - if (!mapper) - { - IOMapper::checkForSystemMapper(); - mapper = IOMapper::gSystem; - } - - switch (MAPTYPE(mappingOptions)) - { - case kMapped: break; - case kNonCoherent: break; - case kBypassed: - if (mapper && !mapper->getBypassMask(&fBypassMask)) - return kIOReturnBadArgument; - break; - default: - return kIOReturnBadArgument; + .fStructSize = sizeof(segmentOptions), + .fNumAddressBits = numAddressBits, + .fMaxSegmentSize = maxSegmentSize, + .fMaxTransferSize = maxTransferSize, + .fAlignment = alignment, + .fAlignmentLength = 1, + .fAlignmentInternalSegments = alignment }; - fNumSegments = 0; - fBypassMask = 0; - fOutSeg = outSegFunc; - fNumAddressBits = numAddressBits; - fMaxSegmentSize = maxSegmentSize; - fMappingOptions = mappingOptions; - fMaxTransferSize = maxTransferSize; - if (!alignment) - alignment = 1; - fAlignMask = alignment - 1; - if (mapper != fMapper) - { - mapper->retain(); - fMapper->release(); - fMapper = mapper; - } - - fInternalState->fIterateOnly = (0 != (kIterateOnly & mappingOptions)); - - return prepare(offset, length, flushCache, synchronize); + return (prepareWithSpecification(outSegFunc, &segmentOptions, mappingOptions, mapper, + offset, length, flushCache, synchronize)); } IOReturn IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchronize) { - IODMACommandInternal * state = fInternalState; - IOReturn ret = kIOReturnSuccess; - MappingOptions mappingOptions = fMappingOptions; + IODMACommandInternal * state = fInternalState; + IOReturn ret = kIOReturnSuccess; + uint32_t mappingOptions = fMappingOptions; - if (!length) - length = fMDSummary.fLength; + // check specification has been set + if (!fOutSeg) return (kIOReturnNotReady); - if (length > fMaxTransferSize) - return kIOReturnNoSpace; + if (!length) length = fMDSummary.fLength; - if (IS_NONCOHERENT(mappingOptions) && flushCache) { - IOMemoryDescriptor *poMD = const_cast(fMemory); + if (length > fMaxTransferSize) return kIOReturnNoSpace; - poMD->performOperation(kIOMemoryIncoherentIOStore, offset, length); - } if (fActive++) { if ((state->fPreparedOffset != offset) @@ -704,6 +807,8 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr } else { + if (fAlignMaskLength & length) return (kIOReturnNotAligned); + state->fPreparedOffset = offset; state->fPreparedLength = length; @@ -716,8 +821,8 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr state->fCopyPageCount = 0; state->fNextRemapPage = NULL; state->fCopyMD = 0; - state->fLocalMapperPageAlloc = 0; - state->fLocalMapperPageCount = 0; + state->fLocalMapperAlloc = 0; + state->fLocalMapperAllocLength = 0; state->fLocalMapper = (fMapper && (fMapper != IOMapper::gSystem)); @@ -738,46 +843,48 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr ret = walkAll(op); } - if (fMapper) + if (IS_NONCOHERENT(mappingOptions) && flushCache) { - if (state->fLocalMapper) + if (state->fCopyMD) { - state->fLocalMapperPageCount = atop_64(round_page( - state->fPreparedLength + ((state->fPreparedOffset + fMDSummary.fPageAlign) & page_mask))); - state->fLocalMapperPageAlloc = ptoa_64(fMapper->iovmAllocDMACommand(this, state->fLocalMapperPageCount)); - if (!state->fLocalMapperPageAlloc) - { - DEBG("IODMACommand !iovmAlloc"); - return (kIOReturnNoResources); - } - state->fMapContig = true; + state->fCopyMD->performOperation(kIOMemoryIncoherentIOStore, 0, length); } else { - IOMDDMAMapArgs mapArgs; - bzero(&mapArgs, sizeof(mapArgs)); - mapArgs.fMapper = fMapper; - mapArgs.fMapSpec.device = state->fDevice; - mapArgs.fMapSpec.alignment = fAlignMask + 1; - mapArgs.fMapSpec.numAddressBits = fNumAddressBits ? fNumAddressBits : 64; - mapArgs.fOffset = state->fPreparedOffset; - mapArgs.fLength = state->fPreparedLength; - const IOMemoryDescriptor * md = state->fCopyMD; - if (!md) md = fMemory; - ret = md->dmaCommandOperation(kIOMDDMAMap | state->fIterateOnly, &mapArgs, sizeof(mapArgs)); - if (kIOReturnSuccess == ret) - { - state->fLocalMapperPageAlloc = mapArgs.fAlloc; - state->fLocalMapperPageCount = mapArgs.fAllocCount; - state->fMapContig = mapArgs.fMapContig; - } - ret = kIOReturnSuccess; + IOMemoryDescriptor * md = const_cast(fMemory); + md->performOperation(kIOMemoryIncoherentIOStore, offset, length); } } + if (fMapper) + { + IOMDDMAMapArgs mapArgs; + bzero(&mapArgs, sizeof(mapArgs)); + mapArgs.fMapper = fMapper; + mapArgs.fCommand = this; + mapArgs.fMapSpec.device = state->fDevice; + mapArgs.fMapSpec.alignment = fAlignMask + 1; + mapArgs.fMapSpec.numAddressBits = fNumAddressBits ? fNumAddressBits : 64; + mapArgs.fLength = state->fPreparedLength; + const IOMemoryDescriptor * md = state->fCopyMD; + if (md) { mapArgs.fOffset = 0; } + else + { + md = fMemory; + mapArgs.fOffset = state->fPreparedOffset; + } + ret = md->dmaCommandOperation(kIOMDDMAMap | state->fIterateOnly, &mapArgs, sizeof(mapArgs)); +//IOLog("dma %p 0x%x 0x%qx-0x%qx 0x%qx-0x%qx\n", this, ret, state->fPreparedOffset, state->fPreparedLength, mapArgs.fAlloc, mapArgs.fAllocLength); - if (kIOReturnSuccess == ret) - state->fPrepared = true; + if (kIOReturnSuccess == ret) + { + state->fLocalMapperAlloc = mapArgs.fAlloc; + state->fLocalMapperAllocLength = mapArgs.fAllocLength; + state->fMapContig = mapArgs.fMapContig; + } + if (NULL != IOMapper::gSystem) ret = kIOReturnSuccess; + } + if (kIOReturnSuccess == ret) state->fPrepared = true; } return ret; } @@ -793,6 +900,19 @@ IODMACommand::complete(bool invalidateCache, bool synchronize) if (!--fActive) { + if (IS_NONCOHERENT(fMappingOptions) && invalidateCache) + { + if (state->fCopyMD) + { + state->fCopyMD->performOperation(kIOMemoryIncoherentIOFlush, 0, state->fPreparedLength); + } + else + { + IOMemoryDescriptor * md = const_cast(fMemory); + md->performOperation(kIOMemoryIncoherentIOFlush, state->fPreparedOffset, state->fPreparedLength); + } + } + if (!state->fCursor) { IOOptionBits op = kWalkComplete; @@ -800,28 +920,18 @@ IODMACommand::complete(bool invalidateCache, bool synchronize) op |= kWalkSyncIn; ret = walkAll(op); } - if (state->fLocalMapperPageAlloc) + if (state->fLocalMapperAlloc) { - if (state->fLocalMapper) - { - fMapper->iovmFreeDMACommand(this, atop_64(state->fLocalMapperPageAlloc), state->fLocalMapperPageCount); - } - else if (state->fLocalMapperPageCount) + if (state->fLocalMapperAllocLength) { - fMapper->iovmFree(atop_64(state->fLocalMapperPageAlloc), state->fLocalMapperPageCount); + fMapper->iovmUnmapMemory(getIOMemoryDescriptor(), this, + state->fLocalMapperAlloc, state->fLocalMapperAllocLength); } - state->fLocalMapperPageAlloc = 0; - state->fLocalMapperPageCount = 0; + state->fLocalMapperAlloc = 0; + state->fLocalMapperAllocLength = 0; } state->fPrepared = false; - - if (IS_NONCOHERENT(fMappingOptions) && invalidateCache) - { - IOMemoryDescriptor *poMD = const_cast(fMemory); - - poMD->performOperation(kIOMemoryIncoherentIOFlush, state->fPreparedOffset, state->fPreparedLength); - } } return ret; @@ -913,7 +1023,7 @@ IODMACommand::transferSegment(void *reference, if ((kMapped == MAPTYPE(target->fMappingOptions)) && target->fMapper) { - cpuAddr = target->fMapper->mapAddr(ioAddr); + cpuAddr = target->fMapper->mapToPhysicalAddress(ioAddr); copyLen = min(copyLen, page_size - (ioAddr & (page_size - 1))); ioAddr += copyLen; } @@ -1013,11 +1123,10 @@ IODMACommand::genIOVMSegments(uint32_t op, state->fIOVMAddr = 0; internalState->fNextRemapPage = NULL; internalState->fNewMD = false; - state->fMapped = (IS_MAPPED(fMappingOptions) && fMapper); + state->fMapped = (0 != fMapper); mdOp = kIOMDFirstSegment; }; - UInt64 bypassMask = fBypassMask; UInt32 segIndex = 0; UInt32 numSegments = *numSegmentsP; Segment64 curSeg = { 0, 0 }; @@ -1039,9 +1148,9 @@ IODMACommand::genIOVMSegments(uint32_t op, state->fOffset = offset; state->fLength = memLength - offset; - if (internalState->fMapContig && internalState->fLocalMapperPageAlloc) + if (internalState->fMapContig && internalState->fLocalMapperAlloc) { - state->fIOVMAddr = internalState->fLocalMapperPageAlloc + offset; + state->fIOVMAddr = internalState->fLocalMapperAlloc + offset; rtn = kIOReturnSuccess; #if 0 { @@ -1091,13 +1200,14 @@ IODMACommand::genIOVMSegments(uint32_t op, { UInt64 length = state->fLength; offset += length; - curSeg.fIOVMAddr = state->fIOVMAddr | bypassMask; + curSeg.fIOVMAddr = state->fIOVMAddr; curSeg.fLength = length; state->fIOVMAddr = 0; } if (!state->fIOVMAddr) { + // maxPhys if ((kWalkClient & op) && (curSeg.fIOVMAddr + curSeg.fLength - 1) > maxPhys) { if (internalState->fCursor) @@ -1156,23 +1266,67 @@ IODMACommand::genIOVMSegments(uint32_t op, } } + // reduce size of output segment + uint64_t reduce, leftover = 0; + + // fMaxSegmentSize if (curSeg.fLength > fMaxSegmentSize) { - UInt64 remain = curSeg.fLength - fMaxSegmentSize; + leftover += curSeg.fLength - fMaxSegmentSize; + curSeg.fLength = fMaxSegmentSize; + state->fIOVMAddr = curSeg.fLength + curSeg.fIOVMAddr; + } + + // alignment current length + + reduce = (curSeg.fLength & fAlignMaskLength); + if (reduce && (curSeg.fLength > reduce)) + { + leftover += reduce; + curSeg.fLength -= reduce; + state->fIOVMAddr = curSeg.fLength + curSeg.fIOVMAddr; + } - state->fIOVMAddr = fMaxSegmentSize + curSeg.fIOVMAddr; - curSeg.fLength = fMaxSegmentSize; + // alignment next address - state->fLength = remain; - offset -= remain; + reduce = (state->fIOVMAddr & fAlignMaskInternalSegments); + if (reduce && (curSeg.fLength > reduce)) + { + leftover += reduce; + curSeg.fLength -= reduce; + state->fIOVMAddr = curSeg.fLength + curSeg.fIOVMAddr; } - if (internalState->fCursor - && (0 != (internalState->fSourceAlignMask & curSeg.fIOVMAddr))) + if (leftover) { - curSeg.fIOVMAddr = 0; - ret = kIOReturnNotAligned; - break; + DEBG("reduce seg by 0x%llx @ 0x%llx [0x%llx, 0x%llx]\n", + leftover, offset, + curSeg.fIOVMAddr, curSeg.fLength); + state->fLength = leftover; + offset -= leftover; + } + + // + + if (internalState->fCursor) + { + bool misaligned; + uint32_t mask; + + mask = (segIndex ? fAlignMaskInternalSegments : internalState->fSourceAlignMask); + misaligned = (0 != (mask & curSeg.fIOVMAddr)); + if (!misaligned) + { + mask = fAlignMaskLength; + misaligned |= (0 != (mask & curSeg.fLength)); + } + if (misaligned) + { + if (misaligned) DEBG("cursor misaligned %qx:%qx\n", curSeg.fIOVMAddr, curSeg.fLength); + curSeg.fIOVMAddr = 0; + ret = kIOReturnNotAligned; + break; + } } if (offset >= memLength) @@ -1218,7 +1372,7 @@ IODMACommand::clientOutputSegment( if (target->fNumAddressBits && (target->fNumAddressBits < 64) && ((segment.fIOVMAddr + segment.fLength - 1) >> target->fNumAddressBits) - && (target->reserved->fLocalMapperPageAlloc || !target->fMapper)) + && (target->reserved->fLocalMapperAlloc || !target->fMapper)) { DEBG("kIOReturnMessageTooLarge(fNumAddressBits) %qx, %qx\n", segment.fIOVMAddr, segment.fLength); ret = kIOReturnMessageTooLarge; diff --git a/iokit/Kernel/IODMAEventSource.cpp b/iokit/Kernel/IODMAEventSource.cpp index 1b53b0b0b..6875e0c42 100644 --- a/iokit/Kernel/IODMAEventSource.cpp +++ b/iokit/Kernel/IODMAEventSource.cpp @@ -149,6 +149,13 @@ IOByteCount IODMAEventSource::validFIFODepth(IOByteCount depth, IODirection dire } +IOReturn IODMAEventSource::setFrameSize(UInt8 byteCount) +{ + if ((dmaController == 0) || (dmaIndex == 0xFFFFFFFF)) return kIOReturnError; + + return dmaController->setFrameSize(dmaIndex, byteCount); +} + // protected bool IODMAEventSource::checkForWork(void) diff --git a/iokit/Kernel/IODataQueue.cpp b/iokit/Kernel/IODataQueue.cpp index 79c97e1af..e3afbdcf3 100644 --- a/iokit/Kernel/IODataQueue.cpp +++ b/iokit/Kernel/IODataQueue.cpp @@ -37,6 +37,12 @@ #include #include +struct IODataQueueInternal +{ + mach_msg_header_t msg; + UInt32 queueSize; +}; + #ifdef enqueue #undef enqueue #endif @@ -95,6 +101,14 @@ Boolean IODataQueue::initWithCapacity(UInt32 size) return false; } + assert(!notifyMsg); + notifyMsg = IONew(IODataQueueInternal, 1); + if (!notifyMsg) { + return false; + } + bzero(notifyMsg, sizeof(IODataQueueInternal)); + ((IODataQueueInternal *)notifyMsg)->queueSize = size; + dataQueue = (IODataQueueMemory *)IOMallocAligned(allocSize, PAGE_SIZE); if (dataQueue == 0) { return false; @@ -105,13 +119,6 @@ Boolean IODataQueue::initWithCapacity(UInt32 size) // dataQueue->head = 0; // dataQueue->tail = 0; - if (!notifyMsg) { - notifyMsg = IOMalloc(sizeof(mach_msg_header_t)); - if (!notifyMsg) - return false; - } - bzero(notifyMsg, sizeof(mach_msg_header_t)); - return true; } @@ -132,14 +139,14 @@ Boolean IODataQueue::initWithEntries(UInt32 numEntries, UInt32 entrySize) void IODataQueue::free() { - if (dataQueue) { - IOFreeAligned(dataQueue, round_page(dataQueue->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE)); - dataQueue = NULL; - - if (notifyMsg) { - IOFree(notifyMsg, sizeof(mach_msg_header_t)); - notifyMsg = NULL; - } + if (notifyMsg) { + if (dataQueue) { + IOFreeAligned(dataQueue, round_page(((IODataQueueInternal *)notifyMsg)->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE)); + dataQueue = NULL; + } + + IODelete(notifyMsg, IODataQueueInternal, 1); + notifyMsg = NULL; } super::free(); @@ -152,14 +159,17 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize) const UInt32 head = dataQueue->head; // volatile const UInt32 tail = dataQueue->tail; const UInt32 entrySize = dataSize + DATA_QUEUE_ENTRY_HEADER_SIZE; + UInt32 queueSize; IODataQueueEntry * entry; // Check for overflow of entrySize if (dataSize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) { return false; } + // Check for underflow of (dataQueue->queueSize - tail) - if (dataQueue->queueSize < tail) { + queueSize = ((IODataQueueInternal *) notifyMsg)->queueSize; + if ((queueSize < tail) || (queueSize < head)) { return false; } @@ -167,7 +177,7 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize) { // Is there enough room at the end for the entry? if ((entrySize <= UINT32_MAX - tail) && - ((tail + entrySize) <= dataQueue->queueSize) ) + ((tail + entrySize) <= queueSize) ) { entry = (IODataQueueEntry *)((UInt8 *)dataQueue->queue + tail); @@ -191,7 +201,7 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize) // doing this. The user client checks for this and will look for the size // at the beginning if there isn't room for it at the end. - if ( ( dataQueue->queueSize - tail ) >= DATA_QUEUE_ENTRY_HEADER_SIZE ) + if ( ( queueSize - tail ) >= DATA_QUEUE_ENTRY_HEADER_SIZE ) { ((IODataQueueEntry *)((UInt8 *)dataQueue->queue + tail))->size = dataSize; } @@ -236,14 +246,13 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize) void IODataQueue::setNotificationPort(mach_port_t port) { - mach_msg_header_t * msgh = (mach_msg_header_t *) notifyMsg; + mach_msg_header_t * msgh; - if (msgh) { - bzero(msgh, sizeof(mach_msg_header_t)); - msgh->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); - msgh->msgh_size = sizeof(mach_msg_header_t); - msgh->msgh_remote_port = port; - } + msgh = &((IODataQueueInternal *) notifyMsg)->msg; + bzero(msgh, sizeof(mach_msg_header_t)); + msgh->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); + msgh->msgh_size = sizeof(mach_msg_header_t); + msgh->msgh_remote_port = port; } void IODataQueue::sendDataAvailableNotification() @@ -251,8 +260,8 @@ void IODataQueue::sendDataAvailableNotification() kern_return_t kr; mach_msg_header_t * msgh; - msgh = (mach_msg_header_t *) notifyMsg; - if (msgh && msgh->msgh_remote_port) { + msgh = &((IODataQueueInternal *) notifyMsg)->msg; + if (msgh->msgh_remote_port) { kr = mach_msg_send_from_kernel_with_options(msgh, msgh->msgh_size, MACH_SEND_TIMEOUT, MACH_MSG_TIMEOUT_NONE); switch(kr) { case MACH_SEND_TIMED_OUT: // Notification already sent @@ -269,9 +278,11 @@ void IODataQueue::sendDataAvailableNotification() IOMemoryDescriptor *IODataQueue::getMemoryDescriptor() { IOMemoryDescriptor *descriptor = 0; + UInt32 queueSize; + queueSize = ((IODataQueueInternal *) notifyMsg)->queueSize; if (dataQueue != 0) { - descriptor = IOMemoryDescriptor::withAddress(dataQueue, dataQueue->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE, kIODirectionOutIn); + descriptor = IOMemoryDescriptor::withAddress(dataQueue, queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE, kIODirectionOutIn); } return descriptor; diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp index 6533ed937..965670f37 100644 --- a/iokit/Kernel/IODeviceTreeSupport.cpp +++ b/iokit/Kernel/IODeviceTreeSupport.cpp @@ -61,6 +61,7 @@ const OSSymbol * gIODTUnitKey; const OSSymbol * gIODTCompatibleKey; const OSSymbol * gIODTTypeKey; const OSSymbol * gIODTModelKey; +const OSSymbol * gIODTTargetTypeKey; const OSSymbol * gIODTSizeCellKey; const OSSymbol * gIODTAddressCellKey; @@ -106,6 +107,7 @@ IODeviceTreeAlloc( void * dtTop ) gIODTCompatibleKey = OSSymbol::withCStringNoCopy( "compatible" ); gIODTTypeKey = OSSymbol::withCStringNoCopy( "device_type" ); gIODTModelKey = OSSymbol::withCStringNoCopy( "model" ); + gIODTTargetTypeKey = OSSymbol::withCStringNoCopy( "target-type" ); gIODTSizeCellKey = OSSymbol::withCStringNoCopy( "#size-cells" ); gIODTAddressCellKey = OSSymbol::withCStringNoCopy( "#address-cells" ); gIODTRangeKey = OSSymbol::withCStringNoCopy( "ranges" ); @@ -898,7 +900,7 @@ OSCollectionIterator * IODTFindMatchingEntries( IORegistryEntry * from, } cIter = OSCollectionIterator::withCollection( result); - result->release(); + if (result) result->release(); return( cIter); } diff --git a/iokit/Kernel/IOEventSource.cpp b/iokit/Kernel/IOEventSource.cpp index 95046dacd..3393993e0 100644 --- a/iokit/Kernel/IOEventSource.cpp +++ b/iokit/Kernel/IOEventSource.cpp @@ -114,7 +114,7 @@ bool IOEventSource::tryCloseGate() int IOEventSource::sleepGate(void *event, UInt32 type) { - bool res; + int res; IOStatisticsOpenGate(); res = workLoop->sleepGate(event, type); IOStatisticsCloseGate(); @@ -123,7 +123,7 @@ int IOEventSource::sleepGate(void *event, UInt32 type) int IOEventSource::sleepGate(void *event, AbsoluteTime deadline, UInt32 type) { - bool res; + int res; IOStatisticsOpenGate(); res = workLoop->sleepGate(event, deadline, type); IOStatisticsCloseGate(); diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index f85f2ab0e..867251b27 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -182,6 +182,7 @@ extern "C" ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); #define DISABLE_TRIM 0 #define TRIM_DELAY 5000 +extern boolean_t root_is_CF_drive; extern unsigned int save_kdebug_enable; extern uint32_t gIOHibernateState; uint32_t gIOHibernateMode; @@ -193,23 +194,28 @@ static uint64_t gIOHibernateCompression = 0x80; // default compression 50% static IODTNVRAM * gIOOptionsEntry; static IORegistryEntry * gIOChosenEntry; + +static const OSSymbol * gIOHibernateBootImageKey; + #if defined(__i386__) || defined(__x86_64__) -static const OSSymbol * gIOCreateEFIDevicePathSymbol; + static const OSSymbol * gIOHibernateRTCVariablesKey; static const OSSymbol * gIOHibernateBoot0082Key; static const OSSymbol * gIOHibernateBootNextKey; static OSData * gIOHibernateBoot0082Data; static OSData * gIOHibernateBootNextData; static OSObject * gIOHibernateBootNextSave; -static struct kern_direct_file_io_ref_t * gDebugImageFileRef; -#endif + +static IOPolledFileIOVars * gDebugImageFileVars; +static IOLock * gDebugImageLock; + +#endif /* defined(__i386__) || defined(__x86_64__) */ static IOLock * gFSLock; static uint32_t gFSState; static IOPolledFileIOVars gFileVars; static IOHibernateVars gIOHibernateVars; -static struct kern_direct_file_io_ref_t * gIOHibernateFileRef; -static hibernate_cryptvars_t gIOHibernateCryptWakeContext; +static IOPolledFileCryptVars gIOHibernateCryptWakeContext; static hibernate_graphics_t _hibernateGraphics; static hibernate_graphics_t * gIOHibernateGraphicsInfo = &_hibernateGraphics; static hibernate_statistics_t _hibernateStats; @@ -224,24 +230,14 @@ enum }; static IOReturn IOHibernateDone(IOHibernateVars * vars); +static IOReturn IOWriteExtentsToFile(IOPolledFileIOVars * vars, uint32_t signature); +static void IOSetBootImageNVRAM(OSData * data); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -enum { kXPRamAudioVolume = 8 }; enum { kDefaultIOSize = 128 * 1024 }; enum { kVideoMapSize = 80 * 1024 * 1024 }; -#ifndef kIOMediaPreferredBlockSizeKey -#define kIOMediaPreferredBlockSizeKey "Preferred Block Size" -#endif - -#ifndef kIOBootPathKey -#define kIOBootPathKey "bootpath" -#endif -#ifndef kIOSelectedBootDeviceKey -#define kIOSelectedBootDeviceKey "boot-device" -#endif - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ // copy from phys addr to MD @@ -358,1020 +354,31 @@ hibernate_set_page_state(hibernate_page_list_t * page_list, hibernate_page_list_ } } -static vm_offset_t -hibernate_page_list_iterate(hibernate_page_list_t * list, vm_offset_t * pPage) -{ - uint32_t page = *pPage; - uint32_t count; - hibernate_bitmap_t * bitmap; - - while ((bitmap = hibernate_page_bitmap_pin(list, &page))) - { - count = hibernate_page_bitmap_count(bitmap, TRUE, page); - if (!count) - break; - page += count; - if (page <= bitmap->last_page) - break; - } - - *pPage = page; - if (bitmap) - count = hibernate_page_bitmap_count(bitmap, FALSE, page); - else - count = 0; - - return (count); -} - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -static IOReturn -IOHibernatePollerProbe(IOPolledFileIOVars * vars, IOService * target) -{ - IOReturn err = kIOReturnError; - int32_t idx; - IOPolledInterface * poller; - - for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--) - { - poller = (IOPolledInterface *) vars->pollers->getObject(idx); - err = poller->probe(target); - if (err) - { - HIBLOG("IOPolledInterface::probe[%d] 0x%x\n", idx, err); - break; - } - } - - return (err); -} - -static IOReturn -IOHibernatePollerOpen(IOPolledFileIOVars * vars, uint32_t state, IOMemoryDescriptor * md) -{ - IOReturn err = kIOReturnError; - int32_t idx; - IOPolledInterface * poller; - - for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--) - { - poller = (IOPolledInterface *) vars->pollers->getObject(idx); - err = poller->open(state, md); - if (err) - { - HIBLOG("IOPolledInterface::open[%d] 0x%x\n", idx, err); - break; - } - } - - return (err); -} - -static IOReturn -IOHibernatePollerClose(IOPolledFileIOVars * vars, uint32_t state) -{ - IOReturn err = kIOReturnError; - int32_t idx; - IOPolledInterface * poller; - - for (idx = 0; - (poller = (IOPolledInterface *) vars->pollers->getObject(idx)); - idx++) - { - err = poller->close(state); - if (err) - HIBLOG("IOPolledInterface::close[%d] 0x%x\n", idx, err); - } - - return (err); -} - -static void -IOHibernatePollerIOComplete(void * target, - void * parameter, - IOReturn status, - UInt64 actualByteCount) -{ - IOPolledFileIOVars * vars = (IOPolledFileIOVars *) parameter; - - vars->ioStatus = status; -} - -static IOReturn -IOHibernatePollerIO(IOPolledFileIOVars * vars, - uint32_t operation, uint32_t bufferOffset, - uint64_t deviceOffset, uint64_t length) -{ - - IOReturn err = kIOReturnError; - IOPolledInterface * poller; - IOPolledCompletion completion; - - completion.target = 0; - completion.action = &IOHibernatePollerIOComplete; - completion.parameter = vars; - - vars->ioStatus = -1; - - poller = (IOPolledInterface *) vars->pollers->getObject(0); - err = poller->startIO(operation, bufferOffset, deviceOffset + vars->block0, length, completion); - if (err) - HIBLOG("IOPolledInterface::startIO[%d] 0x%x\n", 0, err); - - return (err); -} - -static IOReturn -IOHibernatePollerIODone(IOPolledFileIOVars * vars, bool abortable) -{ - IOReturn err = kIOReturnSuccess; - int32_t idx = 0; - IOPolledInterface * poller; - - while (-1 == vars->ioStatus) - { - for (idx = 0; - (poller = (IOPolledInterface *) vars->pollers->getObject(idx)); - idx++) - { - IOReturn newErr; - newErr = poller->checkForWork(); - if ((newErr == kIOReturnAborted) && !abortable) - newErr = kIOReturnSuccess; - if (kIOReturnSuccess == err) - err = newErr; - } - } - - if ((kIOReturnSuccess == err) && abortable && hibernate_should_abort()) - { - err = kIOReturnAborted; - HIBLOG("IOPolledInterface::checkForWork sw abort\n"); - } - - if (err) - { - HIBLOG("IOPolledInterface::checkForWork[%d] 0x%x\n", idx, err); - } - else - { - err = vars->ioStatus; - if (kIOReturnSuccess != err) - HIBLOG("IOPolledInterface::ioStatus 0x%x\n", err); - } - - return (err); -} - -IOReturn -IOPolledInterface::checkAllForWork(void) -{ - IOReturn err = kIOReturnNotReady; - int32_t idx; - IOPolledInterface * poller; - - IOHibernateVars * vars = &gIOHibernateVars; - - if (!vars->fileVars || !vars->fileVars->pollers) - return (err); - - for (idx = 0; - (poller = (IOPolledInterface *) vars->fileVars->pollers->getObject(idx)); - idx++) - { - err = poller->checkForWork(); - if (err) - HIBLOG("IOPolledInterface::checkAllForWork[%d] 0x%x\n", idx, err); - } - - return (err); -} - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -struct _OpenFileContext -{ - OSData * extents; - uint64_t size; -}; - -static void -file_extent_callback(void * ref, uint64_t start, uint64_t length) -{ - _OpenFileContext * ctx = (_OpenFileContext *) ref; - IOPolledFileExtent extent; - - extent.start = start; - extent.length = length; - - HIBLOG("[0x%qx, 0x%qx]\n", start, length); - - ctx->extents->appendBytes(&extent, sizeof(extent)); - ctx->size += length; -} - -static IOService * -IOCopyMediaForDev(dev_t device) -{ - OSDictionary * matching; - OSNumber * num; - OSIterator * iter; - IOService * result = 0; - - matching = IOService::serviceMatching("IOMedia"); - if (!matching) - return (0); - do - { - num = OSNumber::withNumber(major(device), 32); - if (!num) - break; - matching->setObject(kIOBSDMajorKey, num); - num->release(); - num = OSNumber::withNumber(minor(device), 32); - if (!num) - break; - matching->setObject(kIOBSDMinorKey, num); - num->release(); - if (!num) - break; - iter = IOService::getMatchingServices(matching); - if (iter) - { - result = (IOService *) iter->getNextObject(); - result->retain(); - iter->release(); - } - } - while (false); - matching->release(); - - return (result); -} - -/* - * Writes header to disk with signature, block size and file extents data. - * If there are more than 2 extents, then they are written on second block. - */ -static IOReturn -WriteExtentsToFile(struct kern_direct_file_io_ref_t * fileRef, - uint32_t signature, uint32_t blockSize, - IOPolledFileExtent *fileExtents, - IOByteCount size) -{ - IOHibernateImageHeader hdr; - IOItemCount count; - IOReturn err = kIOReturnSuccess; - int rc; - - memset(&hdr, 0, sizeof(IOHibernateImageHeader)); - count = size; - if (count > sizeof(hdr.fileExtentMap)) - { - hdr.fileExtentMapSize = count; - count = sizeof(hdr.fileExtentMap); - } - else - hdr.fileExtentMapSize = sizeof(hdr.fileExtentMap); - - bcopy(fileExtents, &hdr.fileExtentMap[0], count); - - // copy file block extent list if larger than header - if (hdr.fileExtentMapSize > sizeof(hdr.fileExtentMap)) - { - count = hdr.fileExtentMapSize - sizeof(hdr.fileExtentMap); - rc = kern_write_file(fileRef, blockSize, - (caddr_t)(((uint8_t *)fileExtents) + sizeof(hdr.fileExtentMap)), - count, IO_SKIP_ENCRYPTION); - if (rc != 0) { - HIBLOG("kern_write_file returned %d\n", rc); - err = kIOReturnIOError; - goto exit; - } - } - hdr.signature = signature; - hdr.deviceBlockSize = blockSize; - - rc = kern_write_file(fileRef, 0, (char *)&hdr, sizeof(hdr), IO_SKIP_ENCRYPTION); - if (rc != 0) { - HIBLOG("kern_write_file returned %d\n", rc); - err = kIOReturnIOError; - goto exit; - } - -exit: - return err; -} - -static IOReturn -GetImageBlockSize(IOService *part, OSArray *pollers, IOByteCount *blockSize) -{ - IOService * service; - IORegistryEntry * next; - IORegistryEntry * child; - - IOReturn err = kIOReturnSuccess; - - - next = part; - do - { - IOPolledInterface * poller; - OSObject * obj; - OSNumber * num; - - obj = next->getProperty(kIOPolledInterfaceSupportKey); - if (kOSBooleanFalse == obj) - { - pollers->flushCollection(); - break; - } - else if ((poller = OSDynamicCast(IOPolledInterface, obj))) - pollers->setObject(poller); - - if ((service = OSDynamicCast(IOService, next)) - && service->getDeviceMemory() - && !pollers->getCount()) break; - - if ((num = OSDynamicCast(OSNumber, next->getProperty(kIOMediaPreferredBlockSizeKey)))) - *blockSize = num->unsigned32BitValue(); - child = next; - } - while ((next = child->getParentEntry(gIOServicePlane)) - && child->isParent(next, gIOServicePlane, true)); - - if (*blockSize < 4096) *blockSize = 4096; - - if (!pollers->getCount()) - err = kIOReturnUnsupported; - - return err; -} - -IOReturn -IOPolledFileOpen( const char * filename, uint64_t setFileSize, - IOBufferMemoryDescriptor * ioBuffer, - IOPolledFileIOVars ** fileVars, OSData ** fileExtents, - OSData ** imagePath, uint8_t * volumeCryptKey) -{ - IOReturn err = kIOReturnSuccess; - IOPolledFileIOVars * vars; - _OpenFileContext ctx; - OSData * extentsData; - IOService * part = 0; - OSString * keyUUID = 0; - OSString * keyStoreUUID = 0; - dev_t block_dev; - dev_t hibernate_image_dev; - uint64_t maxiobytes; - AbsoluteTime startTime, endTime; - uint64_t nsec; - caddr_t write_file_addr = NULL; - vm_size_t write_file_len = 0; - - vars = IONew(IOPolledFileIOVars, 1); - if (!vars) return (kIOReturnNoMemory); - bzero(vars, sizeof(*vars)); - - do - { - vars->io = false; - vars->buffer = (uint8_t *) ioBuffer->getBytesNoCopy(); - vars->bufferHalf = 0; - vars->bufferOffset = 0; - vars->bufferSize = ioBuffer->getLength() >> 1; - - extentsData = OSData::withCapacity(32); - ctx.extents = extentsData; - ctx.size = 0; - clock_get_uptime(&startTime); - if (!gDebugImageFileRef) - { - // Avoid writing the header if it is written when file is prep'd for debug data - // Image is locked during prep for debug data. So, write may fail. - write_file_addr = (caddr_t)gIOHibernateCurrentHeader; - write_file_len = sizeof(IOHibernateImageHeader); - } - vars->fileRef = kern_open_file_for_direct_io(filename, - true, - &file_extent_callback, &ctx, - setFileSize, - // write file: - 0, write_file_addr, - write_file_len, - // results - &block_dev, - &hibernate_image_dev, - &vars->block0, - &maxiobytes, - &vars->flags); -#if 0 - uint32_t msDelay = (131071 & random()); - HIBLOG("sleep %d\n", msDelay); - IOSleep(msDelay); -#endif - clock_get_uptime(&endTime); - SUB_ABSOLUTETIME(&endTime, &startTime); - absolutetime_to_nanoseconds(endTime, &nsec); - - if (!vars->fileRef) err = kIOReturnNoSpace; - - IOLockLock(gFSLock); - if (kFSOpening != gFSState) err = kIOReturnTimeout; - IOLockUnlock(gFSLock); - - HIBLOG("kern_open_file_for_direct_io(%d) took %qd ms\n", err, nsec / 1000000ULL); - if (kIOReturnSuccess != err) break; - - if (kIOHibernateModeSSDInvert & gIOHibernateMode) - vars->flags ^= kIOHibernateOptionSSD; - - HIBLOG("Opened file %s, size %qd, partition base 0x%qx, maxio %qx ssd %d\n", filename, ctx.size, - vars->block0, maxiobytes, kIOHibernateOptionSSD & vars->flags); - if (ctx.size < 1*1024*1024) // check against image size estimate! - { - err = kIOReturnNoSpace; - break; - } - - vars->fileSize = ctx.size; - if (maxiobytes < vars->bufferSize) vars->bufferSize = maxiobytes; - - vars->extentMap = (IOPolledFileExtent *) extentsData->getBytesNoCopy(); - - part = IOCopyMediaForDev(block_dev); - if (!part) - { - err = kIOReturnNotFound; - break; - } - err = part->callPlatformFunction(PLATFORM_FUNCTION_GET_MEDIA_ENCRYPTION_KEY_UUID, false, - (void *) &keyUUID, (void *) &keyStoreUUID, NULL, NULL); - if ((kIOReturnSuccess == err) && keyUUID && keyStoreUUID) - { -// IOLog("got volume key %s\n", keyStoreUUID->getCStringNoCopy()); - uuid_t volumeKeyUUID; - aks_volume_key_t vek; - static IOService * sKeyStore; - static const OSSymbol * sAKSGetKey; - - if (!sAKSGetKey) - sAKSGetKey = OSSymbol::withCStringNoCopy(AKS_PLATFORM_FUNCTION_GETKEY); - if (!sKeyStore) - sKeyStore = (IOService *) IORegistryEntry::fromPath(AKS_SERVICE_PATH, gIOServicePlane); - if (sKeyStore) - err = uuid_parse(keyStoreUUID->getCStringNoCopy(), volumeKeyUUID); - else - err = kIOReturnNoResources; - if (kIOReturnSuccess == err) - err = sKeyStore->callPlatformFunction(sAKSGetKey, true, volumeKeyUUID, &vek, NULL, NULL); - if (kIOReturnSuccess != err) - IOLog("volume key err 0x%x\n", err); - else - { - size_t bytes = (kIOHibernateAESKeySize / 8); - if (vek.key.keybytecount < bytes) - bytes = vek.key.keybytecount; - bcopy(&vek.key.keybytes[0], volumeCryptKey, bytes); - } - bzero(&vek, sizeof(vek)); - } - part->release(); - - part = IOCopyMediaForDev(hibernate_image_dev); - if (!part) - { - err = kIOReturnNotFound; - break; - } - - vars->pollers = OSArray::withCapacity(4); - if (!vars->pollers) - { - err = kIOReturnNoMemory; - break; - } - - err = GetImageBlockSize(part, vars->pollers, &vars->blockSize); - - HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n", - major(hibernate_image_dev), minor(hibernate_image_dev), (long)vars->blockSize, - vars->pollers->getCount()); - - if (err != kIOReturnSuccess) - break; - - IORegistryEntry * next; - OSData * data; - if (vars->blockSize < sizeof(IOHibernateImageHeader)) - { - err = kIOReturnError; - continue; - } - - err = IOHibernatePollerProbe(vars, (IOService *) part); - if (kIOReturnSuccess != err) break; - - err = IOHibernatePollerOpen(vars, kIOPolledPreflightState, ioBuffer); - if (kIOReturnSuccess != err) break; - - vars->media = part; - next = part; - while (next) - { - next->setProperty(kIOPolledInterfaceActiveKey, kOSBooleanTrue); - next = next->getParentEntry(gIOServicePlane); - } - - *fileVars = vars; - *fileExtents = extentsData; - - // make imagePath - - if ((extentsData->getLength() >= sizeof(IOPolledFileExtent))) - { - char str2[24 + sizeof(uuid_string_t) + 2]; - -#if defined(__i386__) || defined(__x86_64__) - if (!gIOCreateEFIDevicePathSymbol) - gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath"); - - if (keyUUID) - snprintf(str2, sizeof(str2), "%qx:%s", - vars->extentMap[0].start, keyUUID->getCStringNoCopy()); - else - snprintf(str2, sizeof(str2), "%qx", vars->extentMap[0].start); - - err = IOService::getPlatform()->callPlatformFunction( - gIOCreateEFIDevicePathSymbol, false, - (void *) part, (void *) str2, - (void *) (uintptr_t) true, (void *) &data); -#else - char str1[256]; - int len = sizeof(str1); - - if (!part->getPath(str1, &len, gIODTPlane)) - err = kIOReturnNotFound; - else - { - snprintf(str2, sizeof(str2), ",%qx", vars->extentMap[0].start); - // (strip the plane name) - char * tail = strchr(str1, ':'); - if (!tail) - tail = str1 - 1; - data = OSData::withBytes(tail + 1, strlen(tail + 1)); - data->appendBytes(str2, strlen(str2)); - } -#endif - if (kIOReturnSuccess == err) - *imagePath = data; - else - HIBLOG("error 0x%x getting path\n", err); - } - } - while (false); - - if (kIOReturnSuccess != err) - { - HIBLOG("error 0x%x opening hibernation file\n", err); - if (vars->fileRef) - { - kern_close_file_for_direct_io(vars->fileRef, 0, 0, 0, 0, 0); - vars->fileRef = NULL; - } - } - else - { - WriteExtentsToFile(vars->fileRef, kIOHibernateHeaderOpenSignature, vars->blockSize, - (IOPolledFileExtent *)extentsData->getBytesNoCopy(), - extentsData->getLength()); - } - - if (part) - part->release(); - - return (err); -} - -IOReturn -IOPolledFileClose( IOPolledFileIOVars * vars ) -{ - if (vars->pollers) - { - IOHibernatePollerClose(vars, kIOPolledPostflightState); - vars->pollers->release(); - } - - bzero(vars, sizeof(IOPolledFileIOVars)); - - return (kIOReturnSuccess); -} - -static IOReturn -IOPolledFileSeek(IOPolledFileIOVars * vars, uint64_t position) -{ - IOPolledFileExtent * extentMap; - - extentMap = vars->extentMap; - - vars->position = position; - - while (position >= extentMap->length) - { - position -= extentMap->length; - extentMap++; - } - - vars->currentExtent = extentMap; - vars->extentRemaining = extentMap->length - position; - vars->extentPosition = vars->position - position; - - if (vars->bufferSize <= vars->extentRemaining) - vars->bufferLimit = vars->bufferSize; - else - vars->bufferLimit = vars->extentRemaining; - - return (kIOReturnSuccess); -} - -static IOReturn -IOPolledFileWrite(IOPolledFileIOVars * vars, - const uint8_t * bytes, IOByteCount size, - hibernate_cryptvars_t * cryptvars) -{ - IOReturn err = kIOReturnSuccess; - IOByteCount copy; - bool flush = false; - - do - { - if (!bytes && !size) - { - // seek to end of block & flush - size = vars->position & (vars->blockSize - 1); - if (size) - size = vars->blockSize - size; - flush = true; - // use some garbage for the fill - bytes = vars->buffer + vars->bufferOffset; - } - - copy = vars->bufferLimit - vars->bufferOffset; - if (copy > size) - copy = size; - else - flush = true; - - if (bytes) - { - bcopy(bytes, vars->buffer + vars->bufferHalf + vars->bufferOffset, copy); - bytes += copy; - } - else - bzero(vars->buffer + vars->bufferHalf + vars->bufferOffset, copy); - - size -= copy; - vars->bufferOffset += copy; - vars->position += copy; - - if (flush && vars->bufferOffset) - { - uint64_t offset = (vars->position - vars->bufferOffset - - vars->extentPosition + vars->currentExtent->start); - uint32_t length = (vars->bufferOffset); - -#if CRYPTO - if (cryptvars && vars->encryptStart - && (vars->position > vars->encryptStart) - && ((vars->position - length) < vars->encryptEnd)) - { - AbsoluteTime startTime, endTime; - - uint64_t encryptLen, encryptStart; - encryptLen = vars->position - vars->encryptStart; - if (encryptLen > length) - encryptLen = length; - encryptStart = length - encryptLen; - if (vars->position > vars->encryptEnd) - encryptLen -= (vars->position - vars->encryptEnd); - - clock_get_uptime(&startTime); - - // encrypt the buffer - aes_encrypt_cbc(vars->buffer + vars->bufferHalf + encryptStart, - &cryptvars->aes_iv[0], - encryptLen / AES_BLOCK_SIZE, - vars->buffer + vars->bufferHalf + encryptStart, - &cryptvars->ctx.encrypt); - - clock_get_uptime(&endTime); - ADD_ABSOLUTETIME(&vars->cryptTime, &endTime); - SUB_ABSOLUTETIME(&vars->cryptTime, &startTime); - vars->cryptBytes += encryptLen; - - // save initial vector for following encrypts - bcopy(vars->buffer + vars->bufferHalf + encryptStart + encryptLen - AES_BLOCK_SIZE, - &cryptvars->aes_iv[0], - AES_BLOCK_SIZE); - } -#endif /* CRYPTO */ - - if (vars->io) - { - err = IOHibernatePollerIODone(vars, true); - if (kIOReturnSuccess != err) - break; - } - -if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position); -//if (length != vars->bufferSize) HIBLOG("short write of %qx ends@ %qx\n", length, offset + length); - - err = IOHibernatePollerIO(vars, kIOPolledWrite, vars->bufferHalf, offset, length); - if (kIOReturnSuccess != err) - break; - vars->io = true; - - vars->extentRemaining -= vars->bufferOffset; - if (!vars->extentRemaining) - { - vars->currentExtent++; - vars->extentRemaining = vars->currentExtent->length; - vars->extentPosition = vars->position; - if (!vars->extentRemaining) - { - err = kIOReturnOverrun; - break; - } - } - - vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize; - vars->bufferOffset = 0; - if (vars->bufferSize <= vars->extentRemaining) - vars->bufferLimit = vars->bufferSize; - else - vars->bufferLimit = vars->extentRemaining; - - flush = false; - } - } - while (size); - - return (err); -} - -static IOReturn -IOPolledFileRead(IOPolledFileIOVars * vars, - uint8_t * bytes, IOByteCount size, - hibernate_cryptvars_t * cryptvars) -{ - IOReturn err = kIOReturnSuccess; - IOByteCount copy; - -// bytesWritten += size; - - do - { - copy = vars->bufferLimit - vars->bufferOffset; - if (copy > size) - copy = size; - - if (bytes) - { - bcopy(vars->buffer + vars->bufferHalf + vars->bufferOffset, bytes, copy); - bytes += copy; - } - size -= copy; - vars->bufferOffset += copy; -// vars->position += copy; - - if ((vars->bufferOffset == vars->bufferLimit) && (vars->position < vars->readEnd)) - { - if (vars->io) - { - err = IOHibernatePollerIODone(vars, false); - if (kIOReturnSuccess != err) - break; - } - else - cryptvars = 0; - -if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position); - - vars->position += vars->lastRead; - vars->extentRemaining -= vars->lastRead; - vars->bufferLimit = vars->lastRead; - - if (!vars->extentRemaining) - { - vars->currentExtent++; - vars->extentRemaining = vars->currentExtent->length; - vars->extentPosition = vars->position; - if (!vars->extentRemaining) - { - err = kIOReturnOverrun; - break; - } - } - - uint64_t length; - uint64_t lastReadLength = vars->lastRead; - uint64_t offset = (vars->position - - vars->extentPosition + vars->currentExtent->start); - if (vars->extentRemaining <= vars->bufferSize) - length = vars->extentRemaining; - else - length = vars->bufferSize; - if ((length + vars->position) > vars->readEnd) - length = vars->readEnd - vars->position; - - vars->lastRead = length; - if (length) - { -//if (length != vars->bufferSize) HIBLOG("short read of %qx ends@ %qx\n", length, offset + length); - err = IOHibernatePollerIO(vars, kIOPolledRead, vars->bufferHalf, offset, length); - if (kIOReturnSuccess != err) - break; - vars->io = true; - } - - vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize; - vars->bufferOffset = 0; - -#if CRYPTO - if (cryptvars) - { - uint8_t thisVector[AES_BLOCK_SIZE]; - AbsoluteTime startTime, endTime; - - // save initial vector for following decrypts - bcopy(&cryptvars->aes_iv[0], &thisVector[0], AES_BLOCK_SIZE); - bcopy(vars->buffer + vars->bufferHalf + lastReadLength - AES_BLOCK_SIZE, - &cryptvars->aes_iv[0], AES_BLOCK_SIZE); - - // decrypt the buffer - clock_get_uptime(&startTime); - - aes_decrypt_cbc(vars->buffer + vars->bufferHalf, - &thisVector[0], - lastReadLength / AES_BLOCK_SIZE, - vars->buffer + vars->bufferHalf, - &cryptvars->ctx.decrypt); - - clock_get_uptime(&endTime); - ADD_ABSOLUTETIME(&vars->cryptTime, &endTime); - SUB_ABSOLUTETIME(&vars->cryptTime, &startTime); - vars->cryptBytes += lastReadLength; - } -#endif /* CRYPTO */ - } - } - while (size); - - return (err); -} - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#if HIBERNATION -IOReturn -IOHibernateOpenForDebugData( ) -{ - dev_t image_dev; - OSData *extentsData = NULL; - OSObject *obj; - OSString *str; - IOByteCount blockSize = 0; - IOByteCount size; - IOService * part = 0; - OSData * data = NULL; - - IOPolledFileExtent * fileExtents; - IOReturn err = kIOReturnSuccess; - IORegistryEntry * regEntry; - OSArray * pollers = NULL; - - _OpenFileContext ctx; - - if (gDebugImageFileRef != NULL) - return kIOReturnError; - - if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFileKey))) - { - if ((str = OSDynamicCast(OSString, obj))) - strlcpy(gIOHibernateFilename, str->getCStringNoCopy(), - sizeof(gIOHibernateFilename)); - obj->release(); - } - - if (!gIOHibernateFilename[0]) { - HIBLOG("Failed to get hibernate image filename\n"); - return (kIOReturnUnsupported); - } - - extentsData = OSData::withCapacity(32); - ctx.extents = extentsData; - ctx.size = 0; - - bzero(gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader)); - gIOHibernateCurrentHeader->debugFlags = gIOHibernateDebugFlags; - gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature; - - gDebugImageFileRef = kern_open_file_for_direct_io(gIOHibernateFilename, - false, - &file_extent_callback, &ctx, - 0, 0, - (caddr_t)gIOHibernateCurrentHeader, - sizeof(IOHibernateImageHeader), - NULL, &image_dev, NULL, NULL, NULL); - - if (gDebugImageFileRef == NULL) - { - HIBLOG("Failed to open the file \n"); - err = kIOReturnError; - goto exit; - } - fileExtents = (IOPolledFileExtent *)extentsData->getBytesNoCopy(); - size = extentsData->getLength(); - - part = IOCopyMediaForDev(image_dev); - if (!part) - { - HIBLOG("Failed to get the media device\n"); - err = kIOReturnNotFound; - goto exit; - } - - - pollers = OSArray::withCapacity(4); - if (!pollers) - { - err = kIOReturnNoMemory; - goto exit; - } - - err = GetImageBlockSize(part, pollers, &blockSize); - if (err != kIOReturnSuccess) - { - HIBLOG("Failed to get block size\n"); - goto exit; - } - if (blockSize < sizeof(IOHibernateImageHeader)) - { - HIBLOG("block size %llu is less than the size of the header\n", blockSize); - err = kIOReturnError; - goto exit; - } - - WriteExtentsToFile(gDebugImageFileRef, kIOHibernateHeaderOpenSignature, - blockSize, fileExtents, size); - - char str2[24 + sizeof(uuid_string_t) + 2]; - - if (!gIOCreateEFIDevicePathSymbol) - gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath"); - - snprintf(str2, sizeof(str2), "%qx", fileExtents[0].start); - - err = IOService::getPlatform()->callPlatformFunction( - gIOCreateEFIDevicePathSymbol, false, - (void *) part, (void *) str2, - (void *) (uintptr_t) true, (void *) &data); - - if (!gIOOptionsEntry) - { - regEntry = IORegistryEntry::fromPath("/options", gIODTPlane); - gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry); - if (regEntry && !gIOOptionsEntry) - regEntry->release(); - } - if (gIOOptionsEntry) - { - const OSSymbol * sym; - - sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey); - if (sym) - { - gIOOptionsEntry->setProperty(sym, data); - sym->release(); - } - } - - -exit: +static vm_offset_t +hibernate_page_list_iterate(hibernate_page_list_t * list, vm_offset_t * pPage) +{ + uint32_t page = *pPage; + uint32_t count; + hibernate_bitmap_t * bitmap; - if ( (err != kIOReturnSuccess) && gDebugImageFileRef) { - kern_close_file_for_direct_io(gDebugImageFileRef, 0, 0, 0, 0, 0); - gDebugImageFileRef = NULL; + while ((bitmap = hibernate_page_bitmap_pin(list, &page))) + { + count = hibernate_page_bitmap_count(bitmap, TRUE, page); + if (!count) + break; + page += count; + if (page <= bitmap->last_page) + break; } - if (extentsData) extentsData->release(); - if (part) part->release(); - if (pollers) pollers->release(); - if (data) data->release(); - return err; + *pPage = page; + if (bitmap) + count = hibernate_page_bitmap_count(bitmap, FALSE, page); + else + count = 0; + + return (count); } -#endif /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -1379,18 +386,16 @@ IOReturn IOHibernateSystemSleep(void) { IOReturn err; - OSData * data; + OSData * nvramData; OSObject * obj; OSString * str; OSNumber * num; bool dsSSD, vmflush; IOHibernateVars * vars; + uint64_t setFileSize = 0; gIOHibernateState = kIOHibernateStateInactive; - if (!gIOChosenEntry) - gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane); - gIOHibernateDebugFlags = 0; if (kIOLogHibernate & gIOKitDebug) gIOHibernateDebugFlags |= kIOHibernateDebugRestoreLogs; @@ -1436,13 +441,11 @@ IOHibernateSystemSleep(void) { vars->srcBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, 2 * page_size + WKdm_SCRATCH_BUF_SIZE, page_size); - vars->ioBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, - 2 * kDefaultIOSize, page_size); vars->handoffBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, ptoa_64(gIOHibernateHandoffPageCount), page_size); - if (!vars->srcBuffer || !vars->ioBuffer || !vars->handoffBuffer) + if (!vars->srcBuffer || !vars->handoffBuffer) { err = kIOReturnNoMemory; break; @@ -1468,8 +471,7 @@ IOHibernateSystemSleep(void) gIOHibernateCurrentHeader->debugFlags = gIOHibernateDebugFlags; gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature; - vmflush = (kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey)); - uint64_t setFileSize = 0; + vmflush = ((kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey)) && root_is_CF_drive == FALSE); err = hibernate_alloc_page_lists(&vars->page_list, &vars->page_list_wired, &vars->page_list_pal); @@ -1507,16 +509,28 @@ IOHibernateSystemSleep(void) } } - // open & invalidate the image file + // Invalidate the image file + if (gDebugImageLock) { + IOLockLock(gDebugImageLock); + if (gDebugImageFileVars != 0) { + kprintf("IOHIBSystemSleep: Closing debugdata file\n"); + IOSetBootImageNVRAM(0); + IOPolledFileClose(&gDebugImageFileVars, 0, 0, 0, 0, 0); + } + IOLockUnlock(gDebugImageLock); + } - if (gDebugImageFileRef) { - kern_close_file_for_direct_io(gDebugImageFileRef, 0, 0, 0, 0, 0); - gDebugImageFileRef = NULL; - } + err = IOPolledFileOpen(gIOHibernateFilename, setFileSize, 0, + gIOHibernateCurrentHeader, sizeof(gIOHibernateCurrentHeader), + &vars->fileVars, &nvramData, + &vars->volumeCryptKey[0], sizeof(vars->volumeCryptKey)); - err = IOPolledFileOpen(gIOHibernateFilename, setFileSize, vars->ioBuffer, - &vars->fileVars, &vars->fileExtents, &data, - &vars->volumeCryptKey[0]); + if (KERN_SUCCESS != err) + { + IOLockLock(gFSLock); + if (kFSOpening != gFSState) err = kIOReturnTimeout; + IOLockUnlock(gFSLock); + } if (KERN_SUCCESS != err) { @@ -1524,53 +538,50 @@ IOHibernateSystemSleep(void) break; } + // write extents for debug data usage in EFI + IOWriteExtentsToFile(vars->fileVars, kIOHibernateHeaderOpenSignature); + + err = IOPolledFilePollersSetup(vars->fileVars, kIOPolledPreflightState); + if (KERN_SUCCESS != err) break; + clock_get_uptime(&startTime); err = hibernate_setup(gIOHibernateCurrentHeader, - gIOHibernateFreeRatio, gIOHibernateFreeTime, vmflush, vars->page_list, vars->page_list_wired, vars->page_list_pal); clock_get_uptime(&endTime); SUB_ABSOLUTETIME(&endTime, &startTime); absolutetime_to_nanoseconds(endTime, &nsec); HIBLOG("hibernate_setup(%d) took %qd ms\n", err, nsec / 1000000ULL); + if (KERN_SUCCESS != err) break; - dsSSD = ((0 != (kIOHibernateOptionSSD & vars->fileVars->flags)) + dsSSD = ((0 != (kIOPolledFileSSD & vars->fileVars->flags)) && (kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey))); - if (dsSSD) - { - gIOHibernateCurrentHeader->options |= - kIOHibernateOptionSSD - | kIOHibernateOptionColor; + + if (dsSSD) gIOHibernateCurrentHeader->options |= kIOHibernateOptionSSD | kIOHibernateOptionColor; + else gIOHibernateCurrentHeader->options |= kIOHibernateOptionProgress; + #if defined(__i386__) || defined(__x86_64__) - if (!uuid_is_null(vars->volumeCryptKey) && - (kOSBooleanTrue != IOService::getPMRootDomain()->getProperty(kIOPMDestroyFVKeyOnStandbyKey))) - { - uintptr_t smcVars[2]; - smcVars[0] = sizeof(vars->volumeCryptKey); - smcVars[1] = (uintptr_t)(void *) &gIOHibernateVars.volumeCryptKey[0]; + if (!uuid_is_null(vars->volumeCryptKey) && + (kOSBooleanTrue != IOService::getPMRootDomain()->getProperty(kIOPMDestroyFVKeyOnStandbyKey))) + { + uintptr_t smcVars[2]; + smcVars[0] = sizeof(vars->volumeCryptKey); + smcVars[1] = (uintptr_t)(void *) &gIOHibernateVars.volumeCryptKey[0]; - IOService::getPMRootDomain()->setProperty(kIOHibernateSMCVariablesKey, smcVars, sizeof(smcVars)); - bzero(smcVars, sizeof(smcVars)); - } + IOService::getPMRootDomain()->setProperty(kIOHibernateSMCVariablesKey, smcVars, sizeof(smcVars)); + bzero(smcVars, sizeof(smcVars)); + } #endif - } - else - { - gIOHibernateCurrentHeader->options |= kIOHibernateOptionProgress; - } - if (KERN_SUCCESS != err) - break; - if (encryptedswap || !uuid_is_null(vars->volumeCryptKey)) gIOHibernateMode ^= kIOHibernateModeEncrypt; if (kIOHibernateOptionProgress & gIOHibernateCurrentHeader->options) { vars->videoAllocSize = kVideoMapSize; - if (KERN_SUCCESS != kmem_alloc_pageable(kernel_map, &vars->videoMapping, vars->videoAllocSize)) + if (KERN_SUCCESS != kmem_alloc_pageable(kernel_map, &vars->videoMapping, vars->videoAllocSize, VM_KERN_MEMORY_IOKIT)) vars->videoMapping = 0; } @@ -1582,28 +593,11 @@ IOHibernateSystemSleep(void) // set nvram - IORegistryEntry * regEntry; - if (!gIOOptionsEntry) - { - regEntry = IORegistryEntry::fromPath("/options", gIODTPlane); - gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry); - if (regEntry && !gIOOptionsEntry) - regEntry->release(); - } - - if (gIOOptionsEntry) - { - const OSSymbol * sym; - - sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey); - if (sym) - { - gIOOptionsEntry->setProperty(sym, data); - sym->release(); - } - data->release(); + IOSetBootImageNVRAM(nvramData); + nvramData->release(); #if defined(__i386__) || defined(__x86_64__) + { struct AppleRTCHibernateVars { uint8_t signature[4]; @@ -1612,6 +606,7 @@ IOHibernateSystemSleep(void) uint8_t wiredCryptKey[16]; }; AppleRTCHibernateVars rtcVars; + OSData * data; rtcVars.signature[0] = 'A'; rtcVars.signature[1] = 'A'; @@ -1627,167 +622,92 @@ IOHibernateSystemSleep(void) (c = gIOHibernateBootSignature[i]) && (i < (sizeof(rtcVars.booterSignature) << 1)); i++) { - if (c >= 'a') - c -= 'a' - 10; - else if (c >= 'A') - c -= 'A' - 10; - else if (c >= '0') - c -= '0'; - else - continue; + if (c >= 'a') c -= 'a' - 10; + else if (c >= 'A') c -= 'A' - 10; + else if (c >= '0') c -= '0'; + else continue; value = (value << 4) | c; - if (i & 1) - rtcVars.booterSignature[i >> 1] = value; + if (i & 1) rtcVars.booterSignature[i >> 1] = value; } } data = OSData::withBytes(&rtcVars, sizeof(rtcVars)); if (data) { - if (!gIOHibernateRTCVariablesKey) - gIOHibernateRTCVariablesKey = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey); if (gIOHibernateRTCVariablesKey) IOService::getPMRootDomain()->setProperty(gIOHibernateRTCVariablesKey, data); - - if( gIOOptionsEntry ) - { - if( gIOHibernateMode & kIOHibernateModeSwitch ) - { - const OSSymbol *sym; - sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSwitchVarsKey); - if( sym ) - { - gIOOptionsEntry->setProperty(sym, data); /* intentional insecure backup of rtc boot vars */ - sym->release(); - } - } - } - data->release(); } if (gIOChosenEntry) { data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMachineSignatureKey)); - if (data) - gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy()); + if (data) gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy()); + // set BootNext + if (!gIOHibernateBoot0082Data) { - // set BootNext - - if (!gIOHibernateBoot0082Data) + data = OSDynamicCast(OSData, gIOChosenEntry->getProperty("boot-device-path")); + if (data) { - data = OSDynamicCast(OSData, gIOChosenEntry->getProperty("boot-device-path")); - if (data) + // AppleNVRAM_EFI_LOAD_OPTION + struct { + uint32_t Attributes; + uint16_t FilePathLength; + uint16_t Desc; + } loadOptionHeader; + loadOptionHeader.Attributes = 1; + loadOptionHeader.FilePathLength = data->getLength(); + loadOptionHeader.Desc = 0; + gIOHibernateBoot0082Data = OSData::withCapacity(sizeof(loadOptionHeader) + loadOptionHeader.FilePathLength); + if (gIOHibernateBoot0082Data) { - // AppleNVRAM_EFI_LOAD_OPTION - struct { - uint32_t Attributes; - uint16_t FilePathLength; - uint16_t Desc; - } loadOptionHeader; - loadOptionHeader.Attributes = 1; - loadOptionHeader.FilePathLength = data->getLength(); - loadOptionHeader.Desc = 0; - gIOHibernateBoot0082Data = OSData::withCapacity(sizeof(loadOptionHeader) + loadOptionHeader.FilePathLength); - if (gIOHibernateBoot0082Data) - { - gIOHibernateBoot0082Data->appendBytes(&loadOptionHeader, sizeof(loadOptionHeader)); - gIOHibernateBoot0082Data->appendBytes(data); - } + gIOHibernateBoot0082Data->appendBytes(&loadOptionHeader, sizeof(loadOptionHeader)); + gIOHibernateBoot0082Data->appendBytes(data); } } - if (!gIOHibernateBoot0082Key) - gIOHibernateBoot0082Key = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:Boot0082"); - if (!gIOHibernateBootNextKey) - gIOHibernateBootNextKey = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:BootNext"); - if (!gIOHibernateBootNextData) - { - uint16_t bits = 0x0082; - gIOHibernateBootNextData = OSData::withBytes(&bits, sizeof(bits)); - } - if (gIOHibernateBoot0082Key && gIOHibernateBoot0082Data && gIOHibernateBootNextKey && gIOHibernateBootNextData) - { - gIOHibernateBootNextSave = gIOOptionsEntry->copyProperty(gIOHibernateBootNextKey); - gIOOptionsEntry->setProperty(gIOHibernateBoot0082Key, gIOHibernateBoot0082Data); - gIOOptionsEntry->setProperty(gIOHibernateBootNextKey, gIOHibernateBootNextData); - } - } - } -#else /* !i386 && !x86_64 */ - if (kIOHibernateModeEncrypt & gIOHibernateMode) - { - data = OSData::withBytes(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey)); - sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKeyKey); - if (sym && data) - gIOOptionsEntry->setProperty(sym, data); - if (sym) - sym->release(); - if (data) - data->release(); - if (false && gIOHibernateBootSignature[0]) - { - data = OSData::withCapacity(16); - sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSignatureKey); - if (sym && data) - { - char c; - uint8_t value = 0; - for (uint32_t i = 0; (c = gIOHibernateBootSignature[i]); i++) - { - if (c >= 'a') - c -= 'a' - 10; - else if (c >= 'A') - c -= 'A' - 10; - else if (c >= '0') - c -= '0'; - else - continue; - value = (value << 4) | c; - if (i & 1) - data->appendBytes(&value, sizeof(value)); - } - gIOOptionsEntry->setProperty(sym, data); - } - if (sym) - sym->release(); - if (data) - data->release(); - } - } - if (!vars->haveFastBoot) - { - // set boot volume to zero - IODTPlatformExpert * platform = OSDynamicCast(IODTPlatformExpert, IOService::getPlatform()); - if (platform && (kIOReturnSuccess == platform->readXPRAM(kXPRamAudioVolume, - &vars->saveBootAudioVolume, sizeof(vars->saveBootAudioVolume)))) - { - uint8_t newVolume; - newVolume = vars->saveBootAudioVolume & 0xf8; - platform->writeXPRAM(kXPRamAudioVolume, - &newVolume, sizeof(newVolume)); - } + } + if (!gIOHibernateBootNextData) + { + uint16_t bits = 0x0082; + gIOHibernateBootNextData = OSData::withBytes(&bits, sizeof(bits)); + } + if (gIOHibernateBoot0082Key && gIOHibernateBoot0082Data && gIOHibernateBootNextKey && gIOHibernateBootNextData) + { + gIOHibernateBootNextSave = gIOOptionsEntry->copyProperty(gIOHibernateBootNextKey); + gIOOptionsEntry->setProperty(gIOHibernateBoot0082Key, gIOHibernateBoot0082Data); + gIOOptionsEntry->setProperty(gIOHibernateBootNextKey, gIOHibernateBootNextData); + } + // BootNext } -#endif /* !i386 && !x86_64 */ } - // -- - +#endif /* !i386 && !x86_64 */ } while (false); IOLockLock(gFSLock); - if ((kIOReturnSuccess == err) && (kFSOpening == gFSState)) + if ((kIOReturnSuccess == err) && (kFSOpening != gFSState)) + { + HIBLOG("hibernate file close due timeout\n"); + err = kIOReturnTimeout; + } + if (kIOReturnSuccess == err) { gFSState = kFSOpened; gIOHibernateVars = *vars; gFileVars = *vars->fileVars; + gFileVars.allocated = false; gIOHibernateVars.fileVars = &gFileVars; - gIOHibernateFileRef = gFileVars.fileRef; gIOHibernateCurrentHeader->signature = kIOHibernateHeaderSignature; gIOHibernateState = kIOHibernateStateHibernating; } else { - HIBLOG("hibernate file close due timeout\n"); - if (vars->fileVars && vars->fileVars->fileRef) kern_close_file_for_direct_io(vars->fileVars->fileRef, 0, 0, 0, 0, 0); + IOPolledFileIOVars * fileVars = vars->fileVars; IOHibernateDone(vars); + IOPolledFileClose(&fileVars, +#if DISABLE_TRIM + 0, NULL, 0, 0, 0); +#else + 0, NULL, 0, sizeof(IOHibernateImageHeader), setFileSize); +#endif gFSState = kFSIdle; } IOLockUnlock(gFSLock); @@ -1800,6 +720,136 @@ IOHibernateSystemSleep(void) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +static void +IOSetBootImageNVRAM(OSData * data) +{ + IORegistryEntry * regEntry; + + if (!gIOOptionsEntry) + { + regEntry = IORegistryEntry::fromPath("/options", gIODTPlane); + gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry); + if (regEntry && !gIOOptionsEntry) + regEntry->release(); + } + if (gIOOptionsEntry && gIOHibernateBootImageKey) + { + if (data) gIOOptionsEntry->setProperty(gIOHibernateBootImageKey, data); + else + { + gIOOptionsEntry->removeProperty(gIOHibernateBootImageKey); + gIOOptionsEntry->sync(); + } + } +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* + * Writes header to disk with signature, block size and file extents data. + * If there are more than 2 extents, then they are written on second block. + */ +static IOReturn +IOWriteExtentsToFile(IOPolledFileIOVars * vars, uint32_t signature) +{ + IOHibernateImageHeader hdr; + IOItemCount count; + IOReturn err = kIOReturnSuccess; + int rc; + IOPolledFileExtent * fileExtents; + + fileExtents = (typeof(fileExtents)) vars->fileExtents->getBytesNoCopy(), + + memset(&hdr, 0, sizeof(IOHibernateImageHeader)); + count = vars->fileExtents->getLength(); + if (count > sizeof(hdr.fileExtentMap)) + { + hdr.fileExtentMapSize = count; + count = sizeof(hdr.fileExtentMap); + } + else + hdr.fileExtentMapSize = sizeof(hdr.fileExtentMap); + + bcopy(fileExtents, &hdr.fileExtentMap[0], count); + + // copy file block extent list if larger than header + if (hdr.fileExtentMapSize > sizeof(hdr.fileExtentMap)) + { + count = hdr.fileExtentMapSize - sizeof(hdr.fileExtentMap); + rc = kern_write_file(vars->fileRef, vars->blockSize, + (caddr_t)(((uint8_t *)fileExtents) + sizeof(hdr.fileExtentMap)), + count, IO_SKIP_ENCRYPTION); + if (rc != 0) { + HIBLOG("kern_write_file returned %d\n", rc); + err = kIOReturnIOError; + goto exit; + } + } + hdr.signature = signature; + hdr.deviceBlockSize = vars->blockSize; + + rc = kern_write_file(vars->fileRef, 0, (char *)&hdr, sizeof(hdr), IO_SKIP_ENCRYPTION); + if (rc != 0) { + HIBLOG("kern_write_file returned %d\n", rc); + err = kIOReturnIOError; + goto exit; + } + +exit: + return err; +} + +void +IOOpenDebugDataFile(const char *fname, uint64_t size) +{ + IOReturn err; + OSData * imagePath = NULL; + uint64_t padding; + + if (!gDebugImageLock) { + gDebugImageLock = IOLockAlloc(); + } + + // Try to get a lock, but don't block for getting lock + if (!IOLockTryLock(gDebugImageLock)) { + HIBLOG("IOOpenDebugDataFile: Failed to get lock\n"); + return; + } + + if (gDebugImageFileVars || !fname || !size) { + HIBLOG("IOOpenDebugDataFile: conditions failed\n"); + goto exit; + } + + padding = (PAGE_SIZE*2); // allocate couple more pages for header and fileextents + err = IOPolledFileOpen(fname, size+padding, 32ULL*1024*1024*1024, + NULL, 0, + &gDebugImageFileVars, &imagePath, NULL, 0); + + if ((kIOReturnSuccess == err) && imagePath) + { + if ((gDebugImageFileVars->fileSize < (size+padding)) || + (gDebugImageFileVars->fileExtents->getLength() > PAGE_SIZE)) { + // Can't use the file + IOPolledFileClose(&gDebugImageFileVars, 0, 0, 0, 0, 0); + HIBLOG("IOOpenDebugDataFile: too many file extents\n"); + goto exit; + } + + // write extents for debug data usage in EFI + IOWriteExtentsToFile(gDebugImageFileVars, kIOHibernateHeaderOpenSignature); + IOSetBootImageNVRAM(imagePath); + kprintf("IOOpenDebugDataFile: opened debugdata file\n"); + } + +exit: + IOLockUnlock(gDebugImageLock); + + if (imagePath) imagePath->release(); + return; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + DECLARE_IOHIBERNATEPROGRESSALPHA static void @@ -2040,6 +1090,7 @@ IOHibernateSystemWake(void) { if (kFSOpened == gFSState) { + IOPolledFilePollersClose(gIOHibernateVars.fileVars, kIOPolledPostflightState); IOHibernateDone(&gIOHibernateVars); } else @@ -2053,8 +1104,6 @@ IOHibernateSystemWake(void) static IOReturn IOHibernateDone(IOHibernateVars * vars) { - IORegistryEntry * next; - hibernate_teardown(vars->page_list, vars->page_list_wired, vars->page_list_pal); if (vars->videoMapping) @@ -2095,17 +1144,6 @@ IOHibernateDone(IOHibernateVars * vars) IOService::getPMRootDomain()->removeProperty(kIOHibernateGfxStatusKey); } - if (vars->fileVars) - { - if ((next = vars->fileVars->media)) do - { - next->removeProperty(kIOPolledInterfaceActiveKey); - next = next->getParentEntry(gIOServicePlane); - } - while (next); - IOPolledFileClose(vars->fileVars); - } - // invalidate nvram properties - (gIOOptionsEntry != 0) => nvram was touched #if defined(__i386__) || defined(__x86_64__) @@ -2139,10 +1177,7 @@ IOHibernateDone(IOHibernateVars * vars) } #endif - if (vars->srcBuffer) - vars->srcBuffer->release(); - if (vars->ioBuffer) - vars->ioBuffer->release(); + if (vars->srcBuffer) vars->srcBuffer->release(); bzero(&gIOHibernateHandoffPages[0], gIOHibernateHandoffPageCount * sizeof(gIOHibernateHandoffPages[0])); if (vars->handoffBuffer) { @@ -2187,8 +1222,6 @@ IOHibernateDone(IOHibernateVars * vars) } vars->handoffBuffer->release(); } - if (vars->fileExtents) - vars->fileExtents->release(); bzero(vars, sizeof(*vars)); @@ -2200,53 +1233,31 @@ IOHibernateDone(IOHibernateVars * vars) IOReturn IOHibernateSystemPostWake(void) { - struct kern_direct_file_io_ref_t * fileRef; - + gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature; if (kFSOpened == gFSState) { // invalidate & close the image file - gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature; - if ((fileRef = gIOHibernateFileRef)) - { - gIOHibernateFileRef = 0; - IOSleep(TRIM_DELAY); - kern_close_file_for_direct_io(fileRef, + IOSleep(TRIM_DELAY); + IOPolledFileIOVars * vars = &gFileVars; + IOPolledFileClose(&vars, #if DISABLE_TRIM - 0, 0, 0, 0, 0); + 0, NULL, 0, 0, 0); #else - 0, (caddr_t) gIOHibernateCurrentHeader, - sizeof(IOHibernateImageHeader), - 0, - gIOHibernateCurrentHeader->imageSize); + 0, (caddr_t)gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader), + sizeof(IOHibernateImageHeader), gIOHibernateCurrentHeader->imageSize); #endif - } - gFSState = kFSIdle; - } - - if (gDebugImageFileRef) { - kern_close_file_for_direct_io(gDebugImageFileRef, 0, 0, 0, 0, 0); - gDebugImageFileRef = NULL; } + gFSState = kFSIdle; - if (!gIOOptionsEntry) - { - IORegistryEntry * regEntry; - regEntry = IORegistryEntry::fromPath("/options", gIODTPlane); - gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry); - if (regEntry && !gIOOptionsEntry) - regEntry->release(); - } - if (gIOOptionsEntry) - { - const OSSymbol * sym; + IOSetBootImageNVRAM(0); - sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey); - if (sym) - { - gIOOptionsEntry->removeProperty(sym); - gIOOptionsEntry->sync(); - sym->release(); + if (gDebugImageLock) { + IOLockLock(gDebugImageLock); + if (gDebugImageFileVars != 0) { + kprintf("IOHibernateSystemPostWake: Closing debugdata file\n"); + IOPolledFileClose(&gDebugImageFileVars, 0, 0, 0, 0, 0); } + IOLockUnlock(gDebugImageLock); } return (kIOReturnSuccess); @@ -2307,6 +1318,15 @@ SYSCTL_UINT(_kern, OID_AUTO, hibernatehidready, void IOHibernateSystemInit(IOPMrootDomain * rootDomain) { + gIOHibernateBootImageKey = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey); + +#if defined(__i386__) || defined(__x86_64__) + gIOHibernateRTCVariablesKey = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey); + gIOHibernateBoot0082Key = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:Boot0082"); + gIOHibernateBootNextKey = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:BootNext"); + gIOHibernateRTCVariablesKey = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey); +#endif /* defined(__i386__) || defined(__x86_64__) */ + OSData * data = OSData::withBytesNoCopy(&gIOHibernateState, sizeof(gIOHibernateState)); if (data) { @@ -2328,6 +1348,8 @@ IOHibernateSystemInit(IOPMrootDomain * rootDomain) sysctl_register_oid(&sysctl__kern_hibernatelockscreenready); sysctl_register_oid(&sysctl__kern_hibernatehidready); + gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane); + gFSLock = IOLockAlloc(); } @@ -2341,45 +1363,20 @@ hibernate_setup_for_wake(void) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#define C_ASSERT(e) typedef char __C_ASSERT__[(e) ? 1 : -1] - -static bool -no_encrypt_page(vm_offset_t ppnum) -{ - if (pmap_is_noencrypt((ppnum_t)ppnum) == TRUE) - { - return true; - } - return false; -} - -static void -hibernate_pal_callback(void *vars_arg, vm_offset_t addr) -{ - IOHibernateVars *vars = (IOHibernateVars *)vars_arg; - /* Make sure it's not in either of the save lists */ - hibernate_set_page_state(vars->page_list, vars->page_list_wired, atop_64(addr), 1, kIOHibernatePageStateFree); - - /* Set it in the bitmap of pages owned by the PAL */ - hibernate_page_bitset(vars->page_list_pal, TRUE, atop_64(addr)); -} - -static struct hibernate_cryptvars_t *local_cryptvars; - -extern "C" int -hibernate_pal_write(void *buffer, size_t size) +static IOReturn +IOHibernatePolledFileWrite(IOPolledFileIOVars * vars, + const uint8_t * bytes, IOByteCount size, + IOPolledFileCryptVars * cryptvars) { - IOHibernateVars * vars = &gIOHibernateVars; + IOReturn err; - IOReturn err = IOPolledFileWrite(vars->fileVars, (const uint8_t *)buffer, size, local_cryptvars); - if (kIOReturnSuccess != err) { - kprintf("epic hibernate fail! %d\n", err); - return err; - } + err = IOPolledFileWrite(vars, bytes, size, cryptvars); + if ((kIOReturnSuccess == err) && hibernate_should_abort()) err = kIOReturnAborted; - return 0; + return (err); } +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ extern "C" uint32_t hibernate_write_image(void) @@ -2388,7 +1385,7 @@ hibernate_write_image(void) IOHibernateVars * vars = &gIOHibernateVars; IOPolledFileExtent * fileExtents; - C_ASSERT(sizeof(IOHibernateImageHeader) == 512); + assert_static(sizeof(IOHibernateImageHeader) == 512); uint32_t pageCount, pagesDone; IOReturn err; @@ -2398,8 +1395,7 @@ hibernate_write_image(void) uint8_t * data; uint8_t * compressed; uint8_t * scratch; - void * zerosCompressed; - IOByteCount pageCompressedSize, zerosCompressedLen; + IOByteCount pageCompressedSize; uint64_t compressedSize, uncompressedSize; uint64_t image1Size = 0; uint32_t bitmap_size; @@ -2411,6 +1407,8 @@ hibernate_write_image(void) uint32_t pageAndCount[2]; addr64_t phys64; IOByteCount segLen; + uintptr_t hibernateBase; + uintptr_t hibernateEnd; AbsoluteTime startTime, endTime; AbsoluteTime allTime, compTime; @@ -2423,17 +1421,19 @@ hibernate_write_image(void) uint32_t wiredPagesEncrypted; uint32_t dirtyPagesEncrypted; uint32_t wiredPagesClear; - uint32_t zeroPageCount; + uint32_t svPageCount; + uint32_t zvPageCount; - hibernate_cryptvars_t _cryptvars; - hibernate_cryptvars_t * cryptvars = 0; + IOPolledFileCryptVars _cryptvars; + IOPolledFileCryptVars * cryptvars = 0; wiredPagesEncrypted = 0; dirtyPagesEncrypted = 0; wiredPagesClear = 0; - zeroPageCount = 0; + svPageCount = 0; + zvPageCount = 0; - if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents) + if (!vars->fileVars || !vars->fileVars->pollers) return (false /* sleep */ ); if (kIOHibernateModeSleep & gIOHibernateMode) @@ -2444,8 +1444,6 @@ hibernate_write_image(void) restore1Sum = sum1 = sum2 = 0; - hibernate_pal_prepare(); - #if CRYPTO // encryption data. "iv" is the "initial vector". if (kIOHibernateModeEncrypt & gIOHibernateMode) @@ -2455,7 +1453,7 @@ hibernate_write_image(void) 0xdf, 0x9e, 0x5f, 0x32, 0xd7, 0x61, 0x63, 0xda }; cryptvars = &gIOHibernateCryptWakeContext; - bzero(cryptvars, sizeof(hibernate_cryptvars_t)); + bzero(cryptvars, sizeof(IOPolledFileCryptVars)); aes_encrypt_key(vars->cryptKey, kIOHibernateAESKeySize, &cryptvars->ctx.encrypt); @@ -2464,7 +1462,7 @@ hibernate_write_image(void) &cryptvars->ctx.decrypt); cryptvars = &_cryptvars; - bzero(cryptvars, sizeof(hibernate_cryptvars_t)); + bzero(cryptvars, sizeof(IOPolledFileCryptVars)); for (pageCount = 0; pageCount < sizeof(vars->wiredCryptKey); pageCount++) vars->wiredCryptKey[pageCount] ^= vars->volumeCryptKey[pageCount]; bzero(&vars->volumeCryptKey[0], sizeof(vars->volumeCryptKey)); @@ -2475,8 +1473,6 @@ hibernate_write_image(void) bcopy(&first_iv[0], &cryptvars->aes_iv[0], AES_BLOCK_SIZE); bzero(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey)); bzero(&vars->cryptKey[0], sizeof(vars->cryptKey)); - - local_cryptvars = cryptvars; } #endif /* CRYPTO */ @@ -2493,7 +1489,7 @@ hibernate_write_image(void) HIBLOG("hibernate_page_list_setall found pageCount %d\n", pageCount); - fileExtents = (IOPolledFileExtent *) vars->fileExtents->getBytesNoCopy(); + fileExtents = (IOPolledFileExtent *) vars->fileVars->fileExtents->getBytesNoCopy(); #if 0 count = vars->fileExtents->getLength() / sizeof(IOPolledFileExtent); @@ -2516,13 +1512,14 @@ hibernate_write_image(void) { compressedSize = 0; uncompressedSize = 0; - zeroPageCount = 0; + svPageCount = 0; + zvPageCount = 0; IOPolledFileSeek(vars->fileVars, vars->fileVars->blockSize); HIBLOG("IOHibernatePollerOpen, ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled()); - err = IOHibernatePollerOpen(vars->fileVars, kIOPolledBeforeSleepState, vars->ioBuffer); + err = IOPolledFilePollersOpen(vars->fileVars, kIOPolledBeforeSleepState, true); HIBLOG("IOHibernatePollerOpen(%x)\n", err); pollerOpen = (kIOReturnSuccess == err); if (!pollerOpen) @@ -2530,21 +1527,17 @@ hibernate_write_image(void) // copy file block extent list if larger than header - count = vars->fileExtents->getLength(); + count = vars->fileVars->fileExtents->getLength(); if (count > sizeof(header->fileExtentMap)) { count -= sizeof(header->fileExtentMap); - err = IOPolledFileWrite(vars->fileVars, + err = IOHibernatePolledFileWrite(vars->fileVars, ((uint8_t *) &fileExtents[0]) + sizeof(header->fileExtentMap), count, cryptvars); if (kIOReturnSuccess != err) break; } - uintptr_t hibernateBase; - uintptr_t hibernateEnd; - hibernateBase = HIB_BASE; /* Defined in PAL headers */ - hibernateEnd = (segHIBB + segSizeHIB); // copy out restore1 code @@ -2585,11 +1578,11 @@ hibernate_write_image(void) count = ((uintptr_t) &gIOHibernateRestoreStack[0]) - trunc_page(hibernateBase); if (count) { - err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars); + err = IOHibernatePolledFileWrite(vars->fileVars, src, count, cryptvars); if (kIOReturnSuccess != err) break; } - err = IOPolledFileWrite(vars->fileVars, + err = IOHibernatePolledFileWrite(vars->fileVars, (uint8_t *) 0, &gIOHibernateRestoreStackEnd[0] - &gIOHibernateRestoreStack[0], cryptvars); @@ -2599,7 +1592,7 @@ hibernate_write_image(void) count = round_page(hibernateEnd) - ((uintptr_t) src); if (count) { - err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars); + err = IOHibernatePolledFileWrite(vars->fileVars, src, count, cryptvars); if (kIOReturnSuccess != err) break; } @@ -2622,7 +1615,7 @@ hibernate_write_image(void) phys64 = vars->previewBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone); pageAndCount[0] = atop_64(phys64); pageAndCount[1] = atop_32(segLen); - err = IOPolledFileWrite(vars->fileVars, + err = IOHibernatePolledFileWrite(vars->fileVars, (const uint8_t *) &pageAndCount, sizeof(pageAndCount), cryptvars); if (kIOReturnSuccess != err) @@ -2648,15 +1641,16 @@ hibernate_write_image(void) phys64 = vars->previewBuffer->getPhysicalSegment(page, NULL, kIOMemoryMapperNone); sum1 += hibernate_sum_page(src + page, atop_64(phys64)); } - err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars); + err = IOHibernatePolledFileWrite(vars->fileVars, src, count, cryptvars); if (kIOReturnSuccess != err) break; } // mark areas for no save - + IOMemoryDescriptor * ioBuffer; + ioBuffer = IOPolledFileGetIOBuffer(vars->fileVars); for (count = 0; - (phys64 = vars->ioBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone)); + (phys64 = ioBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone)); count += segLen) { hibernate_set_page_state(vars->page_list, vars->page_list_wired, @@ -2679,7 +1673,7 @@ hibernate_write_image(void) bitmap_size = vars->page_list_wired->list_size; src = (uint8_t *) vars->page_list_wired; - err = IOPolledFileWrite(vars->fileVars, src, bitmap_size, cryptvars); + err = IOHibernatePolledFileWrite(vars->fileVars, src, bitmap_size, cryptvars); if (kIOReturnSuccess != err) break; @@ -2716,20 +1710,10 @@ hibernate_write_image(void) pageCount -= atop_32(segLen); } - (void)hibernate_pal_callback; - src = (uint8_t *) vars->srcBuffer->getBytesNoCopy(); compressed = src + page_size; scratch = compressed + page_size; - // compress a zero page - bzero(src, page_size); - zerosCompressed = vars->handoffBuffer->getBytesNoCopy(); - zerosCompressedLen = WKdm_compress_new((WK_word*) src, - (WK_word*) zerosCompressed, - (WK_word*) scratch, - page_size - 4); - pagesDone = 0; lastBlob = 0; @@ -2776,7 +1760,7 @@ hibernate_write_image(void) uint32_t checkIndex; for (checkIndex = 0; (checkIndex < count) - && (((kEncrypt & pageType) == 0) == no_encrypt_page(ppnum + checkIndex)); + && (((kEncrypt & pageType) == 0) == pmap_is_noencrypt(ppnum + checkIndex)); checkIndex++) {} if (!checkIndex) @@ -2799,7 +1783,7 @@ hibernate_write_image(void) { pageAndCount[0] = ppnum; pageAndCount[1] = count; - err = IOPolledFileWrite(vars->fileVars, + err = IOHibernatePolledFileWrite(vars->fileVars, (const uint8_t *) &pageAndCount, sizeof(pageAndCount), cryptvars); if (kIOReturnSuccess != err) @@ -2822,7 +1806,7 @@ hibernate_write_image(void) sum2 += sum; clock_get_uptime(&startTime); - wkresult = WKdm_compress_new((WK_word*) src, + wkresult = WKdm_compress_new((const WK_word*) src, (WK_word*) compressed, (WK_word*) scratch, page_size - 4); @@ -2834,27 +1818,30 @@ hibernate_write_image(void) compBytes += page_size; pageCompressedSize = (-1 == wkresult) ? page_size : wkresult; - if ((pageCompressedSize == zerosCompressedLen) - && !bcmp(compressed, zerosCompressed, zerosCompressedLen)) + if (pageCompressedSize == 0) { - pageCompressedSize = 0; - zeroPageCount++; - } - - if (kIOHibernateModeEncrypt & gIOHibernateMode) - pageCompressedSize = (pageCompressedSize + AES_BLOCK_SIZE - 1) & ~(AES_BLOCK_SIZE - 1); - - if (pageCompressedSize != page_size) - data = compressed; - else + pageCompressedSize = 4; data = src; + + if (*(uint32_t *)src) + svPageCount++; + else + zvPageCount++; + } + else + { + if (pageCompressedSize != page_size) + data = compressed; + else + data = src; + } tag = pageCompressedSize | kIOHibernateTagSignature; - err = IOPolledFileWrite(vars->fileVars, (const uint8_t *) &tag, sizeof(tag), cryptvars); + err = IOHibernatePolledFileWrite(vars->fileVars, (const uint8_t *) &tag, sizeof(tag), cryptvars); if (kIOReturnSuccess != err) break; - err = IOPolledFileWrite(vars->fileVars, data, (pageCompressedSize + 3) & ~3, cryptvars); + err = IOHibernatePolledFileWrite(vars->fileVars, data, (pageCompressedSize + 3) & ~3, cryptvars); if (kIOReturnSuccess != err) break; @@ -2901,14 +1888,14 @@ hibernate_write_image(void) if (kWiredEncrypt != pageType) { // end of image1/2 - fill to next block - err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars); + err = IOHibernatePolledFileWrite(vars->fileVars, 0, 0, cryptvars); if (kIOReturnSuccess != err) break; } if (kWiredClear == pageType) { // enlarge wired image for test -// err = IOPolledFileWrite(vars->fileVars, 0, 0x60000000, cryptvars); +// err = IOHibernatePolledFileWrite(vars->fileVars, 0, 0x60000000, cryptvars); // end wired image header->encryptStart = vars->fileVars->encryptStart; @@ -2922,9 +1909,12 @@ hibernate_write_image(void) { if (kIOReturnOverrun == err) { - // update actual compression ratio on not enough space + // update actual compression ratio on not enough space (for retry) gIOHibernateCompression = (compressedSize << 8) / uncompressedSize; } + + // update partial amount written (for IOPolledFileClose cleanup/unmap) + header->imageSize = vars->fileVars->position; break; } @@ -2943,7 +1933,7 @@ hibernate_write_image(void) header->compression = (compressedSize << 8) / uncompressedSize; gIOHibernateCompression = header->compression; - count = vars->fileExtents->getLength(); + count = vars->fileVars->fileExtents->getLength(); if (count > sizeof(header->fileExtentMap)) { header->fileExtentMapSize = count; @@ -2957,17 +1947,12 @@ hibernate_write_image(void) header->deviceBlockSize = vars->fileVars->blockSize; IOPolledFileSeek(vars->fileVars, 0); - err = IOPolledFileWrite(vars->fileVars, + err = IOHibernatePolledFileWrite(vars->fileVars, (uint8_t *) header, sizeof(IOHibernateImageHeader), cryptvars); if (kIOReturnSuccess != err) break; - err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars); - if (kIOReturnSuccess != err) - break; - err = IOHibernatePollerIODone(vars->fileVars, true); - if (kIOReturnSuccess != err) - break; + err = IOHibernatePolledFileWrite(vars->fileVars, 0, 0, cryptvars); } while (false); @@ -2998,14 +1983,11 @@ hibernate_write_image(void) uncompressedSize ? ((int) ((compressedSize * 100ULL) / uncompressedSize)) : 0, sum1, sum2); - HIBLOG("zeroPageCount %d, wiredPagesEncrypted %d, wiredPagesClear %d, dirtyPagesEncrypted %d\n", - zeroPageCount, wiredPagesEncrypted, wiredPagesClear, dirtyPagesEncrypted); - - if (vars->fileVars->io) - (void) IOHibernatePollerIODone(vars->fileVars, false); + HIBLOG("svPageCount %d, zvPageCount %d, wiredPagesEncrypted %d, wiredPagesClear %d, dirtyPagesEncrypted %d\n", + svPageCount, zvPageCount, wiredPagesEncrypted, wiredPagesClear, dirtyPagesEncrypted); if (pollerOpen) - IOHibernatePollerClose(vars->fileVars, kIOPolledBeforeSleepState); + IOPolledFilePollersClose(vars->fileVars, kIOPolledBeforeSleepState); if (vars->consoleMapping) ProgressUpdate(gIOHibernateGraphicsInfo, @@ -3062,12 +2044,12 @@ hibernate_machine_init(void) uint64_t compBytes; uint32_t lastProgressStamp = 0; uint32_t progressStamp; - hibernate_cryptvars_t * cryptvars = 0; + IOPolledFileCryptVars * cryptvars = 0; IOHibernateVars * vars = &gIOHibernateVars; bzero(gIOHibernateStats, sizeof(hibernate_statistics_t)); - if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents) + if (!vars->fileVars || !vars->fileVars->pollers) return; sum = gIOHibernateCurrentHeader->actualImage1Sum; @@ -3217,18 +2199,17 @@ hibernate_machine_init(void) AbsoluteTime_to_scalar(&compTime) = 0; compBytes = 0; - HIBLOG("IOHibernatePollerOpen(), ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled()); - err = IOHibernatePollerOpen(vars->fileVars, kIOPolledAfterSleepState, 0); + HIBLOG("IOPolledFilePollersOpen(), ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled()); + err = IOPolledFilePollersOpen(vars->fileVars, kIOPolledAfterSleepState, false); clock_get_uptime(&startIOTime); endTime = startIOTime; SUB_ABSOLUTETIME(&endTime, &allTime); absolutetime_to_nanoseconds(endTime, &nsec); - HIBLOG("IOHibernatePollerOpen(%x) %qd ms\n", err, nsec / 1000000ULL); + HIBLOG("IOPolledFilePollersOpen(%x) %qd ms\n", err, nsec / 1000000ULL); IOPolledFileSeek(vars->fileVars, gIOHibernateCurrentHeader->image1Size); // kick off the read ahead - vars->fileVars->io = false; vars->fileVars->bufferHalf = 0; vars->fileVars->bufferLimit = 0; vars->fileVars->lastRead = 0; @@ -3278,30 +2259,39 @@ hibernate_machine_init(void) break; } - if (!compressedSize) bzero_phys(ptoa_64(ppnum), page_size); - else + err = IOPolledFileRead(vars->fileVars, src, (compressedSize + 3) & ~3, cryptvars); + if (kIOReturnSuccess != err) break; + + if (compressedSize < page_size) { - err = IOPolledFileRead(vars->fileVars, src, (compressedSize + 3) & ~3, cryptvars); - if (kIOReturnSuccess != err) break; - if (compressedSize < page_size) - { - decoOffset = page_size; - clock_get_uptime(&startTime); - WKdm_decompress_new((WK_word*) src, (WK_word*) compressed, (WK_word*) scratch, page_size); - clock_get_uptime(&endTime); - ADD_ABSOLUTETIME(&compTime, &endTime); - SUB_ABSOLUTETIME(&compTime, &startTime); - compBytes += page_size; + decoOffset = page_size; + clock_get_uptime(&startTime); + + if (compressedSize == 4) { + int i; + uint32_t *s, *d; + + s = (uint32_t *)src; + d = (uint32_t *)(uintptr_t)compressed; + + for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++) + *d++ = *s; } - else decoOffset = 0; + else + WKdm_decompress_new((WK_word*) src, (WK_word*) compressed, (WK_word*) scratch, compressedSize); + clock_get_uptime(&endTime); + ADD_ABSOLUTETIME(&compTime, &endTime); + SUB_ABSOLUTETIME(&compTime, &startTime); + compBytes += page_size; + } + else decoOffset = 0; - sum += hibernate_sum_page((src + decoOffset), ppnum); - err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size); - if (err) - { + sum += hibernate_sum_page((src + decoOffset), ppnum); + err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size); + if (err) + { HIBLOG("IOMemoryDescriptorReadToPhysical [%ld] %x\n", (long)ppnum, err); break; - } } ppnum++; @@ -3332,12 +2322,9 @@ hibernate_machine_init(void) gIOHibernateCurrentHeader->actualImage2Sum = sum; gIOHibernateCompression = gIOHibernateCurrentHeader->compression; - if (vars->fileVars->io) - (void) IOHibernatePollerIODone(vars->fileVars, false); - clock_get_uptime(&endIOTime); - err = IOHibernatePollerClose(vars->fileVars, kIOPolledAfterSleepState); + err = IOPolledFilePollersClose(vars->fileVars, kIOPolledAfterSleepState); clock_get_uptime(&endTime); diff --git a/iokit/Kernel/IOHibernateInternal.h b/iokit/Kernel/IOHibernateInternal.h index 23f875d18..0c800aec5 100644 --- a/iokit/Kernel/IOHibernateInternal.h +++ b/iokit/Kernel/IOHibernateInternal.h @@ -42,7 +42,6 @@ struct IOHibernateVars class IOBufferMemoryDescriptor * handoffBuffer; class IOMemoryDescriptor * previewBuffer; OSData * previewData; - OSData * fileExtents; OSObject * saveBootDevice; struct IOPolledFileIOVars * fileVars; @@ -60,37 +59,6 @@ struct IOHibernateVars }; typedef struct IOHibernateVars IOHibernateVars; - -struct IOPolledFileIOVars -{ - struct kern_direct_file_io_ref_t * fileRef; - IORegistryEntry * media; - class OSArray * pollers; - IOByteCount blockSize; - uint8_t * buffer; - IOByteCount bufferSize; - IOByteCount bufferLimit; - IOByteCount bufferOffset; - IOByteCount bufferHalf; - IOByteCount extentRemaining; - IOByteCount lastRead; - IOByteCount readEnd; - uint32_t flags; - uint64_t fileSize; - uint64_t block0; - uint64_t position; - uint64_t extentPosition; - uint64_t encryptStart; - uint64_t encryptEnd; - uint64_t cryptBytes; - AbsoluteTime cryptTime; - IOPolledFileExtent * extentMap; - IOPolledFileExtent * currentExtent; - bool io; - IOReturn ioStatus; -}; -typedef struct IOPolledFileIOVars IOPolledFileIOVars; - #endif /* __cplusplus */ enum diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c index daf5d2804..141a280a5 100644 --- a/iokit/Kernel/IOHibernateRestoreKernel.c +++ b/iokit/Kernel/IOHibernateRestoreKernel.c @@ -401,8 +401,17 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, if (compressedSize != PAGE_SIZE) { dst = pal_hib_map(DEST_COPY_AREA, dst); - if (compressedSize) WKdm_decompress_new((WK_word*) src, (WK_word*)(uintptr_t)dst, (WK_word*) &scratch[0], PAGE_SIZE); - else bzero((void *) dst, PAGE_SIZE); + if (compressedSize != 4) WKdm_decompress_new((WK_word*) src, (WK_word*)(uintptr_t)dst, (WK_word*) &scratch[0], compressedSize); + else { + int i; + uint32_t *s, *d; + + s = src; + d = (uint32_t *)(uintptr_t)dst; + + for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++) + *d++ = *s; + } } else { @@ -412,8 +421,6 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, return hibernate_sum_page((uint8_t *)(uintptr_t)dst, ppnum); } -#define C_ASSERT(e) typedef char __C_ASSERT__[(e) ? 1 : -1] - long hibernate_kernel_entrypoint(uint32_t p1, uint32_t p2, uint32_t p3, uint32_t p4) @@ -449,7 +456,7 @@ hibernate_kernel_entrypoint(uint32_t p1, uint64_t timeStart; timeStart = rdtsc64(); - C_ASSERT(sizeof(IOHibernateImageHeader) == 512); + assert_static(sizeof(IOHibernateImageHeader) == 512); headerPhys = ptoa_64(p1); @@ -697,6 +704,8 @@ hibernate_kernel_entrypoint(uint32_t p1, gIOHibernateCurrentHeader->trampolineTime = (((rdtsc64() - timeStart)) >> 8); +// debug_code('done', 0); + #if CONFIG_SLEEP #if defined(__i386__) || defined(__x86_64__) typedef void (*ResetProc)(void); diff --git a/iokit/Kernel/IOHistogramReporter.cpp b/iokit/Kernel/IOHistogramReporter.cpp index f51a1c936..21e92e935 100644 --- a/iokit/Kernel/IOHistogramReporter.cpp +++ b/iokit/Kernel/IOHistogramReporter.cpp @@ -349,7 +349,8 @@ IOHistogramReporter::tallyValue(int64_t value) hist_values.bucket_sum += value; hist_values.bucket_hits++; - if (setElementValues(element_index, (IOReportElementValues *)&hist_values) == kIOReturnSuccess) { + if (setElementValues(element_index, (IOReportElementValues *)&hist_values) + != kIOReturnSuccess) { goto finish; } diff --git a/iokit/Kernel/IOKitDebug.cpp b/iokit/Kernel/IOKitDebug.cpp index 2560a0687..d99e9399e 100644 --- a/iokit/Kernel/IOKitDebug.cpp +++ b/iokit/Kernel/IOKitDebug.cpp @@ -26,9 +26,14 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + #include +extern "C" { +#include +} #include +#include #include #include @@ -43,25 +48,25 @@ #define DEBUG_INIT_VALUE 0 #endif -SInt64 gIOKitDebug = DEBUG_INIT_VALUE; -SInt64 gIOKitTrace = 0; +SInt64 gIOKitDebug = DEBUG_INIT_VALUE; +SInt64 gIOKitTrace = 0; #if DEVELOPMENT || DEBUG -#define IODEBUG_CTLFLAGS CTLFLAG_RW +#define IODEBUG_CTLFLAGS CTLFLAG_RW #else -#define IODEBUG_CTLFLAGS CTLFLAG_RD +#define IODEBUG_CTLFLAGS CTLFLAG_RD #endif SYSCTL_QUAD(_debug, OID_AUTO, iokit, IODEBUG_CTLFLAGS | CTLFLAG_LOCKED, &gIOKitDebug, "boot_arg io"); SYSCTL_QUAD(_debug, OID_AUTO, iotrace, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitTrace, "trace io"); -int debug_malloc_size; -int debug_iomalloc_size; +int debug_malloc_size; +int debug_iomalloc_size; -vm_size_t debug_iomallocpageable_size; -int debug_container_malloc_size; -// int debug_ivars_size; // in OSObject.cpp +vm_size_t debug_iomallocpageable_size; +int debug_container_malloc_size; +// int debug_ivars_size; // in OSObject.cpp extern "C" { @@ -73,11 +78,11 @@ extern "C" { void IOPrintPlane( const IORegistryPlane * plane ) { - IORegistryEntry * next; - IORegistryIterator * iter; - OSOrderedSet * all; - char format[] = "%xxxs"; - IOService * service; + IORegistryEntry * next; + IORegistryIterator * iter; + OSOrderedSet * all; + char format[] = "%xxxs"; + IOService * service; iter = IORegistryIterator::iterateOver( plane ); assert( iter ); @@ -86,20 +91,20 @@ void IOPrintPlane( const IORegistryPlane * plane ) DEBG("Count %d\n", all->getCount() ); all->release(); } else - DEBG("Empty\n"); + DEBG("Empty\n"); iter->reset(); while( (next = iter->getNextObjectRecursive())) { - snprintf(format + 1, sizeof(format) - 1, "%ds", 2 * next->getDepth( plane )); - DEBG( format, ""); - DEBG( "\033[33m%s", next->getName( plane )); - if( (next->getLocation( plane ))) + snprintf(format + 1, sizeof(format) - 1, "%ds", 2 * next->getDepth( plane )); + DEBG( format, ""); + DEBG( "\033[33m%s", next->getName( plane )); + if( (next->getLocation( plane ))) DEBG("@%s", next->getLocation( plane )); - DEBG("\033[0m getMetaClass()->getClassName()); + DEBG("\033[0m getMetaClass()->getClassName()); if( (service = OSDynamicCast(IOService, next))) DEBG(", busy %ld", (long) service->getBusyState()); - DEBG( ">\n"); -// IOSleep(250); + DEBG( ">\n"); +// IOSleep(250); } iter->release(); } @@ -118,12 +123,12 @@ void IOPrintMemory( void ) // OSMetaClass::printInstanceCounts(); IOLog("\n" - "ivar kalloc() 0x%08x\n" - "malloc() 0x%08x\n" + "ivar kalloc() 0x%08x\n" + "malloc() 0x%08x\n" "containers kalloc() 0x%08x\n" - "IOMalloc() 0x%08x\n" + "IOMalloc() 0x%08x\n" "----------------------------------------\n", - debug_ivars_size, + debug_ivars_size, debug_malloc_size, debug_container_malloc_size, debug_iomalloc_size @@ -132,12 +137,12 @@ void IOPrintMemory( void ) } /* extern "C" */ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #define super OSObject OSDefineMetaClassAndStructors(IOKitDiagnostics, OSObject) -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ OSObject * IOKitDiagnostics::diagnostics( void ) { @@ -145,21 +150,21 @@ OSObject * IOKitDiagnostics::diagnostics( void ) diags = new IOKitDiagnostics; if( diags && !diags->init()) { - diags->release(); - diags = 0; + diags->release(); + diags = 0; } return( diags ); } void IOKitDiagnostics::updateOffset( OSDictionary * dict, - UInt32 value, const char * name ) + UInt64 value, const char * name ) { OSNumber * off; - off = OSNumber::withNumber( value, 32 ); + off = OSNumber::withNumber( value, 64 ); if( !off) - return; + return; dict->setObject( name, off ); off->release(); @@ -167,12 +172,12 @@ void IOKitDiagnostics::updateOffset( OSDictionary * dict, bool IOKitDiagnostics::serialize(OSSerialize *s) const { - OSDictionary * dict; - bool ok; + OSDictionary * dict; + bool ok; dict = OSDictionary::withCapacity( 5 ); if( !dict) - return( false ); + return( false ); updateOffset( dict, debug_ivars_size, "Instance allocation" ); updateOffset( dict, debug_container_malloc_size, "Container allocation" ); @@ -188,4 +193,863 @@ bool IOKitDiagnostics::serialize(OSSerialize *s) const return( ok ); } -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#if IOTRACKING + +#include +#include +#include + +__private_extern__ "C" void qsort( + void * array, + size_t nmembers, + size_t member_size, + int (*)(const void *, const void *)); + +extern "C" ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); +extern "C" ppnum_t pmap_valid_page(ppnum_t pn); + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +struct IOTRecursiveLock +{ + lck_mtx_t * mutex; + thread_t thread; + UInt32 count; +}; + +struct IOTrackingQueue +{ + queue_chain_t link; + IOTRecursiveLock lock; + queue_head_t sites; + const char * name; + size_t allocSize; + size_t minCaptureSize; + uint32_t siteCount; + uint8_t captureOn; + uint8_t isAlloc; +}; + +struct IOTrackingCallSite +{ + queue_chain_t link; + IOTrackingQueue * queue; + uint32_t crc; + IOTrackingCallSiteInfo info; + queue_chain_t instances; + IOTracking * addresses; +}; + +struct IOTrackingLeaksRef +{ + uintptr_t * instances; + uint32_t count; + uint32_t found; + size_t bytes; +}; + +enum +{ + kInstanceFlagAddress = 0x01UL, + kInstanceFlagReferenced = 0x02UL, + kInstanceFlags = 0x03UL +}; + +lck_mtx_t * gIOTrackingLock; +queue_head_t gIOTrackingQ; + +enum +{ + kTrackingAddressFlagAllocated = 0x00000001 +}; + +#if defined(__LP64__) +#define IOTrackingAddressFlags(ptr) (ptr->flags) +#else +#define IOTrackingAddressFlags(ptr) (ptr->tracking.flags) +#endif + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static void +IOTRecursiveLockLock(IOTRecursiveLock * lock) +{ + if (lock->thread == current_thread()) lock->count++; + else + { + lck_mtx_lock(lock->mutex); + assert(lock->thread == 0); + assert(lock->count == 0); + lock->thread = current_thread(); + lock->count = 1; + } +} + +static void +IOTRecursiveLockUnlock(IOTRecursiveLock * lock) +{ + assert(lock->thread == current_thread()); + if (0 == (--lock->count)) + { + lock->thread = 0; + lck_mtx_unlock(lock->mutex); + } +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOTrackingInit(void) +{ + queue_init(&gIOTrackingQ); + gIOTrackingLock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOTrackingQueue * +IOTrackingQueueAlloc(const char * name, size_t allocSize, size_t minCaptureSize, bool isAlloc) +{ + IOTrackingQueue * queue; + queue = (typeof(queue)) kalloc(sizeof(IOTrackingQueue)); + bzero(queue, sizeof(IOTrackingQueue)); + + queue->name = name; + queue->allocSize = allocSize; + queue->minCaptureSize = minCaptureSize; + queue->lock.mutex = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); + queue_init(&queue->sites); + + queue->captureOn = (0 != (kIOTrackingBoot & gIOKitDebug)); + queue->isAlloc = isAlloc; + + lck_mtx_lock(gIOTrackingLock); + queue_enter(&gIOTrackingQ, queue, IOTrackingQueue *, link); + lck_mtx_unlock(gIOTrackingLock); + + return (queue); +}; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOTrackingQueueFree(IOTrackingQueue * queue) +{ + lck_mtx_lock(gIOTrackingLock); + IOTrackingReset(queue); + remque(&queue->link); + lck_mtx_unlock(gIOTrackingLock); + + lck_mtx_free(queue->lock.mutex, IOLockGroup); + + kfree(queue, sizeof(IOTrackingQueue)); +}; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* fasthash + The MIT License + + Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com) + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + + +// Compression function for Merkle-Damgard construction. +// This function is generated using the framework provided. +#define mix(h) ({ \ + (h) ^= (h) >> 23; \ + (h) *= 0x2127599bf4325c37ULL; \ + (h) ^= (h) >> 47; }) + +static uint64_t +fasthash64(const void *buf, size_t len, uint64_t seed) +{ + const uint64_t m = 0x880355f21e6d1965ULL; + const uint64_t *pos = (const uint64_t *)buf; + const uint64_t *end = pos + (len / 8); + const unsigned char *pos2; + uint64_t h = seed ^ (len * m); + uint64_t v; + + while (pos != end) { + v = *pos++; + h ^= mix(v); + h *= m; + } + + pos2 = (const unsigned char*)pos; + v = 0; + + switch (len & 7) { + case 7: v ^= (uint64_t)pos2[6] << 48; + case 6: v ^= (uint64_t)pos2[5] << 40; + case 5: v ^= (uint64_t)pos2[4] << 32; + case 4: v ^= (uint64_t)pos2[3] << 24; + case 3: v ^= (uint64_t)pos2[2] << 16; + case 2: v ^= (uint64_t)pos2[1] << 8; + case 1: v ^= (uint64_t)pos2[0]; + h ^= mix(v); + h *= m; + } + + return mix(h); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static uint32_t +fasthash32(const void *buf, size_t len, uint32_t seed) +{ + // the following trick converts the 64-bit hashcode to Fermat + // residue, which shall retain information from both the higher + // and lower parts of hashcode. + uint64_t h = fasthash64(buf, len, seed); + return h - (h >> 32); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOTrackingAdd(IOTrackingQueue * queue, IOTracking * mem, size_t size, bool address) +{ + IOTrackingCallSite * site; + uint32_t crc, num; + uintptr_t bt[kIOTrackingCallSiteBTs + 1]; + + if (mem->site) return; + if (!queue->captureOn) return; + if (size < queue->minCaptureSize) return; + + assert(!mem->link.next); + + num = fastbacktrace(&bt[0], kIOTrackingCallSiteBTs + 1); + num--; + crc = fasthash32(&bt[1], num * sizeof(bt[0]), 0x04C11DB7); + + IOTRecursiveLockLock(&queue->lock); + queue_iterate(&queue->sites, site, IOTrackingCallSite *, link) + { + if (crc == site->crc) break; + } + + if (queue_end(&queue->sites, (queue_entry_t) site)) + { + site = (typeof(site)) kalloc(sizeof(IOTrackingCallSite)); + + queue_init(&site->instances); + site->addresses = (IOTracking *) &site->instances; + site->queue = queue; + site->crc = crc; + site->info.count = 0; + memset(&site->info.size[0], 0, sizeof(site->info.size)); + bcopy(&bt[1], &site->info.bt[0], num * sizeof(site->info.bt[0])); + assert(num <= kIOTrackingCallSiteBTs); + bzero(&site->info.bt[num], (kIOTrackingCallSiteBTs - num) * sizeof(site->info.bt[0])); + + queue_enter_first(&queue->sites, site, IOTrackingCallSite *, link); + queue->siteCount++; + } + + if (address) + { + queue_enter/*last*/(&site->instances, mem, IOTrackingCallSite *, link); + if (queue_end(&site->instances, (queue_entry_t)site->addresses)) site->addresses = mem; + } + else queue_enter_first(&site->instances, mem, IOTrackingCallSite *, link); + + mem->site = site; + site->info.size[0] += size; + site->info.count++; + + IOTRecursiveLockUnlock(&queue->lock); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOTrackingRemove(IOTrackingQueue * queue, IOTracking * mem, size_t size) +{ + if (!mem->link.next) return; + + IOTRecursiveLockLock(&queue->lock); + + assert(mem->site); + + if (mem == mem->site->addresses) mem->site->addresses = (IOTracking *) queue_next(&mem->link); + remque(&mem->link); + + assert(mem->site->info.count); + mem->site->info.count--; + assert(mem->site->info.size[0] >= size); + mem->site->info.size[0] -= size; + if (!mem->site->info.count) + { + assert(queue_empty(&mem->site->instances)); + assert(!mem->site->info.size[0]); + assert(!mem->site->info.size[1]); + + remque(&mem->site->link); + assert(queue->siteCount); + queue->siteCount--; + kfree(mem->site, sizeof(IOTrackingCallSite)); + } + IOTRecursiveLockUnlock(&queue->lock); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOTrackingAlloc(IOTrackingQueue * queue, uintptr_t address, size_t size) +{ + IOTrackingAddress * tracking; + + if (!queue->captureOn) return; + if (size < queue->minCaptureSize) return; + + address = ~address; + tracking = (typeof(tracking)) kalloc(sizeof(IOTrackingAddress)); + bzero(tracking, sizeof(IOTrackingAddress)); + IOTrackingAddressFlags(tracking) |= kTrackingAddressFlagAllocated; + tracking->address = address; + tracking->size = size; + + IOTrackingAdd(queue, &tracking->tracking, size, true); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOTrackingFree(IOTrackingQueue * queue, uintptr_t address, size_t size) +{ + IOTrackingCallSite * site; + IOTrackingAddress * tracking; + bool done; + + address = ~address; + IOTRecursiveLockLock(&queue->lock); + done = false; + queue_iterate(&queue->sites, site, IOTrackingCallSite *, link) + { + for (tracking = (IOTrackingAddress *) site->addresses; + !done && !queue_end(&site->instances, (queue_entry_t) tracking); + tracking = (IOTrackingAddress *) queue_next(&tracking->tracking.link)) + { + if ((done = (address == tracking->address))) + { + IOTrackingRemove(queue, &tracking->tracking, size); + kfree(tracking, sizeof(IOTrackingAddress)); + } + } + if (done) break; + } + + IOTRecursiveLockUnlock(&queue->lock); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOTrackingAccumSize(IOTrackingQueue * queue, IOTracking * mem, size_t size) +{ + IOTRecursiveLockLock(&queue->lock); + if (mem->link.next) + { + assert(mem->site); + assert((size > 0) || (mem->site->info.size[1] >= -size)); + mem->site->info.size[1] += size; + }; + IOTRecursiveLockUnlock(&queue->lock); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOTrackingReset(IOTrackingQueue * queue) +{ + IOTrackingCallSite * site; + IOTracking * tracking; + IOTrackingAddress * trackingAddress; + bool addresses; + + IOTRecursiveLockLock(&queue->lock); + while (!queue_empty(&queue->sites)) + { + queue_remove_first(&queue->sites, site, IOTrackingCallSite *, link); + addresses = false; + while (!queue_empty(&site->instances)) + { + queue_remove_first(&site->instances, tracking, IOTracking *, link); + tracking->link.next = 0; + if (tracking == site->addresses) addresses = true; + if (addresses) + { + trackingAddress = (typeof(trackingAddress)) tracking; + if (kTrackingAddressFlagAllocated & IOTrackingAddressFlags(trackingAddress)) + { + kfree(tracking, sizeof(IOTrackingAddress)); + } + } + } + kfree(site, sizeof(IOTrackingCallSite)); + } + queue->siteCount = 0; + IOTRecursiveLockUnlock(&queue->lock); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static int +IOTrackingCallSiteInfoCompare(const void * left, const void * right) +{ + IOTrackingCallSiteInfo * l = (typeof(l)) left; + IOTrackingCallSiteInfo * r = (typeof(r)) right; + size_t lsize, rsize; + + rsize = r->size[0] + r->size[1]; + lsize = l->size[0] + l->size[1]; + + return ((rsize > lsize) ? 1 : ((rsize == lsize) ? 0 : -1)); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static int +IOTrackingAddressCompare(const void * left, const void * right) +{ + IOTracking * instance; + uintptr_t inst, laddr, raddr; + + inst = ((typeof(inst) *) left)[0]; + instance = (typeof(instance)) (inst & ~kInstanceFlags); + if (kInstanceFlagAddress & inst) laddr = ~((IOTrackingAddress *)instance)->address; + else laddr = (uintptr_t) (instance + 1); + + inst = ((typeof(inst) *) right)[0]; + instance = (typeof(instance)) (inst & ~kInstanceFlags); + if (kInstanceFlagAddress & inst) raddr = ~((IOTrackingAddress *)instance)->address; + else raddr = (uintptr_t) (instance + 1); + + return ((laddr > raddr) ? 1 : ((laddr == raddr) ? 0 : -1)); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static void +IOTrackingLeakScan(void * refcon) +{ + IOTrackingLeaksRef * ref = (typeof(ref)) refcon; + uintptr_t * instances; + IOTracking * instance; + uint64_t vaddr, vincr; + ppnum_t ppn; + uintptr_t ptr, addr, inst; + size_t size; + uint32_t baseIdx, lim, ptrIdx, count; + boolean_t is; + +// if (cpu_number()) return; + + instances = ref->instances; + count = ref->count; + + for (vaddr = VM_MIN_KERNEL_AND_KEXT_ADDRESS; + vaddr < VM_MAX_KERNEL_ADDRESS; + ml_set_interrupts_enabled(is), vaddr += vincr) + { +#if !defined(__LP64__) + thread_block(NULL); +#endif + is = ml_set_interrupts_enabled(false); + + ppn = kernel_pmap_present_mapping(vaddr, &vincr); + // check noencrypt to avoid VM structs (map entries) with pointers + if (ppn && (!pmap_valid_page(ppn) || pmap_is_noencrypt(ppn))) ppn = 0; + if (!ppn) continue; + + for (ptrIdx = 0; ptrIdx < (page_size / sizeof(uintptr_t)); ptrIdx++) + { + ptr = ((uintptr_t *)vaddr)[ptrIdx]; + + for (lim = count, baseIdx = 0; lim; lim >>= 1) + { + inst = instances[baseIdx + (lim >> 1)]; + instance = (typeof(instance)) (inst & ~kInstanceFlags); + if (kInstanceFlagAddress & inst) + { + addr = ~((IOTrackingAddress *)instance)->address; + size = ((IOTrackingAddress *)instance)->size; + } + else + { + addr = (uintptr_t) (instance + 1); + size = instance->site->queue->allocSize; + } + if ((ptr >= addr) && (ptr < (addr + size))) + { + if (!(kInstanceFlagReferenced & inst)) + { + inst |= kInstanceFlagReferenced; + instances[baseIdx + (lim >> 1)] = inst; + ref->found++; + } + break; + } + if (ptr > addr) + { + // move right + baseIdx += (lim >> 1) + 1; + lim--; + } + // else move left + } + } + ref->bytes += page_size; + } +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static OSData * +IOTrackingLeaks(OSData * data) +{ + IOTrackingLeaksRef ref; + IOTrackingCallSiteInfo unslideInfo; + IOTrackingCallSite * site; + OSData * leakData; + uintptr_t * instances; + IOTracking * instance; + uintptr_t inst; + uint32_t count, idx, numSites, dups, siteCount; + + instances = (typeof(instances)) data->getBytesNoCopy(); + count = (data->getLength() / sizeof(*instances)); + qsort(instances, count, sizeof(*instances), &IOTrackingAddressCompare); + + bzero(&ref, sizeof(ref)); + ref.instances = instances; + ref.count = count; + + IOTrackingLeakScan(&ref); + + IOLog("leaks scanned %ld MB, instance count %d, found %d\n", ref.bytes / 1024 / 1024, count, ref.found); + + leakData = OSData::withCapacity(128 * sizeof(IOTrackingCallSiteInfo)); + + for (numSites = 0, idx = 0; idx < count; idx++) + { + inst = instances[idx]; + if (kInstanceFlagReferenced & inst) continue; + instance = (typeof(instance)) (inst & ~kInstanceFlags); + site = instance->site; + instances[numSites] = (uintptr_t) site; + numSites++; + } + + for (idx = 0; idx < numSites; idx++) + { + inst = instances[idx]; + if (!inst) continue; + site = (typeof(site)) inst; + for (siteCount = 1, dups = (idx + 1); dups < numSites; dups++) + { + if (instances[dups] == (uintptr_t) site) + { + siteCount++; + instances[dups] = 0; + } + } + unslideInfo.count = siteCount; + unslideInfo.size[0] = (site->info.size[0] * site->info.count) / siteCount; + unslideInfo.size[1] = (site->info.size[1] * site->info.count) / siteCount;; + for (uint32_t j = 0; j < kIOTrackingCallSiteBTs; j++) + { + unslideInfo.bt[j] = VM_KERNEL_UNSLIDE(site->info.bt[j]); + } + leakData->appendBytes(&unslideInfo, sizeof(unslideInfo)); + } + data->release(); + + return (leakData); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static bool +SkipName(uint32_t options, const char * name, size_t namesLen, const char * names) +{ + const char * scan; + const char * next; + bool exclude, found; + size_t qLen, sLen; + + if (!namesLen || !names) return (false); + // ...<0> + exclude = (0 != (kIOTrackingExcludeNames & options)); + qLen = strlen(name); + scan = names; + found = false; + do + { + sLen = scan[0]; + scan++; + next = scan + sLen; + if (next >= (names + namesLen)) break; + found = ((sLen == qLen) && !strncmp(scan, name, sLen)); + scan = next; + } + while (!found && (scan < (names + namesLen))); + + return (!(exclude ^ found)); +} + +#endif /* IOTRACKING */ + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +kern_return_t +IOTrackingDebug(uint32_t selector, uint32_t options, + const char * names, size_t namesLen, + size_t size, OSObject ** result) +{ + kern_return_t ret; + OSData * data; + + if (result) *result = 0; + data = 0; + ret = kIOReturnNotReady; + +#if IOTRACKING + + IOTrackingQueue * queue; + IOTracking * instance; + IOTrackingCallSite * site; + IOTrackingCallSiteInfo * siteInfos; + IOTrackingCallSiteInfo * siteInfo; + bool addresses; + uint32_t num, idx; + uintptr_t instFlags; + + if (!(kIOTracking & gIOKitDebug)) return (kIOReturnNotReady); + ret = kIOReturnNotFound; + + lck_mtx_lock(gIOTrackingLock); + queue_iterate(&gIOTrackingQ, queue, IOTrackingQueue *, link) + { + if (SkipName(options, queue->name, namesLen, names)) continue; + + switch (selector) + { + case kIOTrackingResetTracking: + { + IOTrackingReset(queue); + ret = kIOReturnSuccess; + break; + } + + case kIOTrackingStartCapture: + case kIOTrackingStopCapture: + { + queue->captureOn = (kIOTrackingStartCapture == selector); + ret = kIOReturnSuccess; + break; + } + + case kIOTrackingSetMinCaptureSize: + { + queue->minCaptureSize = size; + ret = kIOReturnSuccess; + break; + } + + case kIOTrackingLeaks: + { + if (!queue->isAlloc) break; + + if (!data) data = OSData::withCapacity(1024 * sizeof(uintptr_t)); + + IOTRecursiveLockLock(&queue->lock); + queue_iterate(&queue->sites, site, IOTrackingCallSite *, link) + { + addresses = false; + queue_iterate(&site->instances, instance, IOTracking *, link) + { + if (instance == site->addresses) addresses = true; + instFlags = (typeof(instFlags)) instance; + if (addresses) instFlags |= kInstanceFlagAddress; + data->appendBytes(&instFlags, sizeof(instFlags)); + } + } + // queue is locked + ret = kIOReturnSuccess; + break; + } + + case kIOTrackingGetTracking: + case kIOTrackingPrintTracking: + { + if (!data) data = OSData::withCapacity(128 * sizeof(IOTrackingCallSiteInfo)); + + IOTRecursiveLockLock(&queue->lock); + num = queue->siteCount; + idx = 0; + queue_iterate(&queue->sites, site, IOTrackingCallSite *, link) + { + assert(idx < num); + idx++; + + if (size && ((site->info.size[0] + site->info.size[1]) < size)) continue; + + IOTrackingCallSiteInfo unslideInfo; + unslideInfo.count = site->info.count; + memcpy(&unslideInfo.size[0], &site->info.size[0], sizeof(unslideInfo.size)); + + for (uint32_t j = 0; j < kIOTrackingCallSiteBTs; j++) + { + unslideInfo.bt[j] = VM_KERNEL_UNSLIDE(site->info.bt[j]); + } + data->appendBytes(&unslideInfo, sizeof(unslideInfo)); + } + assert(idx == num); + IOTRecursiveLockUnlock(&queue->lock); + ret = kIOReturnSuccess; + break; + } + default: + ret = kIOReturnUnsupported; + break; + } + } + + if ((kIOTrackingLeaks == selector) && data) + { + data = IOTrackingLeaks(data); + queue_iterate(&gIOTrackingQ, queue, IOTrackingQueue *, link) + { + if (SkipName(options, queue->name, namesLen, names)) continue; + if (!queue->isAlloc) continue; + IOTRecursiveLockUnlock(&queue->lock); + } + } + + lck_mtx_unlock(gIOTrackingLock); + + if (data) + { + siteInfos = (typeof(siteInfos)) data->getBytesNoCopy(); + num = (data->getLength() / sizeof(IOTrackingCallSiteInfo)); + qsort(siteInfos, num, sizeof(*siteInfos), &IOTrackingCallSiteInfoCompare); + + if (kIOTrackingPrintTracking == selector) + { + for (idx = 0; idx < num; idx++) + { + siteInfo = &siteInfos[idx]; + printf("\n0x%lx bytes (0x%lx + 0x%lx), %d call%s, [%d]\n", + siteInfo->size[0] + siteInfo->size[1], + siteInfo->size[0], siteInfo->size[1], + siteInfo->count, (siteInfo->count != 1) ? "s" : "", idx); + uintptr_t * bt = &siteInfo->bt[0]; + printf(" Backtrace 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + bt[0], bt[1], bt[2], bt[3], bt[4], bt[5], bt[6], bt[7], + bt[8], bt[9], bt[10], bt[11], bt[12], bt[13], bt[14], bt[15]); + kmod_dump_log((vm_offset_t *) &bt[0], kIOTrackingCallSiteBTs, FALSE); + } + data->release(); + data = 0; + } + } + + *result = data; + +#endif /* IOTRACKING */ + + return (ret); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#undef super +#define super IOUserClient + +OSDefineMetaClassAndStructors(IOKitDiagnosticsClient, IOUserClient) + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOUserClient * IOKitDiagnosticsClient::withTask(task_t owningTask) +{ + IOKitDiagnosticsClient * inst; + + inst = new IOKitDiagnosticsClient; + if (inst && !inst->init()) + { + inst->release(); + inst = 0; + } + + return (inst); +} + +IOReturn IOKitDiagnosticsClient::clientClose(void) +{ + terminate(); + return (kIOReturnSuccess); +} + +IOReturn IOKitDiagnosticsClient::setProperties(OSObject * properties) +{ + IOReturn kr = kIOReturnUnsupported; + return (kr); +} + +IOReturn IOKitDiagnosticsClient::externalMethod(uint32_t selector, IOExternalMethodArguments * args, + IOExternalMethodDispatch * dispatch, OSObject * target, void * reference) +{ + IOReturn ret = kIOReturnBadArgument; + const IOKitDiagnosticsParameters * params; + const char * names; + size_t namesLen; + OSObject * result; + + if (args->structureInputSize < sizeof(IOKitDiagnosticsParameters)) return (kIOReturnBadArgument); + params = (typeof(params)) args->structureInput; + if (!params) return (kIOReturnBadArgument); + + names = 0; + namesLen = args->structureInputSize - sizeof(IOKitDiagnosticsParameters); + if (namesLen) names = (typeof(names))(params + 1); + + ret = IOTrackingDebug(selector, params->options, names, namesLen, params->size, &result); + + if ((kIOReturnSuccess == ret) && args->structureVariableOutputData) *args->structureVariableOutputData = result; + else if (result) result->release(); + + return (ret); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h index a22bd8c60..37e6f9416 100644 --- a/iokit/Kernel/IOKitKernelInternal.h +++ b/iokit/Kernel/IOKitKernelInternal.h @@ -37,6 +37,7 @@ __BEGIN_DECLS #include #include #include +#include /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -68,14 +69,19 @@ kern_return_t IOIteratePageableMaps(vm_size_t size, IOIteratePageableMapsCallback callback, void * ref); vm_map_t IOPageableMapForAddress(uintptr_t address); +struct IOMemoryDescriptorMapAllocRef +{ + vm_map_t map; + mach_vm_address_t mapped; + mach_vm_size_t size; + vm_prot_t prot; + vm_tag_t tag; + IOOptionBits options; +}; + kern_return_t -IOMemoryDescriptorMapMemEntry(vm_map_t * map, ipc_port_t entry, IOOptionBits options, bool pageable, - mach_vm_size_t offset, mach_vm_address_t * address, mach_vm_size_t length); -kern_return_t -IOMemoryDescriptorMapCopy(vm_map_t * map, - IOOptionBits options, - mach_vm_size_t offset, - mach_vm_address_t * address, mach_vm_size_t length); +IOMemoryDescriptorMapAlloc(vm_map_t map, void * ref); + mach_vm_address_t IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxPhys, @@ -83,7 +89,6 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP void IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size); - extern vm_size_t debug_iomallocpageable_size; // osfmk/device/iokit_rpc.c @@ -106,49 +111,15 @@ extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t size); __END_DECLS -// Used for dedicated communications for IODMACommand -enum { - kIOMDWalkSegments = 0x01000000, - kIOMDFirstSegment = 1 | kIOMDWalkSegments, - kIOMDGetCharacteristics = 0x02000000, - kIOMDGetCharacteristicsMapped = 1 | kIOMDGetCharacteristics, - kIOMDDMAActive = 0x03000000, - kIOMDSetDMAActive = 1 | kIOMDDMAActive, - kIOMDSetDMAInactive = kIOMDDMAActive, - kIOMDAddDMAMapSpec = 0x04000000, - kIOMDDMAMap = 0x05000000, - kIOMDDMACommandOperationMask = 0xFF000000, -}; -struct IOMDDMACharacteristics { - UInt64 fLength; - UInt32 fSGCount; - UInt32 fPages; - UInt32 fPageAlign; - ppnum_t fHighestPage; - IODirection fDirection; - UInt8 fIsPrepared; -}; -struct IOMDDMAWalkSegmentArgs { - UInt64 fOffset; // Input/Output offset - UInt64 fIOVMAddr, fLength; // Output variables - UInt8 fMapped; // Input Variable, Require mapped IOVMA -}; -typedef UInt8 IOMDDMAWalkSegmentState[128]; - -struct IOMDDMAMapArgs { - IOMapper * fMapper; - IODMAMapSpecification fMapSpec; - uint64_t fOffset; - uint64_t fLength; - uint64_t fAlloc; - ppnum_t fAllocCount; - uint8_t fMapContig; -}; +#define __IODEQUALIFY(type, expr) \ + ({ typeof(expr) expr_ = (type)(uintptr_t)(expr); \ + (type)(uintptr_t)(expr_); }) + struct IODMACommandInternal { - IOMDDMAWalkSegmentState fState; - IOMDDMACharacteristics fMDSummary; + IOMDDMAWalkSegmentState fState; + IOMDDMACharacteristics fMDSummary; UInt64 fPreparedOffset; UInt64 fPreparedLength; @@ -171,8 +142,8 @@ struct IODMACommandInternal ppnum_t fCopyPageCount; - addr64_t fLocalMapperPageAlloc; - ppnum_t fLocalMapperPageCount; + uint64_t fLocalMapperAlloc; + uint64_t fLocalMapperAllocLength; class IOBufferMemoryDescriptor * fCopyMD; @@ -235,16 +206,24 @@ extern clock_sec_t gIOConsoleLockTime; extern OSSet * gIORemoveOnReadProperties; -extern "C" void IOKitResetTime( void ); extern "C" void IOKitInitializeTime( void ); extern "C" OSString * IOCopyLogNameForPID(int pid); #if defined(__i386__) || defined(__x86_64__) +#ifndef __cplusplus +#error xx +#endif + +extern const OSSymbol * gIOCreateEFIDevicePathSymbol; extern "C" void IOSetKeyStoreData(IOMemoryDescriptor * data); #endif +extern const OSSymbol * gAKSGetKey; void IOScreenLockTimeUpdate(clock_sec_t secs); +void IOCPUInitialize(void); +IOReturn IOInstallServicePlatformActions(IOService * service); +IOReturn IORemoveServicePlatformActions(IOService * service); #endif /* ! _IOKIT_KERNELINTERNAL_H */ diff --git a/iokit/Kernel/IOLib.cpp b/iokit/Kernel/IOLib.cpp index 3714d1d4f..44a436346 100644 --- a/iokit/Kernel/IOLib.cpp +++ b/iokit/Kernel/IOLib.cpp @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ @@ -73,6 +73,10 @@ do { \ #endif /* IOKITSTATS */ + +#define TRACK_ALLOC (IOTRACKING && (kIOTracking & gIOKitDebug)) + + extern "C" { @@ -87,7 +91,8 @@ __doprnt( va_list argp, void (*putc)(int, void *), void *arg, - int radix); + int radix, + int is_log); extern void cons_putc_locked(char); extern void bsd_log_lock(void); @@ -146,6 +151,12 @@ static iopa_t gIOPageablePageAllocator; uint32_t gIOPageAllocChunkBytes; +#if IOTRACKING +IOTrackingQueue * gIOMallocTracking; +IOTrackingQueue * gIOWireTracking; +IOTrackingQueue * gIOMapTracking; +#endif /* IOTRACKING */ + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ void IOLibInit(void) @@ -157,18 +168,25 @@ void IOLibInit(void) if(libInitialized) return; + IOLockGroup = lck_grp_alloc_init("IOKit", LCK_GRP_ATTR_NULL); + +#if IOTRACKING + IOTrackingInit(); + gIOMallocTracking = IOTrackingQueueAlloc(kIOMallocTrackingName, 0, 0, true); + gIOWireTracking = IOTrackingQueueAlloc(kIOWireTrackingName, 0, page_size, false); + gIOMapTracking = IOTrackingQueueAlloc(kIOMapTrackingName, 0, page_size, false); +#endif + gIOKitPageableSpace.maps[0].address = 0; ret = kmem_suballoc(kernel_map, &gIOKitPageableSpace.maps[0].address, kIOPageableMapSize, TRUE, - VM_FLAGS_ANYWHERE, + VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IOKIT), &gIOKitPageableSpace.maps[0].map); if (ret != KERN_SUCCESS) panic("failed to allocate iokit pageable map\n"); - IOLockGroup = lck_grp_alloc_init("IOKit", LCK_GRP_ATTR_NULL); - gIOKitPageableSpace.lock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); gIOKitPageableSpace.maps[0].end = gIOKitPageableSpace.maps[0].address + kIOPageableMapSize; gIOKitPageableSpace.hint = 0; @@ -182,11 +200,22 @@ void IOLibInit(void) iopa_init(&gIOBMDPageAllocator); iopa_init(&gIOPageablePageAllocator); + libInitialized = true; } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +static uint32_t +log2up(uint32_t size) +{ + if (size <= 1) size = 0; + else size = 32 - __builtin_clz(size - 1); + return (size); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + IOThread IOCreateThread(IOThreadFunc fcn, void *arg) { kern_return_t result; @@ -209,17 +238,49 @@ void IOExitThread(void) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#if IOTRACKING +struct IOLibMallocHeader +{ + IOTrackingAddress tracking; +}; +#endif + +#if IOTRACKING +#define sizeofIOLibMallocHeader (sizeof(IOLibMallocHeader) - (TRACK_ALLOC ? 0 : sizeof(IOTrackingAddress))) +#else +#define sizeofIOLibMallocHeader (0) +#endif + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ void * IOMalloc(vm_size_t size) { void * address; + vm_size_t allocSize; + + allocSize = size + sizeofIOLibMallocHeader; +#if IOTRACKING + if (sizeofIOLibMallocHeader && (allocSize <= size)) return (NULL); // overflow +#endif + address = kalloc_tag_bt(allocSize, VM_KERN_MEMORY_IOKIT); - address = (void *)kalloc(size); if ( address ) { +#if IOTRACKING + if (TRACK_ALLOC) { + IOLibMallocHeader * hdr; + hdr = (typeof(hdr)) address; + bzero(&hdr->tracking, sizeof(hdr->tracking)); + hdr->tracking.address = ~(((uintptr_t) address) + sizeofIOLibMallocHeader); + hdr->tracking.size = size; + IOTrackingAdd(gIOMallocTracking, &hdr->tracking.tracking, size, true); + } +#endif + address = (typeof(address)) (((uintptr_t) address) + sizeofIOLibMallocHeader); + #if IOALLOCDEBUG - debug_iomalloc_size += size; + OSAddAtomic(size, &debug_iomalloc_size); #endif - IOStatisticsAlloc(kIOStatisticsMalloc, size); + IOStatisticsAlloc(kIOStatisticsMalloc, size); } return address; @@ -228,31 +289,79 @@ void * IOMalloc(vm_size_t size) void IOFree(void * address, vm_size_t size) { if (address) { - kfree(address, size); + + address = (typeof(address)) (((uintptr_t) address) - sizeofIOLibMallocHeader); + +#if IOTRACKING + if (TRACK_ALLOC) { + IOLibMallocHeader * hdr; + hdr = (typeof(hdr)) address; + if (size != hdr->tracking.size) + { + OSReportWithBacktrace("bad IOFree size 0x%lx should be 0x%lx", size, hdr->tracking.size); + size = hdr->tracking.size; + } + IOTrackingRemove(gIOMallocTracking, &hdr->tracking.tracking, size); + } +#endif + + kfree(address, size + sizeofIOLibMallocHeader); #if IOALLOCDEBUG - debug_iomalloc_size -= size; + OSAddAtomic(-size, &debug_iomalloc_size); #endif - IOStatisticsAlloc(kIOStatisticsFree, size); + IOStatisticsAlloc(kIOStatisticsFree, size); } } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +vm_tag_t +IOMemoryTag(vm_map_t map) +{ + vm_tag_t tag; + + if (!vm_kernel_map_is_kernel(map)) return (VM_MEMORY_IOKIT); + + tag = vm_tag_bt(); + if (tag == VM_KERN_MEMORY_NONE) tag = VM_KERN_MEMORY_IOKIT; + + return (tag); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +struct IOLibPageMallocHeader +{ + mach_vm_size_t allocationSize; + mach_vm_address_t allocationAddress; +#if IOTRACKING + IOTrackingAddress tracking; +#endif +}; + +#if IOTRACKING +#define sizeofIOLibPageMallocHeader (sizeof(IOLibPageMallocHeader) - (TRACK_ALLOC ? 0 : sizeof(IOTrackingAddress))) +#else +#define sizeofIOLibPageMallocHeader (sizeof(IOLibPageMallocHeader)) +#endif + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + void * IOMallocAligned(vm_size_t size, vm_size_t alignment) { - kern_return_t kr; - vm_offset_t address; - vm_offset_t allocationAddress; - vm_size_t adjustedSize; - uintptr_t alignMask; + kern_return_t kr; + vm_offset_t address; + vm_offset_t allocationAddress; + vm_size_t adjustedSize; + uintptr_t alignMask; + IOLibPageMallocHeader * hdr; if (size == 0) return 0; - if (alignment == 0) - alignment = 1; + alignment = (1UL << log2up(alignment)); alignMask = alignment - 1; - adjustedSize = size + sizeof(vm_size_t) + sizeof(vm_address_t); + adjustedSize = size + sizeofIOLibPageMallocHeader; if (size > adjustedSize) { address = 0; /* overflow detected */ @@ -260,9 +369,11 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment) else if (adjustedSize >= page_size) { kr = kernel_memory_allocate(kernel_map, &address, - size, alignMask, 0); - if (KERN_SUCCESS != kr) - address = 0; + size, alignMask, 0, IOMemoryTag(kernel_map)); + if (KERN_SUCCESS != kr) address = 0; +#if IOTRACKING + else if (TRACK_ALLOC) IOTrackingAlloc(gIOMallocTracking, address, size); +#endif } else { @@ -271,22 +382,27 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment) if (adjustedSize >= page_size) { kr = kernel_memory_allocate(kernel_map, &allocationAddress, - adjustedSize, 0, 0); - if (KERN_SUCCESS != kr) - allocationAddress = 0; + adjustedSize, 0, 0, IOMemoryTag(kernel_map)); + if (KERN_SUCCESS != kr) allocationAddress = 0; } else - allocationAddress = (vm_address_t) kalloc(adjustedSize); + allocationAddress = (vm_address_t) kalloc_tag_bt(adjustedSize, VM_KERN_MEMORY_IOKIT); if (allocationAddress) { - address = (allocationAddress + alignMask - + (sizeof(vm_size_t) + sizeof(vm_address_t))) + address = (allocationAddress + alignMask + sizeofIOLibPageMallocHeader) & (~alignMask); - *((vm_size_t *)(address - sizeof(vm_size_t) - sizeof(vm_address_t))) - = adjustedSize; - *((vm_address_t *)(address - sizeof(vm_address_t))) - = allocationAddress; + hdr = (typeof(hdr))(address - sizeofIOLibPageMallocHeader); + hdr->allocationSize = adjustedSize; + hdr->allocationAddress = allocationAddress; +#if IOTRACKING + if (TRACK_ALLOC) { + bzero(&hdr->tracking, sizeof(hdr->tracking)); + hdr->tracking.address = ~address; + hdr->tracking.size = size; + IOTrackingAdd(gIOMallocTracking, &hdr->tracking.tracking, size, true); + } +#endif } else address = 0; } @@ -295,7 +411,7 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment) if( address) { #if IOALLOCDEBUG - debug_iomalloc_size += size; + OSAddAtomic(size, &debug_iomalloc_size); #endif IOStatisticsAlloc(kIOStatisticsMallocAligned, size); } @@ -305,33 +421,47 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment) void IOFreeAligned(void * address, vm_size_t size) { - vm_address_t allocationAddress; - vm_size_t adjustedSize; + vm_address_t allocationAddress; + vm_size_t adjustedSize; + IOLibPageMallocHeader * hdr; if( !address) return; assert(size); - adjustedSize = size + sizeof(vm_size_t) + sizeof(vm_address_t); + adjustedSize = size + sizeofIOLibPageMallocHeader; if (adjustedSize >= page_size) { - +#if IOTRACKING + if (TRACK_ALLOC) IOTrackingFree(gIOMallocTracking, (uintptr_t) address, size); +#endif kmem_free( kernel_map, (vm_offset_t) address, size); } else { - adjustedSize = *((vm_size_t *)( (vm_address_t) address - - sizeof(vm_address_t) - sizeof(vm_size_t))); - allocationAddress = *((vm_address_t *)( (vm_address_t) address - - sizeof(vm_address_t) )); + hdr = (typeof(hdr)) (((uintptr_t)address) - sizeofIOLibPageMallocHeader); + adjustedSize = hdr->allocationSize; + allocationAddress = hdr->allocationAddress; - if (adjustedSize >= page_size) +#if IOTRACKING + if (TRACK_ALLOC) + { + if (size != hdr->tracking.size) + { + OSReportWithBacktrace("bad IOFreeAligned size 0x%lx should be 0x%lx", size, hdr->tracking.size); + size = hdr->tracking.size; + } + IOTrackingRemove(gIOMallocTracking, &hdr->tracking.tracking, size); + } +#endif + if (adjustedSize >= page_size) { kmem_free( kernel_map, allocationAddress, adjustedSize); - else - kfree((void *)allocationAddress, adjustedSize); + } else { + kfree((void *)allocationAddress, adjustedSize); + } } #if IOALLOCDEBUG - debug_iomalloc_size -= size; + OSAddAtomic(-size, &debug_iomalloc_size); #endif IOStatisticsAlloc(kIOStatisticsFreeAligned, size); @@ -342,31 +472,36 @@ void IOFreeAligned(void * address, vm_size_t size) void IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size) { - mach_vm_address_t allocationAddress; - mach_vm_size_t adjustedSize; + mach_vm_address_t allocationAddress; + mach_vm_size_t adjustedSize; + IOLibPageMallocHeader * hdr; if (!address) return; assert(size); - adjustedSize = (2 * size) + sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t); + adjustedSize = (2 * size) + sizeofIOLibPageMallocHeader; if (adjustedSize >= page_size) { - +#if IOTRACKING + if (TRACK_ALLOC) IOTrackingFree(gIOMallocTracking, address, size); +#endif kmem_free( kernel_map, (vm_offset_t) address, size); } else { - adjustedSize = *((mach_vm_size_t *) - (address - sizeof(mach_vm_address_t) - sizeof(mach_vm_size_t))); - allocationAddress = *((mach_vm_address_t *) - (address - sizeof(mach_vm_address_t) )); + hdr = (typeof(hdr)) (((uintptr_t)address) - sizeofIOLibPageMallocHeader); + adjustedSize = hdr->allocationSize; + allocationAddress = hdr->allocationAddress; +#if IOTRACKING + if (TRACK_ALLOC) IOTrackingRemove(gIOMallocTracking, &hdr->tracking.tracking, size); +#endif kfree((void *)allocationAddress, adjustedSize); } IOStatisticsAlloc(kIOStatisticsFreeContiguous, size); #if IOALLOCDEBUG - debug_iomalloc_size -= size; + OSAddAtomic(-size, &debug_iomalloc_size); #endif } @@ -375,11 +510,12 @@ mach_vm_address_t IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxPhys, mach_vm_size_t alignment, bool contiguous) { - kern_return_t kr; - mach_vm_address_t address; - mach_vm_address_t allocationAddress; - mach_vm_size_t adjustedSize; - mach_vm_address_t alignMask; + kern_return_t kr; + mach_vm_address_t address; + mach_vm_address_t allocationAddress; + mach_vm_size_t adjustedSize; + mach_vm_address_t alignMask; + IOLibPageMallocHeader * hdr; if (size == 0) return (0); @@ -387,7 +523,8 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP alignment = 1; alignMask = alignment - 1; - adjustedSize = (2 * size) + sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t); + adjustedSize = (2 * size) + sizeofIOLibPageMallocHeader; + if (adjustedSize < size) return (0); contiguous = (contiguous && (adjustedSize > page_size)) || (alignment > page_size); @@ -416,36 +553,49 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP if (contiguous || maxPhys) { kr = kmem_alloc_contig(kernel_map, &virt, size, - alignMask, atop(maxPhys), atop(alignMask), 0); + alignMask, atop(maxPhys), atop(alignMask), 0, IOMemoryTag(kernel_map)); } else { kr = kernel_memory_allocate(kernel_map, &virt, - size, alignMask, options); + size, alignMask, options, IOMemoryTag(kernel_map)); } if (KERN_SUCCESS == kr) + { address = virt; +#if IOTRACKING + if (TRACK_ALLOC) IOTrackingAlloc(gIOMallocTracking, address, size); +#endif + } else address = 0; } else { adjustedSize += alignMask; - allocationAddress = (mach_vm_address_t) kalloc(adjustedSize); + if (adjustedSize < size) return (0); + allocationAddress = (mach_vm_address_t) kalloc_tag_bt(adjustedSize, VM_KERN_MEMORY_IOKIT); if (allocationAddress) { - address = (allocationAddress + alignMask - + (sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t))) + + address = (allocationAddress + alignMask + sizeofIOLibPageMallocHeader) & (~alignMask); if (atop_32(address) != atop_32(address + size - 1)) address = round_page(address); - *((mach_vm_size_t *)(address - sizeof(mach_vm_size_t) - - sizeof(mach_vm_address_t))) = adjustedSize; - *((mach_vm_address_t *)(address - sizeof(mach_vm_address_t))) - = allocationAddress; + hdr = (typeof(hdr))(address - sizeofIOLibPageMallocHeader); + hdr->allocationSize = adjustedSize; + hdr->allocationAddress = allocationAddress; +#if IOTRACKING + if (TRACK_ALLOC) { + bzero(&hdr->tracking, sizeof(hdr->tracking)); + hdr->tracking.address = ~address; + hdr->tracking.size = size; + IOTrackingAdd(gIOMallocTracking, &hdr->tracking.tracking, size, true); + } +#endif } else address = 0; } @@ -453,7 +603,7 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP if (address) { IOStatisticsAlloc(kIOStatisticsMallocContiguous, size); #if IOALLOCDEBUG - debug_iomalloc_size += size; + OSAddAtomic(size, &debug_iomalloc_size); #endif } @@ -608,7 +758,7 @@ kern_return_t IOIteratePageableMaps(vm_size_t size, &min, segSize, TRUE, - VM_FLAGS_ANYWHERE, + VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IOKIT), &map); if( KERN_SUCCESS != kr) { lck_mtx_unlock( gIOKitPageableSpace.lock ); @@ -631,7 +781,8 @@ kern_return_t IOIteratePageableMaps(vm_size_t size, struct IOMallocPageableRef { vm_offset_t address; - vm_size_t size; + vm_size_t size; + vm_tag_t tag; }; static kern_return_t IOMallocPageableCallback(vm_map_t map, void * _ref) @@ -639,12 +790,12 @@ static kern_return_t IOMallocPageableCallback(vm_map_t map, void * _ref) struct IOMallocPageableRef * ref = (struct IOMallocPageableRef *) _ref; kern_return_t kr; - kr = kmem_alloc_pageable( map, &ref->address, ref->size ); + kr = kmem_alloc_pageable( map, &ref->address, ref->size, ref->tag ); return( kr ); } -static void * IOMallocPageablePages(vm_size_t size, vm_size_t alignment) +static void * IOMallocPageablePages(vm_size_t size, vm_size_t alignment, vm_tag_t tag) { kern_return_t kr = kIOReturnNotReady; struct IOMallocPageableRef ref; @@ -655,6 +806,7 @@ static void * IOMallocPageablePages(vm_size_t size, vm_size_t alignment) return( 0 ); ref.size = size; + ref.tag = tag; kr = IOIteratePageableMaps( size, &IOMallocPageableCallback, &ref ); if( kIOReturnSuccess != kr) ref.address = 0; @@ -691,19 +843,19 @@ static void IOFreePageablePages(void * address, vm_size_t size) static uintptr_t IOMallocOnePageablePage(iopa_t * a) { - return ((uintptr_t) IOMallocPageablePages(page_size, page_size)); + return ((uintptr_t) IOMallocPageablePages(page_size, page_size, VM_KERN_MEMORY_IOKIT)); } void * IOMallocPageable(vm_size_t size, vm_size_t alignment) { void * addr; - if (size >= (page_size - 4*gIOPageAllocChunkBytes)) addr = IOMallocPageablePages(size, alignment); + if (size >= (page_size - 4*gIOPageAllocChunkBytes)) addr = IOMallocPageablePages(size, alignment, IOMemoryTag(kernel_map)); else addr = ((void * ) iopa_alloc(&gIOPageablePageAllocator, &IOMallocOnePageablePage, size, alignment)); if (addr) { #if IOALLOCDEBUG - debug_iomallocpageable_size += size; + OSAddAtomicLong(size, &debug_iomallocpageable_size); #endif IOStatisticsAlloc(kIOStatisticsMallocPageable, size); } @@ -714,7 +866,7 @@ void * IOMallocPageable(vm_size_t size, vm_size_t alignment) void IOFreePageable(void * address, vm_size_t size) { #if IOALLOCDEBUG - debug_iomallocpageable_size -= size; + OSAddAtomicLong(-size, &debug_iomallocpageable_size); #endif IOStatisticsAlloc(kIOStatisticsFreePageable, size); @@ -768,14 +920,6 @@ iopa_allocinpage(iopa_page_t * pa, uint32_t count, uint64_t align) return (0); } -static uint32_t -log2up(uint32_t size) -{ - if (size <= 1) size = 0; - else size = 32 - __builtin_clz(size - 1); - return (size); -} - uintptr_t iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign) { @@ -798,7 +942,7 @@ iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign) align = align_masks[log2up((balign + gIOPageAllocChunkBytes - 1) / gIOPageAllocChunkBytes)]; IOLockLock(a->lock); - pa = (typeof(pa)) queue_first(&a->list); + __IGNORE_WCASTALIGN(pa = (typeof(pa)) queue_first(&a->list)); while (!queue_end(&a->list, &pa->link)) { addr = iopa_allocinpage(pa, count, align); @@ -807,7 +951,7 @@ iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign) a->bytecount += bytes; break; } - pa = (typeof(pa)) queue_next(&pa->link); + __IGNORE_WCASTALIGN(pa = (typeof(pa)) queue_next(&pa->link)); } IOLockUnlock(a->lock); @@ -941,6 +1085,15 @@ void IOSleep(unsigned milliseconds) delay_for_interval(milliseconds, kMillisecondScale); } +/* + * Spin for indicated number of milliseconds, and potentially an + * additional number of milliseconds up to the leeway values. + */ +void IOSleepWithLeeway(unsigned intervalMilliseconds, unsigned leewayMilliseconds) +{ + delay_for_interval_with_leeway(intervalMilliseconds, leewayMilliseconds, kMillisecondScale); +} + /* * Spin for indicated number of microseconds. */ @@ -985,11 +1138,12 @@ void IOLogv(const char *format, va_list ap) va_copy(ap2, ap); bsd_log_lock(); - __doprnt(format, ap, _iolog_logputc, NULL, 16); + __doprnt(format, ap, _iolog_logputc, NULL, 16, TRUE); bsd_log_unlock(); logwakeup(); - __doprnt(format, ap2, _iolog_consputc, NULL, 16); + __doprnt(format, ap2, _iolog_consputc, NULL, 16, TRUE); + va_end(ap2); } #if !__LP64__ @@ -1043,7 +1197,7 @@ OSString * IOCopyLogNameForPID(int pid) IOAlignment IOSizeToAlignment(unsigned int size) { - register int shift; + int shift; const int intsize = sizeof(unsigned int) * 8; for (shift = 1; shift < intsize; shift++) { diff --git a/iokit/Kernel/IOMapper.cpp b/iokit/Kernel/IOMapper.cpp index 294e13822..8f2b35992 100644 --- a/iokit/Kernel/IOMapper.cpp +++ b/iokit/Kernel/IOMapper.cpp @@ -30,6 +30,7 @@ #include #include #include +#include "IOKitKernelInternal.h" __BEGIN_DECLS extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); @@ -38,10 +39,10 @@ __END_DECLS #define super IOService OSDefineMetaClassAndAbstractStructors(IOMapper, IOService); -OSMetaClassDefineReservedUsed(IOMapper, 0); -OSMetaClassDefineReservedUsed(IOMapper, 1); -OSMetaClassDefineReservedUsed(IOMapper, 2); -OSMetaClassDefineReservedUsed(IOMapper, 3); +OSMetaClassDefineReservedUnused(IOMapper, 0); +OSMetaClassDefineReservedUnused(IOMapper, 1); +OSMetaClassDefineReservedUnused(IOMapper, 2); +OSMetaClassDefineReservedUnused(IOMapper, 3); OSMetaClassDefineReservedUnused(IOMapper, 4); OSMetaClassDefineReservedUnused(IOMapper, 5); OSMetaClassDefineReservedUnused(IOMapper, 6); @@ -80,6 +81,8 @@ bool IOMapper::start(IOService *provider) if (!initHardware(provider)) return false; + fPageSize = getPageSize(); + if (fIsSystem) { sMapperLock.lock(); IOMapper::gSystem = this; @@ -98,22 +101,8 @@ bool IOMapper::start(IOService *provider) return true; } -bool IOMapper::allocTable(IOByteCount size) -{ - assert(!fTable); - - fTableSize = size; - fTableHandle = NewARTTable(size, &fTable, &fTablePhys); - return fTableHandle != 0; -} - void IOMapper::free() { - if (fTableHandle) { - FreeARTTable(fTableHandle, fTableSize); - fTableHandle = 0; - } - super::free(); } @@ -186,135 +175,45 @@ IOMapper * IOMapper::copyMapperForDeviceWithIndex(IOService * device, unsigned i return (mapper); } -ppnum_t IOMapper::iovmAllocDMACommand(IODMACommand * command, IOItemCount pageCount) -{ - return (0); -} - -void IOMapper::iovmFreeDMACommand(IODMACommand * command, - ppnum_t addr, IOItemCount pageCount) -{ -} - -ppnum_t IOMapper::iovmMapMemory( - OSObject * memory, // dma command or iomd - ppnum_t offsetPage, - ppnum_t pageCount, - uint32_t options, - upl_page_info_t * pageList, - const IODMAMapSpecification * mapSpecification) -{ - return (0); -} - -void IOMapper::iovmInsert(ppnum_t addr, IOItemCount offset, - ppnum_t *pageList, IOItemCount pageCount) -{ - while (pageCount--) - iovmInsert(addr, offset++, *pageList++); -} - -void IOMapper::iovmInsert(ppnum_t addr, IOItemCount offset, - upl_page_info_t *pageList, IOItemCount pageCount) -{ - for (IOItemCount i = 0; i < pageCount; i++) - iovmInsert(addr, offset + i, pageList[i].phys_addr); -} - -OSData * IOMapper:: -NewARTTable(IOByteCount size, void ** virtAddrP, ppnum_t *physAddrP) -{ - if (!virtAddrP || !physAddrP) - return 0; - - kern_return_t kr; - vm_address_t address; - - size = round_page(size); - kr = kmem_alloc_contig(kernel_map, &address, size, PAGE_MASK, 0 /*max_pnum*/, 0 /*pnum_mask*/, false); - if (kr) - return 0; - - ppnum_t pagenum = pmap_find_phys(kernel_pmap, (addr64_t) address); - if (pagenum) - *physAddrP = pagenum; - else { - FreeARTTable((OSData *) address, size); - address = 0; - } - - *virtAddrP = (void *) address; - - return (OSData *) address; -} - -void IOMapper::FreeARTTable(OSData *artHandle, IOByteCount size) -{ - vm_address_t address = (vm_address_t) artHandle; - - size = round_page(size); - kmem_free(kernel_map, address, size); // Just panic if address is 0 -} - -bool IOMapper::getBypassMask(addr64_t *maskP) const -{ - return false; -} - __BEGIN_DECLS // These are C accessors to the system mapper for non-IOKit clients ppnum_t IOMapperIOVMAlloc(unsigned pages) { + IOReturn ret; + uint64_t dmaAddress, dmaLength; + IOMapper::checkForSystemMapper(); + ret = kIOReturnUnsupported; if (IOMapper::gSystem) - return IOMapper::gSystem->iovmAlloc((IOItemCount) pages); - else - return 0; + { + ret = IOMapper::gSystem->iovmMapMemory( + NULL, 0, ptoa_64(pages), + (kIODMAMapReadAccess | kIODMAMapWriteAccess), + NULL, NULL, NULL, + &dmaAddress, &dmaLength); + } + + if (kIOReturnSuccess == ret) return (atop_64(dmaAddress)); + return (0); } void IOMapperIOVMFree(ppnum_t addr, unsigned pages) { if (IOMapper::gSystem) - IOMapper::gSystem->iovmFree(addr, (IOItemCount) pages); -} - -ppnum_t IOMapperInsertPage(ppnum_t addr, unsigned offset, ppnum_t page) -{ - if (IOMapper::gSystem) { - if (!addr) panic("!addr"); - IOMapper::gSystem->iovmInsert(addr, (IOItemCount) offset, page); - return addr + offset; + { + IOMapper::gSystem->iovmUnmapMemory(NULL, NULL, ptoa_64(addr), ptoa_64(pages)); } - else - return page; } -void IOMapperInsertPPNPages(ppnum_t addr, unsigned offset, - ppnum_t *pageList, unsigned pageCount) -{ - if (!IOMapper::gSystem) - panic("IOMapperInsertPPNPages no system mapper"); - else - assert(!((vm_address_t) IOMapper::gSystem & 3)); - - IOMapper::gSystem-> - iovmInsert(addr, (IOItemCount) offset, pageList, pageCount); -} - -void IOMapperInsertUPLPages(ppnum_t addr, unsigned offset, - upl_page_info_t *pageList, unsigned pageCount) +ppnum_t IOMapperInsertPage(ppnum_t addr, unsigned offset, ppnum_t page) { - if (!IOMapper::gSystem) - panic("IOMapperInsertUPLPages no system mapper"); - else - assert(!((vm_address_t) IOMapper::gSystem & 3)); - - IOMapper::gSystem->iovmInsert(addr, - (IOItemCount) offset, - pageList, - (IOItemCount) pageCount); + if (!IOMapper::gSystem) return (page); + if (!addr) panic("!addr"); + IOMapper::gSystem->iovmInsert((kIODMAMapReadAccess | kIODMAMapWriteAccess), + ptoa_64(addr), ptoa_64(offset), ptoa_64(page), ptoa_64(1)); + return (addr + offset); } ///////////////////////////////////////////////////////////////////////////// @@ -332,7 +231,7 @@ UInt8 IOMappedRead8(IOPhysicalAddress address) IOMapper::checkForSystemMapper(); if (IOMapper::gSystem) { - addr64_t addr = IOMapper::gSystem->mapAddr(address); + addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address); return (UInt8) ml_phys_read_byte_64(addr); } else @@ -344,7 +243,7 @@ UInt16 IOMappedRead16(IOPhysicalAddress address) IOMapper::checkForSystemMapper(); if (IOMapper::gSystem) { - addr64_t addr = IOMapper::gSystem->mapAddr(address); + addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address); return (UInt16) ml_phys_read_half_64(addr); } else @@ -356,7 +255,7 @@ UInt32 IOMappedRead32(IOPhysicalAddress address) IOMapper::checkForSystemMapper(); if (IOMapper::gSystem) { - addr64_t addr = IOMapper::gSystem->mapAddr(address); + addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address); return (UInt32) ml_phys_read_word_64(addr); } else @@ -368,7 +267,7 @@ UInt64 IOMappedRead64(IOPhysicalAddress address) IOMapper::checkForSystemMapper(); if (IOMapper::gSystem) { - addr64_t addr = IOMapper::gSystem->mapAddr(address); + addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address); return (UInt64) ml_phys_read_double_64(addr); } else @@ -380,7 +279,7 @@ void IOMappedWrite8(IOPhysicalAddress address, UInt8 value) IOMapper::checkForSystemMapper(); if (IOMapper::gSystem) { - addr64_t addr = IOMapper::gSystem->mapAddr(address); + addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address); ml_phys_write_byte_64(addr, value); } else @@ -392,7 +291,7 @@ void IOMappedWrite16(IOPhysicalAddress address, UInt16 value) IOMapper::checkForSystemMapper(); if (IOMapper::gSystem) { - addr64_t addr = IOMapper::gSystem->mapAddr(address); + addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address); ml_phys_write_half_64(addr, value); } else @@ -404,7 +303,7 @@ void IOMappedWrite32(IOPhysicalAddress address, UInt32 value) IOMapper::checkForSystemMapper(); if (IOMapper::gSystem) { - addr64_t addr = IOMapper::gSystem->mapAddr(address); + addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address); ml_phys_write_word_64(addr, value); } else @@ -416,7 +315,7 @@ void IOMappedWrite64(IOPhysicalAddress address, UInt64 value) IOMapper::checkForSystemMapper(); if (IOMapper::gSystem) { - addr64_t addr = IOMapper::gSystem->mapAddr(address); + addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address); ml_phys_write_double_64(addr, value); } else diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 0c7744386..4bd9659e7 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -25,12 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - */ #include @@ -43,9 +37,8 @@ #include #include -#ifndef __LP64__ #include -#endif /* !__LP64__ */ +#include #include #include @@ -74,16 +67,6 @@ __BEGIN_DECLS extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); extern void ipc_port_release_send(ipc_port_t port); -kern_return_t -memory_object_iopl_request( - ipc_port_t port, - memory_object_offset_t offset, - vm_size_t *upl_size, - upl_t *upl_ptr, - upl_page_info_array_t user_page_list, - unsigned int *page_list_count, - int *flags); - // osfmk/device/iokit_rpc.c unsigned int IODefaultCacheBits(addr64_t pa); unsigned int IOTranslateCacheBits(struct phys_entry *pp); @@ -151,8 +134,12 @@ struct ioGMDData { IOMapper * fMapper; uint8_t fDMAMapNumAddressBits; uint64_t fDMAMapAlignment; - addr64_t fMappedBase; - uint64_t fPreparationID; + uint64_t fMappedBase; + uint64_t fMappedLength; + uint64_t fPreparationID; +#if IOTRACKING + IOTracking fWireTracking; +#endif unsigned int fPageCnt; unsigned char fDiscontig:1; unsigned char fCompletionError:1; @@ -484,9 +471,12 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate( IOOptionBits type = (_flags & kIOMemoryTypeMask); IOOptionBits cacheMode; unsigned int pagerFlags; + vm_tag_t tag; ref = memoryReferenceAlloc(kCapacity, NULL); if (!ref) return (kIOReturnNoMemory); + + tag = IOMemoryTag(kernel_map); entries = &ref->entries[0]; count = 0; @@ -497,6 +487,7 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate( { nextAddr = getPhysicalSegment(offset, &physLen, kIOMemoryMapperNone); nextLen = physLen; + // default cache mode for physical if (kIODefaultCache == ((_flags & kIOMemoryBufferCacheMask) >> kIOMemoryBufferCacheShift)) { @@ -607,7 +598,7 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate( } else { - // _task == 0, physical + // _task == 0, physical or kIOMemoryTypeUPL memory_object_t pager; vm_size_t size = ptoa_32(_pages); @@ -667,16 +658,7 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate( return (err); } -struct IOMemoryDescriptorMapAllocRef -{ - vm_map_t map; - mach_vm_address_t mapped; - mach_vm_size_t size; - vm_prot_t prot; - IOOptionBits options; -}; - -static kern_return_t +kern_return_t IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref) { IOMemoryDescriptorMapAllocRef * ref = (typeof(ref))_ref; @@ -684,12 +666,13 @@ IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref) vm_map_offset_t addr; addr = ref->mapped; + err = vm_map_enter_mem_object(map, &addr, ref->size, (vm_map_offset_t) 0, (((ref->options & kIOMapAnywhere) ? VM_FLAGS_ANYWHERE : VM_FLAGS_FIXED) - | VM_MAKE_TAG(VM_MEMORY_IOKIT) + | VM_MAKE_TAG(ref->tag) | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */ IPC_PORT_NULL, (memory_object_offset_t) 0, @@ -721,13 +704,14 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( vm_map_offset_t addr, mapAddr; vm_map_offset_t pageOffset, entryOffset, remain, chunk; - mach_vm_address_t srcAddr, nextAddr; - mach_vm_size_t srcLen, nextLen; + mach_vm_address_t nextAddr; + mach_vm_size_t nextLen; IOByteCount physLen; IOMemoryEntry * entry; vm_prot_t prot, memEntryCacheMode; IOOptionBits type; IOOptionBits cacheMode; + vm_tag_t tag; /* * For the kIOMapPrefault option. @@ -747,6 +731,8 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( memEntryCacheMode = (MAP_MEM_ONLY | VM_PROT_WRITE | prot | vmProtForCacheMode(cacheMode)); } + tag = IOMemoryTag(map); + if (_task) { // Find first range for offset @@ -788,9 +774,19 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( // allocate VM size = round_page_64(size + pageOffset); + if (kIOMapOverwrite & options) + { + if ((map == kernel_map) && (kIOMemoryBufferPageable & _flags)) + { + map = IOPageableMapForAddress(addr); + } + err = KERN_SUCCESS; + } + else { IOMemoryDescriptorMapAllocRef ref; ref.map = map; + ref.tag = tag; ref.options = options; ref.size = size; ref.prot = prot; @@ -799,7 +795,6 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( ref.mapped = 0; else ref.mapped = addr; - if ((ref.map == kernel_map) && (kIOMemoryBufferPageable & _flags)) err = IOIteratePageableMaps( ref.size, &IOMemoryDescriptorMapAlloc, &ref ); else @@ -815,7 +810,8 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( * Prefaulting is only possible if we wired the memory earlier. Check the * memory type, and the underlying data. */ - if (options & kIOMapPrefault) { + if (options & kIOMapPrefault) + { /* * The memory must have been wired by calling ::prepare(), otherwise * we don't have the UPL. Without UPLs, pages cannot be pre-faulted @@ -829,7 +825,7 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( { return kIOReturnBadArgument; } - + // Get the page list. ioGMDData* dataP = getDataP(_memoryEntries); ioPLBlock const* ioplList = getIOPLList(dataP); @@ -871,22 +867,9 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( remain = size; mapAddr = addr; addr += pageOffset; - while (remain && nextLen && (KERN_SUCCESS == err)) - { - srcAddr = nextAddr; - srcLen = nextLen; - nextAddr = 0; - nextLen = 0; - // coalesce addr range - for (++rangeIdx; rangeIdx < _rangesCount; rangeIdx++) - { - getAddrLenForInd(nextAddr, nextLen, type, _ranges, rangeIdx); - if ((srcAddr + srcLen) != nextAddr) break; - srcLen += nextLen; - } - while (srcLen && (KERN_SUCCESS == err)) - { + while (remain && (KERN_SUCCESS == err)) + { entryOffset = offset - entry->offset; if ((page_mask & entryOffset) != pageOffset) { @@ -908,15 +891,15 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( if (chunk) { if (chunk > remain) chunk = remain; - - if (options & kIOMapPrefault) { + if (options & kIOMapPrefault) + { UInt nb_pages = round_page(chunk) / PAGE_SIZE; err = vm_map_enter_mem_object_prefault(map, &mapAddr, chunk, 0 /* mask */, (VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE - | VM_MAKE_TAG(VM_MEMORY_IOKIT) + | VM_MAKE_TAG(tag) | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */ entry->entry, entryOffset, @@ -928,13 +911,15 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( // Compute the next index in the page list. currentPageIndex += nb_pages; assert(currentPageIndex <= _pages); - } else { + } + else + { err = vm_map_enter_mem_object(map, &mapAddr, chunk, 0 /* mask */, (VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE - | VM_MAKE_TAG(VM_MEMORY_IOKIT) + | VM_MAKE_TAG(tag) | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */ entry->entry, entryOffset, @@ -943,7 +928,6 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( prot, // max VM_INHERIT_NONE); } - if (KERN_SUCCESS != err) break; remain -= chunk; if (!remain) break; @@ -959,9 +943,8 @@ IOGeneralMemoryDescriptor::memoryReferenceMap( break; } } - } - if ((KERN_SUCCESS != err) && addr) + if ((KERN_SUCCESS != err) && addr && !(kIOMapOverwrite & options)) { (void) mach_vm_deallocate(map, trunc_page_64(addr), size); addr = 0; @@ -1188,7 +1171,7 @@ IOMemoryDescriptor::withSubRange(IOMemoryDescriptor * of, IOByteCount length, IODirection direction) { - return (IOSubMemoryDescriptor::withSubRange(of, offset, length, direction | kIOMemoryThreadSafe)); + return (IOSubMemoryDescriptor::withSubRange(of, offset, length, direction)); } #endif /* !__LP64__ */ @@ -1660,7 +1643,7 @@ void IOGeneralMemoryDescriptor::free() ioGMDData * dataP; if (_memoryEntries && (dataP = getDataP(_memoryEntries)) && dataP->fMappedBase) { - dataP->fMapper->iovmFree(atop_64(dataP->fMappedBase), _pages); + dataP->fMapper->iovmUnmapMemory(this, NULL, dataP->fMappedBase, dataP->fMappedLength); dataP->fMappedBase = 0; } } @@ -1952,31 +1935,35 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * if (_memoryEntries && data->fMapper) { - bool remap; - bool whole = ((data->fOffset == 0) && (data->fLength == _length)); + bool remap, keepMap; dataP = getDataP(_memoryEntries); if (data->fMapSpec.numAddressBits < dataP->fDMAMapNumAddressBits) dataP->fDMAMapNumAddressBits = data->fMapSpec.numAddressBits; if (data->fMapSpec.alignment > dataP->fDMAMapAlignment) dataP->fDMAMapAlignment = data->fMapSpec.alignment; - remap = (dataP->fDMAMapNumAddressBits < 64) - && ((dataP->fMappedBase + _length) > (1ULL << dataP->fDMAMapNumAddressBits)); + keepMap = (data->fMapper == gIOSystemMapper); + keepMap &= ((data->fOffset == 0) && (data->fLength == _length)); + + remap = (!keepMap); + remap |= (dataP->fDMAMapNumAddressBits < 64) + && ((dataP->fMappedBase + _length) > (1ULL << dataP->fDMAMapNumAddressBits)); remap |= (dataP->fDMAMapAlignment > page_size); - remap |= (!whole); + if (remap || !dataP->fMappedBase) { // if (dataP->fMappedBase) OSReportWithBacktrace("kIOMDDMAMap whole %d remap %d params %d\n", whole, remap, params); - err = md->dmaMap(data->fMapper, &data->fMapSpec, data->fOffset, data->fLength, &data->fAlloc, &data->fAllocCount); - if ((kIOReturnSuccess == err) && whole && !dataP->fMappedBase) + err = md->dmaMap(data->fMapper, data->fCommand, &data->fMapSpec, data->fOffset, data->fLength, &data->fAlloc, &data->fAllocLength); + if (keepMap && (kIOReturnSuccess == err) && !dataP->fMappedBase) { - dataP->fMappedBase = data->fAlloc; - data->fAllocCount = 0; // IOMD owns the alloc now + dataP->fMappedBase = data->fAlloc; + dataP->fMappedLength = data->fAllocLength; + data->fAllocLength = 0; // IOMD owns the alloc now } } else { data->fAlloc = dataP->fMappedBase; - data->fAllocCount = 0; // IOMD owns the alloc + data->fAllocLength = 0; // give out IOMD map } data->fMapContig = !dataP->fDiscontig; } @@ -2077,7 +2064,7 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * bzero(&mapSpec, sizeof(mapSpec)); mapSpec.numAddressBits = dataP->fDMAMapNumAddressBits; mapSpec.alignment = dataP->fDMAMapAlignment; - err = md->dmaMap(dataP->fMapper, &mapSpec, 0, _length, &dataP->fMappedBase, NULL); + err = md->dmaMap(dataP->fMapper, NULL, &mapSpec, 0, _length, &dataP->fMappedBase, &dataP->fMappedLength); if (kIOReturnSuccess != err) return (err); } } @@ -2350,10 +2337,10 @@ IOGeneralMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *l addr64_t origAddr = address; IOByteCount origLen = length; - address = mapper->mapAddr(origAddr); + address = mapper->mapToPhysicalAddress(origAddr); length = page_size - (address & (page_size - 1)); while ((length < origLen) - && ((address + length) == mapper->mapAddr(origAddr + length))) + && ((address + length) == mapper->mapToPhysicalAddress(origAddr + length))) length += page_size; if (length > origLen) length = origLen; @@ -2437,11 +2424,11 @@ IOMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount *length { IOByteCount origLen; - phys64 = mapper->mapAddr(phys32); + phys64 = mapper->mapToPhysicalAddress(phys32); origLen = *lengthOfSegment; length = page_size - (phys64 & (page_size - 1)); while ((length < origLen) - && ((phys64 + length) == mapper->mapAddr(phys32 + length))) + && ((phys64 + length) == mapper->mapToPhysicalAddress(phys32 + length))) length += page_size; if (length > origLen) length = origLen; @@ -2522,7 +2509,7 @@ IOMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt data if (params) panic("class %s does not support IODMACommand::kIterateOnly", getMetaClass()->getClassName()); data->fMapContig = true; - err = md->dmaMap(data->fMapper, &data->fMapSpec, data->fOffset, data->fLength, &data->fAlloc, &data->fAllocCount); + err = md->dmaMap(data->fMapper, data->fCommand, &data->fMapSpec, data->fOffset, data->fLength, &data->fAlloc, &data->fAllocLength); return (err); } else return kIOReturnBadArgument; @@ -2607,10 +2594,23 @@ IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState, IOReturn IOMemoryDescriptor::getPageCounts( IOByteCount * residentPageCount, IOByteCount * dirtyPageCount ) { - IOReturn err = kIOReturnNotReady; + IOReturn err = kIOReturnNotReady; if (kIOMemoryThreadSafe & _flags) LOCK; if (_memRef) err = IOGeneralMemoryDescriptor::memoryReferenceGetPageCounts(_memRef, residentPageCount, dirtyPageCount); + else + { + IOMultiMemoryDescriptor * mmd; + IOSubMemoryDescriptor * smd; + if ((smd = OSDynamicCast(IOSubMemoryDescriptor, this))) + { + err = smd->getPageCounts(residentPageCount, dirtyPageCount); + } + else if ((mmd = OSDynamicCast(IOMultiMemoryDescriptor, this))) + { + err = mmd->getPageCounts(residentPageCount, dirtyPageCount); + } + } if (kIOMemoryThreadSafe & _flags) UNLOCK; return (err); @@ -2703,8 +2703,10 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options, } #if defined(__i386__) || defined(__x86_64__) -extern vm_offset_t first_avail; -#define io_kernel_static_end first_avail + +#define io_kernel_static_start vm_kernel_stext +#define io_kernel_static_end vm_kernel_etext + #else #error io_kernel_static_end is undefined for this architecture #endif @@ -2713,7 +2715,7 @@ static kern_return_t io_get_kernel_static_upl( vm_map_t /* map */, uintptr_t offset, - vm_size_t *upl_size, + upl_size_t *upl_size, upl_t *upl, upl_page_info_array_t page_list, unsigned int *count, @@ -2762,7 +2764,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) if ((kIODirectionOutIn & forDirection) == kIODirectionNone) forDirection = (IODirection) (forDirection | getDirection()); - int uplFlags; // This Mem Desc's default flags for upl creation + upl_control_flags_t uplFlags; // This Mem Desc's default flags for upl creation switch (kIODirectionOutIn & forDirection) { case kIODirectionOut: @@ -2793,6 +2795,8 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) dataP->fMappedBase = 0; uplFlags |= UPL_SET_IO_WIRE | UPL_SET_LITE; + uplFlags |= UPL_MEMORY_TAG_MAKE(IOMemoryTag(kernel_map)); + if (kIODirectionPrepareToPhys32 & forDirection) { if (!mapper) uplFlags |= UPL_NEED_32BIT_ADDR; @@ -2857,15 +2861,18 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) theMap = IOPageableMapForAddress(kernelStart); } - int ioplFlags = uplFlags; + // ioplFlags is an in/out parameter + upl_control_flags_t ioplFlags = uplFlags; dataP = getDataP(_memoryEntries); pageInfo = getPageList(dataP); upl_page_list_ptr_t baseInfo = &pageInfo[pageIndex]; - vm_size_t ioplSize = round_page(numBytes); + upl_size_t ioplSize = round_page(numBytes); unsigned int numPageInfo = atop_32(ioplSize); - if ((theMap == kernel_map) && (kernelStart < io_kernel_static_end)) { + if ((theMap == kernel_map) + && (kernelStart >= io_kernel_static_start) + && (kernelStart < io_kernel_static_end)) { error = io_get_kernel_static_upl(theMap, kernelStart, &ioplSize, @@ -2877,7 +2884,8 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) else if (_memRef) { memory_object_offset_t entryOffset; - entryOffset = (mdOffset - iopl.fPageOffset - memRefEntry->offset); + entryOffset = mdOffset; + entryOffset = (entryOffset - iopl.fPageOffset - memRefEntry->offset); if (entryOffset >= memRefEntry->size) { memRefEntry++; if (memRefEntry >= &_memRef->entries[_memRef->count]) panic("memRefEntry"); @@ -2968,6 +2976,16 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) if (UPL_COPYOUT_FROM & uplFlags) _flags |= kIOMemoryPreparedReadOnly; + if ((kIOTracking & gIOKitDebug) + //&& !(_flags & kIOMemoryAutoPrepare) + ) + { + dataP = getDataP(_memoryEntries); +#if IOTRACKING + IOTrackingAdd(gIOWireTracking, &dataP->fWireTracking, ptoa(_pages), false); +#endif + } + return kIOReturnSuccess; abortExit: @@ -3028,106 +3046,52 @@ bool IOGeneralMemoryDescriptor::initMemoryEntries(size_t size, IOMapper * mapper IOReturn IOMemoryDescriptor::dmaMap( IOMapper * mapper, + IODMACommand * command, const IODMAMapSpecification * mapSpec, uint64_t offset, uint64_t length, - uint64_t * address, - ppnum_t * mapPages) + uint64_t * mapAddress, + uint64_t * mapLength) { - IOMDDMAWalkSegmentState walkState; - IOMDDMAWalkSegmentArgs * walkArgs = (IOMDDMAWalkSegmentArgs *) (void *)&walkState; - IOOptionBits mdOp; - IOReturn ret; - IOPhysicalLength segLen; - addr64_t phys, align, pageOffset; - ppnum_t base, pageIndex, pageCount; - uint64_t index; - uint32_t mapOptions = 0; + IOReturn ret; + uint32_t mapOptions; + mapOptions = 0; + mapOptions |= kIODMAMapReadAccess; if (!(kIOMemoryPreparedReadOnly & _flags)) mapOptions |= kIODMAMapWriteAccess; - walkArgs->fMapped = false; - mdOp = kIOMDFirstSegment; - pageCount = 0; - for (index = 0; index < length; ) - { - if (index && (page_mask & (index + pageOffset))) break; - - walkArgs->fOffset = offset + index; - ret = dmaCommandOperation(mdOp, &walkState, sizeof(walkState)); - mdOp = kIOMDWalkSegments; - if (ret != kIOReturnSuccess) break; - phys = walkArgs->fIOVMAddr; - segLen = walkArgs->fLength; - - align = (phys & page_mask); - if (!index) pageOffset = align; - else if (align) break; - pageCount += atop_64(round_page_64(align + segLen)); - index += segLen; - } - - if (index < length) return (kIOReturnVMError); + ret = mapper->iovmMapMemory(this, offset, length, mapOptions, + mapSpec, command, NULL, mapAddress, mapLength); - base = mapper->iovmMapMemory(this, offset, pageCount, - mapOptions, NULL, mapSpec); - - if (!base) return (kIOReturnNoResources); - - mdOp = kIOMDFirstSegment; - for (pageIndex = 0, index = 0; index < length; ) - { - walkArgs->fOffset = offset + index; - ret = dmaCommandOperation(mdOp, &walkState, sizeof(walkState)); - mdOp = kIOMDWalkSegments; - if (ret != kIOReturnSuccess) break; - phys = walkArgs->fIOVMAddr; - segLen = walkArgs->fLength; - - ppnum_t page = atop_64(phys); - ppnum_t count = atop_64(round_page_64(phys + segLen)) - page; - while (count--) - { - mapper->iovmInsert(base, pageIndex, page); - page++; - pageIndex++; - } - index += segLen; - } - if (pageIndex != pageCount) panic("pageIndex"); - - *address = ptoa_64(base) + pageOffset; - if (mapPages) *mapPages = pageCount; - - return (kIOReturnSuccess); + return (ret); } IOReturn IOGeneralMemoryDescriptor::dmaMap( IOMapper * mapper, + IODMACommand * command, const IODMAMapSpecification * mapSpec, uint64_t offset, uint64_t length, - uint64_t * address, - ppnum_t * mapPages) + uint64_t * mapAddress, + uint64_t * mapLength) { IOReturn err = kIOReturnSuccess; ioGMDData * dataP; IOOptionBits type = _flags & kIOMemoryTypeMask; - *address = 0; + *mapAddress = 0; if (kIOMemoryHostOnly & _flags) return (kIOReturnSuccess); if ((type == kIOMemoryTypePhysical) || (type == kIOMemoryTypePhysical64) || offset || (length != _length)) { - err = super::dmaMap(mapper, mapSpec, offset, length, address, mapPages); + err = super::dmaMap(mapper, command, mapSpec, offset, length, mapAddress, mapLength); } else if (_memoryEntries && _pages && (dataP = getDataP(_memoryEntries))) { const ioPLBlock * ioplList = getIOPLList(dataP); upl_page_info_t * pageList; uint32_t mapOptions = 0; - ppnum_t base; IODMAMapSpecification mapSpec; bzero(&mapSpec, sizeof(mapSpec)); @@ -3141,18 +3105,27 @@ IOReturn IOGeneralMemoryDescriptor::dmaMap( pageList = (upl_page_info_t *) ioplList->fPageInfo; mapOptions |= kIODMAMapPagingPath; } - else - pageList = getPageList(dataP); + else pageList = getPageList(dataP); - if (!(kIOMemoryPreparedReadOnly & _flags)) mapOptions |= kIODMAMapWriteAccess; + if ((_length == ptoa_64(_pages)) && !(page_mask & ioplList->fPageOffset)) + { + mapOptions |= kIODMAMapPageListFullyOccupied; + } + + mapOptions |= kIODMAMapReadAccess; + if (!(kIOMemoryPreparedReadOnly & _flags)) mapOptions |= kIODMAMapWriteAccess; // Check for direct device non-paged memory if (ioplList->fFlags & kIOPLOnDevice) mapOptions |= kIODMAMapPhysicallyContiguous; - base = mapper->iovmMapMemory( - this, offset, _pages, mapOptions, &pageList[0], &mapSpec); - *address = ptoa_64(base) + (ioplList->fPageOffset & PAGE_MASK); - if (mapPages) *mapPages = _pages; + IODMAMapPageList dmaPageList = + { + .pageOffset = ioplList->fPageOffset & page_mask, + .pageListCount = _pages, + .pageList = &pageList[0] + }; + err = mapper->iovmMapMemory(this, offset, length, mapOptions, &mapSpec, + command, &dmaPageList, mapAddress, mapLength); } return (err); @@ -3260,11 +3233,19 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection forDirection) #endif /* IOMD_DEBUG_DMAACTIVE */ if (dataP->fMappedBase) { - dataP->fMapper->iovmFree(atop_64(dataP->fMappedBase), _pages); + dataP->fMapper->iovmUnmapMemory(this, NULL, dataP->fMappedBase, dataP->fMappedLength); dataP->fMappedBase = 0; } // Only complete iopls that we created which are for TypeVirtual if (kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type) { +#if IOTRACKING + if ((kIOTracking & gIOKitDebug) + //&& !(_flags & kIOMemoryAutoPrepare) + ) + { + IOTrackingRemove(gIOWireTracking, &dataP->fWireTracking, ptoa(_pages)); + } +#endif for (ind = 0; ind < count; ind++) if (ioplList[ind].fIOPL) { if (dataP->fCompletionError) @@ -3296,7 +3277,6 @@ IOReturn IOGeneralMemoryDescriptor::doMap( IOOptionBits options, IOByteCount __offset, IOByteCount __length ) - { #ifndef __LP64__ if (!(kIOMap64Bit & options)) panic("IOGeneralMemoryDescriptor::doMap !64bit"); @@ -3361,10 +3341,10 @@ IOReturn IOGeneralMemoryDescriptor::doMap( { do { - upl_t redirUPL2; - vm_size_t size; - int flags; - unsigned int lock_count; + upl_t redirUPL2; + upl_size_t size; + upl_control_flags_t flags; + unsigned int lock_count; if (!_memRef || (1 != _memRef->count)) { @@ -3374,7 +3354,8 @@ IOReturn IOGeneralMemoryDescriptor::doMap( size = round_page(mapping->fLength); flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL - | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS; + | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS + | UPL_MEMORY_TAG_MAKE(IOMemoryTag(kernel_map)); if (KERN_SUCCESS != memory_object_iopl_request(_memRef->entries[0].entry, 0, &size, &redirUPL2, NULL, NULL, @@ -3420,14 +3401,14 @@ IOReturn IOGeneralMemoryDescriptor::doMap( else { err = memoryReferenceMap(_memRef, mapping->fAddressMap, offset, length, options, &mapping->fAddress); - +#if IOTRACKING + if (err == KERN_SUCCESS) IOTrackingAdd(gIOMapTracking, &mapping->fTracking, length, false); +#endif if ((err == KERN_SUCCESS) && pager) { err = populateDevicePager(pager, mapping->fAddressMap, mapping->fAddress, offset, length, options); - if (err != KERN_SUCCESS) - { - doUnmap(mapping->fAddressMap, (IOVirtualAddress) mapping, 0); - } + + if (err != KERN_SUCCESS) doUnmap(mapping->fAddressMap, (IOVirtualAddress) mapping, 0); else if (kIOMapDefaultCache == (options & kIOMapCacheMask)) { mapping->fOptions |= ((_flags & kIOMemoryBufferCacheMask) >> kIOMemoryBufferCacheShift); @@ -3561,8 +3542,17 @@ IOReturn IOMemoryDescriptor::populateDevicePager( mach_vm_size_t page; mach_vm_size_t pageOffset; mach_vm_size_t pagerOffset; - IOPhysicalLength segLen; + IOPhysicalLength segLen, chunk; addr64_t physAddr; + IOOptionBits type; + + type = _flags & kIOMemoryTypeMask; + + if (reserved->dp.pagerContig) + { + sourceOffset = 0; + pagerOffset = 0; + } physAddr = getPhysicalSegment( sourceOffset, &segLen, kIOMemoryMapperNone ); assert( physAddr ); @@ -3583,26 +3573,24 @@ IOReturn IOMemoryDescriptor::populateDevicePager( if (kIOReturnSuccess != err) break; - if (reserved && reserved->dp.pagerContig) +#if DEBUG || DEVELOPMENT + if ((kIOMemoryTypeUPL != type) + && pmap_has_managed_page(atop_64(physAddr), atop_64(physAddr + segLen - 1))) { - IOPhysicalLength allLen; - addr64_t allPhys; + OSReportWithBacktrace("IOMemoryDescriptor physical with managed page 0x%qx:0x%qx", physAddr, segLen); + } +#endif /* DEBUG || DEVELOPMENT */ + + chunk = (reserved->dp.pagerContig ? round_page(segLen) : page_size); + for (page = 0; + (page < segLen) && (KERN_SUCCESS == err); + page += chunk) + { + err = device_pager_populate_object(pager, pagerOffset, + (ppnum_t)(atop_64(physAddr + page)), chunk); + pagerOffset += chunk; + } - allPhys = getPhysicalSegment( 0, &allLen, kIOMemoryMapperNone ); - assert( allPhys ); - err = device_pager_populate_object( pager, 0, atop_64(allPhys), round_page(allLen) ); - } - else - { - for( page = 0; - (page < segLen) && (KERN_SUCCESS == err); - page += page_size) - { - err = device_pager_populate_object(pager, pagerOffset, - (ppnum_t)(atop_64(physAddr + page)), page_size); - pagerOffset += page_size; - } - } assert (KERN_SUCCESS == err); if (err) break; @@ -3637,31 +3625,32 @@ IOReturn IOMemoryDescriptor::doUnmap( IOByteCount __length ) { IOReturn err; + IOMemoryMap * mapping; mach_vm_address_t address; mach_vm_size_t length; - if (__length) - { - address = __address; - length = __length; - } - else - { - addressMap = ((IOMemoryMap *) __address)->fAddressMap; - address = ((IOMemoryMap *) __address)->fAddress; - length = ((IOMemoryMap *) __address)->fLength; - } + if (__length) panic("doUnmap"); - if ((addressMap == kernel_map) && (kIOMemoryBufferPageable & _flags)) - addressMap = IOPageableMapForAddress( address ); + mapping = (IOMemoryMap *) __address; + addressMap = mapping->fAddressMap; + address = mapping->fAddress; + length = mapping->fLength; + if (kIOMapOverwrite & mapping->fOptions) err = KERN_SUCCESS; + else + { + if ((addressMap == kernel_map) && (kIOMemoryBufferPageable & _flags)) + addressMap = IOPageableMapForAddress( address ); #if DEBUG - if( kIOLogMapping & gIOKitDebug) - IOLog("IOMemoryDescriptor::doUnmap map %p, 0x%qx:0x%qx\n", - addressMap, address, length ); + if( kIOLogMapping & gIOKitDebug) IOLog("IOMemoryDescriptor::doUnmap map %p, 0x%qx:0x%qx\n", + addressMap, address, length ); #endif + err = mach_vm_deallocate( addressMap, address, length ); + } - err = mach_vm_deallocate( addressMap, address, length ); +#if IOTRACKING + IOTrackingRemove(gIOMapTracking, &mapping->fTracking, length); +#endif return (err); } @@ -3774,7 +3763,7 @@ IOReturn IOMemoryMap::unmap( void ) LOCK; if( fAddress && fAddressMap && (0 == fSuperMap) && fMemory - && (0 == (fOptions & kIOMapStatic))) { + && (0 == (kIOMapStatic & fOptions))) { err = fMemory->doUnmap(fAddressMap, (IOVirtualAddress) this, 0); @@ -3797,8 +3786,11 @@ IOReturn IOMemoryMap::unmap( void ) void IOMemoryMap::taskDied( void ) { LOCK; - if (fUserClientUnmap) - unmap(); + if (fUserClientUnmap) unmap(); +#if IOTRACKING + else IOTrackingRemove(gIOMapTracking, &fTracking, fLength); +#endif + if( fAddressMap) { vm_map_deallocate(fAddressMap); fAddressMap = 0; @@ -3958,10 +3950,13 @@ IOReturn IOMemoryMap::wireRange( IOReturn kr; mach_vm_address_t start = trunc_page_64(fAddress + offset); mach_vm_address_t end = round_page_64(fAddress + offset + length); - - if (kIODirectionOutIn & options) + vm_prot_t prot; + + prot = (kIODirectionOutIn & options); + if (prot) { - kr = vm_map_wire(fAddressMap, start, end, (kIODirectionOutIn & options), FALSE); + prot |= VM_PROT_MEMORY_TAG_MAKE(IOMemoryTag(kernel_map)); + kr = vm_map_wire(fAddressMap, start, end, prot, FALSE); } else { @@ -4009,9 +4004,13 @@ void IOMemoryDescriptor::initialize( void ) void IOMemoryDescriptor::free( void ) { - if( _mappings) - _mappings->release(); + if( _mappings) _mappings->release(); + if (reserved) + { + IODelete(reserved, IOMemoryDescriptorReserved, 1); + reserved = NULL; + } super::free(); } @@ -4117,9 +4116,10 @@ IOReturn IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, if (!fRedirUPL && fMemory->_memRef && (1 == fMemory->_memRef->count)) { - vm_size_t size = round_page(fLength); - int flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL - | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS; + upl_size_t size = round_page(fLength); + upl_control_flags_t flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL + | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS + | UPL_MEMORY_TAG_MAKE(IOMemoryTag(kernel_map)); if (KERN_SUCCESS != memory_object_iopl_request(fMemory->_memRef->entries[0].entry, 0, &size, &fRedirUPL, NULL, NULL, &flags)) @@ -4451,144 +4451,6 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#if DEVELOPMENT || DEBUG - -extern "C" void IOMemoryDescriptorTest(int x) -{ - IOGeneralMemoryDescriptor * md; - - vm_offset_t data[2]; - vm_size_t bsize = 16*1024*1024; - - vm_size_t srcsize, srcoffset, mapoffset, size; - - kern_return_t kr; - - kr = vm_allocate(kernel_map, &data[0], bsize, VM_FLAGS_ANYWHERE); - vm_inherit(kernel_map, data[0] + 1*4096, 4096, VM_INHERIT_NONE); - vm_inherit(kernel_map, data[0] + 16*4096, 4096, VM_INHERIT_NONE); - - kprintf("data 0x%lx, 0x%lx\n", (long)data[0], (long)data[1]); - - uint32_t idx, offidx; - for (idx = 0; idx < (bsize / sizeof(uint32_t)); idx++) - { - ((uint32_t*)data[0])[idx] = idx; - } - - for (srcoffset = 0; srcoffset < bsize; srcoffset = ((srcoffset << 1) + 0x40c)) - { - for (srcsize = 4; srcsize < (bsize - srcoffset - 1); srcsize = ((srcsize << 1) + 0x3fc)) - { - IOAddressRange ranges[3]; - uint32_t rangeCount = 1; - - bzero(&ranges[0], sizeof(ranges)); - ranges[0].address = data[0] + srcoffset; - ranges[0].length = srcsize; - - if (srcsize > 5*page_size) - { - ranges[0].length = 7634; - ranges[1].length = 9870; - ranges[2].length = srcsize - ranges[0].length - ranges[1].length; - ranges[1].address = ranges[0].address + ranges[0].length; - ranges[2].address = ranges[1].address + ranges[1].length; - rangeCount = 3; - } - else if ((srcsize > 2*page_size) && !(page_mask & srcoffset)) - { - ranges[0].length = 4096; - ranges[1].length = 4096; - ranges[2].length = srcsize - ranges[0].length - ranges[1].length; - ranges[0].address = data[0] + srcoffset + 4096; - ranges[1].address = data[0] + srcoffset; - ranges[2].address = ranges[0].address + ranges[0].length; - rangeCount = 3; - } - - md = OSDynamicCast(IOGeneralMemoryDescriptor, - IOMemoryDescriptor::withAddressRanges(&ranges[0], rangeCount, kIODirectionInOut, kernel_task)); - assert(md); - - kprintf("IOMemoryReferenceCreate [0x%lx @ 0x%lx]\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx]\n", - (long) srcsize, (long) srcoffset, - (long long) ranges[0].address - data[0], (long long) ranges[0].length, - (long long) ranges[1].address - data[0], (long long) ranges[1].length, - (long long) ranges[2].address - data[0], (long long) ranges[2].length); - - if (kIOReturnSuccess == kr) - { - for (mapoffset = 0; mapoffset < srcsize; mapoffset = ((mapoffset << 1) + 0xf00)) - { - for (size = 4; size < (srcsize - mapoffset - 1); size = ((size << 1) + 0x20)) - { - IOMemoryMap * map; - mach_vm_address_t addr = 0; - uint32_t data; - - kprintf("createMappingInTask(kernel_task, 0, kIOMapAnywhere, mapoffset, size); - if (map) addr = map->getAddress(); - else kr = kIOReturnError; - - kprintf(">mapRef 0x%x %llx\n", kr, addr); - - if (kIOReturnSuccess != kr) break; - kr = md->prepare(); - if (kIOReturnSuccess != kr) - { - kprintf("prepare() fail 0x%x\n", kr); - break; - } - for (idx = 0; idx < size; idx += sizeof(uint32_t)) - { - offidx = (idx + mapoffset + srcoffset); - if ((srcsize <= 5*page_size) && (srcsize > 2*page_size) && !(page_mask & srcoffset)) - { - if (offidx < 8192) offidx ^= 0x1000; - } - offidx /= sizeof(uint32_t); - - if (offidx != ((uint32_t*)addr)[idx/sizeof(uint32_t)]) - { - kprintf("vm mismatch @ 0x%x, 0x%lx, 0x%lx, \n", idx, (long) srcoffset, (long) mapoffset); - kr = kIOReturnBadMedia; - } - else - { - if (sizeof(data) != md->readBytes(mapoffset + idx, &data, sizeof(data))) data = 0; - if (offidx != data) - { - kprintf("phys mismatch @ 0x%x, 0x%lx, 0x%lx, \n", idx, (long) srcoffset, (long) mapoffset); - kr = kIOReturnBadMedia; - } - } - } - md->complete(); - map->release(); - kprintf("unmapRef %llx\n", addr); - } - if (kIOReturnSuccess != kr) break; - } - } - if (kIOReturnSuccess != kr) break; - } - if (kIOReturnSuccess != kr) break; - } - - if (kIOReturnSuccess != kr) kprintf("FAIL: src 0x%lx @ 0x%lx, map 0x%lx @ 0x%lx\n", - (long) srcsize, (long) srcoffset, (long) size, (long) mapoffset); - - vm_deallocate(kernel_map, data[0], bsize); -// vm_deallocate(kernel_map, data[1], size); -} - -#endif /* DEVELOPMENT || DEBUG */ - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 0); #ifdef __LP64__ OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 1); diff --git a/iokit/Kernel/IOMultiMemoryDescriptor.cpp b/iokit/Kernel/IOMultiMemoryDescriptor.cpp index 6d209ab1f..fd17233c1 100644 --- a/iokit/Kernel/IOMultiMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMultiMemoryDescriptor.cpp @@ -67,6 +67,8 @@ bool IOMultiMemoryDescriptor::initWithDescriptors( IODirection withDirection, bool asReference ) { + unsigned index; + IOOptionBits copyFlags; // // Initialize an IOMultiMemoryDescriptor. The "buffer" is made up of several // memory descriptors, that are to be chained end-to-end to make up a single @@ -117,7 +119,7 @@ bool IOMultiMemoryDescriptor::initWithDescriptors( /* bytes */ withCount * sizeof(IOMemoryDescriptor *) ); } - for ( unsigned index = 0; index < withCount; index++ ) + for ( index = 0; index < withCount; index++ ) { descriptors[index]->retain(); _length += descriptors[index]->getLength(); @@ -126,6 +128,16 @@ bool IOMultiMemoryDescriptor::initWithDescriptors( (withDirection & kIOMemoryDirectionMask)); } + enum { kCopyFlags = kIOMemoryBufferPageable }; + copyFlags = 0; + for ( index = 0; index < withCount; index++ ) + { + if (!index) copyFlags = (kCopyFlags & descriptors[index]->_flags); + else if (copyFlags != (kCopyFlags & descriptors[index]->_flags)) break; + } + if (index < withCount) return (false); + _flags |= copyFlags; + return true; } @@ -174,9 +186,9 @@ IOReturn IOMultiMemoryDescriptor::prepare(IODirection forDirection) if ( status != kIOReturnSuccess ) { - for ( unsigned indexUndo = 0; indexUndo <= index; indexUndo++ ) + for ( unsigned indexUndo = 0; indexUndo < index; indexUndo++ ) { - statusUndo = _descriptors[index]->complete(forDirection); + statusUndo = _descriptors[indexUndo]->complete(forDirection); assert(statusUndo == kIOReturnSuccess); } } @@ -212,10 +224,9 @@ IOReturn IOMultiMemoryDescriptor::complete(IODirection forDirection) return statusFinal; } -addr64_t IOMultiMemoryDescriptor::getPhysicalSegment( - IOByteCount offset, - IOByteCount * length, - IOOptionBits options ) +addr64_t IOMultiMemoryDescriptor::getPhysicalSegment(IOByteCount offset, + IOByteCount * length, + IOOptionBits options) { // // This method returns the physical address of the byte at the given offset @@ -238,3 +249,140 @@ addr64_t IOMultiMemoryDescriptor::getPhysicalSegment( return 0; } + +#include "IOKitKernelInternal.h" + +IOReturn IOMultiMemoryDescriptor::doMap(vm_map_t __addressMap, + IOVirtualAddress * __address, + IOOptionBits options, + IOByteCount __offset, + IOByteCount __length) +{ + IOMemoryMap * mapping = (IOMemoryMap *) *__address; + vm_map_t map = mapping->fAddressMap; + mach_vm_size_t offset = mapping->fOffset; + mach_vm_size_t length = mapping->fLength; + mach_vm_address_t address = mapping->fAddress; + + kern_return_t err; + IOOptionBits subOptions; + mach_vm_size_t mapOffset; + mach_vm_size_t bytesRemaining, chunk; + mach_vm_address_t nextAddress; + IOMemoryDescriptorMapAllocRef ref; + vm_prot_t prot; + + do + { + prot = VM_PROT_READ; + if (!(kIOMapReadOnly & options)) prot |= VM_PROT_WRITE; + ref.map = map; + ref.tag = IOMemoryTag(map); + ref.options = options; + ref.size = length; + ref.prot = prot; + if (options & kIOMapAnywhere) + // vm_map looks for addresses above here, even when VM_FLAGS_ANYWHERE + ref.mapped = 0; + else + ref.mapped = mapping->fAddress; + + if ((ref.map == kernel_map) && (kIOMemoryBufferPageable & _flags)) + err = IOIteratePageableMaps(ref.size, &IOMemoryDescriptorMapAlloc, &ref); + else + err = IOMemoryDescriptorMapAlloc(ref.map, &ref); + + if (KERN_SUCCESS != err) break; + + address = ref.mapped; + mapping->fAddress = address; + + mapOffset = offset; + bytesRemaining = length; + nextAddress = address; + assert(mapOffset <= _length); + subOptions = (options & ~kIOMapAnywhere) | kIOMapOverwrite; + + for (unsigned index = 0; bytesRemaining && (index < _descriptorsCount); index++) + { + chunk = _descriptors[index]->getLength(); + if (mapOffset >= chunk) + { + mapOffset -= chunk; + continue; + } + chunk -= mapOffset; + if (chunk > bytesRemaining) chunk = bytesRemaining; + IOMemoryMap * subMap; + subMap = _descriptors[index]->createMappingInTask(mapping->fAddressTask, nextAddress, subOptions, mapOffset, chunk ); + if (!subMap) break; + subMap->release(); // kIOMapOverwrite means it will not deallocate + + bytesRemaining -= chunk; + nextAddress += chunk; + mapOffset = 0; + } + if (bytesRemaining) err = kIOReturnUnderrun; + } + while (false); + + if (kIOReturnSuccess == err) + { +#if IOTRACKING + IOTrackingAdd(gIOMapTracking, &mapping->fTracking, length, false); +#endif + } + else + { + mapping->release(); + mapping = 0; + } + + return (err); +} + +IOReturn IOMultiMemoryDescriptor::setPurgeable( IOOptionBits newState, + IOOptionBits * oldState ) +{ + IOReturn err; + IOOptionBits totalState, state; + + totalState = kIOMemoryPurgeableNonVolatile; + for (unsigned index = 0; index < _descriptorsCount; index++) + { + err = _descriptors[index]->setPurgeable(newState, &state); + if (kIOReturnSuccess != err) break; + + if (kIOMemoryPurgeableEmpty == state) totalState = kIOMemoryPurgeableEmpty; + else if (kIOMemoryPurgeableEmpty == totalState) continue; + else if (kIOMemoryPurgeableVolatile == totalState) continue; + else if (kIOMemoryPurgeableVolatile == state) totalState = kIOMemoryPurgeableVolatile; + else totalState = kIOMemoryPurgeableNonVolatile; + } + if (oldState) *oldState = totalState; + + return (err); +} + +IOReturn IOMultiMemoryDescriptor::getPageCounts(IOByteCount * pResidentPageCount, + IOByteCount * pDirtyPageCount) +{ + IOReturn err; + IOByteCount totalResidentPageCount, totalDirtyPageCount; + IOByteCount residentPageCount, dirtyPageCount; + + err = kIOReturnSuccess; + totalResidentPageCount = totalDirtyPageCount = 0; + for (unsigned index = 0; index < _descriptorsCount; index++) + { + err = _descriptors[index]->getPageCounts(&residentPageCount, &dirtyPageCount); + if (kIOReturnSuccess != err) break; + totalResidentPageCount += residentPageCount; + totalDirtyPageCount += dirtyPageCount; + } + + if (pResidentPageCount) *pResidentPageCount = totalResidentPageCount; + if (pDirtyPageCount) *pDirtyPageCount = totalDirtyPageCount; + + return (err); +} diff --git a/iokit/Kernel/IONVRAM.cpp b/iokit/Kernel/IONVRAM.cpp index 5af96b290..17cd841bc 100644 --- a/iokit/Kernel/IONVRAM.cpp +++ b/iokit/Kernel/IONVRAM.cpp @@ -32,9 +32,17 @@ #include #include #include +#include #include #include +#if CONFIG_MACF +extern "C" { +#include +#include +}; +#endif /* MAC */ + #define super IOService #define kIONVRAMPrivilege kIOClientPrivilegeAdministrator @@ -67,7 +75,7 @@ bool IODTNVRAM::init(IORegistryEntry *old, const IORegistryPlane *plane) // race condition possible between // IODTNVRAM and IONVRAMController (restore loses boot-args) initProxyData(); - + return true; } @@ -109,6 +117,8 @@ void IODTNVRAM::registerNVRAMController(IONVRAMController *nvram) if (!_isProxied) { _nvramController->read(0, _nvramImage, kIODTNVRAMImageSize); initNVRAMImage(); + } else { + syncOFVariables(); } } @@ -217,9 +227,10 @@ void IODTNVRAM::initNVRAMImage(void) // Set the partition checksum. _nvramImage[freePartitionOffset + 1] = calculatePartitionChecksum(_nvramImage + freePartitionOffset); - - // Set the nvram image as dirty. - _nvramImageDirty = true; + + if (_nvramController != 0) { + _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize); + } } } else { _piImage = _nvramImage + _piPartitionOffset; @@ -231,20 +242,21 @@ void IODTNVRAM::initNVRAMImage(void) initOFVariables(); } -void IODTNVRAM::sync(void) +void IODTNVRAM::syncInternal(bool rateLimit) { - if (!_nvramImageDirty && !_ofImageDirty) return; - - // Don't try to sync OF Variables if the system has already paniced. - if (!_systemPaniced) syncOFVariables(); - // Don't try to perform controller operations if none has been registered. if (_nvramController == 0) return; + + // Rate limit requests to sync. Drivers that need this rate limiting will + // shadow the data and only write to flash when they get a sync call + if (rateLimit && !safeToSync()) return; - _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize); _nvramController->sync(); - - _nvramImageDirty = false; +} + +void IODTNVRAM::sync(void) +{ + syncInternal(false); } bool IODTNVRAM::serializeProperties(OSSerialize *s) const @@ -280,7 +292,11 @@ bool IODTNVRAM::serializeProperties(OSSerialize *s) const variablePerm = getOFVariablePerm(key); if ((hasPrivilege || (variablePerm != kOFVariablePermRootOnly)) && - ( ! (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) )) {} + ( ! (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) ) +#if CONFIG_MACF + && (current_task() == kernel_task || mac_iokit_check_nvram_get(kauth_cred_get(), key->getCStringNoCopy()) == 0) +#endif + ) { } else dict->removeObject(key); } } @@ -309,6 +325,12 @@ OSObject *IODTNVRAM::copyProperty(const OSSymbol *aKey) const } if (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) return 0; +#if CONFIG_MACF + if (current_task() != kernel_task && + mac_iokit_check_nvram_get(kauth_cred_get(), aKey->getCStringNoCopy()) != 0) + return 0; +#endif + IOLockLock(_ofLock); theObject = _ofDict->getObject(aKey); if (theObject) theObject->retain(); @@ -370,6 +392,12 @@ bool IODTNVRAM::setProperty(const OSSymbol *aKey, OSObject *anObject) // Don't allow change of 'aapl,panic-info'. if (aKey->isEqualTo(kIODTNVRAMPanicInfoKey)) return false; + +#if CONFIG_MACF + if (current_task() != kernel_task && + mac_iokit_check_nvram_set(kauth_cred_get(), aKey->getCStringNoCopy(), anObject) != 0) + return false; +#endif // Make sure the object is of the correct type. propType = getOFVariableType(aKey); @@ -403,9 +431,9 @@ bool IODTNVRAM::setProperty(const OSSymbol *aKey, OSObject *anObject) IOLockLock(_ofLock); result = _ofDict->setObject(aKey, propObject); IOLockUnlock(_ofLock); - + if (result) { - _ofImageDirty = true; + syncOFVariables(); } return result; @@ -429,15 +457,24 @@ void IODTNVRAM::removeProperty(const OSSymbol *aKey) // Don't allow change of 'aapl,panic-info'. if (aKey->isEqualTo(kIODTNVRAMPanicInfoKey)) return; +#if CONFIG_MACF + if (current_task() != kernel_task && + mac_iokit_check_nvram_delete(kauth_cred_get(), aKey->getCStringNoCopy()) != 0) + return; +#endif + // If the object exists, remove it from the dictionary. IOLockLock(_ofLock); result = _ofDict->getObject(aKey) != 0; if (result) { _ofDict->removeObject(aKey); - _ofImageDirty = true; } IOLockUnlock(_ofLock); + + if (result) { + syncOFVariables(); + } } IOReturn IODTNVRAM::setProperties(OSObject *properties) @@ -472,14 +509,15 @@ IOReturn IODTNVRAM::setProperties(OSObject *properties) } else { result = false; } - } else if(key->isEqualTo(kIONVRAMSyncNowPropertyKey)) { + } else if(key->isEqualTo(kIONVRAMSyncNowPropertyKey) || key->isEqualTo(kIONVRAMForceSyncNowPropertyKey)) { tmpStr = OSDynamicCast(OSString, object); if (tmpStr != 0) { - result = true; // We are not going to gaurantee sync, this is best effort + result = true; + + // We still want to throttle NVRAM commit rate for SyncNow. ForceSyncNow is provided as a really big hammer. - if(safeToSync()) - sync(); + syncInternal(key->isEqualTo(kIONVRAMSyncNowPropertyKey)); } else { result = false; @@ -587,7 +625,9 @@ IOReturn IODTNVRAM::writeNVRAMPartition(const OSSymbol *partitionID, bcopy(buffer, _nvramImage + partitionOffset + offset, length); - _nvramImageDirty = true; + if (_nvramController != 0) { + _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize); + } return kIOReturnSuccess; } @@ -605,7 +645,9 @@ IOByteCount IODTNVRAM::savePanicInfo(UInt8 *buffer, IOByteCount length) // Save the Panic Info length. *(UInt32 *)_piImage = length; - _nvramImageDirty = true; + if (_nvramController != 0) { + _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize); + } /* * This prevents OF variables from being committed if the system has panicked */ @@ -701,7 +743,9 @@ IOReturn IODTNVRAM::initOFVariables(void) // Clear the length from _piImage and mark dirty. *(UInt32 *)_piImage = 0; - _nvramImageDirty = true; + if (_nvramController != 0) { + _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize); + } } } @@ -717,9 +761,7 @@ IOReturn IODTNVRAM::syncOFVariables(void) OSObject *tmpObject; OSCollectionIterator *iter; - if ((_ofImage == 0) || (_ofDict == 0)) return kIOReturnNotReady; - - if (!_ofImageDirty) return kIOReturnSuccess; + if ((_ofImage == 0) || (_ofDict == 0) || _systemPaniced) return kIOReturnNotReady; buffer = tmpBuffer = IONew(UInt8, _ofPartitionSize); if (buffer == 0) return kIOReturnNoMemory; @@ -759,8 +801,9 @@ IOReturn IODTNVRAM::syncOFVariables(void) if (!ok) return kIOReturnBadArgument; - _ofImageDirty = false; - _nvramImageDirty = true; + if (_nvramController != 0) { + _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize); + } return kIOReturnSuccess; } @@ -1427,12 +1470,13 @@ IOReturn IODTNVRAM::writeNVRAMPropertyType1(IORegistryEntry *entry, if (ok) { ok = _ofDict->setObject(_registryPropertiesKey, data); - if (ok) _ofImageDirty = true; } IOLockUnlock(_ofLock); if (data) data->release(); + if (ok) syncOFVariables(); + return ok ? kIOReturnSuccess : kIOReturnNoMemory; } diff --git a/iokit/Kernel/IOPMPowerStateQueue.h b/iokit/Kernel/IOPMPowerStateQueue.h index 2b34e768b..bccf6f45f 100644 --- a/iokit/Kernel/IOPMPowerStateQueue.h +++ b/iokit/Kernel/IOPMPowerStateQueue.h @@ -51,8 +51,8 @@ class IOPMPowerStateQueue : public IOEventSource IOLock * queueLock; protected: - virtual bool checkForWork( void ); - virtual bool init( OSObject * owner, Action action ); + virtual bool checkForWork( void ) APPLE_KEXT_OVERRIDE; + virtual bool init( OSObject * owner, Action action ) APPLE_KEXT_OVERRIDE; public: static IOPMPowerStateQueue * PMPowerStateQueue( OSObject * owner, Action action ); diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 73738c14b..a27622128 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -47,6 +47,7 @@ #include "IOPMPowerStateQueue.h" #include #include +#include "IOKitKernelInternal.h" #if HIBERNATION #include #endif @@ -63,6 +64,7 @@ __BEGIN_DECLS #include +#include __END_DECLS #if defined(__i386__) || defined(__x86_64__) @@ -172,7 +174,7 @@ extern "C" addr64_t kvtophys(vm_offset_t va); extern "C" int stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced); static void idleSleepTimerExpired( thread_call_param_t, thread_call_param_t ); -static void notifySystemShutdown( IOService * root, unsigned long event ); +static void notifySystemShutdown( IOService * root, uint32_t messageType ); static void handleAggressivesFunction( thread_call_param_t, thread_call_param_t ); static void pmEventTimeStamp(uint64_t *recordTS); @@ -192,13 +194,6 @@ static const OSSymbol *sleepMessagePEFunction = NULL; | kIOPMSupportedOnBatt \ | kIOPMSupportedOnUPS) -enum -{ - // not idle around autowake time, secs - kAutoWakePreWindow = 45, - kAutoWakePostWindow = 15 -}; - #define kLocalEvalClamshellCommand (1 << 15) #define kIdleSleepRetryInterval (3 * 60) @@ -316,6 +311,7 @@ static uint32_t gAggressivesState = 0; uuid_string_t bootsessionuuid_string; static uint32_t gDarkWakeFlags = kDarkWakeFlagHIDTickleNone; +static uint32_t gNoIdleFlag = 0; static PMStatsStruct gPMStats; #if HIBERNATION @@ -330,6 +326,19 @@ struct timeval gIOLastWakeTime; static char gWakeReasonString[128]; static bool gWakeReasonSysctlRegistered = false; +#if defined(__i386__) || defined(__x86_64__) +static bool gSpinDumpBufferFull = false; +#endif + +static unsigned int gPMHaltBusyCount; +static unsigned int gPMHaltIdleCount; +static int gPMHaltDepth; +static uint32_t gPMHaltMessageType; +static IOLock * gPMHaltLock = 0; +static OSArray * gPMHaltArray = 0; +static const OSSymbol * gPMHaltClientAcknowledgeKey = 0; +static bool gPMQuiesced; + // Constants used as arguments to IOPMrootDomain::informCPUStateChange #define kCPUUnknownIndex 9999999 enum { @@ -357,7 +366,7 @@ class PMSettingHandle : public OSObject private: PMSettingObject *pmso; - void free(void); + void free(void) APPLE_KEXT_OVERRIDE; }; /* @@ -381,7 +390,7 @@ class PMSettingObject : public OSObject uint32_t settingCount; bool disabled; - void free(void); + void free(void) APPLE_KEXT_OVERRIDE; public: static PMSettingObject *pmSettingObject( @@ -431,7 +440,7 @@ class PMTraceWorker : public OSObject void traceLoginWindowPhase(uint8_t phase); int recordTopLevelPCIDevice(IOService *); void RTC_TRACE(void); - virtual bool serialize(OSSerialize *s) const; + virtual bool serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE; IOPMTracePointHandler tracePointHandler; void * tracePointTarget; @@ -525,7 +534,7 @@ class PMHaltWorker : public OSObject static void main( void * arg, wait_result_t waitResult ); static void work( PMHaltWorker * me ); static void checkTimeout( PMHaltWorker * me, AbsoluteTime * now ); - virtual void free( void ); + virtual void free( void ) APPLE_KEXT_OVERRIDE; }; OSDefineMetaClassAndFinalStructors( PMHaltWorker, OSObject ) @@ -582,6 +591,9 @@ extern "C" void IOSystemShutdownNotification(void) { IOPMRootDomainWillShutdown(); +#if HIBERNATION + IOHibernateSystemPostWake(); +#endif if (OSCompareAndSwap(0, 1, &gPagingOff)) { gRootDomain->handlePlatformHaltRestart(kPEPagingOff); @@ -646,6 +658,16 @@ IOPMrootDomain * IOPMrootDomain::construct( void ) return( root ); } +//****************************************************************************** +// updateConsoleUsersCallout +// +//****************************************************************************** + +static void updateConsoleUsersCallout(thread_call_param_t p0, thread_call_param_t p1) +{ + IOService::updateConsoleUsers(NULL, kIOMessageSystemHasPoweredOn); +} + //****************************************************************************** static void disk_sync_callout( thread_call_param_t p0, thread_call_param_t p1 ) @@ -664,6 +686,9 @@ static void disk_sync_callout( thread_call_param_t p0, thread_call_param_t p1 ) else { IOHibernateSystemPostWake(); + + if (gRootDomain) + gRootDomain->sleepWakeDebugSaveSpinDumpFile(); } #endif @@ -679,7 +704,7 @@ static void hib_debugSetup_callout( thread_call_param_t p0, thread_call_param_t uint32_t notifyRef = (uint32_t)(uintptr_t) p1; #if HIBERNATION - IOHibernateOpenForDebugData(); + IOOpenDebugDataFile(kSleepWakeStackBinFilename, SWD_BUF_SIZE); #endif rootDomain->allowPowerChange(notifyRef); @@ -753,6 +778,8 @@ static SYSCTL_PROC(_kern, OID_AUTO, willshutdown, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, sysctl_willshutdown, "I", ""); +extern struct sysctl_oid sysctl__kern_iokittest; + static int sysctl_progressmeterenable @@ -791,6 +818,26 @@ static SYSCTL_PROC(_kern, OID_AUTO, progressmeter, 0, 0, sysctl_progressmeter, "I", ""); + + +static int +sysctl_consoleoptions +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int error; + int new_value, changed; + + error = sysctl_io_number(req, vc_user_options, sizeof(int), &new_value, &changed); + + if (changed) vc_set_options(new_value); + + return (error); +} + +static SYSCTL_PROC(_kern, OID_AUTO, consoleoptions, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + 0, 0, sysctl_consoleoptions, "I", ""); + static int sysctl_wakereason SYSCTL_HANDLER_ARGS { @@ -807,7 +854,33 @@ SYSCTL_PROC(_kern, OID_AUTO, wakereason, CTLTYPE_STRING| CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, NULL, 0, sysctl_wakereason, "A", "wakereason"); +static int +sysctl_targettype SYSCTL_HANDLER_ARGS +{ + IOService * root; + OSObject * obj; + OSData * data; + char tt[32]; + + tt[0] = '\0'; + root = IOService::getServiceRoot(); + if (root && (obj = root->copyProperty(gIODTTargetTypeKey))) + { + if ((data = OSDynamicCast(OSData, obj))) + { + strlcpy(tt, (const char *) data->getBytesNoCopy(), sizeof(tt)); + } + obj->release(); + } + return sysctl_io_string(req, tt, 0, 0, NULL); +} + +SYSCTL_PROC(_hw, OID_AUTO, targettype, + CTLTYPE_STRING| CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + NULL, 0, sysctl_targettype, "A", "targettype"); + static SYSCTL_INT(_debug, OID_AUTO, darkwake, CTLFLAG_RW, &gDarkWakeFlags, 0, ""); +static SYSCTL_INT(_debug, OID_AUTO, noidle, CTLFLAG_RW, &gNoIdleFlag, 0, ""); static const OSSymbol * gIOPMSettingAutoWakeCalendarKey; static const OSSymbol * gIOPMSettingAutoWakeSecondsKey; @@ -877,6 +950,7 @@ bool IOPMrootDomain::start( IOService * nub ) }; PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags)); + PE_parse_boot_argn("noidle", &gNoIdleFlag, sizeof(gNoIdleFlag)); queue_init(&aggressivesQueue); aggressivesThreadCall = thread_call_allocate(handleAggressivesFunction, this); @@ -899,6 +973,10 @@ bool IOPMrootDomain::start( IOService * nub ) &hib_debugSetup_callout, (thread_call_param_t) this); + updateConsoleUsersEntry = thread_call_allocate( + &updateConsoleUsersCallout, + (thread_call_param_t) this); + #if DARK_TO_FULL_EVALUATE_CLAMSHELL fullWakeThreadCall = thread_call_allocate( OSMemberFunctionCast(thread_call_func_t, this, @@ -952,6 +1030,7 @@ bool IOPMrootDomain::start( IOService * nub ) _statsResponseTypeKey = OSSymbol::withCString(kIOPMStatsApplicationResponseTypeKey); _statsMessageTypeKey = OSSymbol::withCString(kIOPMStatsMessageTypeKey); _statsPowerCapsKey = OSSymbol::withCString(kIOPMStatsPowerCapabilityKey); + assertOnWakeSecs = -1; // Invalid value to prevent updates pmStatsLock = IOLockAlloc(); idxPMCPUClamshell = kCPUUnknownIndex; @@ -1047,7 +1126,10 @@ bool IOPMrootDomain::start( IOService * nub ) // IOBacklightDisplay can take a long time to load at boot, or it may // not load at all if you're booting with clamshell closed. We publish // 'DisplayDims' here redundantly to get it published early and at all. - psIterator = getMatchingServices( serviceMatching("IOPMPowerSource") ); + OSDictionary * matching; + matching = serviceMatching("IOPMPowerSource"); + psIterator = getMatchingServices( matching ); + if (matching) matching->release(); if( psIterator && psIterator->getNextObject() ) { // There's at least one battery on the system, so we publish @@ -1061,9 +1143,13 @@ bool IOPMrootDomain::start( IOService * nub ) sysctl_register_oid(&sysctl__kern_sleeptime); sysctl_register_oid(&sysctl__kern_waketime); sysctl_register_oid(&sysctl__kern_willshutdown); + sysctl_register_oid(&sysctl__kern_iokittest); + sysctl_register_oid(&sysctl__hw_targettype); + sysctl_register_oid(&sysctl__kern_progressmeterenable); sysctl_register_oid(&sysctl__kern_progressmeter); sysctl_register_oid(&sysctl__kern_wakereason); + sysctl_register_oid(&sysctl__kern_consoleoptions); #if HIBERNATION IOHibernateSystemInit(this); @@ -1212,25 +1298,6 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) // Relay our allowed PM settings onto our registered PM clients else if ((allowedPMSettings->getNextIndexOfObject(key, 0) != (unsigned int) -1)) { - if ((gIOPMSettingAutoWakeSecondsKey == key) && ((n = OSDynamicCast(OSNumber, obj)))) - { - UInt32 rsecs = n->unsigned32BitValue(); - if (!rsecs) - autoWakeStart = autoWakeEnd = 0; - else - { - AbsoluteTime deadline; - clock_interval_to_deadline(rsecs + kAutoWakePostWindow, kSecondScale, &deadline); - autoWakeEnd = AbsoluteTime_to_scalar(&deadline); - if (rsecs > kAutoWakePreWindow) - rsecs -= kAutoWakePreWindow; - else - rsecs = 0; - clock_interval_to_deadline(rsecs, kSecondScale, &deadline); - autoWakeStart = AbsoluteTime_to_scalar(&deadline); - } - } - return_value = setPMSetting(key, obj); if (kIOReturnSuccess != return_value) break; @@ -1793,6 +1860,10 @@ void IOPMrootDomain::startIdleSleepTimer( uint32_t inSeconds ) AbsoluteTime deadline; ASSERT_GATED(); + if (gNoIdleFlag) { + DLOG("idle timer not set (noidle=%d)\n", gNoIdleFlag); + return; + } if (inSeconds) { clock_interval_to_deadline(inSeconds, kSecondScale, &deadline); @@ -1819,6 +1890,18 @@ void IOPMrootDomain::cancelIdleSleepTimer( void ) DLOG("idle timer cancelled\n"); thread_call_cancel(extraSleepTimer); idleSleepTimerPending = false; + + if (!assertOnWakeSecs && systemWakeTime) { + AbsoluteTime now; + clock_usec_t microsecs; + clock_get_uptime(&now); + SUB_ABSOLUTETIME(&now, &systemWakeTime); + absolutetime_to_microtime(now, &assertOnWakeSecs, µsecs); + if (assertOnWakeReport) { + HISTREPORT_TALLYVALUE(assertOnWakeReport, (int64_t)assertOnWakeSecs); + DLOG("Updated assertOnWake %lu\n", (unsigned long)assertOnWakeSecs); + } + } } } @@ -1859,13 +1942,6 @@ void IOPMrootDomain::handleSleepTimerExpiration( void ) idleSleepTimerPending = false; clock_get_uptime(&time); - if ((AbsoluteTime_to_scalar(&time) > autoWakeStart) && - (AbsoluteTime_to_scalar(&time) < autoWakeEnd)) - { - thread_call_enter_delayed(extraSleepTimer, *((AbsoluteTime *) &autoWakeEnd)); - return; - } - setQuickSpinDownTimeout(); adjustPowerState(true); } @@ -2007,6 +2083,7 @@ IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason ) void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) { + uint64_t now; ASSERT_GATED(); DLOG("PowerChangeDone: %u->%u\n", (uint32_t) previousPowerState, (uint32_t) getPowerState()); @@ -2024,13 +2101,27 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) clock_sec_t secs; clock_usec_t microsecs; - clock_get_calendar_microtime(&secs, µsecs); + clock_get_calendar_absolute_and_microtime(&secs, µsecs, &now); logtime(secs); gIOLastSleepTime.tv_sec = secs; gIOLastSleepTime.tv_usec = microsecs; gIOLastWakeTime.tv_sec = 0; gIOLastWakeTime.tv_usec = 0; + if (wake2DarkwakeDelay && sleepDelaysReport) { + clock_usec_t microsecs; + clock_sec_t wake2DarkwakeSecs, darkwake2SleepSecs; + // Update 'wake2DarkwakeDelay' histogram if this is a fullwake->sleep transition + + SUB_ABSOLUTETIME(&now, &ts_sleepStart); + absolutetime_to_microtime(now, &darkwake2SleepSecs, µsecs); + absolutetime_to_microtime(wake2DarkwakeDelay, &wake2DarkwakeSecs, µsecs); + HISTREPORT_TALLYVALUE(sleepDelaysReport, + (int64_t)(wake2DarkwakeSecs+darkwake2SleepSecs)); + + DLOG("Updated sleepDelaysReport %lu %lu\n", (unsigned long)wake2DarkwakeSecs, (unsigned long)darkwake2SleepSecs); + wake2DarkwakeDelay = 0; + } #if HIBERNATION LOG("System %sSleep\n", gIOHibernateState ? "Safe" : ""); @@ -2047,6 +2138,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) event->release(); } } + assertOnWakeSecs = 0; ((IOService *)this)->stop_watchdog_timer(); //14456299 getPlatform()->sleepKernel(); @@ -2064,8 +2156,17 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) // sleep transition complete gSleepOrShutdownPending = 0; - // trip the reset of the calendar clock - clock_wakeup_calendar(); + // trip the reset of the calendar clock + { + clock_sec_t wakeSecs; + clock_usec_t wakeMicrosecs; + + clock_initialize_calendar(); + + clock_get_calendar_microtime(&wakeSecs, &wakeMicrosecs); + gIOLastWakeTime.tv_sec = wakeSecs; + gIOLastWakeTime.tv_usec = wakeMicrosecs; + } #if HIBERNATION LOG("System %sWake\n", gIOHibernateState ? "SafeSleep " : ""); @@ -2097,7 +2198,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) userWasActive = false; fullWakeReason = kFullWakeReasonNone; - OSString * wakeType = OSDynamicCast( OSString, getProperty(kIOPMRootDomainWakeTypeKey)); OSString * wakeReason = OSDynamicCast( @@ -2118,9 +2218,9 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) } else if ((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) != 0) { +#if HIBERNATION OSNumber * hibOptions = OSDynamicCast( OSNumber, getProperty(kIOHibernateOptionsKey)); - if (hibernateAborted || ((hibOptions && !(hibOptions->unsigned32BitValue() & kIOHibernateOptionDarkWake)))) { @@ -2131,6 +2231,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) hibOptions ? hibOptions->unsigned32BitValue() : 0); } else +#endif if (wakeType && ( wakeType->isEqualTo(kIOPMRootDomainWakeTypeUser) || wakeType->isEqualTo(kIOPMRootDomainWakeTypeAlarm))) @@ -2165,9 +2266,11 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) { darkWakeMaintenance = true; darkWakeSleepService = true; +#if HIBERNATION if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) { sleepToStandby = true; } +#endif } else if (wakeType && @@ -2213,6 +2316,10 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) fullWakeReason = kFullWakeReasonLocalUser; reportUserInput(); } + else if (displayPowerOnRequested && checkSystemCanSustainFullWake()) + { + handleDisplayPowerOn(); + } else if (!darkWakeMaintenance) { // Early/late tickle for non-maintenance wake. @@ -2232,8 +2339,24 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) #endif sleepCnt++; + thread_call_enter(updateConsoleUsersEntry); + changePowerStateToPriv(ON_STATE); } break; +#if !__i386__ && !__x86_64__ + case ON_STATE: { + if (previousPowerState != ON_STATE) + { + DLOG("Force re-evaluating aggressiveness\n"); + /* Force re-evaluate the aggressiveness values to set appropriate idle sleep timer */ + pmPowerStateQueue->submitPowerEvent( + kPowerEventPolicyStimulus, + (void *) kStimulusNoIdleSleepPreventers ); + } + break; + } + +#endif } } @@ -2255,6 +2378,7 @@ IOReturn IOPMrootDomain::requestPowerDomainState ( return super::requestPowerDomainState(0, childConnection, specification); } + //****************************************************************************** // updatePreventIdleSleepList // @@ -2310,6 +2434,8 @@ bool IOPMrootDomain::updatePreventIdleSleepList( changePowerStateTo(SLEEP_STATE); evaluatePolicy( kStimulusNoIdleSleepPreventers ); } + messageClient(kIOPMMessageIdleSleepPreventers, systemCapabilityNotifier, + &newCount, sizeof(newCount)); #if defined(__i386__) || defined(__x86_64__) if (addNotRemove && (service == wrangler) && !checkSystemCanSustainFullWake()) @@ -2330,7 +2456,7 @@ bool IOPMrootDomain::updatePreventIdleSleepList( void IOPMrootDomain::updatePreventSystemSleepList( IOService * service, bool addNotRemove ) { - unsigned int oldCount; + unsigned int oldCount, newCount; ASSERT_GATED(); if (this == service) @@ -2342,6 +2468,17 @@ void IOPMrootDomain::updatePreventSystemSleepList( preventSystemSleepList->setObject(service); DLOG("prevent system sleep list: %s+ (%u)\n", service->getName(), preventSystemSleepList->getCount()); + if (!assertOnWakeSecs && systemWakeTime) { + AbsoluteTime now; + clock_usec_t microsecs; + clock_get_uptime(&now); + SUB_ABSOLUTETIME(&now, &systemWakeTime); + absolutetime_to_microtime(now, &assertOnWakeSecs, µsecs); + if (assertOnWakeReport) { + HISTREPORT_TALLYVALUE(assertOnWakeReport, (int64_t)assertOnWakeSecs); + DLOG("Updated assertOnWake %lu\n", (unsigned long)assertOnWakeSecs); + } + } } else if (preventSystemSleepList->member(service)) { @@ -2356,6 +2493,62 @@ void IOPMrootDomain::updatePreventSystemSleepList( evaluatePolicy( kStimulusDarkWakeEvaluate ); } } + newCount = preventSystemSleepList->getCount(); + messageClient(kIOPMMessageSystemSleepPreventers, systemCapabilityNotifier, + &newCount, sizeof(newCount)); +} + +void IOPMrootDomain::copySleepPreventersList(OSArray **idleSleepList, OSArray **systemSleepList) +{ + + OSCollectionIterator *iterator = NULL; + OSObject *object = NULL; + OSArray *array = NULL; + + if (!getPMworkloop()->inGate()) + { + getPMworkloop()->runAction( + OSMemberFunctionCast(IOWorkLoop::Action, this, + &IOPMrootDomain::IOPMrootDomain::copySleepPreventersList), + this, (void *)idleSleepList, (void *)systemSleepList); + return; + } + + if (idleSleepList && preventIdleSleepList && (preventIdleSleepList->getCount() != 0)) + { + iterator = OSCollectionIterator::withCollection(preventIdleSleepList); + array = OSArray::withCapacity(5); + + while ((object = iterator->getNextObject())) + { + IOService *service = OSDynamicCast(IOService, object); + if (object) + { + array->setObject(OSSymbol::withCString(service->getName())); + } + } + + iterator->release(); + *idleSleepList = array; + } + + if (systemSleepList && preventSystemSleepList && (preventSystemSleepList->getCount() != 0)) + { + iterator = OSCollectionIterator::withCollection(preventSystemSleepList); + array = OSArray::withCapacity(5); + + while ((object = iterator->getNextObject())) + { + IOService *service = OSDynamicCast(IOService, object); + if (object) + { + array->setObject(OSSymbol::withCString(service->getName())); + } + } + + iterator->release(); + *systemSleepList = array; + } } //****************************************************************************** @@ -2474,6 +2667,9 @@ void IOPMrootDomain::askChangeDownDone( void IOPMrootDomain::systemDidNotSleep( void ) { + // reset console lock state + thread_call_enter(updateConsoleUsersEntry); + if (!wrangler) { if (idleSeconds) @@ -2494,6 +2690,30 @@ void IOPMrootDomain::systemDidNotSleep( void ) preventTransitionToUserActive(false); IOService::setAdvisoryTickleEnable( true ); + + // After idle revert and cancel, send a did-change message to powerd + // to balance the previous will-change message. Kernel clients do not + // need this since sleep cannot be canceled once they are notified. + + if (toldPowerdCapWillChange && systemCapabilityNotifier && + (_pendingCapability != _currentCapability) && + ((_systemMessageClientMask & kSystemMessageClientPowerd) != 0)) + { + // Differs from a real capability gain change where notifyRef != 0, + // but it is zero here since no response is expected. + + IOPMSystemCapabilityChangeParameters params; + + bzero(¶ms, sizeof(params)); + params.fromCapabilities = _pendingCapability; + params.toCapabilities = _currentCapability; + params.changeFlags = kIOPMSystemCapabilityDidChange; + + DLOG("MESG cap %x->%x did change\n", + params.fromCapabilities, params.toCapabilities); + messageClient(kIOMessageSystemCapabilityChange, systemCapabilityNotifier, + ¶ms, sizeof(params)); + } } //****************************************************************************** @@ -2531,7 +2751,6 @@ void IOPMrootDomain::tellNoChangeDown( unsigned long stateNum ) void IOPMrootDomain::tellChangeUp( unsigned long stateNum ) { - DLOG("tellChangeUp %u->%u\n", (uint32_t) getPowerState(), (uint32_t) stateNum); @@ -2556,7 +2775,6 @@ void IOPMrootDomain::tellChangeUp( unsigned long stateNum ) tellClients( kIOMessageSystemWillPowerOn ); } - tracePoint( kIOPMTracePointWakeApplications ); tellClients( kIOMessageSystemHasPoweredOn ); } @@ -2583,11 +2801,14 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( if (messageType == kIOMessageSystemWillSleep) { #if HIBERNATION - uint32_t mem_only = 0; + static int32_t mem_only = -1; IOPowerStateChangeNotification *notify = (IOPowerStateChangeNotification *)messageArgs; - PE_parse_boot_argn("swd_mem_only", &mem_only, sizeof(mem_only)); + if ((mem_only == -1) && + (PE_parse_boot_argn("swd_mem_only", &mem_only, sizeof(mem_only)) == false)) { + mem_only = 0; + } if ((mem_only != 1) && (gRootDomain->sleepWakeDebugIsWdogEnabled())) { notify->returnValue = 30 * 1000 * 1000; @@ -2851,13 +3072,29 @@ hibernate_should_abort(void) void IOPMrootDomain::willNotifyPowerChildren( IOPMPowerStateIndex newPowerState ) { -#if HIBERNATION + OSDictionary *dict; + OSNumber *secs; + if (SLEEP_STATE == newPowerState) { +#if HIBERNATION IOHibernateSystemSleep(); IOHibernateIOKitSleep(); - } #endif + if (gRootDomain->activitySinceSleep()) { + dict = OSDictionary::withCapacity(1); + secs = OSNumber::withNumber(1, 32); + + if (dict && secs) { + dict->setObject(gIOPMSettingDebugWakeRelativeKey, secs); + gRootDomain->setProperties(dict); + MSG("Reverting sleep with relative wake\n"); + } + if (dict) dict->release(); + if (secs) secs->release(); + } + + } } //****************************************************************************** @@ -4083,6 +4320,7 @@ struct HaltRestartApplierContext { IOPMPowerFlags PowerFlags; UInt32 MessageType; UInt32 Counter; + const char * LogString; }; static void @@ -4091,7 +4329,7 @@ platformHaltRestartApplier( OSObject * object, void * context ) IOPowerStateChangeNotification notify; HaltRestartApplierContext * ctx; AbsoluteTime startTime; - UInt32 deltaTime; + uint32_t deltaTime; ctx = (HaltRestartApplierContext *) context; @@ -4117,20 +4355,26 @@ platformHaltRestartApplier( OSObject * object, void * context ) if (notifier) { LOG("%s handler %p took %u ms\n", - (ctx->MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : - (ctx->MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart", - OBFUSCATE(notifier->handler), (uint32_t) deltaTime ); + ctx->LogString, OBFUSCATE(notifier->handler), deltaTime); } } ctx->Counter++; } +static void quiescePowerTreeCallback( void * target, void * param ) +{ + IOLockLock(gPMHaltLock); + gPMQuiesced = true; + thread_wakeup(param); + IOLockUnlock(gPMHaltLock); +} + void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type ) { HaltRestartApplierContext ctx; AbsoluteTime startTime; - UInt32 deltaTime; + uint32_t deltaTime; memset(&ctx, 0, sizeof(ctx)); ctx.RootDomain = this; @@ -4142,16 +4386,19 @@ void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type ) case kPEUPSDelayHaltCPU: ctx.PowerState = OFF_STATE; ctx.MessageType = kIOMessageSystemWillPowerOff; + ctx.LogString = "PowerOff"; break; case kPERestartCPU: ctx.PowerState = RESTART_STATE; ctx.MessageType = kIOMessageSystemWillRestart; + ctx.LogString = "Restart"; break; case kPEPagingOff: ctx.PowerState = ON_STATE; ctx.MessageType = kIOMessageSystemPagingOff; + ctx.LogString = "PagingOff"; IOService::updateConsoleUsers(NULL, kIOMessageSystemPagingOff); #if HIBERNATION IOHibernateSystemRestart(); @@ -4186,11 +4433,29 @@ void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type ) IOCPURunPlatformHaltRestartActions(pe_type); + // Wait for PM to quiesce + if ((kPEPagingOff != pe_type) && gPMHaltLock) + { + AbsoluteTime quiesceTime = mach_absolute_time(); + + IOLockLock(gPMHaltLock); + gPMQuiesced = false; + if (quiescePowerTree(this, &quiescePowerTreeCallback, &gPMQuiesced) == + kIOReturnSuccess) + { + while (!gPMQuiesced) + { + IOLockSleep(gPMHaltLock, &gPMQuiesced, THREAD_UNINT); + } + } + IOLockUnlock(gPMHaltLock); + + deltaTime = computeDeltaTimeMS(&quiesceTime); + DLOG("PM quiesce took %u ms\n", deltaTime); + } + deltaTime = computeDeltaTimeMS(&startTime); - LOG("%s all drivers took %u ms\n", - (ctx.MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : - (ctx.MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart", - (uint32_t) deltaTime ); + LOG("%s all drivers took %u ms\n", ctx.LogString, deltaTime); } //****************************************************************************** @@ -4442,6 +4707,7 @@ void IOPMrootDomain::handleOurPowerChangeStart( _systemTransitionType = kSystemTransitionNone; _systemMessageClientMask = 0; capabilityLoss = false; + toldPowerdCapWillChange = false; if (lowBatteryCondition) { @@ -4565,6 +4831,10 @@ void IOPMrootDomain::handleOurPowerChangeStart( // Publish a UUID for the Sleep --> Wake cycle handlePublishSleepWakeUUID(true); + if (sleepDelaysReport) { + clock_get_uptime(&ts_sleepStart); + DLOG("sleepDelaysReport f->9 start at 0x%llx\n", ts_sleepStart); + } } } @@ -4587,6 +4857,10 @@ void IOPMrootDomain::handleOurPowerChangeStart( publishSleepReason = true; lastSleepReason = sleepReason; + if (sleepDelaysReport) { + clock_get_uptime(&ts_sleepStart); + DLOG("sleepDelaysReport 9->0 start at 0x%llx\n", ts_sleepStart); + } } // 3. System wake. @@ -4704,6 +4978,13 @@ void IOPMrootDomain::handleOurPowerChangeDone( // userIsActive will be cleared by wrangler powering down wranglerTickled = false; fullWakeReason = kFullWakeReasonNone; + + if (ts_sleepStart) { + clock_get_uptime(&wake2DarkwakeDelay); + SUB_ABSOLUTETIME(&wake2DarkwakeDelay, &ts_sleepStart); + DLOG("sleepDelaysReport f->9 end 0x%llx\n", wake2DarkwakeDelay); + ts_sleepStart = 0; + } } } @@ -4802,6 +5083,7 @@ void IOPMrootDomain::handleOurPowerChangeDone( _systemTransitionType = kSystemTransitionNone; _systemMessageClientMask = 0; + toldPowerdCapWillChange = false; logGraphicsClamp = false; } @@ -4964,7 +5246,8 @@ void IOPMrootDomain::handleActivityTickleForDisplayWrangler( clock_get_uptime(&userActivityTime); bool aborting = ((lastSleepReason == kIOPMSleepReasonIdle) - || (lastSleepReason == kIOPMSleepReasonMaintenance)); + || (lastSleepReason == kIOPMSleepReasonMaintenance) + || (lastSleepReason == kIOPMSleepReasonSoftware)); if (aborting) { userActivityCount++; DLOG("display wrangler tickled1 %d lastSleepReason %d\n", @@ -5230,6 +5513,12 @@ bool IOPMrootDomain::systemMessageFilter( capArgs->changeFlags = kIOPMSystemCapabilityWillChange; else capArgs->changeFlags = kIOPMSystemCapabilityDidChange; + + if ((object == (void *) systemCapabilityNotifier) && + context->isPreChange) + { + toldPowerdCapWillChange = true; + } } // Capability change messages only go to the PM configd plugin. @@ -5505,10 +5794,13 @@ void IOPMrootDomain::reportUserInput( void ) { #if !NO_KERNEL_HID OSIterator * iter; + OSDictionary * matching; if(!wrangler) { - iter = getMatchingServices(serviceMatching("IODisplayWrangler")); + matching = serviceMatching("IODisplayWrangler"); + iter = getMatchingServices(matching); + if (matching) matching->release(); if(iter) { wrangler = (IOService *) iter->getNextObject(); @@ -5567,11 +5859,8 @@ bool IOPMrootDomain::latchDisplayWranglerTickle( bool latch ) void IOPMrootDomain::setDisplayPowerOn( uint32_t options ) { - if (checkSystemCanSustainFullWake()) - { - pmPowerStateQueue->submitPowerEvent( kPowerEventSetDisplayPowerOn, - (void *) 0, options ); - } + pmPowerStateQueue->submitPowerEvent( kPowerEventSetDisplayPowerOn, + (void *) 0, options ); } // MARK: - @@ -5766,6 +6055,41 @@ void IOPMrootDomain::adjustPowerState( bool sleepASAP ) } } +void IOPMrootDomain::handleDisplayPowerOn( ) +{ + if (!wrangler) return; + if (displayPowerOnRequested) + { + if (!checkSystemCanSustainFullWake()) return; + + // Force wrangler to max power state. If system is in dark wake + // this alone won't raise the wrangler's power state. + + wrangler->changePowerStateForRootDomain(kWranglerPowerStateMax); + + // System in dark wake, always requesting full wake should + // not have any bad side-effects, even if the request fails. + + if (!CAP_CURRENT(kIOPMSystemCapabilityGraphics)) + { + setProperty(kIOPMRootDomainWakeTypeKey, kIOPMRootDomainWakeTypeNotification); + requestFullWake( kFullWakeReasonDisplayOn ); + } + } + else + { + // Relenquish desire to power up display. + // Must first transition to state 1 since wrangler doesn't + // power off the displays at state 0. At state 0 the root + // domain is removed from the wrangler's power client list. + + wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin + 1); + wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin); + + } + +} + //****************************************************************************** // dispatchPowerEvent // @@ -5919,30 +6243,13 @@ void IOPMrootDomain::dispatchPowerEvent( if (!wrangler) break; if (arg1 != 0) { - // Force wrangler to max power state. If system is in dark wake - // this alone won't raise the wrangler's power state. - - wrangler->changePowerStateForRootDomain(kWranglerPowerStateMax); - - // System in dark wake, always requesting full wake should - // not have any bad side-effects, even if the request fails. - - if (!CAP_CURRENT(kIOPMSystemCapabilityGraphics)) - { - setProperty(kIOPMRootDomainWakeTypeKey, kIOPMRootDomainWakeTypeNotification); - requestFullWake( kFullWakeReasonDisplayOn ); - } + displayPowerOnRequested = true; } else { - // Relenquish desire to power up display. - // Must first transition to state 1 since wrangler doesn't - // power off the displays at state 0. At state 0 the root - // domain is removed from the wrangler's power client list. - - wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin + 1); - wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin); + displayPowerOnRequested = false; } + handleDisplayPowerOn(); break; } } @@ -6086,15 +6393,13 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) } /* - * Sleep if system is in dark wake + * Forward DW thermal notification to client, if system is not going to sleep */ - if (msg & kIOPMDWOverTemp) + if ((msg & kIOPMDWOverTemp) && (_systemTransitionType != kSystemTransitionSleep)) { DLOG("DarkWake thermal limits message received!\n"); - // Inform cap client that we're going to sleep messageClients(kIOPMMessageDarkWakeThermalEmergency); - } /* @@ -6791,8 +7096,20 @@ void IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, I } } - if (changedBits & kIOPMDriverAssertionCPUBit) + if (changedBits & kIOPMDriverAssertionCPUBit) { evaluatePolicy(kStimulusDarkWakeEvaluate); + if (!assertOnWakeSecs && systemWakeTime) { + AbsoluteTime now; + clock_usec_t microsecs; + clock_get_uptime(&now); + SUB_ABSOLUTETIME(&now, &systemWakeTime); + absolutetime_to_microtime(now, &assertOnWakeSecs, µsecs); + if (assertOnWakeReport) { + HISTREPORT_TALLYVALUE(assertOnWakeReport, (int64_t)assertOnWakeSecs); + DLOG("Updated assertOnWake %lu\n", (unsigned long)assertOnWakeSecs); + } + } + } if (changedBits & kIOPMDriverAssertionReservedBit7) { bool value = (newAssertions & kIOPMDriverAssertionReservedBit7) ? true : false; @@ -6935,7 +7252,7 @@ void IOPMrootDomain::pmStatsRecordApplicationResponse( if (response->isEqualTo(gIOPMStatsDriverPSChangeSlow)) { powerCaps = OSNumber::withNumber(powerState, 32); -#if !defined(__i386__) && !defined(__x86_64__) +#if !defined(__i386__) && !defined(__x86_64__) && (DEVELOPMENT || DEBUG) IOLog("%s::powerStateChange type(%d) to(%lu) async took %d ms\n", name, messageType, powerState, delay_ms); @@ -7065,26 +7382,137 @@ void IOPMrootDomain::traceDetail( uint32_t detail ) } +void IOPMrootDomain::configureReportGated(uint64_t channel_id, uint64_t action, void *result) +{ + size_t reportSize; + void **report = NULL; + uint32_t bktCnt; + uint32_t bktSize; + uint32_t *clientCnt; + + ASSERT_GATED(); + + report = NULL; + if (channel_id == kAssertDelayChID) { + report = &assertOnWakeReport; + bktCnt = kAssertDelayBcktCnt; + bktSize = kAssertDelayBcktSize; + clientCnt = &assertOnWakeClientCnt; + } + else if (channel_id == kSleepDelaysChID) { + report = &sleepDelaysReport; + bktCnt = kSleepDelaysBcktCnt; + bktSize = kSleepDelaysBcktSize; + clientCnt = &sleepDelaysClientCnt; + } + + switch (action) + { + case kIOReportEnable: + + if (*report) { + (*clientCnt)++; + break; + } + + reportSize = HISTREPORT_BUFSIZE(bktCnt); + *report = IOMalloc(reportSize); + if (*report == NULL) { + break; + } + bzero(*report, reportSize); + HISTREPORT_INIT(bktCnt, bktSize, *report, reportSize, + getRegistryEntryID(), channel_id, kIOReportCategoryPower); + + if (channel_id == kAssertDelayChID) + assertOnWakeSecs = 0; + + break; + + case kIOReportDisable: + if (*clientCnt == 0) { + break; + } + if (*clientCnt == 1) + { + IOFree(*report, HISTREPORT_BUFSIZE(bktCnt)); + *report = NULL; + } + (*clientCnt)--; + + if (channel_id == kAssertDelayChID) + assertOnWakeSecs = -1; // Invalid value to prevent updates + + break; + + case kIOReportGetDimensions: + if (*report) { + HISTREPORT_UPDATERES(*report, kIOReportGetDimensions, result); + } + break; + } + + return; +} + IOReturn IOPMrootDomain::configureReport(IOReportChannelList *channelList, IOReportConfigureAction action, void *result, void *destination) { unsigned cnt; - if (action != kIOReportGetDimensions) goto exit; + uint64_t configAction = (uint64_t)action; for (cnt = 0; cnt < channelList->nchannels; cnt++) { if ( (channelList->channels[cnt].channel_id == kSleepCntChID) || (channelList->channels[cnt].channel_id == kDarkWkCntChID) || (channelList->channels[cnt].channel_id == kUserWkCntChID) ) { + if (action != kIOReportGetDimensions) continue; SIMPLEREPORT_UPDATERES(kIOReportGetDimensions, result); } + else if ((channelList->channels[cnt].channel_id == kAssertDelayChID) || + (channelList->channels[cnt].channel_id == kSleepDelaysChID)) { + gIOPMWorkLoop->runAction( + OSMemberFunctionCast(IOWorkLoop::Action, this, &IOPMrootDomain::configureReportGated), + (OSObject *)this, (void *)channelList->channels[cnt].channel_id, + (void *)configAction, (void *)result); + } } -exit: return super::configureReport(channelList, action, result, destination); } +IOReturn IOPMrootDomain::updateReportGated(uint64_t ch_id, void *result, IOBufferMemoryDescriptor *dest) +{ + + uint32_t size2cpy; + void *data2cpy; + void **report; + + ASSERT_GATED(); + + report = NULL; + if (ch_id == kAssertDelayChID) { + report = &assertOnWakeReport; + } + else if (ch_id == kSleepDelaysChID) { + report = &sleepDelaysReport; + } + + if (*report == NULL) { + return kIOReturnNotOpen; + } + + HISTREPORT_UPDATEPREP(*report, data2cpy, size2cpy); + if (size2cpy > (dest->getCapacity() - dest->getLength()) ) { + return kIOReturnOverrun; + } + + HISTREPORT_UPDATERES(*report, kIOReportCopyChannelData, result); + dest->appendBytes(data2cpy, size2cpy); + + return kIOReturnSuccess; +} IOReturn IOPMrootDomain::updateReport(IOReportChannelList *channelList, IOReportUpdateAction action, @@ -7103,7 +7531,15 @@ IOReturn IOPMrootDomain::updateReport(IOReportChannelList *channelList, for (cnt = 0; cnt < channelList->nchannels; cnt++) { ch_id = channelList->channels[cnt].channel_id ; - if ((ch_id == kSleepCntChID) || + if ((ch_id == kAssertDelayChID) || (ch_id == kSleepDelaysChID)) { + gIOPMWorkLoop->runAction( + OSMemberFunctionCast(IOWorkLoop::Action, this, &IOPMrootDomain::updateReportGated), + (OSObject *)this, (void *)ch_id, + (void *)result, (void *)dest); + continue; + + } + else if ((ch_id == kSleepCntChID) || (ch_id == kDarkWkCntChID) || (ch_id == kUserWkCntChID)) { SIMPLEREPORT_INIT(buf, sizeof(buf), getRegistryEntryID(), ch_id, kIOReportCategoryPower); } @@ -7312,7 +7748,7 @@ void PMTraceWorker::tracePCIPowerChange( uint64_t PMTraceWorker::getPMStatusCode( ) { - return (((uint64_t)traceData32 << 32) | (tracePhase << 24) | + return (((uint64_t)traceData32 << 32) | ((uint64_t)tracePhase << 24) | (loginWindowPhase << 16) | (traceData8 << 8)); } @@ -7325,14 +7761,6 @@ uint64_t PMTraceWorker::getPMStatusCode( ) // //****************************************************************************** -static unsigned int gPMHaltBusyCount; -static unsigned int gPMHaltIdleCount; -static int gPMHaltDepth; -static unsigned long gPMHaltEvent; -static IOLock * gPMHaltLock = 0; -static OSArray * gPMHaltArray = 0; -static const OSSymbol * gPMHaltClientAcknowledgeKey = 0; - PMHaltWorker * PMHaltWorker::worker( void ) { PMHaltWorker * me; @@ -7455,7 +7883,7 @@ void PMHaltWorker::work( PMHaltWorker * me ) me->timeout = false; IOLockUnlock(me->lock); - service->systemWillShutdown( gPMHaltEvent ); + service->systemWillShutdown( gPMHaltMessageType ); // Wait for driver acknowledgement IOLockLock(me->lock); @@ -7472,10 +7900,10 @@ void PMHaltWorker::work( PMHaltWorker * me ) if ((deltaTime > kPMHaltTimeoutMS) || timeout || (gIOKitDebug & kIOLogPMRootDomain)) { - LOG("%s driver %s (%p) took %u ms\n", - (gPMHaltEvent == kIOMessageSystemWillPowerOff) ? + LOG("%s driver %s (0x%llx) took %u ms\n", + (gPMHaltMessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : "Restart", - service->getName(), OBFUSCATE(service), + service->getName(), service->getRegistryEntryID(), (uint32_t) deltaTime ); } @@ -7506,7 +7934,7 @@ void PMHaltWorker::checkTimeout( PMHaltWorker * me, AbsoluteTime * now ) { me->timeout = true; MSG("%s still waiting on %s\n", - (gPMHaltEvent == kIOMessageSystemWillPowerOff) ? + (gPMHaltMessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : "Restart", me->service->getName()); } @@ -7514,7 +7942,6 @@ void PMHaltWorker::checkTimeout( PMHaltWorker * me, AbsoluteTime * now ) IOLockUnlock(me->lock); } - //****************************************************************************** // acknowledgeSystemWillShutdown // @@ -7555,7 +7982,7 @@ void IOPMrootDomain::acknowledgeSystemWillShutdown( IOService * from ) //****************************************************************************** static void -notifySystemShutdown( IOService * root, unsigned long event ) +notifySystemShutdown( IOService * root, uint32_t messageType ) { #define PLACEHOLDER ((OSSet *)gPMHaltArray) IORegistryIterator * iter; @@ -7573,7 +8000,7 @@ notifySystemShutdown( IOService * root, unsigned long event ) void * baseFunc; bool ok; - DLOG("%s event = %lx\n", __FUNCTION__, event); + DLOG("%s msgType = 0x%x\n", __FUNCTION__, messageType); baseFunc = OSMemberFunctionCast(void *, root, &IOService::systemWillShutdown); @@ -7607,7 +8034,7 @@ notifySystemShutdown( IOService * root, unsigned long event ) if (!gPMHaltClientAcknowledgeKey) goto done; } - gPMHaltEvent = event; + gPMHaltMessageType = messageType; // Depth-first walk of PM plane @@ -7837,18 +8264,18 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const if (!strncmp(aKey, kIOPMSleepWakeWdogRebootKey, sizeof(kIOPMSleepWakeWdogRebootKey))) { if (swd_flags & SWD_BOOT_BY_SW_WDOG) - return OSBoolean::withBoolean(true); + return kOSBooleanTrue; else - return OSBoolean::withBoolean(false); + return kOSBooleanFalse; } if (!strncmp(aKey, kIOPMSleepWakeWdogLogsValidKey, sizeof(kIOPMSleepWakeWdogLogsValidKey))) { if (swd_flags & SWD_VALID_LOGS) - return OSBoolean::withBoolean(true); + return kOSBooleanTrue; else - return OSBoolean::withBoolean(false); + return kOSBooleanFalse; } @@ -7859,16 +8286,16 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const */ if (!strcmp(aKey, "DesktopMode")) { if (desktopMode) - return OSBoolean::withBoolean(true); + return kOSBooleanTrue; else - return OSBoolean::withBoolean(false); + return kOSBooleanFalse; } if (!strcmp(aKey, "DisplayIdleForDemandSleep")) { if (displayIdleForDemandSleep) { - return OSBoolean::withBoolean(true); + return kOSBooleanTrue; } else { - return OSBoolean::withBoolean(false); + return kOSBooleanFalse; } } @@ -7876,8 +8303,12 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const { OSArray * array = 0; WAKEEVENT_LOCK(); - if (_systemWakeEventsArray && _systemWakeEventsArray->getCount()) - array = OSArray::withArray(_systemWakeEventsArray); + if (_systemWakeEventsArray && _systemWakeEventsArray->getCount()) { + OSCollection *collection = _systemWakeEventsArray->copyCollection(); + if (collection && !(array = OSDynamicCast(OSArray, collection))) { + collection->release(); + } + } WAKEEVENT_UNLOCK(); return array; } @@ -7887,13 +8318,30 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const OSArray * array = 0; IOLockLock(pmStatsLock); if (pmStatsAppResponses && pmStatsAppResponses->getCount()) { - array = OSArray::withArray(pmStatsAppResponses); + OSCollection *collection = pmStatsAppResponses->copyCollection(); + if (collection && !(array = OSDynamicCast(OSArray, collection))) { + collection->release(); + } pmStatsAppResponses->flushCollection(); } IOLockUnlock(pmStatsLock); return array; } + if (!strcmp(aKey, kIOPMIdleSleepPreventersKey)) + { + OSArray *idleSleepList = NULL; + gRootDomain->copySleepPreventersList(&idleSleepList, NULL); + return idleSleepList; + } + + if (!strcmp(aKey, kIOPMSystemSleepPreventersKey)) + { + OSArray *systemSleepList = NULL; + gRootDomain->copySleepPreventersList(NULL, &systemSleepList); + return systemSleepList; + } + return NULL; } @@ -8618,17 +9066,17 @@ IOReturn IOPMrootDomain::restartWithStackshot() if ((swd_flags & SWD_WDOG_ENABLED) == 0) return kIOReturnError; - takeStackshot(true, true); + takeStackshot(true, true, false); return kIOReturnSuccess; } void IOPMrootDomain::sleepWakeDebugTrig(bool wdogTrigger) { - takeStackshot(wdogTrigger, false); + takeStackshot(wdogTrigger, false, false); } -void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog) +void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog, bool isSpinDump) { swd_hdr * hdr = NULL; addr64_t data[3]; @@ -8646,8 +9094,18 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog) IOMemoryMap * logBufMap = NULL; swd_stackshot_hdr *stackshotHdr = NULL; - if ( kIOSleepWakeWdogOff & gIOKitDebug ) - return; + + uint32_t bufSize; + uint32_t initialStackSize; + + if (isSpinDump) { + if (_systemTransitionType != kSystemTransitionSleep && + _systemTransitionType != kSystemTransitionWake) + return; + } else { + if ( kIOSleepWakeWdogOff & gIOKitDebug ) + return; + } if (wdogTrigger) { if (PE_parse_boot_argn("swd_panic", &wdog_panic, sizeof(wdog_panic)) && @@ -8659,8 +9117,7 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog) else if (swd_flags & SWD_BOOT_BY_SW_WDOG) { // If current boot is due to this watch dog trigger restart in previous boot, // then don't trigger again until at least 1 successful sleep & wake. - sleepCnt = displayWakeCnt = 1; - if (!(sleepCnt && displayWakeCnt)) { + if (!(sleepCnt && (displayWakeCnt || darkWakeCnt))) { IOLog("Shutting down due to repeated Sleep/Wake failures\n"); PEHaltRestart(kPEHaltCPU); return; @@ -8669,19 +9126,36 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog) } - if (sleepWakeDebugIsWdogEnabled() == false) - return; + if (isSpinDump) { + if (gSpinDumpBufferFull) + return; + if (swd_spindump_buffer == NULL) { + sleepWakeDebugSpinDumpMemAlloc(); + if (swd_spindump_buffer == NULL) return; + } + + bufSize = SWD_SPINDUMP_SIZE; + initialStackSize = SWD_INITIAL_SPINDUMP_SIZE; + } else { + if (sleepWakeDebugIsWdogEnabled() == false) + return; - if (swd_buffer == NULL) { - sleepWakeDebugMemAlloc(); - if (swd_buffer == NULL) return; + if (swd_buffer == NULL) { + sleepWakeDebugMemAlloc(); + if (swd_buffer == NULL) return; + } + + bufSize = SWD_BUF_SIZE; + initialStackSize = SWD_INITIAL_STACK_SIZE; } if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) return; - - hdr = (swd_hdr *)swd_buffer; + if (isSpinDump) + hdr = (swd_hdr *)swd_spindump_buffer; + else + hdr = (swd_hdr *)swd_buffer; memset(hdr->UUID, 0x20, sizeof(hdr->UUID)); if ((UUIDstring = OSDynamicCast(OSString, getProperty(kIOPMSleepWakeUUIDKey))) != NULL ) { @@ -8696,7 +9170,7 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog) } dstAddr = (char*)hdr + hdr->spindump_offset; - bytesRemaining = SWD_BUF_SIZE - hdr->spindump_offset; + bytesRemaining = bufSize - hdr->spindump_offset; /* if AppleOSXWatchdog triggered the stackshot, set the flag in the heaer */ hdr->is_osx_watchdog = isOSXWatchdog; @@ -8722,7 +9196,7 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog) * to SWD_INITIAL_STACK_SIZE */ pid = -1; - size = (bytesRemaining > SWD_INITIAL_STACK_SIZE) ? SWD_INITIAL_STACK_SIZE : bytesRemaining; + size = (bytesRemaining > initialStackSize) ? initialStackSize : bytesRemaining; flags = STACKSHOT_SAVE_LOADINFO | STACKSHOT_SAVE_KEXT_LOADINFO|STACKSHOT_SAVE_KERNEL_FRAMES_ONLY; } else { @@ -8743,7 +9217,7 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog) IOSleep(10); // 10 ms } - hdr->spindump_size = (SWD_BUF_SIZE - bytesRemaining - hdr->spindump_offset); + hdr->spindump_size = (bufSize - bytesRemaining - hdr->spindump_offset); memset(hdr->cps, 0x20, sizeof(hdr->cps)); @@ -8753,6 +9227,12 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog) snprintf(hdr->PMStatusCode, sizeof(hdr->PMStatusCode), "\nCode: %08x %08x", (uint32_t)((code >> 32) & 0xffffffff), (uint32_t)(code & 0xffffffff)); memset(hdr->reason, 0x20, sizeof(hdr->reason)); + if (isSpinDump) { + snprintf(hdr->reason, sizeof(hdr->reason), "\nStackshot reason: PSC Delay\n\n"); + gRootDomain->swd_lock = 0; + gSpinDumpBufferFull = true; + return; + } snprintf(hdr->reason, sizeof(hdr->reason), "\nStackshot reason: Watchdog\n\n"); @@ -8834,6 +9314,41 @@ void IOPMrootDomain::sleepWakeDebugMemAlloc( ) gRootDomain->swd_lock = 0; } +void IOPMrootDomain::sleepWakeDebugSpinDumpMemAlloc( ) +{ + vm_size_t size = SWD_SPINDUMP_SIZE; + + swd_hdr *hdr = NULL; + + IOBufferMemoryDescriptor *memDesc = NULL; + + if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) + return; + + memDesc = IOBufferMemoryDescriptor::inTaskWithOptions( + kernel_task, kIODirectionIn|kIOMemoryMapperNone, + SWD_SPINDUMP_SIZE); + + if (memDesc == NULL) + { + DLOG("Failed to allocate Memory descriptor for sleepWake debug spindump\n"); + goto exit; + } + + + hdr = (swd_hdr *)memDesc->getBytesNoCopy(); + memset(hdr, 0, sizeof(swd_hdr)); + + hdr->signature = SWD_HDR_SIGNATURE; + hdr->alloc_size = size; + + hdr->spindump_offset = sizeof(swd_hdr); + swd_spindump_buffer = (void *)hdr; + +exit: + gRootDomain->swd_lock = 0; +} + void IOPMrootDomain::sleepWakeDebugEnableWdog() { swd_flags |= SWD_WDOG_ENABLED; @@ -8844,7 +9359,28 @@ void IOPMrootDomain::sleepWakeDebugEnableWdog() bool IOPMrootDomain::sleepWakeDebugIsWdogEnabled() { return ((swd_flags & SWD_WDOG_ENABLED) && - !systemBooting && !systemShutdown); + !systemBooting && !systemShutdown && !gWillShutdown); +} + +void IOPMrootDomain::sleepWakeDebugSaveSpinDumpFile() +{ + swd_hdr *hdr = NULL; + errno_t error = EIO; + + if (swd_spindump_buffer && gSpinDumpBufferFull) { + hdr = (swd_hdr *)swd_spindump_buffer; + + error = sleepWakeDebugSaveFile("/var/tmp/SleepWakeDelayStacks.dump", + (char*)hdr+hdr->spindump_offset, hdr->spindump_size); + + if (error) return; + + sleepWakeDebugSaveFile("/var/tmp/SleepWakeDelayLog.dump", + (char*)hdr+offsetof(swd_hdr, UUID), + sizeof(swd_hdr)-offsetof(swd_hdr, UUID)); + + gSpinDumpBufferFull = false; + } } errno_t IOPMrootDomain::sleepWakeDebugSaveFile(const char *name, char *buf, int len) @@ -8929,7 +9465,7 @@ errno_t IOPMrootDomain::sleepWakeDebugCopyFile( bytesToRead = (round_page(numBytes) > tmpBufSize) ? tmpBufSize : round_page(numBytes); readFileOffset = trunc_page(srcOffset); - DLOG("Read file (numBytes:0x%llx)\n", bytesToRead); + DLOG("Read file (numBytes:0x%llx offset:0x%llx)\n", bytesToRead, readFileOffset); error = vn_rdwr(UIO_READ, srcVp, tmpBuf, bytesToRead, readFileOffset, UIO_SYSSPACE, IO_SKIP_ENCRYPTION|IO_SYNC|IO_NODELOCKED|IO_UNIT|IO_NOCACHE, vfs_context_ucred(srcCtx), (int *) 0, @@ -8946,6 +9482,7 @@ errno_t IOPMrootDomain::sleepWakeDebugCopyFile( if (crc) { newcrc = crc32(newcrc, (void *)srcDataOffset, bytesToWrite); } + DLOG("Write file (numBytes:0x%llx offset:0x%llx)\n", bytesToWrite, writeFileOffset); error = vn_rdwr(UIO_WRITE, vp, (char *)srcDataOffset, bytesToWrite, writeFileOffset, UIO_SYSSPACE, IO_SYNC|IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, @@ -8988,11 +9525,70 @@ errno_t IOPMrootDomain::sleepWakeDebugCopyFile( +} +void IOPMrootDomain::checkForValidDebugData(const char *fname, vfs_context_t *ctx, + void *tmpBuf, struct vnode **vp) +{ + int rc; + uint64_t hdrOffset; + + struct vnode_attr va; + IOHibernateImageHeader *imageHdr; + + *vp = NULL; + if (vnode_open(fname, (FREAD | O_NOFOLLOW), 0, + VNODE_LOOKUP_NOFOLLOW, vp, *ctx) != 0) + { + DMSG("sleepWakeDebugDumpFromFile: Failed to open the file %s\n", fname); + goto err; + } + VATTR_INIT(&va); + VATTR_WANTED(&va, va_nlink); + VATTR_WANTED(&va, va_data_alloc); + if ((*vp)->v_type != VREG || + vnode_getattr((*vp), &va, *ctx) || va.va_nlink != 1) { + DMSG("sleepWakeDebugDumpFromFile: Bailing as %s is not a regular file\n", fname); + goto err; + } + + /* Read the sleepimage file header */ + rc = vn_rdwr(UIO_READ, *vp, (char *)tmpBuf, round_page(sizeof(IOHibernateImageHeader)), 0, + UIO_SYSSPACE, IO_SKIP_ENCRYPTION|IO_SYNC|IO_NODELOCKED|IO_UNIT|IO_NOCACHE, + vfs_context_ucred(*ctx), (int *) 0, + vfs_context_proc(*ctx)); + if (rc != 0) { + DMSG("sleepWakeDebugDumpFromFile: Failed to read header size %lu(rc=%d) from %s\n", + round_page(sizeof(IOHibernateImageHeader)), rc, fname); + goto err; + } + + imageHdr = ((IOHibernateImageHeader *)tmpBuf); + if (imageHdr->signature != kIOHibernateHeaderDebugDataSignature) { + DMSG("sleepWakeDebugDumpFromFile: File %s header has unexpected value 0x%x\n", + fname, imageHdr->signature); + goto err; + } + + /* Sleep/Wake debug header(swd_hdr) is at the beggining of the second block */ + hdrOffset = imageHdr->deviceBlockSize; + if (hdrOffset + sizeof(swd_hdr) >= va.va_data_alloc) { + DMSG("sleepWakeDebugDumpFromFile: header is crossing file size(0x%llx) in file %s\n", + va.va_data_alloc, fname); + goto err; + } + + return; + +err: + if (*vp) vnode_close(*vp, FREAD, *ctx); + *vp = NULL; + + return; } void IOPMrootDomain::sleepWakeDebugDumpFromFile( ) { - +#if HIBERNATION int rc; char hibernateFilename[MAXPATHLEN+1]; char PMStatusCode[100]; @@ -9008,9 +9604,7 @@ void IOPMrootDomain::sleepWakeDebugDumpFromFile( ) struct vnode *vp = NULL; vfs_context_t ctx = NULL; - struct vnode_attr va; IOBufferMemoryDescriptor *tmpBufDesc = NULL; - IOHibernateImageHeader *imageHdr; DLOG("sleepWakeDebugDumpFromFile\n"); if ((swd_flags & SWD_LOGS_IN_FILE) == 0) @@ -9020,20 +9614,6 @@ void IOPMrootDomain::sleepWakeDebugDumpFromFile( ) return; - hibernateFilename[0] = 0; - if ((obj = copyProperty(kIOHibernateFileKey))) - { - if ((str = OSDynamicCast(OSString, obj))) - strlcpy(hibernateFilename, str->getCStringNoCopy(), - sizeof(hibernateFilename)); - obj->release(); - } - if (!hibernateFilename[0]) { - DMSG("sleepWakeDebugDumpFromFile: Failed to hib file name\n"); - goto exit; - } - DLOG("sleepWakeDebugDumpFromFile: Hib file name %s\n", hibernateFilename); - /* Allocate a temp buffer to copy data between files */ tmpBufSize = 2*4096; tmpBufDesc = IOBufferMemoryDescriptor:: @@ -9048,44 +9628,37 @@ void IOPMrootDomain::sleepWakeDebugDumpFromFile( ) tmpBuf = tmpBufDesc->getBytesNoCopy(); ctx = vfs_context_create(vfs_context_current()); - if (vnode_open(hibernateFilename, (FREAD | O_NOFOLLOW), 0, - VNODE_LOOKUP_NOFOLLOW, &vp, ctx) != 0) - { - DMSG("sleepWakeDebugDumpFromFile: Failed to open the hibernate file %s\n", hibernateFilename); - goto exit; - } - VATTR_INIT(&va); - VATTR_WANTED(&va, va_nlink); - VATTR_WANTED(&va, va_data_alloc); - if (vp->v_type != VREG || - vnode_getattr(vp, &va, ctx) || va.va_nlink != 1) { - DMSG("sleepWakeDebugDumpFromFile: Bailing as this is not a regular file\n"); - goto exit; - } - /* Read the sleepimage file header */ - rc = vn_rdwr(UIO_READ, vp, (char *)tmpBuf, round_page(sizeof(IOHibernateImageHeader)), 0, - UIO_SYSSPACE, IO_SKIP_ENCRYPTION|IO_SYNC|IO_NODELOCKED|IO_UNIT|IO_NOCACHE, - vfs_context_ucred(ctx), (int *) 0, - vfs_context_proc(ctx)); - if (rc != 0) { - DMSG("sleepWakeDebugDumpFromFile: Failed to read header size %lu(rc=%d)\n", round_page(sizeof(IOHibernateImageHeader)), rc); - goto exit; - } + /* First check if 'kSleepWakeStackBinFilename' has valid data */ + checkForValidDebugData(kSleepWakeStackBinFilename, &ctx, tmpBuf, &vp); + if (vp == NULL) { + /* Check if the debug data is saved to hibernation file */ + hibernateFilename[0] = 0; + if ((obj = copyProperty(kIOHibernateFileKey))) + { + if ((str = OSDynamicCast(OSString, obj))) + strlcpy(hibernateFilename, str->getCStringNoCopy(), + sizeof(hibernateFilename)); + obj->release(); + } + if (!hibernateFilename[0]) { + DMSG("sleepWakeDebugDumpFromFile: Failed to get hibernation file name\n"); + goto exit; + } - imageHdr = ((IOHibernateImageHeader *)tmpBuf); - if (imageHdr->signature != kIOHibernateHeaderDebugDataSignature) { - DMSG("sleepWakeDebugDumpFromFile: File header has unexpected value 0x%x\n", imageHdr->signature); - goto exit; + checkForValidDebugData(hibernateFilename, &ctx, tmpBuf, &vp); + if (vp == NULL) { + DMSG("sleepWakeDebugDumpFromFile: No valid debug data is found\n"); + goto exit; + } + DLOG("Getting SW Stacks image from file %s\n", hibernateFilename); } - - /* Sleep/Wake debug header(swd_hdr) is at the beggining of the second block */ - hdrOffset = imageHdr->deviceBlockSize; - if (hdrOffset + sizeof(swd_hdr) >= va.va_data_alloc) { - DMSG("sleepWakeDebugDumpFromFile: header is crossing file size(0x%llx)\n", va.va_data_alloc); - goto exit; + else { + DLOG("Getting SW Stacks image from file %s\n", kSleepWakeStackBinFilename); } + hdrOffset = ((IOHibernateImageHeader *)tmpBuf)->deviceBlockSize; + DLOG("Reading swd_hdr len 0x%lx offset 0x%lx\n", round_page(sizeof(swd_hdr)), trunc_page(hdrOffset)); /* Read the sleep/wake debug header(swd_hdr) */ rc = vn_rdwr(UIO_READ, vp, (char *)tmpBuf, round_page(sizeof(swd_hdr)), trunc_page(hdrOffset), @@ -9147,7 +9720,7 @@ void IOPMrootDomain::sleepWakeDebugDumpFromFile( ) if (vp) vnode_close(vp, FREAD, ctx); if (ctx) vfs_context_rele(ctx); if (tmpBufDesc) tmpBufDesc->release(); - +#endif /* HIBERNATION */ } void IOPMrootDomain::sleepWakeDebugDumpFromMem(IOMemoryMap *logBufMap) @@ -9340,7 +9913,7 @@ void IOPMrootDomain::sleepWakeDebugTrig(bool restart) { } -void IOPMrootDomain::takeStackshot(bool restart, bool isOSXWatchdog) +void IOPMrootDomain::takeStackshot(bool restart, bool isOSXWatchdog, bool isSpinDump) { #pragma unused(restart) #pragma unused(isOSXWatchdog) diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index 8bcc9342e..31ab8b700 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -819,6 +820,10 @@ int PEHaltRestart(unsigned int type) replies. */ } + else if(type == kPEPanicRestartCPU || type == kPEPanicSync) + { + IOCPURunPlatformPanicActions(type); + } if (gIOPlatform) return gIOPlatform->haltRestart(type); else return -1; @@ -901,6 +906,38 @@ boolean_t PEReadNVRAMProperty(const char *symbol, void *value, return FALSE; } +boolean_t +PEWriteNVRAMBooleanProperty(const char *symbol, boolean_t value) +{ + const OSSymbol *sym = NULL; + OSBoolean *data = NULL; + bool ret = false; + + if (symbol == NULL) { + goto exit; + } + + if (init_gIOOptionsEntry() < 0) { + goto exit; + } + + if ((sym = OSSymbol::withCStringNoCopy(symbol)) == NULL) { + goto exit; + } + + data = value ? kOSBooleanTrue : kOSBooleanFalse; + ret = gIOOptionsEntry->setProperty(sym, data); + + sym->release(); + + /* success, force the NVRAM to flush writes */ + if (ret == true) { + gIOOptionsEntry->sync(); + } + +exit: + return ret; +} boolean_t PEWriteNVRAMProperty(const char *symbol, const void *value, const unsigned int len) @@ -1188,6 +1225,7 @@ void IODTPlatformExpert::processTopLevel( IORegistryEntry * rootEntry ) } else { dtNVRAM->attach(this); dtNVRAM->registerService(); + options->release(); } } } @@ -1195,7 +1233,10 @@ void IODTPlatformExpert::processTopLevel( IORegistryEntry * rootEntry ) // Publish the cpus. cpus = rootEntry->childFromPath( "cpus", gIODTPlane); if ( cpus) + { createNubs( this, IODTFindMatchingEntries( cpus, kIODTExclusive, 0)); + cpus->release(); + } // publish top level, minus excludeList createNubs( this, IODTFindMatchingEntries( rootEntry, kIODTExclusive, excludeList())); @@ -1476,6 +1517,40 @@ IOReturn IOPlatformExpertDevice::setProperties( OSObject * properties ) return kIOReturnUnsupported; } +IOReturn IOPlatformExpertDevice::newUserClient( task_t owningTask, void * securityID, + UInt32 type, OSDictionary * properties, + IOUserClient ** handler ) +{ + IOReturn err = kIOReturnSuccess; + IOUserClient * newConnect = 0; + IOUserClient * theConnect = 0; + + switch (type) + { + case kIOKitDiagnosticsClientType: + newConnect = IOKitDiagnosticsClient::withTask(owningTask); + if (!newConnect) err = kIOReturnNotPermitted; + break; + default: + err = kIOReturnBadArgument; + } + + if (newConnect) + { + if ((false == newConnect->attach(this)) + || (false == newConnect->start(this))) + { + newConnect->detach( this ); + newConnect->release(); + } + else + theConnect = newConnect; + } + + *handler = theConnect; + return (err); +} + void IOPlatformExpertDevice::free() { if (workLoop) @@ -1526,7 +1601,7 @@ class IOPanicPlatform : IOPlatformExpert { OSDeclareDefaultStructors(IOPanicPlatform); public: - bool start(IOService * provider); + bool start(IOService * provider) APPLE_KEXT_OVERRIDE; }; diff --git a/iokit/Kernel/IOPolledInterface.cpp b/iokit/Kernel/IOPolledInterface.cpp index 440ac3235..1917714fc 100644 --- a/iokit/Kernel/IOPolledInterface.cpp +++ b/iokit/Kernel/IOPolledInterface.cpp @@ -26,8 +26,19 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include +#include + +#include +#include #include +#include #include +#include +#include +#include +#include "IOKitKernelInternal.h" + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -50,13 +61,929 @@ OSMetaClassDefineReservedUnused(IOPolledInterface, 13); OSMetaClassDefineReservedUnused(IOPolledInterface, 14); OSMetaClassDefineReservedUnused(IOPolledInterface, 15); -#if !HIBERNATION -/* KPI stub if hibernate is configured off */ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef kIOMediaPreferredBlockSizeKey +#define kIOMediaPreferredBlockSizeKey "Preferred Block Size" +#endif + +enum { kDefaultIOSize = 128*1024 }; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +class IOPolledFilePollers : public OSObject +{ + OSDeclareDefaultStructors(IOPolledFilePollers) + +public: + IOService * media; + OSArray * pollers; + IOBufferMemoryDescriptor * ioBuffer; + bool abortable; + bool io; + IOReturn ioStatus; + uint32_t openCount; + uint32_t openState; + + static IOPolledFilePollers * copyPollers(IOService * media); +}; + +OSDefineMetaClassAndStructors(IOPolledFilePollers, OSObject) + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOPolledFilePollers * +IOPolledFilePollers::copyPollers(IOService * media) +{ + IOPolledFilePollers * vars; + IOReturn err; + IOService * service; + OSObject * obj; + IORegistryEntry * next; + IORegistryEntry * child; + + if ((obj = media->copyProperty(kIOPolledInterfaceStackKey))) + { + return (OSDynamicCast(IOPolledFilePollers, obj)); + } + + do + { + vars = OSTypeAlloc(IOPolledFilePollers); + vars->init(); + + vars->pollers = OSArray::withCapacity(4); + if (!vars->pollers) + { + err = kIOReturnNoMemory; + break; + } + + next = vars->media = media; + do + { + IOPolledInterface * poller; + OSObject * obj; + + obj = next->getProperty(kIOPolledInterfaceSupportKey); + if (kOSBooleanFalse == obj) + { + vars->pollers->flushCollection(); + break; + } + else if ((poller = OSDynamicCast(IOPolledInterface, obj))) + vars->pollers->setObject(poller); + + if ((service = OSDynamicCast(IOService, next)) + && service->getDeviceMemory() + && !vars->pollers->getCount()) break; + + child = next; + } + while ((next = child->getParentEntry(gIOServicePlane)) + && child->isParent(next, gIOServicePlane, true)); + + if (!vars->pollers->getCount()) + { + err = kIOReturnUnsupported; + break; + } + } + while (false); + + media->setProperty(kIOPolledInterfaceStackKey, vars); + + return (vars); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static IOReturn +IOPolledFilePollersIODone(IOPolledFilePollers * vars, bool abortable); + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static IOReturn +IOPolledFilePollersProbe(IOPolledFilePollers * vars) +{ + IOReturn err = kIOReturnError; + int32_t idx; + IOPolledInterface * poller; + + for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--) + { + poller = (IOPolledInterface *) vars->pollers->getObject(idx); + err = poller->probe(vars->media); + if (err) + { + HIBLOG("IOPolledInterface::probe[%d] 0x%x\n", idx, err); + break; + } + } + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOPolledFilePollersOpen(IOPolledFileIOVars * filevars, uint32_t state, bool abortable) +{ + + IOPolledFilePollers * vars = filevars->pollers; + IOBufferMemoryDescriptor * ioBuffer; + IOPolledInterface * poller; + IOService * next; + IOReturn err = kIOReturnError; + int32_t idx; + + vars->abortable = abortable; + ioBuffer = 0; + + if (kIOPolledAfterSleepState == state) + { + vars->ioStatus = 0; + vars->io = false; + } + (void) IOPolledFilePollersIODone(vars, false); + + if ((kIOPolledPreflightState == state) || (kIOPolledPreflightCoreDumpState == state)) + { + ioBuffer = vars->ioBuffer; + if (!ioBuffer) + { + vars->ioBuffer = ioBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionInOut, + 2 * kDefaultIOSize, page_size); + if (!ioBuffer) return (kIOReturnNoMemory); + } + } + + for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--) + { + poller = (IOPolledInterface *) vars->pollers->getObject(idx); + err = poller->open(state, ioBuffer); + if ((kIOReturnSuccess != err) && (kIOPolledPreflightCoreDumpState == state)) + { + err = poller->open(kIOPolledPreflightState, ioBuffer); + } + if (kIOReturnSuccess != err) + { + HIBLOG("IOPolledInterface::open[%d] 0x%x\n", idx, err); + break; + } + } + if (kIOReturnSuccess == err) + { + next = vars->media; + while (next) + { + next->setProperty(kIOPolledInterfaceActiveKey, kOSBooleanTrue); + next = next->getProvider(); + } + } + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOPolledFilePollersClose(IOPolledFileIOVars * filevars, uint32_t state) +{ + IOPolledFilePollers * vars = filevars->pollers; + IOPolledInterface * poller; + IORegistryEntry * next; + IOReturn err; + int32_t idx; + + (void) IOPolledFilePollersIODone(vars, false); + + if (kIOPolledPostflightState == state) + { + vars->openCount--; + if (vars->openCount) + { + // 21207427 + IOPolledFilePollersOpen(filevars, vars->openState, vars->abortable); + return (kIOReturnSuccess); + } + } + + for (idx = 0, err = kIOReturnSuccess; + (poller = (IOPolledInterface *) vars->pollers->getObject(idx)); + idx++) + { + err = poller->close(state); + if (err) HIBLOG("IOPolledInterface::close[%d] 0x%x\n", idx, err); + } + + if (kIOPolledPostflightState == state) + { + next = vars->media; + while (next) + { + next->removeProperty(kIOPolledInterfaceActiveKey); + next = next->getParentEntry(gIOServicePlane); + } + + if (vars->ioBuffer) + { + vars->ioBuffer->release(); + vars->ioBuffer = 0; + } + } + return (err); +} +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOMemoryDescriptor * +IOPolledFileGetIOBuffer(IOPolledFileIOVars * vars) +{ + return (vars->pollers->ioBuffer); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static void +IOPolledIOComplete(void * target, + void * parameter, + IOReturn status, + UInt64 actualByteCount) +{ + IOPolledFilePollers * vars = (IOPolledFilePollers *) parameter; + + vars->ioStatus = status; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static IOReturn +IOStartPolledIO(IOPolledFilePollers * vars, + uint32_t operation, uint32_t bufferOffset, + uint64_t deviceOffset, uint64_t length) +{ + IOReturn err; + IOPolledInterface * poller; + IOPolledCompletion completion; + + err = vars->ioStatus; + if (kIOReturnSuccess != err) return (err); + + completion.target = 0; + completion.action = &IOPolledIOComplete; + completion.parameter = vars; + + vars->ioStatus = -1; + + poller = (IOPolledInterface *) vars->pollers->getObject(0); + err = poller->startIO(operation, bufferOffset, deviceOffset, length, completion); + if (err) + HIBLOG("IOPolledInterface::startIO[%d] 0x%x\n", 0, err); + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static IOReturn +IOPolledFilePollersIODone(IOPolledFilePollers * vars, bool abortable) +{ + IOReturn err = kIOReturnSuccess; + int32_t idx = 0; + IOPolledInterface * poller; + AbsoluteTime deadline; + + if (!vars->io) return (kIOReturnSuccess); + + abortable &= vars->abortable; + + clock_interval_to_deadline(2000, kMillisecondScale, &deadline); + + while (-1 == vars->ioStatus) + { + for (idx = 0; + (poller = (IOPolledInterface *) vars->pollers->getObject(idx)); + idx++) + { + IOReturn newErr; + newErr = poller->checkForWork(); + if ((newErr == kIOReturnAborted) && !abortable) + newErr = kIOReturnSuccess; + if (kIOReturnSuccess == err) + err = newErr; + } + if ((false) && (kIOReturnSuccess == err) && (mach_absolute_time() > AbsoluteTime_to_scalar(&deadline))) + { + HIBLOG("IOPolledInterface::forced timeout\n"); + vars->ioStatus = kIOReturnTimeout; + } + } + vars->io = false; + +#if HIBERNATION + if ((kIOReturnSuccess == err) && abortable && hibernate_should_abort()) + { + err = kIOReturnAborted; + HIBLOG("IOPolledInterface::checkForWork sw abort\n"); + } +#endif + + if (err) + { + HIBLOG("IOPolledInterface::checkForWork[%d] 0x%x\n", idx, err); + } + else + { + err = vars->ioStatus; + if (kIOReturnSuccess != err) HIBLOG("IOPolledInterface::ioStatus 0x%x\n", err); + } + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +struct _OpenFileContext +{ + OSData * extents; + uint64_t size; +}; + +static void +file_extent_callback(void * ref, uint64_t start, uint64_t length) +{ + _OpenFileContext * ctx = (_OpenFileContext *) ref; + IOPolledFileExtent extent; + + extent.start = start; + extent.length = length; + ctx->extents->appendBytes(&extent, sizeof(extent)); + ctx->size += length; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static IOService * +IOCopyMediaForDev(dev_t device) +{ + OSDictionary * matching; + OSNumber * num; + OSIterator * iter; + IOService * result = 0; + + matching = IOService::serviceMatching("IOMedia"); + if (!matching) + return (0); + do + { + num = OSNumber::withNumber(major(device), 32); + if (!num) + break; + matching->setObject(kIOBSDMajorKey, num); + num->release(); + num = OSNumber::withNumber(minor(device), 32); + if (!num) + break; + matching->setObject(kIOBSDMinorKey, num); + num->release(); + if (!num) + break; + iter = IOService::getMatchingServices(matching); + if (iter) + { + result = (IOService *) iter->getNextObject(); + result->retain(); + iter->release(); + } + } + while (false); + matching->release(); + + return (result); +} + +static IOReturn +IOGetVolumeCryptKey(dev_t block_dev, OSString ** pKeyUUID, + uint8_t * volumeCryptKey, size_t keySize) +{ + IOReturn err; + IOService * part; + OSString * keyUUID = 0; + OSString * keyStoreUUID = 0; + uuid_t volumeKeyUUID; + aks_volume_key_t vek; + + static IOService * sKeyStore; + + part = IOCopyMediaForDev(block_dev); + if (!part) return (kIOReturnNotFound); + + err = part->callPlatformFunction(PLATFORM_FUNCTION_GET_MEDIA_ENCRYPTION_KEY_UUID, false, + (void *) &keyUUID, (void *) &keyStoreUUID, NULL, NULL); + if ((kIOReturnSuccess == err) && keyUUID && keyStoreUUID) + { +// IOLog("got volume key %s\n", keyStoreUUID->getCStringNoCopy()); + + if (!sKeyStore) + sKeyStore = (IOService *) IORegistryEntry::fromPath(AKS_SERVICE_PATH, gIOServicePlane); + if (sKeyStore) + err = uuid_parse(keyStoreUUID->getCStringNoCopy(), volumeKeyUUID); + else + err = kIOReturnNoResources; + if (kIOReturnSuccess == err) + err = sKeyStore->callPlatformFunction(gAKSGetKey, true, volumeKeyUUID, &vek, NULL, NULL); + if (kIOReturnSuccess != err) + IOLog("volume key err 0x%x\n", err); + else + { + if (vek.key.keybytecount < keySize) keySize = vek.key.keybytecount; + bcopy(&vek.key.keybytes[0], volumeCryptKey, keySize); + } + bzero(&vek, sizeof(vek)); + + } + part->release(); + if (pKeyUUID) *pKeyUUID = keyUUID; + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + IOReturn -IOPolledInterface::checkAllForWork(void) +IOPolledFileOpen(const char * filename, + uint64_t setFileSize, uint64_t fsFreeSize, + void * write_file_addr, size_t write_file_len, + IOPolledFileIOVars ** fileVars, + OSData ** imagePath, + uint8_t * volumeCryptKey, size_t keySize) { - IOReturn err = kIOReturnNotReady; + IOReturn err = kIOReturnSuccess; + IOPolledFileIOVars * vars; + _OpenFileContext ctx; + OSData * extentsData; + OSNumber * num; + IOService * part = 0; + dev_t block_dev; + dev_t image_dev; + AbsoluteTime startTime, endTime; + uint64_t nsec; + + vars = IONew(IOPolledFileIOVars, 1); + if (!vars) return (kIOReturnNoMemory); + bzero(vars, sizeof(*vars)); + vars->allocated = true; + + do + { + extentsData = OSData::withCapacity(32); + ctx.extents = extentsData; + ctx.size = 0; + clock_get_uptime(&startTime); + + vars->fileRef = kern_open_file_for_direct_io(filename, + (write_file_addr != NULL) || (0 != setFileSize), + &file_extent_callback, &ctx, + setFileSize, + fsFreeSize, + // write file: + 0, write_file_addr, write_file_len, + // results + &block_dev, + &image_dev, + &vars->block0, + &vars->maxiobytes, + &vars->flags); +#if 0 + uint32_t msDelay = (131071 & random()); + HIBLOG("sleep %d\n", msDelay); + IOSleep(msDelay); +#endif + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &startTime); + absolutetime_to_nanoseconds(endTime, &nsec); + + if (!vars->fileRef) err = kIOReturnNoSpace; + + HIBLOG("kern_open_file_for_direct_io took %qd ms\n", nsec / 1000000ULL); + if (kIOReturnSuccess != err) break; + + HIBLOG("Opened file %s, size %qd, extents %ld, maxio %qx ssd %d\n", filename, ctx.size, + (extentsData->getLength() / sizeof(IOPolledFileExtent)) - 1, + vars->maxiobytes, kIOPolledFileSSD & vars->flags); + assert(!vars->block0); + if (extentsData->getLength() < sizeof(IOPolledFileExtent)) + { + err = kIOReturnNoSpace; + break; + } + + vars->fileSize = ctx.size; + vars->extentMap = (IOPolledFileExtent *) extentsData->getBytesNoCopy(); + + part = IOCopyMediaForDev(image_dev); + if (!part) + { + err = kIOReturnNotFound; + break; + } + + if (!(vars->pollers = IOPolledFilePollers::copyPollers(part))) break; + + if ((num = OSDynamicCast(OSNumber, part->getProperty(kIOMediaPreferredBlockSizeKey)))) + vars->blockSize = num->unsigned32BitValue(); + if (vars->blockSize < 4096) vars->blockSize = 4096; + + HIBLOG("polled file major %d, minor %d, blocksize %ld, pollers %d\n", + major(image_dev), minor(image_dev), (long)vars->blockSize, + vars->pollers->pollers->getCount()); + + OSString * keyUUID = NULL; + if (volumeCryptKey) + { + err = IOGetVolumeCryptKey(block_dev, &keyUUID, volumeCryptKey, keySize); + } + + *fileVars = vars; + vars->fileExtents = extentsData; + + // make imagePath + OSData * data; + if (imagePath) + { +#if defined(__i386__) || defined(__x86_64__) + char str2[24 + sizeof(uuid_string_t) + 2]; + + if (keyUUID) + snprintf(str2, sizeof(str2), "%qx:%s", + vars->extentMap[0].start, keyUUID->getCStringNoCopy()); + else + snprintf(str2, sizeof(str2), "%qx", vars->extentMap[0].start); + + err = IOService::getPlatform()->callPlatformFunction( + gIOCreateEFIDevicePathSymbol, false, + (void *) part, (void *) str2, + (void *) (uintptr_t) true, (void *) &data); +#else + data = 0; + err = kIOReturnSuccess; +#endif + if (kIOReturnSuccess != err) + { + HIBLOG("error 0x%x getting path\n", err); + break; + } + *imagePath = data; + } + } + while (false); + + if (kIOReturnSuccess != err) + { + HIBLOG("error 0x%x opening polled file\n", err); + IOPolledFileClose(&vars, 0, 0, 0, 0, 0); + } + + if (part) part->release(); - return err; + return (err); } -#endif /* !HIBERNATION */ + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOPolledFileClose(IOPolledFileIOVars ** pVars, + off_t write_offset, void * addr, size_t write_length, + off_t discard_offset, off_t discard_end) +{ + IOPolledFileIOVars * vars; + + vars = *pVars; + if (!vars) return(kIOReturnSuccess); + + if (vars->fileRef) + { + kern_close_file_for_direct_io(vars->fileRef, write_offset, addr, write_length, + discard_offset, discard_end); + vars->fileRef = NULL; + } + if (vars->fileExtents) + { + vars->fileExtents->release(); + vars->fileExtents = 0; + } + if (vars->pollers) + { + vars->pollers->release(); + vars->pollers = 0; + } + + if (vars->allocated) IODelete(vars, IOPolledFileIOVars, 1); + else bzero(vars, sizeof(IOPolledFileIOVars)); + *pVars = NULL; + + return (kIOReturnSuccess); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOPolledFilePollersSetup(IOPolledFileIOVars * vars, + uint32_t openState) +{ + IOReturn err; + + err = kIOReturnSuccess; + do + { + if (!vars->pollers->openCount) + { + err = IOPolledFilePollersProbe(vars->pollers); + if (kIOReturnSuccess != err) break; + err = IOPolledFilePollersOpen(vars, openState, false); + if (kIOReturnSuccess != err) break; + vars->pollers->openState = openState; + } + vars->pollers->openCount++; + vars->pollers->io = false; + vars->buffer = (uint8_t *) vars->pollers->ioBuffer->getBytesNoCopy(); + vars->bufferHalf = 0; + vars->bufferOffset = 0; + vars->bufferSize = (vars->pollers->ioBuffer->getLength() >> 1); + + if (vars->maxiobytes < vars->bufferSize) vars->bufferSize = vars->maxiobytes; + } + while (false); + + if (kIOReturnSuccess != err) HIBLOG("IOPolledFilePollersSetup(%d) error 0x%x\n", openState, err); + + return (err); +} + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOPolledFileSeek(IOPolledFileIOVars * vars, uint64_t position) +{ + IOPolledFileExtent * extentMap; + + extentMap = vars->extentMap; + + vars->position = position; + + while (position >= extentMap->length) + { + position -= extentMap->length; + extentMap++; + } + + vars->currentExtent = extentMap; + vars->extentRemaining = extentMap->length - position; + vars->extentPosition = vars->position - position; + + if (vars->bufferSize <= vars->extentRemaining) + vars->bufferLimit = vars->bufferSize; + else + vars->bufferLimit = vars->extentRemaining; + + return (kIOReturnSuccess); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOPolledFileWrite(IOPolledFileIOVars * vars, + const uint8_t * bytes, IOByteCount size, + IOPolledFileCryptVars * cryptvars) +{ + IOReturn err = kIOReturnSuccess; + IOByteCount copy; + bool flush = false; + + do + { + if (!bytes && !size) + { + // seek to end of block & flush + size = vars->position & (vars->blockSize - 1); + if (size) + size = vars->blockSize - size; + flush = true; + // use some garbage for the fill + bytes = vars->buffer + vars->bufferOffset; + } + + copy = vars->bufferLimit - vars->bufferOffset; + if (copy > size) + copy = size; + else + flush = true; + + if (bytes) + { + bcopy(bytes, vars->buffer + vars->bufferHalf + vars->bufferOffset, copy); + bytes += copy; + } + else + bzero(vars->buffer + vars->bufferHalf + vars->bufferOffset, copy); + + size -= copy; + vars->bufferOffset += copy; + vars->position += copy; + + if (flush && vars->bufferOffset) + { + uint64_t offset = (vars->position - vars->bufferOffset + - vars->extentPosition + vars->currentExtent->start); + uint32_t length = (vars->bufferOffset); + +#if CRYPTO + if (cryptvars && vars->encryptStart + && (vars->position > vars->encryptStart) + && ((vars->position - length) < vars->encryptEnd)) + { + AbsoluteTime startTime, endTime; + + uint64_t encryptLen, encryptStart; + encryptLen = vars->position - vars->encryptStart; + if (encryptLen > length) + encryptLen = length; + encryptStart = length - encryptLen; + if (vars->position > vars->encryptEnd) + encryptLen -= (vars->position - vars->encryptEnd); + + clock_get_uptime(&startTime); + + // encrypt the buffer + aes_encrypt_cbc(vars->buffer + vars->bufferHalf + encryptStart, + &cryptvars->aes_iv[0], + encryptLen / AES_BLOCK_SIZE, + vars->buffer + vars->bufferHalf + encryptStart, + &cryptvars->ctx.encrypt); + + clock_get_uptime(&endTime); + ADD_ABSOLUTETIME(&vars->cryptTime, &endTime); + SUB_ABSOLUTETIME(&vars->cryptTime, &startTime); + vars->cryptBytes += encryptLen; + + // save initial vector for following encrypts + bcopy(vars->buffer + vars->bufferHalf + encryptStart + encryptLen - AES_BLOCK_SIZE, + &cryptvars->aes_iv[0], + AES_BLOCK_SIZE); + } +#endif /* CRYPTO */ + + err = IOPolledFilePollersIODone(vars->pollers, true); + if (kIOReturnSuccess != err) + break; + +if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position); +//if (length != vars->bufferSize) HIBLOG("short write of %qx ends@ %qx\n", length, offset + length); + + err = IOStartPolledIO(vars->pollers, kIOPolledWrite, vars->bufferHalf, offset, length); + if (kIOReturnSuccess != err) + break; + vars->pollers->io = true; + + vars->extentRemaining -= vars->bufferOffset; + if (!vars->extentRemaining) + { + vars->currentExtent++; + vars->extentRemaining = vars->currentExtent->length; + vars->extentPosition = vars->position; + } + + vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize; + vars->bufferOffset = 0; + if (vars->bufferSize <= vars->extentRemaining) + vars->bufferLimit = vars->bufferSize; + else + vars->bufferLimit = vars->extentRemaining; + + if (!vars->extentRemaining) + { + err = kIOReturnOverrun; + break; + } + + flush = false; + } + } + while (size); + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOPolledFileRead(IOPolledFileIOVars * vars, + uint8_t * bytes, IOByteCount size, + IOPolledFileCryptVars * cryptvars) +{ + IOReturn err = kIOReturnSuccess; + IOByteCount copy; + +// bytesWritten += size; + + do + { + copy = vars->bufferLimit - vars->bufferOffset; + if (copy > size) + copy = size; + + if (bytes) + { + bcopy(vars->buffer + vars->bufferHalf + vars->bufferOffset, bytes, copy); + bytes += copy; + } + size -= copy; + vars->bufferOffset += copy; +// vars->position += copy; + + if ((vars->bufferOffset == vars->bufferLimit) && (vars->position < vars->readEnd)) + { + if (!vars->pollers->io) cryptvars = 0; + err = IOPolledFilePollersIODone(vars->pollers, true); + if (kIOReturnSuccess != err) + break; + +if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position); + + vars->position += vars->lastRead; + vars->extentRemaining -= vars->lastRead; + vars->bufferLimit = vars->lastRead; + + if (!vars->extentRemaining) + { + vars->currentExtent++; + vars->extentRemaining = vars->currentExtent->length; + vars->extentPosition = vars->position; + if (!vars->extentRemaining) + { + err = kIOReturnOverrun; + break; + } + } + + uint64_t length; + uint64_t lastReadLength = vars->lastRead; + uint64_t offset = (vars->position + - vars->extentPosition + vars->currentExtent->start); + if (vars->extentRemaining <= vars->bufferSize) + length = vars->extentRemaining; + else + length = vars->bufferSize; + if ((length + vars->position) > vars->readEnd) + length = vars->readEnd - vars->position; + + vars->lastRead = length; + if (length) + { +//if (length != vars->bufferSize) HIBLOG("short read of %qx ends@ %qx\n", length, offset + length); + err = IOStartPolledIO(vars->pollers, kIOPolledRead, vars->bufferHalf, offset, length); + if (kIOReturnSuccess != err) + break; + vars->pollers->io = true; + } + + vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize; + vars->bufferOffset = 0; + +#if CRYPTO + if (cryptvars) + { + uint8_t thisVector[AES_BLOCK_SIZE]; + AbsoluteTime startTime, endTime; + + // save initial vector for following decrypts + bcopy(&cryptvars->aes_iv[0], &thisVector[0], AES_BLOCK_SIZE); + bcopy(vars->buffer + vars->bufferHalf + lastReadLength - AES_BLOCK_SIZE, + &cryptvars->aes_iv[0], AES_BLOCK_SIZE); + + // decrypt the buffer + clock_get_uptime(&startTime); + + aes_decrypt_cbc(vars->buffer + vars->bufferHalf, + &thisVector[0], + lastReadLength / AES_BLOCK_SIZE, + vars->buffer + vars->bufferHalf, + &cryptvars->ctx.decrypt); + + clock_get_uptime(&endTime); + ADD_ABSOLUTETIME(&vars->cryptTime, &endTime); + SUB_ABSOLUTETIME(&vars->cryptTime, &startTime); + vars->cryptBytes += lastReadLength; + } +#endif /* CRYPTO */ + } + } + while (size); + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp index c4bbb3513..1d9cf8f9d 100644 --- a/iokit/Kernel/IORegistryEntry.cpp +++ b/iokit/Kernel/IORegistryEntry.cpp @@ -95,7 +95,7 @@ class IORegistryPlane : public OSObject { int reserved[2]; public: - virtual bool serialize(OSSerialize *s) const; + virtual bool serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE; }; OSDefineMetaClassAndStructors(IORegistryPlane, OSObject) diff --git a/iokit/Kernel/IOReportLegend.cpp b/iokit/Kernel/IOReportLegend.cpp index 33e04ed5d..20ae27af8 100644 --- a/iokit/Kernel/IOReportLegend.cpp +++ b/iokit/Kernel/IOReportLegend.cpp @@ -33,14 +33,14 @@ //#define IORDEBUG_LEGEND 1 #ifdef IORDEBUG_LEGEND -#define IORLEGENDLOG(fmt, args...) \ -do { \ -IOLog("IOReportLegend | "); \ -IOLog(fmt, ##args); \ -IOLog("\n"); \ -} while(0) + #define IORLEGENDLOG(fmt, args...) \ + do { \ + IOLog("IOReportLegend | "); \ + IOLog(fmt, ##args); \ + IOLog("\n"); \ + } while(0) #else -#define IORLEGENDLOG(fmt, args...) + #define IORLEGENDLOG(fmt, args...) #endif @@ -101,25 +101,33 @@ IOReportLegend::addReporterLegend(IOService *reportingService, const char *subGroupName) { IOReturn res = kIOReturnError; - IOReportLegend *legend; + IOReportLegend *legend = NULL; + OSObject *curLegend = NULL; // No need to check groupName and subGroupName because optional params if (!reportingService || !reporter) { goto finish; } - legend = IOReportLegend::with(OSDynamicCast(OSArray, reportingService->getProperty(kIOReportLegendKey))); - - if (legend) - { - legend->addReporterLegend(reporter, groupName, subGroupName); - reportingService->setProperty(kIOReportLegendKey, legend->getLegend()); - reportingService->setProperty(kIOReportLegendPublicKey, true); - legend->free(); - res = kIOReturnSuccess; - } + // It's fine if the legend doesn't exist (IOReportLegend::with(NULL) + // is how you make an empty legend). If it's not an array, then + // we're just going to replace it. + curLegend = reportingService->copyProperty(kIOReportLegendKey); + legend = IOReportLegend::with(OSDynamicCast(OSArray, curLegend)); + if (!legend) goto finish; + + // Add the reporter's entries and update the service property. + // The overwrite triggers a release of the old legend array. + legend->addReporterLegend(reporter, groupName, subGroupName); + reportingService->setProperty(kIOReportLegendKey, legend->getLegend()); + reportingService->setProperty(kIOReportLegendPublicKey, true); + + res = kIOReturnSuccess; finish: + if (legend) legend->release(); + if (curLegend) curLegend->release(); + return res; } diff --git a/iokit/Kernel/IOReporterDefs.h b/iokit/Kernel/IOReporterDefs.h index 1a79a2de6..ec94a3d24 100644 --- a/iokit/Kernel/IOReporterDefs.h +++ b/iokit/Kernel/IOReporterDefs.h @@ -59,11 +59,11 @@ do { \ #define PREFL_MEMOP_FAIL(__val, __type) do { \ if (__val <= 0) { \ - IORERROR("%s - %s <= 0!", __func__, #__val); \ + IORERROR("%s - %s <= 0!\n", __func__, #__val); \ res = kIOReturnUnderrun; \ goto finish; \ } else if (__val > INT_MAX / (int)sizeof(__type)) { \ - IORERROR("%s - %s > INT_MAX / sizeof(%s)!", __func__, #__val, #__type);\ + IORERROR("%s - %s > INT_MAX / sizeof(%s)!\n",__func__,#__val,#__type);\ res = kIOReturnOverrun; \ goto finish; \ } \ diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp index 96bb4fc6c..bed5b5e4e 100644 --- a/iokit/Kernel/IOService.cpp +++ b/iokit/Kernel/IOService.cpp @@ -50,9 +50,13 @@ #include #include #include +#include +#include #include #include #include +#include +#include #include @@ -147,6 +151,11 @@ const OSSymbol * gIOAppPowerStateInterest; const OSSymbol * gIOPriorityPowerStateInterest; const OSSymbol * gIOConsoleSecurityInterest; +const OSSymbol * gAKSGetKey; +#if defined(__i386__) || defined(__x86_64__) +const OSSymbol * gIOCreateEFIDevicePathSymbol; +#endif + static OSDictionary * gNotifications; static IORecursiveLock * gNotificationLock; @@ -160,6 +169,7 @@ static int gOutstandingJobs; static int gNumConfigThreads; static int gNumWaitingThreads; static IOLock * gIOServiceBusyLock; +static bool gCPUsRunning; static thread_t gIOTerminateThread; static UInt32 gIOTerminateWork; @@ -173,12 +183,6 @@ static OSData * gIOConsoleUsersSeedValue; extern const OSSymbol * gIODTPHandleKey; -const OSSymbol * gIOPlatformSleepActionKey; -const OSSymbol * gIOPlatformWakeActionKey; -const OSSymbol * gIOPlatformQuiesceActionKey; -const OSSymbol * gIOPlatformActiveActionKey; -const OSSymbol * gIOPlatformHaltRestartActionKey; - const OSSymbol * gIOPlatformFunctionHandlerSet; static IOLock * gIOConsoleUsersLock; @@ -266,6 +270,7 @@ static IORecursiveLock *sCpuDelayLock = IORecursiveLockAlloc(); static OSArray *sCpuLatencyHandlers[kCpuNumDelayTypes]; const OSSymbol *sCPULatencyFunctionName[kCpuNumDelayTypes]; static OSNumber * sCPULatencyHolder[kCpuNumDelayTypes]; +static char sCPULatencyHolderName[kCpuNumDelayTypes][128]; static OSNumber * sCPULatencySet[kCpuNumDelayTypes]; static void @@ -346,12 +351,6 @@ void IOService::initialize( void ) gIOConsoleSessionScreenLockedTimeKey = OSSymbol::withCStringNoCopy(kIOConsoleSessionScreenLockedTimeKey); gIOConsoleUsersSeedValue = OSData::withBytesNoCopy(&gIOConsoleUsersSeed, sizeof(gIOConsoleUsersSeed)); - - gIOPlatformSleepActionKey = OSSymbol::withCStringNoCopy(kIOPlatformSleepActionKey); - gIOPlatformWakeActionKey = OSSymbol::withCStringNoCopy(kIOPlatformWakeActionKey); - gIOPlatformQuiesceActionKey = OSSymbol::withCStringNoCopy(kIOPlatformQuiesceActionKey); - gIOPlatformActiveActionKey = OSSymbol::withCStringNoCopy(kIOPlatformActiveActionKey); - gIOPlatformHaltRestartActionKey = OSSymbol::withCStringNoCopy(kIOPlatformHaltRestartActionKey); gIOPlatformFunctionHandlerSet = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerSet); #if defined(__i386__) || defined(__x86_64__) @@ -364,9 +363,12 @@ void IOService::initialize( void ) sCPULatencyHolder[idx] = OSNumber::withNumber(0ULL, 64); assert(sCPULatencySet[idx] && sCPULatencyHolder[idx]); } + gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath"); #endif gNotificationLock = IORecursiveLockAlloc(); + gAKSGetKey = OSSymbol::withCStringNoCopy(AKS_PLATFORM_FUNCTION_GETKEY); + assert( gIOServicePlane && gIODeviceMemoryKey && gIOInterruptControllersKey && gIOInterruptSpecifiersKey && gIOResourcesKey && gNotifications && gNotificationLock @@ -413,6 +415,16 @@ void IOService::initialize( void ) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#if defined(__i386__) || defined(__x86_64__) +extern "C" { + +const char *getCpuDelayBusStallHolderName(void) { + return sCPULatencyHolderName[kCpuDelayBusStall]; +} + +} +#endif + #if IOMATCHDEBUG static UInt64 getDebugFlags( OSDictionary * props ) { @@ -453,20 +465,14 @@ void IOService::stop( IOService * provider ) bool IOService::init( OSDictionary * dictionary ) { - bool ret = false; + bool ret; ret = super::init(dictionary); - - if (!ret) - goto done; + if (!ret) return (false); + if (reserved) return (true); reserved = IONew(ExpansionData, 1); - - if (!reserved) { - ret = false; - goto done; - } - + if (!reserved) return (false); bzero(reserved, sizeof(*reserved)); /* @@ -480,33 +486,22 @@ bool IOService::init( OSDictionary * dictionary ) * which should be unlikely). */ reserved->interruptStatisticsLock = IOLockAlloc(); + if (!reserved->interruptStatisticsLock) return (false); - if (!reserved->interruptStatisticsLock) { - ret = false; - goto done; - } - -done: - return ret; + return (true); } bool IOService::init( IORegistryEntry * from, const IORegistryPlane * inPlane ) { - bool ret = false; + bool ret; ret = super::init(from, inPlane); - - if (!ret) - goto done; + if (!ret) return (false); + if (reserved) return (true); reserved = IONew(ExpansionData, 1); - - if (!reserved) { - ret = false; - goto done; - } - + if (!reserved) return (false); bzero(reserved, sizeof(*reserved)); /* @@ -520,14 +515,9 @@ bool IOService::init( IORegistryEntry * from, * which should be unlikely). */ reserved->interruptStatisticsLock = IOLockAlloc(); + if (!reserved->interruptStatisticsLock) return (false); - if (!reserved->interruptStatisticsLock) { - ret = false; - goto done; - } - -done: - return ret; + return (true); } void IOService::free( void ) @@ -554,6 +544,12 @@ void IOService::free( void ) IODelete(reserved, ExpansionData, 1); } + if (_numInterruptSources && _interruptSources) + { + IOFree(_interruptSources, _numInterruptSources * sizeof(IOInterruptSource)); + _interruptSources = 0; + } + super::free(); } @@ -603,6 +599,15 @@ void IOService::detach( IOService * provider ) lockForArbitration(); + uint64_t regID1 = provider->getRegistryEntryID(); + uint64_t regID2 = getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_DETACH, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32)); + adjParent = ((busy = (__state[1] & kIOServiceBusyStateMask)) && (provider == getProvider())); @@ -616,6 +621,7 @@ void IOService::detach( IOService * provider ) if (kIOServiceInactiveState & __state[0]) { getMetaClass()->removeInstance(this); + IORemoveServicePlatformActions(this); } unlockForArbitration(); @@ -669,6 +675,8 @@ void IOService::registerService( IOOptionBits options ) if( gIOPlatform && (!gIOPlatform->platformAdjustService(this))) return; + IOInstallServicePlatformActions(this); + if( (this != gIOResources) && (kIOLogRegister & gIOKitDebug)) { @@ -1861,7 +1869,7 @@ bool IOService::requestTerminate( IOService * provider, IOOptionBits options ) // -- compat if( ok) { provider->terminateClient( this, options | kIOServiceRecursing ); - ok = (0 != (__state[1] & kIOServiceRecursing)); + ok = (0 != (kIOServiceInactiveState & __state[0])); } // -- @@ -1874,8 +1882,9 @@ bool IOService::terminatePhase1( IOOptionBits options ) IOService * client; OSIterator * iter; OSArray * makeInactive; - int waitResult = THREAD_AWAKENED; - bool wait; + OSArray * waitingInactive; + int waitResult = THREAD_AWAKENED; + bool wait; bool ok; bool didInactive; bool startPhase2 = false; @@ -1893,70 +1902,86 @@ bool IOService::terminatePhase1( IOOptionBits options ) // -- compat if( options & kIOServiceRecursing) { lockForArbitration(); - __state[0] |= kIOServiceInactiveState; - __state[1] |= kIOServiceRecursing; + if (0 == (kIOServiceInactiveState & __state[0])) + { + __state[0] |= kIOServiceInactiveState; + __state[1] |= kIOServiceRecursing | kIOServiceTermPhase1State; + } unlockForArbitration(); return( true ); } // -- - makeInactive = OSArray::withCapacity( 16 ); - if( !makeInactive) - return( false ); + makeInactive = OSArray::withCapacity( 16 ); + waitingInactive = OSArray::withCapacity( 16 ); + if(!makeInactive || !waitingInactive) return( false ); victim = this; victim->retain(); - while( victim ) { - + while( victim ) + { didInactive = victim->lockForArbitration( true ); - if( didInactive) { - didInactive = (0 == (victim->__state[0] & kIOServiceInactiveState)) - || (victim->__state[1] & kIOServiceRecursing); - if( didInactive) { - victim->__state[0] |= kIOServiceInactiveState; - victim->__state[0] &= ~(kIOServiceRegisteredState | kIOServiceMatchedState - | kIOServiceFirstPublishState | kIOServiceFirstMatchState); - victim->__state[1] &= ~kIOServiceRecursing; + if( didInactive) + { + uint64_t regID1 = victim->getRegistryEntryID(); + IOServiceTrace(IOSERVICE_TERM_SET_INACTIVE, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) victim->__state[1], + (uintptr_t) 0); + enum { kRP1 = kIOServiceRecursing | kIOServiceTermPhase1State }; + didInactive = (kRP1 == (victim->__state[1] & kRP1)) + || (0 == (victim->__state[0] & kIOServiceInactiveState)); + + if (!didInactive) + { + // a multiply attached IOService can be visited twice + if (-1U == waitingInactive->getNextIndexOfObject(victim, 0)) do + { + IOLockLock(gIOServiceBusyLock); + wait = (victim->__state[1] & kIOServiceTermPhase1State); + if( wait) { + TLOG("%s[0x%qx]::waitPhase1(%s[0x%qx])\n", + getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID()); + victim->__state[1] |= kIOServiceTerm1WaiterState; + victim->unlockForArbitration(); + assert_wait((event_t)&victim->__state[1], THREAD_UNINT); + } + IOLockUnlock(gIOServiceBusyLock); + if( wait) { + waitResult = thread_block(THREAD_CONTINUE_NULL); + TLOG("%s[0x%qx]::did waitPhase1(%s[0x%qx])\n", + getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID()); + victim->lockForArbitration(); + } + } + while (wait && (waitResult != THREAD_TIMED_OUT)); + } + else + { + victim->__state[0] |= kIOServiceInactiveState; + victim->__state[0] &= ~(kIOServiceRegisteredState | kIOServiceMatchedState + | kIOServiceFirstPublishState | kIOServiceFirstMatchState); + victim->__state[1] &= ~kIOServiceRecursing; + victim->__state[1] |= kIOServiceTermPhase1State; + waitingInactive->headQ(victim); if (victim == this) { - victim->__state[1] |= kIOServiceTermPhase1State; if (kIOServiceTerminateNeedWillTerminate & options) { victim->__state[1] |= kIOServiceNeedWillTerminate; } } - - victim->_adjustBusy( 1 ); - - } else if (victim != this) do { - - IOLockLock(gIOServiceBusyLock); - wait = (victim->__state[1] & kIOServiceTermPhase1State); - if( wait) { - TLOG("%s[0x%qx]::waitPhase1(%s[0x%qx])\n", - getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID()); - victim->__state[1] |= kIOServiceTerm1WaiterState; - victim->unlockForArbitration(); - assert_wait((event_t)&victim->__state[1], THREAD_UNINT); - } - IOLockUnlock(gIOServiceBusyLock); - if( wait) { - waitResult = thread_block(THREAD_CONTINUE_NULL); - TLOG("%s[0x%qx]::did waitPhase1(%s[0x%qx])\n", - getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID()); - victim->lockForArbitration(); - } - } while( wait && (waitResult != THREAD_TIMED_OUT)); - + victim->_adjustBusy( 1 ); + } victim->unlockForArbitration(); } - if( victim == this) - startPhase2 = didInactive; - if( didInactive) { - + if( victim == this) startPhase2 = didInactive; + if (didInactive) + { victim->deliverNotification( gIOTerminatedNotification, 0, 0xffffffff ); IOUserClient::destroyUserReferences( victim ); @@ -1994,23 +2019,35 @@ bool IOService::terminatePhase1( IOOptionBits options ) makeInactive->removeObject(0); } } - makeInactive->release(); - if( startPhase2) + while ((victim = (IOService *) waitingInactive->getObject(0))) { - lockForArbitration(); - __state[1] &= ~kIOServiceTermPhase1State; - if (kIOServiceTerm1WaiterState & __state[1]) + victim->retain(); + waitingInactive->removeObject(0); + + victim->lockForArbitration(); + victim->__state[1] &= ~kIOServiceTermPhase1State; + if (kIOServiceTerm1WaiterState & victim->__state[1]) { - __state[1] &= ~kIOServiceTerm1WaiterState; - TLOG("%s[0x%qx]::wakePhase1\n", getName(), getRegistryEntryID()); + victim->__state[1] &= ~kIOServiceTerm1WaiterState; + TLOG("%s[0x%qx]::wakePhase1\n", victim->getName(), victim->getRegistryEntryID()); IOLockLock( gIOServiceBusyLock ); - thread_wakeup( (event_t) &__state[1]); + thread_wakeup( (event_t) &victim->__state[1]); IOLockUnlock( gIOServiceBusyLock ); } + victim->unlockForArbitration(); + victim->release(); + } + waitingInactive->release(); + + if( startPhase2) + { + retain(); + lockForArbitration(); + scheduleTerminatePhase2(options); unlockForArbitration(); - scheduleTerminatePhase2( options ); + release(); } return( true ); @@ -2035,16 +2072,25 @@ void IOService::setTerminateDefer(IOService * provider, bool defer) void IOService::scheduleTerminatePhase2( IOOptionBits options ) { AbsoluteTime deadline; + uint64_t regID1; int waitResult = THREAD_AWAKENED; bool wait, haveDeadline = false; - if (!(__state[0] & kIOServiceInactiveState) - || (__state[1] & kIOServiceTermPhase1State)) return; + if (!(__state[0] & kIOServiceInactiveState)) return; - options |= kIOServiceRequired; + regID1 = getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERM_SCHED_PHASE2, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) __state[1], + (uintptr_t) options); - retain(); + if (__state[1] & kIOServiceTermPhase1State) return; + retain(); + unlockForArbitration(); + options |= kIOServiceRequired; IOLockLock( gJobsLock ); if( (options & kIOServiceSynchronous) @@ -2096,7 +2142,7 @@ void IOService::scheduleTerminatePhase2( IOOptionBits options ) } IOLockUnlock( gJobsLock ); - + lockForArbitration(); release(); } @@ -2229,7 +2275,7 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options, { OSIterator * iter; IOService * client; - bool defer = false; + bool defer; uint64_t regID1, regID2 = victim->getRegistryEntryID(); victim->messageClients( kIOMessageServiceIsTerminated, (void *)(uintptr_t) options ); @@ -2242,6 +2288,7 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options, TLOG("%s[0x%qx]::didTerminate(%s[0x%qx], %08llx)\n", client->getName(), regID1, victim->getName(), regID2, (long long)options); + defer = false; client->didTerminate( victim, options, &defer ); IOServiceTrace( @@ -2397,22 +2444,48 @@ void IOService::terminateWorker( IOOptionBits options ) gIOTerminatePhase2List->removeObject(0); IOLockUnlock( gJobsLock ); + uint64_t regID1 = victim->getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERM_START_PHASE2, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) 0, + (uintptr_t) 0); + while( victim ) { doPhase2 = victim->lockForArbitration( true ); if( doPhase2) { doPhase2 = (0 != (kIOServiceInactiveState & victim->__state[0])); if( doPhase2) { + + uint64_t regID1 = victim->getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERM_TRY_PHASE2, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) victim->__state[1], + (uintptr_t) 0); + doPhase2 = (0 == (victim->__state[1] & kIOServiceTermPhase2State)) && (0 == (victim->__state[1] & kIOServiceConfigState)); if (doPhase2 && (iter = victim->getClientIterator())) { while (doPhase2 && (client = (IOService *) iter->getNextObject())) { doPhase2 = (0 == (client->__state[1] & kIOServiceStartState)); - - if (!doPhase2) TLOG("%s[0x%qx]::defer phase2(%s[0x%qx])\n", - victim->getName(), victim->getRegistryEntryID(), - client->getName(), client->getRegistryEntryID()); + if (!doPhase2) + { + uint64_t regID1 = client->getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERM_UC_DEFER, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) client->__state[1], + (uintptr_t) 0); + TLOG("%s[0x%qx]::defer phase2(%s[0x%qx])\n", + victim->getName(), victim->getRegistryEntryID(), + client->getName(), client->getRegistryEntryID()); + } } iter->release(); } @@ -3510,9 +3583,9 @@ UInt32 IOService::_adjustBusy( SInt32 delta ) next->unlockForArbitration(); if( (wasQuiet || nowQuiet) ) { - uint64_t regID = next->getRegistryEntryID(); - IOServiceTrace( + uint64_t regID = next->getRegistryEntryID(); + IOServiceTrace( ((wasQuiet/*nowBusy*/) ? IOSERVICE_BUSY : IOSERVICE_NONBUSY), (uintptr_t) regID, (uintptr_t) (regID >> 32), @@ -3635,13 +3708,54 @@ IOReturn IOService::waitForState( UInt32 mask, UInt32 value, IOReturn IOService::waitQuiet( uint64_t timeout ) { - IOReturn ret; + IOReturn ret; ret = waitForState( kIOServiceBusyStateMask, 0, timeout ); - if ((kIOReturnTimeout == ret) && (timeout >= 30000000000) && (kIOWaitQuietPanics & gIOKitDebug)) + if ((kIOReturnTimeout == ret) && (timeout >= 41000000000) && (kIOWaitQuietPanics & gIOKitDebug)) + { + IORegistryIterator * iter; + OSOrderedSet * set; + OSOrderedSet * leaves; + IOService * next; + IOService * nextParent; + char * string; + char * s; + size_t len, l; + + len = 256; + string = IONew(char, len); + set = NULL; + iter = IORegistryIterator::iterateOver(this, gIOServicePlane, kIORegistryIterateRecursively); + leaves = OSOrderedSet::withCapacity(4); + if (iter) set = iter->iterateAll(); + if (string && leaves && set) { - panic("IOService 0x%llx (%s) busy timeout", getRegistryEntryID(), getName()); + while ((next = (IOService *) set->getLastObject())) + { + if (next->getBusyState()) + { + leaves->setObject(next); + nextParent = next; + while ((nextParent = nextParent->getProvider())) + { + set->removeObject(nextParent); + leaves->removeObject(nextParent); + } + } + set->removeObject(next); + } + s = string; + while ((next = (IOService *) leaves->getLastObject())) + { + l = snprintf(s, len, "%s'%s'", ((s == string) ? "" : ", "), next->getName()); + if (l >= len) break; + s += l; + len -= l; + leaves->removeObject(next); + } } - return (ret); + panic("busy timeout(%llds): %s", timeout / 1000000000ULL, string ? string : ""); + } + return (ret); } IOReturn IOService::waitQuiet( mach_timespec_t * timeout ) @@ -3781,6 +3895,11 @@ IOReturn IOService::waitMatchIdle( UInt32 msToWait ) return( kIOReturnSuccess ); } +void IOService::cpusRunning(void) +{ + gCPUsRunning = true; +} + void _IOServiceJob::pingConfig( _IOServiceJob * job ) { int count; @@ -3797,7 +3916,9 @@ void _IOServiceJob::pingConfig( _IOServiceJob * job ) // if( gNumConfigThreads) count++;// assume we're called from a config thread create = ( (gOutstandingJobs > count) - && (gNumConfigThreads < kMaxConfigThreads) ); + && ((gNumConfigThreads < kMaxConfigThreads) + || (job->nub == gIOResources) + || !gCPUsRunning)); if( create) { gNumConfigThreads++; gNumWaitingThreads++; @@ -4644,6 +4765,13 @@ bool IOResources::init( OSDictionary * dictionary ) return true; } +IOReturn IOResources::newUserClient(task_t owningTask, void * securityID, + UInt32 type, OSDictionary * properties, + IOUserClient ** handler) +{ + return( kIOReturnUnsupported ); +} + IOWorkLoop * IOResources::getWorkLoop() const { // If we are the resource root @@ -5568,6 +5696,10 @@ requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType) if (setCpuDelay) { + if (holder && debug_boot_arg) { + strlcpy(sCPULatencyHolderName[delayType], holder->getName(), sizeof(sCPULatencyHolderName[delayType])); + } + // Must be safe to call from locked context if (delayType == kCpuDelayBusStall) { @@ -5785,6 +5917,7 @@ IOReturn IOService::addInterruptStatistics(IOInterruptAccountingData * statistic IOReportLegend * legend = NULL; IOInterruptAccountingData * oldValue = NULL; IOInterruptAccountingReporter * newArray = NULL; + char subgroupName[64]; int newArraySize = 0; int i = 0; @@ -5862,7 +5995,7 @@ IOReturn IOService::addInterruptStatistics(IOInterruptAccountingData * statistic * TODO: Some statistics do in fact have common units (time); should this be * split into separate reporters to communicate this? */ - reserved->interruptStatisticsArray[source].reporter = IOSimpleReporter::with(this, kIOReportCategoryInterrupt, kIOReportUnitNone); + reserved->interruptStatisticsArray[source].reporter = IOSimpleReporter::with(this, kIOReportCategoryPower, kIOReportUnitNone); /* * Each statistic is given an identifier based on the interrupt index (which @@ -5885,19 +6018,13 @@ IOReturn IOService::addInterruptStatistics(IOInterruptAccountingData * statistic */ legend = IOReportLegend::with(OSDynamicCast(OSArray, getProperty(kIOReportLegendKey))); - if ((source >= IA_MAX_SUBGROUP_NAME) || (source < 0)) { - /* - * Either we're using a nonsensical index (should never happen), or the - * index is larger than anticipated (may happen, almost certainly won't). - * This may move to live generation of the names in the future, but for - * now, point both cases to a generic subgroup name (this will confuse - * clients, unfortunately). - */ - legend->addReporterLegend(reserved->interruptStatisticsArray[source].reporter, kInterruptAccountingGroupName, kInterruptAccountingGenericSubgroupName); - } else { - legend->addReporterLegend(reserved->interruptStatisticsArray[source].reporter, kInterruptAccountingGroupName, kInterruptAccountingSubgroupNames[source]); - } - + /* + * Note that while we compose the subgroup name, we do not need to + * manage its lifecycle (the reporter will handle this). + */ + snprintf(subgroupName, sizeof(subgroupName), "%s %d", getName(), source); + subgroupName[sizeof(subgroupName) - 1] = 0; + legend->addReporterLegend(reserved->interruptStatisticsArray[source].reporter, kInterruptAccountingGroupName, subgroupName); setProperty(kIOReportLegendKey, legend->getLegend()); legend->release(); diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index 7c363a269..30612ce42 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -47,6 +47,7 @@ #include #include +#include #include #include @@ -81,18 +82,32 @@ OSDefineMetaClassAndStructors(IOPMprot, OSObject) //****************************************************************************** static bool gIOPMInitialized = false; -static uint32_t gIOPMBusyCount = 0; -static uint32_t gIOPMWorkCount = 0; +static uint32_t gIOPMBusyRequestCount = 0; +static uint32_t gIOPMWorkInvokeCount = 0; static uint32_t gIOPMTickleGeneration = 0; static IOWorkLoop * gIOPMWorkLoop = 0; static IOPMRequestQueue * gIOPMRequestQueue = 0; static IOPMRequestQueue * gIOPMReplyQueue = 0; static IOPMWorkQueue * gIOPMWorkQueue = 0; -static IOPMCompletionQueue * gIOPMFreeQueue = 0; +static IOPMCompletionQueue * gIOPMCompletionQueue = 0; static IOPMRequest * gIOPMRequest = 0; static IOService * gIOPMRootNode = 0; static IOPlatformExpert * gPlatform = 0; +static char gIOSpinDumpKextName[128]; +static char gIOSpinDumpDelayType[16]; +static uint32_t gIOSpinDumpDelayDuration = 0; + +static SYSCTL_STRING(_debug, OID_AUTO, swd_kext_name, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &gIOSpinDumpKextName, sizeof(gIOSpinDumpKextName), ""); +static SYSCTL_STRING(_debug, OID_AUTO, swd_delay_type, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &gIOSpinDumpDelayType, sizeof(gIOSpinDumpDelayType), ""); +static SYSCTL_INT(_debug, OID_AUTO, swd_delay_duration, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &gIOSpinDumpDelayDuration, 0, ""); + const OSSymbol * gIOPMPowerClientDevice = 0; const OSSymbol * gIOPMPowerClientDriver = 0; const OSSymbol * gIOPMPowerClientChildProxy = 0; @@ -102,6 +117,7 @@ const OSSymbol * gIOPMPowerClientRootDomain = 0; static const OSSymbol * gIOPMPowerClientAdvisoryTickle = 0; static bool gIOPMAdvisoryTickleEnabled = true; static thread_t gIOPMWatchDogThread = NULL; +uint32_t gCanSleepTimeout = 0; static uint32_t getPMRequestType( void ) { @@ -122,6 +138,8 @@ static IOPMRequestTag getPMRequestTag( void ) return tag; } +SYSCTL_UINT(_kern, OID_AUTO, pmtimeout, CTLFLAG_RW | CTLFLAG_LOCKED, &gCanSleepTimeout, 0, "Power Management Timeout"); + //****************************************************************************** // Macros //****************************************************************************** @@ -159,8 +177,12 @@ do { \ #define PM_LOCK_SLEEP(event, dl) IOLockSleepDeadline(fPMLock, event, dl, THREAD_UNINT) #define PM_LOCK_WAKEUP(event) IOLockWakeup(fPMLock, event, false) +#define us_per_s 1000000 #define ns_per_us 1000 -#define k30Seconds (30*1000000) +#define k30Seconds (30*us_per_s) +#define k5Seconds ( 5*us_per_s) +#define kCanSleepMaxTimeReq k30Seconds +#define kMaxTimeRequested k30Seconds #define kMinAckTimeoutTicks (10*1000000) #define kIOPMTardyAckSPSKey "IOPMTardyAckSetPowerState" #define kIOPMTardyAckPSCKey "IOPMTardyAckPowerStateChange" @@ -298,22 +320,21 @@ void IOService::PMinit( void ) { gIOPMRequestQueue = IOPMRequestQueue::create( this, OSMemberFunctionCast(IOPMRequestQueue::Action, - this, &IOService::servicePMRequestQueue)); + this, &IOService::actionPMRequestQueue)); gIOPMReplyQueue = IOPMRequestQueue::create( this, OSMemberFunctionCast(IOPMRequestQueue::Action, - this, &IOService::servicePMReplyQueue)); + this, &IOService::actionPMReplyQueue)); - gIOPMWorkQueue = IOPMWorkQueue::create( - this, + gIOPMWorkQueue = IOPMWorkQueue::create(this, OSMemberFunctionCast(IOPMWorkQueue::Action, this, - &IOService::servicePMRequest), + &IOService::actionPMWorkQueueInvoke), OSMemberFunctionCast(IOPMWorkQueue::Action, this, - &IOService::retirePMRequest)); + &IOService::actionPMWorkQueueRetire)); - gIOPMFreeQueue = IOPMCompletionQueue::create( + gIOPMCompletionQueue = IOPMCompletionQueue::create( this, OSMemberFunctionCast(IOPMCompletionQueue::Action, - this, &IOService::servicePMFreeQueue)); + this, &IOService::actionPMCompletionQueue)); if (gIOPMWorkLoop->addEventSource(gIOPMRequestQueue) != kIOReturnSuccess) @@ -336,11 +357,13 @@ void IOService::PMinit( void ) gIOPMWorkQueue = 0; } - if (gIOPMWorkLoop->addEventSource(gIOPMFreeQueue) != + // Must be added after the work queue, which pushes request + // to the completion queue without signaling the work loop. + if (gIOPMWorkLoop->addEventSource(gIOPMCompletionQueue) != kIOReturnSuccess) { - gIOPMFreeQueue->release(); - gIOPMFreeQueue = 0; + gIOPMCompletionQueue->release(); + gIOPMCompletionQueue = 0; } gIOPMPowerClientDevice = @@ -360,9 +383,12 @@ void IOService::PMinit( void ) gIOPMPowerClientRootDomain = OSSymbol::withCStringNoCopy( "RootDomainPower" ); + + gIOSpinDumpKextName[0] = '\0'; + gIOSpinDumpDelayType[0] = '\0'; } - if (gIOPMRequestQueue && gIOPMReplyQueue && gIOPMFreeQueue) + if (gIOPMRequestQueue && gIOPMReplyQueue && gIOPMCompletionQueue) gIOPMInitialized = true; } if (!gIOPMInitialized) @@ -432,6 +458,11 @@ void IOService::PMinit( void ) fDriverCallEntry = thread_call_allocate( (thread_call_func_t) &IOService::pmDriverCallout, this); assert(fDriverCallEntry); + if (kIOKextSpinDump & gIOKitDebug) + { + fSpinDumpTimer = thread_call_allocate( + &IOService::spindump_timer_expired, (thread_call_param_t)this); + } // Check for powerChangeDone override. if (OSMemberFunctionCast(void (*)(void), @@ -505,6 +536,11 @@ void IOService::PMfree( void ) thread_call_free(fDriverCallEntry); fDriverCallEntry = NULL; } + if ( fSpinDumpTimer ) { + thread_call_cancel(fSpinDumpTimer); + thread_call_free(fSpinDumpTimer); + fSpinDumpTimer = NULL; + } if ( fPMLock ) { IOLockFree(fPMLock); fPMLock = NULL; @@ -827,7 +863,7 @@ IOReturn IOService::addPowerChild( IOService * child ) requests[1]->fArg0 = connection; requests[2]->fArg0 = connection; - submitPMRequest( requests, 3 ); + submitPMRequests( requests, 3 ); return kIOReturnSuccess; } while (false); @@ -1018,8 +1054,12 @@ IOReturn IOService::removePowerChild( IOPowerConnection * theNub ) { stop_ack_timer(); - // Request unblocked, work queue - // should re-scan all busy requests. + // This parent may have a request in the work queue that is + // blocked on fHeadNotePendingAcks=0. And removePowerChild() + // is called while executing the child's PMstop request so they + // can occur simultaneously. IOPMWorkQueue::checkForWork() must + // restart and check all request queues again. + gIOPMWorkQueue->incrementProducerCount(); } } @@ -1308,7 +1348,6 @@ IOPMPowerFlags IOService::registerInterestedDriver( IOService * driver ) IOReturn IOService::deRegisterInterestedDriver( IOService * driver ) { - IOPMinformeeList * list; IOPMinformee * item; IOPMRequest * request; bool signal; @@ -1319,18 +1358,25 @@ IOReturn IOService::deRegisterInterestedDriver( IOService * driver ) return IOPMNotPowerManaged; PM_LOCK(); + if (fInsertInterestSet) + { + fInsertInterestSet->removeObject(driver); + } + + item = fInterestedDrivers->findItem(driver); + if (!item) + { + PM_UNLOCK(); + return kIOReturnNotFound; + } + signal = (!fRemoveInterestSet && !fInsertInterestSet); if (fRemoveInterestSet == NULL) fRemoveInterestSet = OSSet::withCapacity(4); if (fRemoveInterestSet) { fRemoveInterestSet->setObject(driver); - if (fInsertInterestSet) - fInsertInterestSet->removeObject(driver); - - list = fInterestedDrivers; - item = list->findItem(driver); - if (item && item->active) + if (item->active) { item->active = false; waitForPMDriverCall( driver ); @@ -1740,6 +1786,13 @@ void IOService::handlePowerDomainWillChangeTo( IOPMRequest * request ) maxPowerState = fControllingDriver->maxCapabilityForDomainState( combinedPowerFlags); + if (parentChangeFlags & kIOPMDomainPowerDrop) + { + // fMaxPowerState set a limit on self-initiated power changes. + // Update it before a parent power drop. + fMaxPowerState = maxPowerState; + } + // Use kIOPMSynchronize below instead of kIOPMRootBroadcastFlags // to avoid propagating the root change flags if any service must // change power state due to root's will-change notification. @@ -1841,6 +1894,13 @@ void IOService::handlePowerDomainDidChangeTo( IOPMRequest * request ) maxPowerState = fControllingDriver->maxCapabilityForDomainState( fParentsCurrentPowerFlags); + if ((parentChangeFlags & kIOPMDomainPowerDrop) == 0) + { + // fMaxPowerState set a limit on self-initiated power changes. + // Update it after a parent power rise. + fMaxPowerState = maxPowerState; + } + if (fInitialPowerChange) { computeDesire = true; @@ -2301,6 +2361,44 @@ IOReturn IOService::changePowerStateForRootDomain( IOPMPowerStateIndex ordinal ) return requestPowerState( gIOPMPowerClientRootDomain, ordinal ); } +//********************************************************************************* +// [public for PMRD] quiescePowerTree +// +// For root domain to issue a request to quiesce the power tree. +// Supplied callback invoked upon completion. +//********************************************************************************* + +IOReturn IOService::quiescePowerTree( + void * target, IOPMCompletionAction action, void * param ) +{ + IOPMRequest * request; + + if (!initialized) + return kIOPMNotYetInitialized; + if (!target || !action) + return kIOReturnBadArgument; + + OUR_PMLog(kPMLogQuiescePowerTree, 0, 0); + + // Target the root node instead of root domain. This is to avoid blocking + // the quiesce request behind an existing root domain request in the work + // queue. Root parent and root domain requests in the work queue must not + // block the completion of the quiesce request. + + request = acquirePMRequest(gIOPMRootNode, kIOPMRequestTypeQuiescePowerTree); + if (!request) + return kIOReturnNoMemory; + + request->installCompletionAction(target, action, param); + + // Submit through the normal request flow. This will make sure any request + // already in the request queue will get pushed over to the work queue for + // execution. Any request submitted after this request may not be serviced. + + submitPMRequest( request ); + return kIOReturnSuccess; +} + //********************************************************************************* // [private] requestPowerState //********************************************************************************* @@ -3847,9 +3945,11 @@ void IOService::driverSetPowerState( void ) if (assertPMDriverCall(&callEntry)) { OUR_PMLog( kPMLogProgramHardware, (uintptr_t) this, powerState); + start_spindump_timer("SetState"); clock_get_uptime(&fDriverCallStartTime); result = fControllingDriver->setPowerState( powerState, this ); clock_get_uptime(&end); + stop_spindump_timer(); OUR_PMLog((UInt32) -kPMLogProgramHardware, (uintptr_t) this, (UInt32) result); deassertPMDriverCall(&callEntry); @@ -3926,17 +4026,21 @@ void IOService::driverInformPowerChange( void ) if (fDriverCallReason == kDriverCallInformPreChange) { OUR_PMLog(kPMLogInformDriverPreChange, (uintptr_t) this, powerState); + start_spindump_timer("WillChange"); clock_get_uptime(&informee->startTime); result = driver->powerStateWillChangeTo(powerFlags, powerState, this); clock_get_uptime(&end); + stop_spindump_timer(); OUR_PMLog((UInt32)-kPMLogInformDriverPreChange, (uintptr_t) this, result); } else { OUR_PMLog(kPMLogInformDriverPostChange, (uintptr_t) this, powerState); + start_spindump_timer("DidChange"); clock_get_uptime(&informee->startTime); result = driver->powerStateDidChangeTo(powerFlags, powerState, this); clock_get_uptime(&end); + stop_spindump_timer(); OUR_PMLog((UInt32)-kPMLogInformDriverPostChange, (uintptr_t) this, result); } @@ -4146,7 +4250,7 @@ void IOService::all_done( void ) const IOPMPSEntry * powerStatePtr; IOPMDriverCallEntry callEntry; uint32_t prevMachineState = fMachineState; - bool callAction = false; + bool actionCalled = false; uint64_t ts; fMachineState = kIOPM_Finished; @@ -4192,10 +4296,10 @@ void IOService::all_done( void ) } // our power change - if ( fHeadNoteChangeFlags & kIOPMSelfInitiated ) + if (fHeadNoteChangeFlags & kIOPMSelfInitiated) { - // could our driver switch to the new state? - if ( !( fHeadNoteChangeFlags & kIOPMNotDone) ) + // power state changed + if ((fHeadNoteChangeFlags & kIOPMNotDone) == 0) { trackSystemSleepPreventers( fCurrentPowerState, fHeadNotePowerState, fHeadNoteChangeFlags); @@ -4224,7 +4328,7 @@ void IOService::all_done( void ) OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, prevPowerState); PM_ACTION_2(actionPowerChangeDone, fHeadNotePowerState, fHeadNoteChangeFlags); - callAction = true; + actionCalled = true; powerStatePtr = &fPowerStates[fCurrentPowerState]; fCurrentCapabilityFlags = powerStatePtr->capabilityFlags; @@ -4252,16 +4356,14 @@ void IOService::all_done( void ) } } - // parent's power change - if ( fHeadNoteChangeFlags & kIOPMParentInitiated) + // parent-initiated power change + if (fHeadNoteChangeFlags & kIOPMParentInitiated) { if (fHeadNoteChangeFlags & kIOPMRootChangeDown) ParentChangeRootChangeDown(); - if (((fHeadNoteChangeFlags & kIOPMDomainWillChange) && - (StateOrder(fCurrentPowerState) >= StateOrder(fHeadNotePowerState))) || - ((fHeadNoteChangeFlags & kIOPMDomainDidChange) && - (StateOrder(fCurrentPowerState) < StateOrder(fHeadNotePowerState)))) + // power state changed + if ((fHeadNoteChangeFlags & kIOPMNotDone) == 0) { trackSystemSleepPreventers( fCurrentPowerState, fHeadNotePowerState, fHeadNoteChangeFlags); @@ -4284,12 +4386,11 @@ void IOService::all_done( void ) #if PM_VARS_SUPPORT fPMVars->myCurrentState = fCurrentPowerState; #endif - fMaxPowerState = fControllingDriver->maxCapabilityForDomainState(fHeadNoteDomainFlags); OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, prevPowerState); PM_ACTION_2(actionPowerChangeDone, fHeadNotePowerState, fHeadNoteChangeFlags); - callAction = true; + actionCalled = true; powerStatePtr = &fPowerStates[fCurrentPowerState]; fCurrentCapabilityFlags = powerStatePtr->capabilityFlags; @@ -4314,7 +4415,7 @@ void IOService::all_done( void ) fIdleTimerMinPowerState = kPowerStateZero; } - if (!callAction) + if (!actionCalled) { PM_ACTION_2(actionPowerChangeDone, fHeadNotePowerState, fHeadNoteChangeFlags); @@ -4779,11 +4880,15 @@ IOReturn IOService::ParentChangeStart( void ) // to our children. fMachineState = kIOPM_SyncNotifyDidChange; fDriverCallReason = kDriverCallInformPreChange; + fHeadNoteChangeFlags |= kIOPMNotDone; notifyChildren(); return IOPMWillAckLater; } } + // No power state change necessary + fHeadNoteChangeFlags |= kIOPMNotDone; + all_done(); return IOPMAckImplied; } @@ -4817,6 +4922,7 @@ void IOService::ParentChangeRootChangeDown( void ) { updatePowerClient(gIOPMPowerClientDevice, kPowerStateZero); computeDesiredState(kPowerStateZero, true); + requestDomainPower( fDesiredPowerState ); PM_LOG1("%s: tickle desire removed\n", fName); } @@ -5237,13 +5343,20 @@ void IOService::start_watchdog_timer( void ) { AbsoluteTime deadline; boolean_t pending; + static int timeout = -1; if (!fWatchdogTimer || (kIOSleepWakeWdogOff & gIOKitDebug)) return; if (thread_call_isactive(fWatchdogTimer)) return; + if (timeout == -1) { + PE_parse_boot_argn("swd_timeout", &timeout, sizeof(timeout)); + } + if (timeout < 60) { + timeout = WATCHDOG_TIMER_PERIOD; + } - clock_interval_to_deadline(WATCHDOG_TIMER_PERIOD, kSecondScale, &deadline); + clock_interval_to_deadline(timeout, kSecondScale, &deadline); retain(); pending = thread_call_enter_delayed(fWatchdogTimer, deadline); @@ -5388,6 +5501,103 @@ IOService::ack_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 me->release(); } +//********************************************************************************* +// [private] start_spindump_timer +//********************************************************************************* + +void IOService::start_spindump_timer( const char * delay_type ) +{ + AbsoluteTime deadline; + boolean_t pending; + + if (!fSpinDumpTimer || !(kIOKextSpinDump & gIOKitDebug)) + return; + + if (gIOSpinDumpKextName[0] == '\0' && + !(PE_parse_boot_argn("swd_kext_name", &gIOSpinDumpKextName, + sizeof(gIOSpinDumpKextName)))) + { + return; + } + + if (strncmp(gIOSpinDumpKextName, fName, sizeof(gIOSpinDumpKextName)) != 0) + return; + + if (gIOSpinDumpDelayType[0] == '\0' && + !(PE_parse_boot_argn("swd_delay_type", &gIOSpinDumpDelayType, + sizeof(gIOSpinDumpDelayType)))) + { + strncpy(gIOSpinDumpDelayType, "SetState", sizeof(gIOSpinDumpDelayType)); + } + + if (strncmp(delay_type, gIOSpinDumpDelayType, sizeof(gIOSpinDumpDelayType)) != 0) + return; + + if (gIOSpinDumpDelayDuration == 0 && + !(PE_parse_boot_argn("swd_delay_duration", &gIOSpinDumpDelayDuration, + sizeof(gIOSpinDumpDelayDuration)))) + { + gIOSpinDumpDelayDuration = 300; + } + + clock_interval_to_deadline(gIOSpinDumpDelayDuration, kMillisecondScale, &deadline); + + retain(); + pending = thread_call_enter_delayed(fSpinDumpTimer, deadline); + if (pending) release(); +} + +//********************************************************************************* +// [private] stop_spindump_timer +//********************************************************************************* + +void IOService::stop_spindump_timer( void ) +{ + boolean_t pending; + + if (!fSpinDumpTimer || !(kIOKextSpinDump & gIOKitDebug)) + return; + + pending = thread_call_cancel(fSpinDumpTimer); + if (pending) release(); +} + + +//********************************************************************************* +// [static] actionSpinDumpTimerExpired +// +// Inside PM work loop's gate. +//********************************************************************************* + +IOReturn +IOService::actionSpinDumpTimerExpired( + OSObject * target, + void * arg0, void * arg1, + void * arg2, void * arg3 ) +{ + getPMRootDomain()->takeStackshot(false, false, true); + + return kIOReturnSuccess; +} + +//********************************************************************************* +// spindump_timer_expired +// +// Thread call function. Holds a retain while the callout is in flight. +//********************************************************************************* + +void +IOService::spindump_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 ) +{ + IOService * me = (IOService *) arg0; + + if (gIOPMWorkLoop) + { + gIOPMWorkLoop->runAction(&actionSpinDumpTimerExpired, me); + } + me->release(); +} + // MARK: - // MARK: Client Messaging @@ -5589,6 +5799,7 @@ bool IOService::tellClientsWithResponse( int messageType ) { IOPMInterestContext context; bool isRootDomain = IS_ROOT_DOMAIN; + uint32_t maxTimeOut = kMaxTimeRequested; PM_ASSERT_IN_GATE(); assert( fResponseArray == NULL ); @@ -5646,8 +5857,15 @@ bool IOService::tellClientsWithResponse( int messageType ) context.notifyType = fOutOfBandParameter; context.messageType = messageType; } - context.maxTimeRequested = k30Seconds; - + if(context.messageType == kIOMessageCanSystemSleep) + { + maxTimeOut = kCanSleepMaxTimeReq; + if(gCanSleepTimeout) + { + maxTimeOut = (gCanSleepTimeout*us_per_s); + } + } + context.maxTimeRequested = maxTimeOut; applyToInterested( gIOGeneralInterest, pmTellClientWithResponse, (void *) &context ); @@ -5673,7 +5891,15 @@ bool IOService::tellClientsWithResponse( int messageType ) applyToInterested( gIOAppPowerStateInterest, pmTellCapabilityAppWithResponse, (void *) &context ); fNotifyClientArray = context.notifyClients; - context.maxTimeRequested = k30Seconds; + if(context.messageType == kIOMessageCanSystemSleep) + { + maxTimeOut = kCanSleepMaxTimeReq; + if(gCanSleepTimeout) + { + maxTimeOut = (gCanSleepTimeout*us_per_s); + } + } + context.maxTimeRequested = maxTimeOut; break; case kNotifyCapabilityChangePriority: @@ -6936,7 +7162,7 @@ void IOService::releasePMRequest( IOPMRequest * request ) } //********************************************************************************* -// [private] submitPMRequest +// [private static] submitPMRequest //********************************************************************************* void IOService::submitPMRequest( IOPMRequest * request ) @@ -6957,7 +7183,7 @@ void IOService::submitPMRequest( IOPMRequest * request ) gIOPMRequestQueue->queuePMRequest( request ); } -void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count ) +void IOService::submitPMRequests( IOPMRequest ** requests, IOItemCount count ) { assert( requests ); assert( count > 0 ); @@ -6977,12 +7203,12 @@ void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count ) } //********************************************************************************* -// [private] servicePMRequestQueue +// [private] actionPMRequestQueue // -// Called from IOPMRequestQueue::checkForWork(). +// IOPMRequestQueue::checkForWork() passing a new request to the request target. //********************************************************************************* -bool IOService::servicePMRequestQueue( +bool IOService::actionPMRequestQueue( IOPMRequest * request, IOPMRequestQueue * queue ) { @@ -6990,34 +7216,40 @@ bool IOService::servicePMRequestQueue( if (initialized) { - // Work queue will immediately execute the queue'd request if possible. - // If execution blocks, the work queue will wait for a producer signal. - // Only need to signal more when completing attached requests. + // Work queue will immediately execute the request if the per-service + // request queue is empty. Note pwrMgt is the target's IOServicePM. more = gIOPMWorkQueue->queuePMRequest(request, pwrMgt); - return more; } + else + { + // Calling PM without PMinit() is not allowed, fail the request. + // Need to signal more when completing attached requests. - // Calling PM without PMinit() is not allowed, fail the request. + PM_LOG("%s: PM not initialized\n", getName()); + PM_LOG1("[- %02x] %p [%p %s] !initialized\n", + request->getType(), OBFUSCATE(request), + OBFUSCATE(this), getName()); + + more = gIOPMCompletionQueue->queuePMRequest(request); + if (more) gIOPMWorkQueue->incrementProducerCount(); + } - PM_LOG("%s: PM not initialized\n", getName()); - fAdjustPowerScheduled = false; - more = gIOPMFreeQueue->queuePMRequest(request); - if (more) gIOPMWorkQueue->incrementProducerCount(); return more; } //********************************************************************************* -// [private] servicePMFreeQueue +// [private] actionPMCompletionQueue // -// Called from IOPMCompletionQueue::checkForWork(). +// IOPMCompletionQueue::checkForWork() passing a completed request to the +// request target. //********************************************************************************* -bool IOService::servicePMFreeQueue( +bool IOService::actionPMCompletionQueue( IOPMRequest * request, IOPMCompletionQueue * queue ) { - bool more = request->getNextRequest(); + bool more = (request->getNextRequest() != 0); IOPMRequest * root = request->getRootRequest(); if (root && (root != request)) @@ -7030,22 +7262,21 @@ bool IOService::servicePMFreeQueue( } //********************************************************************************* -// [private] retirePMRequest +// [private] actionPMWorkQueueRetire // -// Called by IOPMWorkQueue to retire a completed request. +// IOPMWorkQueue::checkForWork() passing a retired request to the request target. //********************************************************************************* -bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) +bool IOService::actionPMWorkQueueRetire( IOPMRequest * request, IOPMWorkQueue * queue ) { assert(request && queue); PM_LOG1("[- %02x] %p [%p %s] state %d, busy %d\n", request->getType(), OBFUSCATE(request), OBFUSCATE(this), getName(), - fMachineState, gIOPMBusyCount); - - // Catch requests created by idleTimerExpired(). + fMachineState, gIOPMBusyRequestCount); + // Catch requests created by idleTimerExpired() if (request->getType() == kIOPMRequestTypeActivityTickle) { uint32_t tickleFlags = (uint32_t)(uintptr_t) request->fArg1; @@ -7061,11 +7292,11 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) fIdleTimerGeneration++; } } + + // When the completed request is linked, tell work queue there is + // more work pending. - // If the request is linked, then Work queue has already incremented its - // producer count. - - return (gIOPMFreeQueue->queuePMRequest( request )); + return (gIOPMCompletionQueue->queuePMRequest( request )); } //********************************************************************************* @@ -7137,12 +7368,13 @@ bool IOService::isPMBlocked( IOPMRequest * request, int count ) } //********************************************************************************* -// [private] servicePMRequest +// [private] actionPMWorkQueueInvoke // -// Service a request from our work queue. +// IOPMWorkQueue::checkForWork() passing a request to the +// request target for execution. //********************************************************************************* -bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) +bool IOService::actionPMWorkQueueInvoke( IOPMRequest * request, IOPMWorkQueue * queue ) { bool done = false; int loop = 0; @@ -7156,7 +7388,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) OBFUSCATE(this), getName(), fMachineState); gIOPMRequest = request; - gIOPMWorkCount++; + gIOPMWorkInvokeCount++; // Every PM machine states must be handled in one of the cases below. @@ -7427,7 +7659,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) break; default: - panic("servicePMWorkQueue: unknown machine state %x", + panic("PMWorkQueueInvoke: unknown machine state %x", fMachineState); } @@ -7518,16 +7750,23 @@ void IOService::executePMRequest( IOPMRequest * request ) fIdleTimerIgnored = request->fArg0 ? 1 : 0; break; + case kIOPMRequestTypeQuiescePowerTree: + gIOPMWorkQueue->finishQuiesceRequest(request); + break; + default: panic("executePMRequest: unknown request type %x", request->getType()); } } //********************************************************************************* -// [private] servicePMReplyQueue +// [private] actionPMReplyQueue +// +// IOPMRequestQueue::checkForWork() passing a reply-type request to the +// request target. //********************************************************************************* -bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue ) +bool IOService::actionPMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue ) { bool more = false; @@ -7639,7 +7878,8 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q // Stop waiting for app replys. if ((fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) || (fMachineState == kIOPM_OurChangeTellUserPMPolicyPowerDown) || - (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown)) + (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown) || + (fMachineState == kIOPM_SyncTellClientsPowerDown) ) cleanClientResponses(false); more = true; } @@ -7654,11 +7894,10 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q break; default: - panic("servicePMReplyQueue: unknown reply type %x", - request->getType()); + panic("PMReplyQueue: unknown reply type %x", request->getType()); } - more |= gIOPMFreeQueue->queuePMRequest(request); + more |= gIOPMCompletionQueue->queuePMRequest(request); if (more) gIOPMWorkQueue->incrementProducerCount(); @@ -7841,15 +8080,18 @@ bool IOPMRequest::init( IOService * target, IOOptionBits type ) if (!IOCommand::init()) return false; - fType = type; - fTarget = target; -#if NOT_READY - fCompletionStatus = kIOReturnSuccess; -#endif + fRequestType = type; + fTarget = target; if (fTarget) fTarget->retain(); + // Root node and root domain requests does not prevent the power tree from + // becoming quiescent. + + fIsQuiesceBlocker = ((fTarget != gIOPMRootNode) && + (fTarget != IOService::getPMRootDomain())); + return true; } @@ -7861,14 +8103,14 @@ void IOPMRequest::reset( void ) detachNextRequest(); detachRootRequest(); - fType = kIOPMRequestTypeInvalid; - -#if NOT_READY - if (fCompletionAction) + if (fCompletionAction && (fRequestType == kIOPMRequestTypeQuiescePowerTree)) { - fCompletionAction(fCompletionTarget, fCompletionParam, fCompletionStatus); + // Call the completion on PM work loop context + fCompletionAction(fCompletionTarget, fCompletionParam); + fCompletionAction = 0; } -#endif + + fRequestType = kIOPMRequestTypeInvalid; if (fTarget) { @@ -7889,8 +8131,8 @@ bool IOPMRequest::attachNextRequest( IOPMRequest * next ) fRequestNext->fWorkWaitCount++; #if LOG_REQUEST_ATTACH PM_LOG("Attached next: %p [0x%x] -> %p [0x%x, %u] %s\n", - OBFUSCATE(this), (uint32_t) fType, OBFUSCATE(fRequestNext), - (uint32_t) fRequestNext->fType, + OBFUSCATE(this), fRequestType, OBFUSCATE(fRequestNext), + fRequestNext->fRequestType, (uint32_t) fRequestNext->fWorkWaitCount, fTarget->getName()); #endif @@ -7910,8 +8152,8 @@ bool IOPMRequest::detachNextRequest( void ) fRequestNext->fWorkWaitCount--; #if LOG_REQUEST_ATTACH PM_LOG("Detached next: %p [0x%x] -> %p [0x%x, %u] %s\n", - OBFUSCATE(this), (uint32_t) fType, OBFUSCATE(fRequestNext), - (uint32_t) fRequestNext->fType, + OBFUSCATE(this), fRequestType, OBFUSCATE(fRequestNext), + fRequestNext->fRequestType, (uint32_t) fRequestNext->fWorkWaitCount, fTarget->getName()); #endif @@ -8011,7 +8253,7 @@ void IOPMRequestQueue::queuePMRequest( IOPMRequest * request ) { assert(request); IOLockLock(fLock); - queue_enter(&fQueue, request, IOPMRequest *, fCommandChain); + queue_enter(&fQueue, request, typeof(request), fCommandChain); IOLockUnlock(fLock); if (workLoop) signalWorkAvailable(); } @@ -8027,7 +8269,7 @@ IOPMRequestQueue::queuePMRequestChain( IOPMRequest ** requests, IOItemCount coun { next = *requests; requests++; - queue_enter(&fQueue, next, IOPMRequest *, fCommandChain); + queue_enter(&fQueue, next, typeof(next), fCommandChain); } IOLockUnlock(fLock); if (workLoop) signalWorkAvailable(); @@ -8038,14 +8280,22 @@ bool IOPMRequestQueue::checkForWork( void ) Action dqAction = (Action) action; IOPMRequest * request; IOService * target; + int dequeueCount = 0; bool more = false; IOLockLock( fLock ); while (!queue_empty(&fQueue)) { - queue_remove_first( &fQueue, request, IOPMRequest *, fCommandChain ); - IOLockUnlock( fLock ); + if (dequeueCount++ >= kMaxDequeueCount) + { + // Allow other queues a chance to work + more = true; + break; + } + + queue_remove_first(&fQueue, request, typeof(request), fCommandChain); + IOLockUnlock(fLock); target = request->getTarget(); assert(target); more |= (*dqAction)( target, request, this ); @@ -8062,16 +8312,17 @@ bool IOPMRequestQueue::checkForWork( void ) //********************************************************************************* // IOPMWorkQueue Class // -// Queue of IOServicePM objects with busy IOPMRequest(s). +// Queue of IOServicePM objects, each with a queue of IOPMRequest sharing the +// same target. //********************************************************************************* OSDefineMetaClassAndStructors( IOPMWorkQueue, IOEventSource ); IOPMWorkQueue * -IOPMWorkQueue::create( IOService * inOwner, Action work, Action retire ) +IOPMWorkQueue::create( IOService * inOwner, Action invoke, Action retire ) { IOPMWorkQueue * me = OSTypeAlloc(IOPMWorkQueue); - if (me && !me->init(inOwner, work, retire)) + if (me && !me->init(inOwner, invoke, retire)) { me->release(); me = 0; @@ -8079,15 +8330,15 @@ IOPMWorkQueue::create( IOService * inOwner, Action work, Action retire ) return me; } -bool IOPMWorkQueue::init( IOService * inOwner, Action work, Action retire ) +bool IOPMWorkQueue::init( IOService * inOwner, Action invoke, Action retire ) { - if (!work || !retire || + if (!invoke || !retire || !IOEventSource::init(inOwner, (IOEventSourceAction)0)) return false; queue_init(&fWorkQueue); - fWorkAction = work; + fInvokeAction = invoke; fRetireAction = retire; fConsumerCount = fProducerCount = 0; @@ -8096,8 +8347,9 @@ bool IOPMWorkQueue::init( IOService * inOwner, Action work, Action retire ) bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt ) { - bool more = false; - bool empty; + queue_head_t * requestQueue; + bool more = false; + bool empty; assert( request ); assert( pwrMgt ); @@ -8105,24 +8357,42 @@ bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt assert( queue_next(&request->fCommandChain) == queue_prev(&request->fCommandChain) ); - gIOPMBusyCount++; + gIOPMBusyRequestCount++; + + if (request->isQuiesceType()) + { + if ((request->getTarget() == gIOPMRootNode) && !fQuiesceStartTime) + { + // Attach new quiesce request to all quiesce blockers in the queue + fQuiesceStartTime = mach_absolute_time(); + attachQuiesceRequest(request); + fQuiesceRequest = request; + } + } + else if (fQuiesceRequest && request->isQuiesceBlocker()) + { + // Attach the new quiesce blocker to the blocked quiesce request + request->attachNextRequest(fQuiesceRequest); + } // Add new request to the tail of the per-service request queue. // Then immediately check the request queue to minimize latency // if the queue was empty. - empty = queue_empty(&pwrMgt->RequestHead); - queue_enter(&pwrMgt->RequestHead, request, IOPMRequest *, fCommandChain); + requestQueue = &pwrMgt->RequestHead; + empty = queue_empty(requestQueue); + queue_enter(requestQueue, request, typeof(request), fCommandChain); if (empty) { - more = checkRequestQueue(&pwrMgt->RequestHead, &empty); + more = checkRequestQueue(requestQueue, &empty); if (!empty) { - // New Request is blocked, add IOServicePM to work queue. + // Request just added is blocked, add its target IOServicePM + // to the work queue. assert( queue_next(&pwrMgt->WorkChain) == queue_prev(&pwrMgt->WorkChain) ); - queue_enter(&fWorkQueue, pwrMgt, IOServicePM *, WorkChain); + queue_enter(&fWorkQueue, pwrMgt, typeof(pwrMgt), WorkChain); fQueueLength++; PM_LOG3("IOPMWorkQueue: [%u] added %s@%p to queue\n", fQueueLength, pwrMgt->Name, OBFUSCATE(pwrMgt)); @@ -8132,40 +8402,53 @@ bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt return more; } -bool IOPMWorkQueue::checkRequestQueue( queue_head_t * queue, bool * empty ) +bool IOPMWorkQueue::checkRequestQueue( queue_head_t * requestQueue, bool * empty ) { IOPMRequest * request; IOService * target; bool more = false; bool done = false; - assert(!queue_empty(queue)); + assert(!queue_empty(requestQueue)); do { - request = (IOPMRequest *) queue_first(queue); + request = (typeof(request)) queue_first(requestQueue); if (request->isWorkBlocked()) - break; // cannot start, blocked on attached request + break; // request dispatch blocked on attached request target = request->getTarget(); - done = (*fWorkAction)( target, request, this ); + if (fInvokeAction) + { + done = (*fInvokeAction)( target, request, this ); + } + else + { + PM_LOG("PM request 0x%x dropped\n", request->getType()); + done = true; + } if (!done) - break; // work started, blocked on PM state machine + break; // PM state machine blocked - assert(gIOPMBusyCount > 0); - if (gIOPMBusyCount) - gIOPMBusyCount--; + assert(gIOPMBusyRequestCount > 0); + if (gIOPMBusyRequestCount) + gIOPMBusyRequestCount--; - queue_remove_first(queue, request, IOPMRequest *, fCommandChain); + if (request == fQuiesceRequest) + { + fQuiesceRequest = 0; + } + + queue_remove_first(requestQueue, request, typeof(request), fCommandChain); more |= (*fRetireAction)( target, request, this ); - done = queue_empty(queue); + done = queue_empty(requestQueue); } while (!done); *empty = done; if (more) { - // Retired request blocks another request, since the - // blocked request may reside in the work queue, we - // must bump the producer count to avoid work stall. + // Retired a request that may unblock a previously visited request + // that is still waiting on the work queue. Must trigger another + // queue check. fProducerCount++; } @@ -8183,8 +8466,8 @@ bool IOPMWorkQueue::checkForWork( void ) fStatCheckForWork++; #endif - // Each producer signal triggers a full iteration over - // all IOServicePM entries in the work queue. + // Iterate over all IOServicePM entries in the work queue, + // and check each entry's request queue. while (fConsumerCount != fProducerCount) { @@ -8200,31 +8483,31 @@ bool IOPMWorkQueue::checkForWork( void ) break; } fStatScanEntries++; - uint32_t cachedWorkCount = gIOPMWorkCount; + uint32_t cachedWorkCount = gIOPMWorkInvokeCount; #endif - entry = (IOServicePM *) queue_first(&fWorkQueue); + __IGNORE_WCASTALIGN(entry = (typeof(entry)) queue_first(&fWorkQueue)); while (!queue_end(&fWorkQueue, (queue_entry_t) entry)) { more |= checkRequestQueue(&entry->RequestHead, &empty); // Get next entry, points to head if current entry is last. - next = (IOServicePM *) queue_next(&entry->WorkChain); + __IGNORE_WCASTALIGN(next = (typeof(next)) queue_next(&entry->WorkChain)); - // if request queue is empty, remove IOServicePM from queue. + // if request queue is empty, remove IOServicePM from work queue. if (empty) { assert(fQueueLength); if (fQueueLength) fQueueLength--; PM_LOG3("IOPMWorkQueue: [%u] removed %s@%p from queue\n", fQueueLength, entry->Name, OBFUSCATE(entry)); - queue_remove(&fWorkQueue, entry, IOServicePM *, WorkChain); + queue_remove(&fWorkQueue, entry, typeof(entry), WorkChain); } entry = next; } #if WORK_QUEUE_STATS - if (cachedWorkCount == gIOPMWorkCount) + if (cachedWorkCount == gIOPMWorkInvokeCount) fStatNoWorkDone++; #endif } @@ -8243,6 +8526,42 @@ void IOPMWorkQueue::incrementProducerCount( void ) fProducerCount++; } +void IOPMWorkQueue::attachQuiesceRequest( IOPMRequest * quiesceRequest ) +{ + IOServicePM * entry; + IOPMRequest * request; + + if (queue_empty(&fWorkQueue)) + { + return; + } + + queue_iterate(&fWorkQueue, entry, typeof(entry), WorkChain) + { + queue_iterate(&entry->RequestHead, request, typeof(request), fCommandChain) + { + // Attach the quiesce request to any request in the queue that + // is not linked to a next request. These requests will block + // the quiesce request. + + if (request->isQuiesceBlocker()) + { + request->attachNextRequest(quiesceRequest); + } + } + } +} + +void IOPMWorkQueue::finishQuiesceRequest( IOPMRequest * quiesceRequest ) +{ + if (fQuiesceRequest && (quiesceRequest == fQuiesceRequest) && + (fQuiesceStartTime != 0)) + { + fInvokeAction = 0; + fQuiesceFinishTime = mach_absolute_time(); + } +} + // MARK: - // MARK: IOPMCompletionQueue @@ -8280,7 +8599,7 @@ bool IOPMCompletionQueue::queuePMRequest( IOPMRequest * request ) assert(request); // unblock dependent request more = request->detachNextRequest(); - queue_enter(&fQueue, request, IOPMRequest *, fCommandChain); + queue_enter(&fQueue, request, typeof(request), fCommandChain); return more; } @@ -8292,13 +8611,13 @@ bool IOPMCompletionQueue::checkForWork( void ) IOService * target; bool more = false; - request = (IOPMRequest *) queue_first(&fQueue); + request = (typeof(request)) queue_first(&fQueue); while (!queue_end(&fQueue, (queue_entry_t) request)) { - next = (IOPMRequest *) queue_next(&request->fCommandChain); + next = (typeof(next)) queue_next(&request->fCommandChain); if (!request->isFreeBlocked()) { - queue_remove(&fQueue, request, IOPMRequest *, fCommandChain); + queue_remove(&fQueue, request, typeof(request), fCommandChain); target = request->getTarget(); assert(target); more |= (*dqAction)( target, request, this ); diff --git a/iokit/Kernel/IOServicePMPrivate.h b/iokit/Kernel/IOServicePMPrivate.h index 313d8e3c5..ca91e9d46 100644 --- a/iokit/Kernel/IOServicePMPrivate.h +++ b/iokit/Kernel/IOServicePMPrivate.h @@ -55,6 +55,7 @@ enum { kIOPMRequestTypeRequestPowerStateOverride = 0x0E, kIOPMRequestTypeSetIdleTimerPeriod = 0x0F, kIOPMRequestTypeIgnoreIdleTimer = 0x10, + kIOPMRequestTypeQuiescePowerTree = 0x11, /* Reply Types */ kIOPMRequestTypeReplyStart = 0x80, @@ -183,6 +184,7 @@ class IOServicePM : public OSObject thread_call_t SettleTimer; thread_call_t IdleTimer; thread_call_t WatchdogTimer; + thread_call_t SpinDumpTimer; // Settle time after changing power state. uint32_t SettleTimeUS; @@ -343,7 +345,7 @@ class IOServicePM : public OSObject // Serialize IOServicePM state for debug output. IOReturn gatedSerialize( OSSerialize * s ) const; - virtual bool serialize( OSSerialize * s ) const; + virtual bool serialize( OSSerialize * s ) const APPLE_KEXT_OVERRIDE; // PM log and trace void pmPrint( uint32_t event, uintptr_t param1, uintptr_t param2 ) const; @@ -358,6 +360,7 @@ class IOServicePM : public OSObject #define fSettleTimer pwrMgt->SettleTimer #define fIdleTimer pwrMgt->IdleTimer #define fWatchdogTimer pwrMgt->WatchdogTimer +#define fSpinDumpTimer pwrMgt->SpinDumpTimer #define fSettleTimeUS pwrMgt->SettleTimeUS #define fIdleTimerGeneration pwrMgt->IdleTimerGeneration #define fHeadNoteChangeFlags pwrMgt->HeadNoteChangeFlags @@ -552,26 +555,22 @@ extern const OSSymbol *gIOPMStatsDriverPSChangeSlow; // IOPMRequest //****************************************************************************** -typedef void (*IOPMCompletionAction)(void * target, void * param, IOReturn status); - class IOPMRequest : public IOCommand { OSDeclareDefaultStructors( IOPMRequest ) protected: - IOService * fTarget; // request target - IOPMRequest * fRequestNext; // the next request in the chain - IOPMRequest * fRequestRoot; // the root request in the issue tree - IOItemCount fWorkWaitCount; // execution blocked if non-zero - IOItemCount fFreeWaitCount; // completion blocked if non-zero - uint32_t fType; // request type + IOService * fTarget; // request target + IOPMRequest * fRequestNext; // the next request in the chain + IOPMRequest * fRequestRoot; // the root request in the call tree + IOItemCount fWorkWaitCount; // execution blocked if non-zero + IOItemCount fFreeWaitCount; // completion blocked if non-zero + uint32_t fRequestType; // request type + bool fIsQuiesceBlocker; -#if NOT_READY IOPMCompletionAction fCompletionAction; void * fCompletionTarget; void * fCompletionParam; - IOReturn fCompletionStatus; -#endif public: uint32_t fRequestTag; @@ -605,12 +604,12 @@ class IOPMRequest : public IOCommand inline uint32_t getType( void ) const { - return fType; + return fRequestType; } inline bool isReplyType( void ) const { - return (fType > kIOPMRequestTypeReplyStart); + return (fRequestType > kIOPMRequestTypeReplyStart); } inline IOService * getTarget( void ) const @@ -618,22 +617,26 @@ class IOPMRequest : public IOCommand return fTarget; } -#if NOT_READY - inline bool isCompletionInstalled( void ) + inline bool isQuiesceBlocker( void ) const + { + return fIsQuiesceBlocker; + } + + inline bool isQuiesceType( void ) const { - return (fCompletionAction != 0); + return ((kIOPMRequestTypeQuiescePowerTree == fRequestType) && + (fCompletionAction != 0) && (fCompletionTarget != 0)); } inline void installCompletionAction( - IOPMCompletionAction action, void * target, + IOPMCompletionAction action, void * param ) { - fCompletionAction = action; fCompletionTarget = target; + fCompletionAction = action; fCompletionParam = param; } -#endif /* NOT_READY */ static IOPMRequest * create( void ); bool init( IOService * owner, IOOptionBits type ); @@ -659,8 +662,10 @@ class IOPMRequestQueue : public IOEventSource queue_head_t fQueue; IOLock * fLock; - virtual bool checkForWork( void ); - virtual void free( void ); + enum { kMaxDequeueCount = 256 }; + + virtual bool checkForWork( void ) APPLE_KEXT_OVERRIDE; + virtual void free( void ) APPLE_KEXT_OVERRIDE; virtual bool init( IOService * inOwner, Action inAction ); public: @@ -691,21 +696,26 @@ class IOPMWorkQueue : public IOEventSource protected: queue_head_t fWorkQueue; - Action fWorkAction; + Action fInvokeAction; Action fRetireAction; uint32_t fQueueLength; uint32_t fConsumerCount; volatile uint32_t fProducerCount; + IOPMRequest * fQuiesceRequest; + AbsoluteTime fQuiesceStartTime; + AbsoluteTime fQuiesceFinishTime; - virtual bool checkForWork( void ); - virtual bool init( IOService * inOwner, Action work, Action retire ); + virtual bool checkForWork( void ) APPLE_KEXT_OVERRIDE; + virtual bool init( IOService * inOwner, Action invoke, Action retire ); bool checkRequestQueue( queue_head_t * queue, bool * empty ); public: - static IOPMWorkQueue * create( IOService * inOwner, Action work, Action retire ); + static IOPMWorkQueue * create( IOService * inOwner, Action invoke, Action retire ); bool queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt ); void signalWorkAvailable( void ); void incrementProducerCount( void ); + void attachQuiesceRequest( IOPMRequest * quiesceRequest ); + void finishQuiesceRequest( IOPMRequest * quiesceRequest ); }; //****************************************************************************** @@ -722,7 +732,7 @@ class IOPMCompletionQueue : public IOEventSource protected: queue_head_t fQueue; - virtual bool checkForWork( void ); + virtual bool checkForWork( void ) APPLE_KEXT_OVERRIDE; virtual bool init( IOService * inOwner, Action inAction ); public: diff --git a/iokit/Kernel/IOServicePrivate.h b/iokit/Kernel/IOServicePrivate.h index 465b8261a..af6ca5636 100644 --- a/iokit/Kernel/IOServicePrivate.h +++ b/iokit/Kernel/IOServicePrivate.h @@ -95,10 +95,10 @@ class _IOServiceNotifier : public IONotifier queue_head_t handlerInvocations; IOOptionBits state; - virtual void free(); - virtual void remove(); - virtual bool disable(); - virtual void enable( bool was ); + virtual void free() APPLE_KEXT_OVERRIDE; + virtual void remove() APPLE_KEXT_OVERRIDE; + virtual bool disable() APPLE_KEXT_OVERRIDE; + virtual void enable( bool was ) APPLE_KEXT_OVERRIDE; virtual void wait(); }; @@ -117,12 +117,12 @@ class _IOServiceInterestNotifier : public IONotifier queue_head_t handlerInvocations; IOOptionBits state; - virtual void free(); - virtual void remove(); - virtual bool disable(); - virtual void enable( bool was ); + virtual void free() APPLE_KEXT_OVERRIDE; + virtual void remove() APPLE_KEXT_OVERRIDE; + virtual bool disable() APPLE_KEXT_OVERRIDE; + virtual void enable( bool was ) APPLE_KEXT_OVERRIDE; virtual void wait(); - virtual bool init(); + virtual bool init() APPLE_KEXT_OVERRIDE; }; class _IOConfigThread : public OSObject @@ -132,7 +132,7 @@ class _IOConfigThread : public OSObject OSDeclareDefaultStructors(_IOConfigThread) public: - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; static void configThread( void ); static void main( void * arg, wait_result_t result ); @@ -171,10 +171,13 @@ class IOResources : public IOService public: static IOService * resources( void ); - virtual bool init( OSDictionary * dictionary = 0 ); - virtual IOWorkLoop * getWorkLoop( ) const; - virtual bool matchPropertyTable( OSDictionary * table ); - virtual IOReturn setProperties( OSObject * properties ); + virtual bool init( OSDictionary * dictionary = 0 ) APPLE_KEXT_OVERRIDE; + virtual IOReturn newUserClient(task_t owningTask, void * securityID, + UInt32 type, OSDictionary * properties, + IOUserClient ** handler) APPLE_KEXT_OVERRIDE; + virtual IOWorkLoop * getWorkLoop( ) const APPLE_KEXT_OVERRIDE; + virtual bool matchPropertyTable( OSDictionary * table ) APPLE_KEXT_OVERRIDE; + virtual IOReturn setProperties( OSObject * properties ) APPLE_KEXT_OVERRIDE; }; class _IOOpenServiceIterator : public OSIterator @@ -192,10 +195,10 @@ class _IOOpenServiceIterator : public OSIterator static OSIterator * iterator( OSIterator * _iter, const IOService * client, const IOService * provider ); - virtual void free(); - virtual void reset(); - virtual bool isValid(); - virtual OSObject * getNextObject(); + virtual void free() APPLE_KEXT_OVERRIDE; + virtual void reset() APPLE_KEXT_OVERRIDE; + virtual bool isValid() APPLE_KEXT_OVERRIDE; + virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE; }; extern const OSSymbol * gIOConsoleUsersKey; diff --git a/iokit/Kernel/IOStartIOKit.cpp b/iokit/Kernel/IOStartIOKit.cpp index 787a69bf2..8177603cc 100644 --- a/iokit/Kernel/IOStartIOKit.cpp +++ b/iokit/Kernel/IOStartIOKit.cpp @@ -76,25 +76,12 @@ void IOKitInitializeTime( void ) clock_initialize_calendar(); } -void IOKitResetTime( void ) -{ - clock_sec_t secs; - clock_usec_t microsecs; - - clock_initialize_calendar(); - - clock_get_calendar_microtime(&secs, µsecs); - gIOLastWakeTime.tv_sec = secs; - gIOLastWakeTime.tv_usec = microsecs; - - IOService::updateConsoleUsers(NULL, kIOMessageSystemHasPoweredOn); -} - void iokit_post_constructor_init(void) { IORegistryEntry * root; OSObject * obj; + IOCPUInitialize(); root = IORegistryEntry::initialize(); assert( root ); IOService::initialize(); @@ -135,18 +122,23 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 ) int debugFlags; if( PE_parse_boot_argn( "io", &debugFlags, sizeof (debugFlags) )) - gIOKitDebug = debugFlags; + gIOKitDebug = debugFlags; +#if DEVELOPMENT || DEBUG + else gIOKitDebug |= kIOWaitQuietPanics; +#endif /* DEVELOPMENT || DEBUG */ if( PE_parse_boot_argn( "iotrace", &debugFlags, sizeof (debugFlags) )) - gIOKitTrace = debugFlags; + gIOKitTrace = debugFlags; - // Compat for boot-args - gIOKitTrace |= (gIOKitDebug & kIOTraceCompatBootArgs); + // Compat for boot-args + gIOKitTrace |= (gIOKitDebug & kIOTraceCompatBootArgs); // Check for the log synchronous bit set in io if (gIOKitDebug & kIOLogSynchronous) debug_mode = true; + if( PE_parse_boot_argn( "pmtimeout", &debugFlags, sizeof (debugFlags) )) + gCanSleepTimeout = debugFlags; // // Have to start IOKit environment before we attempt to start // the C++ runtime environment. At some stage we have to clean up diff --git a/iokit/Kernel/IOSubMemoryDescriptor.cpp b/iokit/Kernel/IOSubMemoryDescriptor.cpp index c82a927ee..5b377141a 100644 --- a/iokit/Kernel/IOSubMemoryDescriptor.cpp +++ b/iokit/Kernel/IOSubMemoryDescriptor.cpp @@ -93,6 +93,8 @@ bool IOSubMemoryDescriptor::initSubRange( IOMemoryDescriptor * parent, _start = offset; _length = length; _flags = direction; + _flags |= kIOMemoryThreadSafe; + #ifndef __LP64__ _direction = (IODirection) (_flags & kIOMemoryDirectionMask); #endif /* !__LP64__ */ @@ -205,3 +207,9 @@ IOSubMemoryDescriptor::getPreparationID( void ) return (super::getPreparationID()); } +IOReturn +IOSubMemoryDescriptor::getPageCounts(IOByteCount * residentPageCount, + IOByteCount * dirtyPageCount) +{ + return (_parent->getPageCounts(residentPageCount, dirtyPageCount)); +} diff --git a/iokit/Kernel/IOUserClient.cpp b/iokit/Kernel/IOUserClient.cpp index 9f3587844..6c9ec5df7 100644 --- a/iokit/Kernel/IOUserClient.cpp +++ b/iokit/Kernel/IOUserClient.cpp @@ -166,7 +166,7 @@ class IOMachPort : public OSObject static mach_port_name_t makeSendRightForTask( task_t task, io_object_t obj, ipc_kobject_type_t type ); - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; }; #define super OSObject @@ -363,13 +363,13 @@ class IOUserNotification : public OSIterator public: - virtual bool init( void ); - virtual void free(); + virtual bool init( void ) APPLE_KEXT_OVERRIDE; + virtual void free() APPLE_KEXT_OVERRIDE; virtual void setNotification( IONotifier * obj ); - virtual void reset(); - virtual bool isValid(); + virtual void reset() APPLE_KEXT_OVERRIDE; + virtual bool isValid() APPLE_KEXT_OVERRIDE; }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -466,13 +466,13 @@ class IOServiceUserNotification : public IOUserNotification virtual bool init( mach_port_t port, natural_t type, void * reference, vm_size_t referenceSize, bool clientIs64 ); - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; static bool _handler( void * target, void * ref, IOService * newService, IONotifier * notifier ); virtual bool handler( void * ref, IOService * newService ); - virtual OSObject * getNextObject(); + virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE; }; class IOServiceMessageUserNotification : public IOUserNotification @@ -498,7 +498,7 @@ class IOServiceMessageUserNotification : public IOUserNotification vm_size_t extraSize, bool clientIs64 ); - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; static IOReturn _handler( void * target, void * ref, UInt32 messageType, IOService * provider, @@ -507,7 +507,7 @@ class IOServiceMessageUserNotification : public IOUserNotification UInt32 messageType, IOService * provider, void * messageArgument, vm_size_t argSize ); - virtual OSObject * getNextObject(); + virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE; }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -1505,6 +1505,27 @@ extern "C" { /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +// Create a vm_map_copy_t or kalloc'ed data for memory +// to be copied out. ipc will free after the copyout. + +static kern_return_t copyoutkdata( const void * data, vm_size_t len, + io_buf_ptr_t * buf ) +{ + kern_return_t err; + vm_map_copy_t copy; + + err = vm_map_copyin( kernel_map, CAST_USER_ADDR_T(data), len, + false /* src_destroy */, ©); + + assert( err == KERN_SUCCESS ); + if( err == KERN_SUCCESS ) + *buf = (char *) copy; + + return( err ); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + /* Routine io_server_version */ kern_return_t is_io_server_version( mach_port_t master_port, @@ -1516,20 +1537,26 @@ kern_return_t is_io_server_version( /* Routine io_object_get_class */ kern_return_t is_io_object_get_class( - io_object_t object, - io_name_t className ) + io_object_t object, + io_name_t className ) { - const OSMetaClass* my_obj = NULL; + const OSMetaClass* my_obj = NULL; + const char * my_class_name = NULL; - if( !object) - return( kIOReturnBadArgument ); + if( !object) + return( kIOReturnBadArgument ); - my_obj = object->getMetaClass(); - if (!my_obj) { - return (kIOReturnNotFound); - } + if ( !my_class_name ) { + my_obj = object->getMetaClass(); + if (!my_obj) { + return (kIOReturnNotFound); + } + + my_class_name = my_obj->getClassName(); + } - strlcpy( className, my_obj->getClassName(), sizeof(io_name_t)); + strlcpy( className, my_class_name, sizeof(io_name_t)); + return( kIOReturnSuccess ); } @@ -1623,6 +1650,7 @@ kern_return_t is_io_object_conforms_to( return( kIOReturnBadArgument ); *conforms = (0 != object->metaCast( className )); + return( kIOReturnSuccess ); } @@ -2312,6 +2340,58 @@ kern_return_t is_io_registry_entry_from_path( return( kIOReturnSuccess ); } + +/* Routine io_registry_entry_from_path */ +kern_return_t is_io_registry_entry_from_path_ool( + mach_port_t master_port, + io_string_inband_t path, + io_buf_ptr_t path_ool, + mach_msg_type_number_t path_oolCnt, + kern_return_t *result, + io_object_t *registry_entry) +{ + IORegistryEntry * entry; + vm_map_offset_t map_data; + const char * cpath; + IOReturn res; + kern_return_t err; + + if (master_port != master_device_port) return(kIOReturnNotPrivileged); + + map_data = 0; + entry = 0; + res = err = KERN_SUCCESS; + if (path[0]) cpath = path; + else + { + if (!path_oolCnt) return(kIOReturnBadArgument); + if (path_oolCnt > (sizeof(io_struct_inband_t) * 1024)) return(kIOReturnMessageTooLarge); + + err = vm_map_copyout(kernel_map, &map_data, (vm_map_copy_t) path_ool); + if (KERN_SUCCESS == err) + { + // must return success to mig after vm_map_copyout() succeeds, so result is actual + cpath = CAST_DOWN(const char *, map_data); + if (cpath[path_oolCnt - 1]) res = kIOReturnBadArgument; + } + } + + if ((KERN_SUCCESS == err) && (KERN_SUCCESS == res)) + { + entry = IORegistryEntry::fromPath(cpath); + res = entry ? kIOReturnSuccess : kIOReturnNotFound; + } + + if (map_data) vm_deallocate(kernel_map, map_data, path_oolCnt); + + if (KERN_SUCCESS != err) res = err; + *registry_entry = entry; + *result = res; + + return (err); +} + + /* Routine io_registry_entry_in_plane */ kern_return_t is_io_registry_entry_in_plane( io_object_t registry_entry, @@ -2342,6 +2422,42 @@ kern_return_t is_io_registry_entry_get_path( return( kIOReturnBadArgument ); } +/* Routine io_registry_entry_get_path */ +kern_return_t is_io_registry_entry_get_path_ool( + io_object_t registry_entry, + io_name_t plane, + io_string_inband_t path, + io_buf_ptr_t *path_ool, + mach_msg_type_number_t *path_oolCnt) +{ + enum { kMaxPath = 16384 }; + IOReturn err; + int length; + char * buf; + + CHECK( IORegistryEntry, registry_entry, entry ); + + *path_ool = NULL; + *path_oolCnt = 0; + length = sizeof(io_string_inband_t); + if (entry->getPath(path, &length, IORegistryEntry::getPlane(plane))) err = kIOReturnSuccess; + else + { + length = kMaxPath; + buf = IONew(char, length); + if (!buf) err = kIOReturnNoMemory; + else if (!entry->getPath(buf, &length, IORegistryEntry::getPlane(plane))) err = kIOReturnError; + else + { + *path_oolCnt = length; + err = copyoutkdata(buf, length, path_ool); + } + if (buf) IODelete(buf, char, kMaxPath); + } + + return (err); +} + /* Routine io_registry_entry_get_name */ kern_return_t is_io_registry_entry_get_name( @@ -2409,25 +2525,6 @@ kern_return_t is_io_registry_entry_get_registry_entry_id( return (kIOReturnSuccess); } -// Create a vm_map_copy_t or kalloc'ed data for memory -// to be copied out. ipc will free after the copyout. - -static kern_return_t copyoutkdata( const void * data, vm_size_t len, - io_buf_ptr_t * buf ) -{ - kern_return_t err; - vm_map_copy_t copy; - - err = vm_map_copyin( kernel_map, CAST_USER_ADDR_T(data), len, - false /* src_destroy */, ©); - - assert( err == KERN_SUCCESS ); - if( err == KERN_SUCCESS ) - *buf = (char *) copy; - - return( err ); -} - /* Routine io_registry_entry_get_property */ kern_return_t is_io_registry_entry_get_property_bytes( io_object_t registry_entry, @@ -2799,6 +2896,7 @@ kern_return_t is_io_registry_entry_get_property_bin( return( err ); } + /* Routine io_registry_entry_set_properties */ kern_return_t is_io_registry_entry_set_properties ( @@ -2981,6 +3079,8 @@ kern_return_t is_io_service_open_extended( CHECK( IOService, _service, service ); + if (!owningTask) return (kIOReturnBadArgument); + do { if (properties) @@ -3148,6 +3248,8 @@ kern_return_t is_io_connect_map_memory_into_task CHECK( IOUserClient, connection, client ); + if (!into_task) return (kIOReturnBadArgument); + IOStatisticsClientCall(); map = client->mapClientMemory64( memory_type, into_task, flags, *address ); @@ -3252,6 +3354,8 @@ kern_return_t is_io_connect_unmap_memory_from_task CHECK( IOUserClient, connection, client ); + if (!from_task) return (kIOReturnBadArgument); + IOStatisticsClientCall(); err = client->clientMemoryForType( (UInt32) memory_type, &options, &memory ); @@ -4148,8 +4252,7 @@ kern_return_t shim_io_connect_method_scalarI_structureI( do { - if( (kIOUCVariableStructureSize != method->count0) - && (inputCount != method->count0)) + if (inputCount != method->count0) { IOLog("%s: IOUserClient inputCount count mismatch\n", object->getName()); continue; @@ -4225,8 +4328,7 @@ kern_return_t shim_io_async_method_scalarI_structureI( do { - if( (kIOUCVariableStructureSize != method->count0) - && (inputCount != method->count0)) + if (inputCount != method->count0) { IOLog("%s: IOUserClient inputCount count mismatch\n", object->getName()); continue; @@ -4815,8 +4917,8 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume if (args->asyncWakePort) { IOExternalAsyncMethod * method; - - if( !(method = getAsyncTargetAndMethodForIndex(&object, selector)) ) + object = 0; + if( !(method = getAsyncTargetAndMethodForIndex(&object, selector)) || !object ) return (kIOReturnUnsupported); if (kIOUCForegroundOnly & method->flags) @@ -4864,8 +4966,8 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume else { IOExternalMethod * method; - - if( !(method = getTargetAndMethodForIndex(&object, selector)) ) + object = 0; + if( !(method = getTargetAndMethodForIndex(&object, selector)) || !object ) return (kIOReturnUnsupported); if (kIOUCForegroundOnly & method->flags) diff --git a/iokit/Kernel/IOWorkLoop.cpp b/iokit/Kernel/IOWorkLoop.cpp index 0789f66bb..6207b1ea1 100644 --- a/iokit/Kernel/IOWorkLoop.cpp +++ b/iokit/Kernel/IOWorkLoop.cpp @@ -255,6 +255,7 @@ void IOWorkLoop::free() // Either way clean up all of our resources and return. if (controlG) { + controlG->workLoop = 0; controlG->release(); controlG = 0; } @@ -559,10 +560,10 @@ IOReturn IOWorkLoop::_maintRequest(void *inC, void *inD, void *, void *) if (eventChain == inEvent) eventChain = inEvent->getNext(); else { - IOEventSource *event, *next; + IOEventSource *event, *next = 0; event = eventChain; - while ((next = event->getNext()) && next != inEvent) + if (event) while ((next = event->getNext()) && (next != inEvent)) event = next; if (!next) { @@ -576,10 +577,10 @@ IOReturn IOWorkLoop::_maintRequest(void *inC, void *inD, void *, void *) if (passiveEventChain == inEvent) passiveEventChain = inEvent->getNext(); else { - IOEventSource *event, *next; + IOEventSource *event, *next = 0; event = passiveEventChain; - while ((next = event->getNext()) && next != inEvent) + if (event) while ((next = event->getNext()) && (next != inEvent)) event = next; if (!next) { diff --git a/iokit/Kernel/RootDomainUserClient.h b/iokit/Kernel/RootDomainUserClient.h index 9083d8772..ac09ffbf0 100644 --- a/iokit/Kernel/RootDomainUserClient.h +++ b/iokit/Kernel/RootDomainUserClient.h @@ -71,24 +71,24 @@ class RootDomainUserClient : public IOUserClient public: - virtual IOReturn clientClose( void ); + virtual IOReturn clientClose( void ) APPLE_KEXT_OVERRIDE; virtual IOReturn externalMethod( uint32_t selector, IOExternalMethodArguments * arguments, IOExternalMethodDispatch * dispatch, OSObject * target, - void * reference ); + void * reference ) APPLE_KEXT_OVERRIDE; - virtual bool start( IOService * provider ); + virtual bool start( IOService * provider ) APPLE_KEXT_OVERRIDE; virtual bool initWithTask(task_t owningTask, void *security_id, - UInt32 type, OSDictionary * properties); + UInt32 type, OSDictionary * properties) APPLE_KEXT_OVERRIDE; // Unused - retained for symbol compatibility void setPreventative(UInt32 on_off, UInt32 types_of_sleep); // Unused - retained for symbol compatibility - virtual IOExternalMethod * getTargetAndMethodForIndex( IOService ** targetP, UInt32 index ); + virtual IOExternalMethod * getTargetAndMethodForIndex( IOService ** targetP, UInt32 index ) APPLE_KEXT_OVERRIDE; }; diff --git a/iokit/Tests/TestIOMemoryDescriptor.cpp b/iokit/Tests/TestIOMemoryDescriptor.cpp new file mode 100644 index 000000000..926681a7e --- /dev/null +++ b/iokit/Tests/TestIOMemoryDescriptor.cpp @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifndef __LP64__ +#include +#endif /* !__LP64__ */ +#include +#include +#include + +#include +#include +#include + +__BEGIN_DECLS +#include +#include +#include +#include + +#include +#include +#include +#include +__END_DECLS + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#if DEVELOPMENT || DEBUG + +static int IOMultMemoryDescriptorTest(int newValue) +{ + IOMemoryDescriptor * mds[3]; + IOMultiMemoryDescriptor * mmd; + IOMemoryMap * map; + void * addr; + uint8_t * data; + uint32_t i; + IOAddressRange ranges[2]; + + data = (typeof(data)) IOMallocAligned(ptoa(8), page_size); + for (i = 0; i < ptoa(8); i++) data[i] = atop(i) | 0xD0; + + ranges[0].address = (IOVirtualAddress)(data + ptoa(4)); + ranges[0].length = ptoa(4); + ranges[1].address = (IOVirtualAddress)(data + ptoa(0)); + ranges[1].length = ptoa(4); + + mds[0] = IOMemoryDescriptor::withAddressRanges(&ranges[0], 2, kIODirectionOutIn, kernel_task); + + mds[1] = IOSubMemoryDescriptor::withSubRange(mds[0], ptoa(3), ptoa(2), kIODirectionOutIn); + mds[2] = IOSubMemoryDescriptor::withSubRange(mds[0], ptoa(7), ptoa(1), kIODirectionOutIn); + + mmd = IOMultiMemoryDescriptor::withDescriptors(&mds[0], sizeof(mds)/sizeof(mds[0]), kIODirectionOutIn, false); + mds[2]->release(); + mds[1]->release(); + mds[0]->release(); + map = mmd->createMappingInTask(kernel_task, 0, kIOMapAnywhere, ptoa(7), mmd->getLength() - ptoa(7)); + mmd->release(); + assert(map); + + addr = (void *) map->getVirtualAddress(); + assert(ptoa(4) == map->getLength()); + assert(0xd3d3d3d3 == ((uint32_t *)addr)[ptoa(0) / sizeof(uint32_t)]); + assert(0xd7d7d7d7 == ((uint32_t *)addr)[ptoa(1) / sizeof(uint32_t)]); + assert(0xd0d0d0d0 == ((uint32_t *)addr)[ptoa(2) / sizeof(uint32_t)]); + assert(0xd3d3d3d3 == ((uint32_t *)addr)[ptoa(3) / sizeof(uint32_t)]); + map->release(); + IOFreeAligned(data, ptoa(8)); + + return (0); +} + + +int IOMemoryDescriptorTest(int newValue) +{ + int result; + +#if 0 + if (5 == newValue) + { + IOReturn ret; + IOMemoryDescriptor * md; + IODMACommand * dma; + IODMACommand::SegmentOptions segOptions = + { + .fStructSize = sizeof(segOptions), + .fNumAddressBits = 64, + .fMaxSegmentSize = 4096, + .fMaxTransferSize = 128*1024, + .fAlignment = 4, + .fAlignmentLength = 4, + .fAlignmentInternalSegments = 0x1000 + }; + + IOAddressRange ranges[3][2] = + { + { + { (uintptr_t) &IOMemoryDescriptorTest, 0x2ffc }, + { 0, 0 }, + }, + { + { ranges[0][0].address, 0x10 }, + { 0x3000 + ranges[0][0].address, 0xff0 }, + }, + { + { ranges[0][0].address, 0x2ffc }, + { trunc_page(ranges[0][0].address), 0x800 }, + }, + }; + static const uint32_t rangesCount[3] = { 1, 2, 2 }; + uint32_t test; + + for (test = 0; test < 3; test++) + { + kprintf("---[%d] address 0x%qx-0x%qx, 0x%qx-0x%qx\n", test, + ranges[test][0].address, ranges[test][0].length, + ranges[test][1].address, ranges[test][1].length); + + md = IOMemoryDescriptor::withAddressRanges((IOAddressRange*)&ranges[test][0], rangesCount[test], kIODirectionOut, kernel_task); + assert(md); + ret = md->prepare(); + assert(kIOReturnSuccess == ret); + dma = IODMACommand::withSpecification(kIODMACommandOutputHost64, &segOptions, + IODMACommand::kMapped, NULL, NULL); + assert(dma); + ret = dma->setMemoryDescriptor(md, true); + if (kIOReturnSuccess == ret) + { + IODMACommand::Segment64 segments[1]; + UInt32 numSegments; + UInt64 offset; + + offset = 0; + do + { + numSegments = 1; + ret = dma->gen64IOVMSegments(&offset, &segments[0], &numSegments); + assert(kIOReturnSuccess == ret); + assert(1 == numSegments); + kprintf("seg 0x%qx, 0x%qx\n", segments[0].fIOVMAddr, segments[0].fLength); + } + while (offset < md->getLength()); + + ret = dma->clearMemoryDescriptor(true); + assert(kIOReturnSuccess == ret); + dma->release(); + } + md->release(); + } + + return (kIOReturnSuccess); + } + else if (4 == newValue) + { + IOService * isp; + IOMapper * mapper; + IOBufferMemoryDescriptor * md1; + IODMACommand * dma; + IOReturn ret; + size_t bufSize = 8192 * 8192 * sizeof(uint32_t); + uint64_t start, time, nano; + + isp = IOService::copyMatchingService(IOService::nameMatching("isp")); + assert(isp); + mapper = IOMapper::copyMapperForDeviceWithIndex(isp, 0); + assert(mapper); + + md1 = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL, + kIODirectionOutIn | kIOMemoryPersistent | kIOMemoryPageable, + bufSize, page_size); + + ret = md1->prepare(); + assert(kIOReturnSuccess == ret); + + IODMAMapSpecification mapSpec; + bzero(&mapSpec, sizeof(mapSpec)); + uint64_t mapped; + uint64_t mappedLength; + + start = mach_absolute_time(); + + ret = md1->dmaMap(mapper, NULL, &mapSpec, 0, bufSize, &mapped, &mappedLength); + assert(kIOReturnSuccess == ret); + + time = mach_absolute_time() - start; + + absolutetime_to_nanoseconds(time, &nano); + kprintf("time %lld us\n", nano / 1000ULL); + kprintf("seg0 0x%qx, 0x%qx\n", mapped, mappedLength); + + assert(md1); + + dma = IODMACommand::withSpecification(kIODMACommandOutputHost32, + 32, 0, IODMACommand::kMapped, 0, 1, mapper, NULL); + + assert(dma); + + start = mach_absolute_time(); + ret = dma->setMemoryDescriptor(md1, true); + assert(kIOReturnSuccess == ret); + time = mach_absolute_time() - start; + + absolutetime_to_nanoseconds(time, &nano); + kprintf("time %lld us\n", nano / 1000ULL); + + + IODMACommand::Segment32 segments[1]; + UInt32 numSegments = 1; + UInt64 offset; + + offset = 0; + ret = dma->gen32IOVMSegments(&offset, &segments[0], &numSegments); + assert(kIOReturnSuccess == ret); + assert(1 == numSegments); + kprintf("seg0 0x%x, 0x%x\n", (int)segments[0].fIOVMAddr, (int)segments[0].fLength); + + ret = dma->clearMemoryDescriptor(true); + assert(kIOReturnSuccess == ret); + + md1->release(); + + return (kIOReturnSuccess); + } + + if (3 == newValue) + { + IOBufferMemoryDescriptor * md1; + IOBufferMemoryDescriptor * md2; + IOMemoryMap * map1; + IOMemoryMap * map2; + uint32_t * buf1; + uint32_t * buf2; + IOReturn err; + + md1 = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL, + kIODirectionOutIn | kIOMemoryPersistent | kIOMemoryPageable, + 64*1024, page_size); + assert(md1); + map1 = md1->createMappingInTask(kernel_task, 0, kIOMapAnywhere | kIOMapUnique); + assert(map1); + buf1 = (uint32_t *) map1->getVirtualAddress(); + + md2 = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL, + kIODirectionOutIn | kIOMemoryPersistent | kIOMemoryPageable, + 64*1024, page_size); + assert(md2); + map2 = md2->createMappingInTask(kernel_task, 0, kIOMapAnywhere | kIOMapUnique); + assert(map2); + buf2 = (uint32_t *) map2->getVirtualAddress(); + + memset(buf1, 0x11, 64*1024L); + memset(buf2, 0x22, 64*1024L); + + kprintf("md1 %p, map1 %p, buf2 %p; md2 %p, map2 %p, buf2 %p\n", md1, map1, buf1, md2, map2, buf2); + + kprintf("no redir 0x%08x, 0x%08x\n", buf1[0], buf2[0]); + assert(0x11111111 == buf1[0]); + assert(0x22222222 == buf2[0]); + err = map1->redirect(md2, 0, 0ULL); + kprintf("redir md2(0x%x) 0x%08x, 0x%08x\n", err, buf1[0], buf2[0]); + assert(0x11111111 == buf2[0]); + assert(0x22222222 == buf1[0]); + err = map1->redirect(md1, 0, 0ULL); + kprintf("redir md1(0x%x) 0x%08x, 0x%08x\n", err, buf1[0], buf2[0]); + assert(0x11111111 == buf1[0]); + assert(0x22222222 == buf2[0]); + map1->release(); + map2->release(); + md1->release(); + md2->release(); + } +#endif + + result = IOMultMemoryDescriptorTest(newValue); + if (result) return (result); + + IOGeneralMemoryDescriptor * md; + vm_offset_t data[2]; + vm_size_t bsize = 16*1024*1024; + vm_size_t srcsize, srcoffset, mapoffset, size; + kern_return_t kr; + + kr = vm_allocate(kernel_map, &data[0], bsize, VM_FLAGS_ANYWHERE); + vm_inherit(kernel_map, data[0] + ptoa(1), ptoa(1), VM_INHERIT_NONE); + vm_inherit(kernel_map, data[0] + ptoa(16), ptoa(4), VM_INHERIT_NONE); + + IOLog("data 0x%lx, 0x%lx\n", (long)data[0], (long)data[1]); + + uint32_t idx, offidx; + for (idx = 0; idx < (bsize / sizeof(uint32_t)); idx++) + { + ((uint32_t*)data[0])[idx] = idx; + } + + for (srcoffset = 0; srcoffset < bsize; srcoffset = ((srcoffset << 2) + 0x40c)) + { + for (srcsize = 4; srcsize < (bsize - srcoffset - 1); srcsize = ((srcsize << 2) + 0x3fc)) + { + IOAddressRange ranges[3]; + uint32_t rangeCount = 1; + + bzero(&ranges[0], sizeof(ranges)); + ranges[0].address = data[0] + srcoffset; + ranges[0].length = srcsize; + + if (srcsize > ptoa(5)) + { + ranges[0].length = 7634; + ranges[1].length = 9870; + ranges[2].length = srcsize - ranges[0].length - ranges[1].length; + ranges[1].address = ranges[0].address + ranges[0].length; + ranges[2].address = ranges[1].address + ranges[1].length; + rangeCount = 3; + } + else if ((srcsize > ptoa(2)) && !(page_mask & srcoffset)) + { + ranges[0].length = ptoa(1); + ranges[1].length = ptoa(1); + ranges[2].length = srcsize - ranges[0].length - ranges[1].length; + ranges[0].address = data[0] + srcoffset + ptoa(1); + ranges[1].address = data[0] + srcoffset; + ranges[2].address = ranges[0].address + ranges[0].length; + rangeCount = 3; + } + + md = OSDynamicCast(IOGeneralMemoryDescriptor, + IOMemoryDescriptor::withAddressRanges(&ranges[0], rangeCount, kIODirectionInOut, kernel_task)); + assert(md); + + IOLog("IOMemoryDescriptor::withAddressRanges [0x%lx @ 0x%lx]\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx]\n", + (long) srcsize, (long) srcoffset, + (long long) ranges[0].address - data[0], (long long) ranges[0].length, + (long long) ranges[1].address - data[0], (long long) ranges[1].length, + (long long) ranges[2].address - data[0], (long long) ranges[2].length); + + if (kIOReturnSuccess == kr) + { + for (mapoffset = 0; mapoffset < srcsize; mapoffset = ((mapoffset << 1) + 0xf00)) + { + for (size = 4; size < (srcsize - mapoffset - 1); size = ((size << 2) + 0x200)) + { + IOMemoryMap * map; + mach_vm_address_t addr = 0; + uint32_t data; + +// IOLog("createMappingInTask(kernel_task, 0, kIOMapAnywhere, mapoffset, size); + if (map) addr = map->getAddress(); + else kr = kIOReturnError; + +// IOLog(">mapRef 0x%x %llx\n", kr, addr); + + if (kIOReturnSuccess != kr) break; + kr = md->prepare(); + if (kIOReturnSuccess != kr) + { + panic("prepare() fail 0x%x\n", kr); + break; + } + for (idx = 0; idx < size; idx += sizeof(uint32_t)) + { + offidx = (idx + mapoffset + srcoffset); + if ((srcsize <= ptoa(5)) && (srcsize > ptoa(2)) && !(page_mask & srcoffset)) + { + if (offidx < ptoa(2)) offidx ^= ptoa(1); + } + offidx /= sizeof(uint32_t); + + if (offidx != ((uint32_t*)addr)[idx/sizeof(uint32_t)]) + { + panic("vm mismatch md %p map %p, @ 0x%x, 0x%lx, 0x%lx, \n", md, map, idx, (long) srcoffset, (long) mapoffset); + kr = kIOReturnBadMedia; + } + else + { + if (sizeof(data) != md->readBytes(mapoffset + idx, &data, sizeof(data))) data = 0; + if (offidx != data) + { + panic("phys mismatch md %p map %p, @ 0x%x, 0x%lx, 0x%lx, \n", md, map, idx, (long) srcoffset, (long) mapoffset); + kr = kIOReturnBadMedia; + } + } + } + md->complete(); + map->release(); +// IOLog("unmapRef %llx\n", addr); + } + if (kIOReturnSuccess != kr) break; + } + } + md->release(); + if (kIOReturnSuccess != kr) break; + } + if (kIOReturnSuccess != kr) break; + } + + if (kIOReturnSuccess != kr) IOLog("FAIL: src 0x%lx @ 0x%lx, map 0x%lx @ 0x%lx\n", + (long) srcsize, (long) srcoffset, (long) size, (long) mapoffset); + + assert(kr == kIOReturnSuccess); + + vm_deallocate(kernel_map, data[0], bsize); +// vm_deallocate(kernel_map, data[1], size); + + return (0); +} + +#endif /* DEVELOPMENT || DEBUG */ diff --git a/iokit/Tests/Tests.cpp b/iokit/Tests/Tests.cpp index 405b20352..bc2d05b69 100644 --- a/iokit/Tests/Tests.cpp +++ b/iokit/Tests/Tests.cpp @@ -29,137 +29,179 @@ * */ -#include -#include -#include - -#include +#define TEST_HEADERS 0 +#if TEST_HEADERS -extern "C" { -extern int debug_container_malloc_size; -extern int debug_ivars_size; -} - -static void DumpTree( void ) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* TEST_HEADERS */ + +#include +#include +#include "Tests.h" + +static int +sysctl_iokittest(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { - IORegistryEntry * next; - IORegistryEntry * packages = 0; - IORegistryEntry * deblocker = 0; - IORegistryEntry * keyboard = 0; - IORegistryIterator * iter; - OSOrderedSet * all; - - IOLog("ivars %08x, containers %08x\n", - debug_ivars_size, debug_container_malloc_size); - - iter = IORegistryIterator::iterateOver( gIODTPlane ); - assert( iter ); - - all = iter->iterateAll(); - IOLog("\nCount %d\n", all->getCount() ); - all->release(); - - iter->reset(); - while( (next = iter->nextEntryRecursive())) { - if( 0 == strcmp( "packages", next->getName())) - packages = next; - if( 0 == strcmp( "deblocker", next->getName())) - deblocker = next; - if( 0 == strcmp( "keyboard", next->getName())) - keyboard = next; - } - - if( deblocker && keyboard) - deblocker->attachToParent( keyboard, gIODTPlane); - - iter->reset(); - while( (next = iter->nextEntryRecursive())) { - IOLog("%s=%d,", next->getName(), next->getDepth( gIODTPlane )); - if( 0 == strcmp( "gc", next->getName())) { - packages = next; - } + int error; + int newValue, changed; + + error = sysctl_io_number(req, 0, sizeof(int), &newValue, &changed); + if (error) return (error); + +#if DEVELOPMENT || DEBUG + if (changed && (999==newValue)) + { + OSData * data = OSData::withCapacity(16); + data->release(); + data->release(); } - IOLog("ivars %08x, containers %08x\n", - debug_ivars_size, debug_container_malloc_size); - - if( packages) - packages->detachAll( gIODTPlane); - all = iter->iterateAll(); - IOLog("del gc/, count now %d\n", all->getCount() ); - all->release(); - - iter->release(); - - IOLog("ivars %08x, containers %08x\n", - debug_ivars_size, debug_container_malloc_size); + if (changed && newValue) error = IOMemoryDescriptorTest(newValue); +#endif /* DEVELOPMENT || DEBUG */ + return (error); } -extern "C" { -void PathTests( void ) -{ - const char * tests[] = { - "IODeviceTree:/bandit", - "IODeviceTree:/", - "IODeviceTree:/xxxx", - "IODeviceTree:/bandit/xxx", - "IODeviceTree:/bandit@F2000000", - "IODeviceTree:/bandit/gc", - "IODeviceTree:/bandit/gc/mace:17.202.42.95,\\mach_kernel", - "IODeviceTree:/bandit/@10/mesh", - "IODeviceTree:enet:17.202", - "IODeviceTree:scsi/@0:0", - "IODeviceTree:scsi-int", - "IODeviceTree:/bandit/gc@10/mesh", - "IODeviceTree:/bandit/gc/53c94/disk@0:6,mach_kernel", - "IOService:/", - "IOService:/ApplePlatformExpert", - "IOService:/ApplePlatformExpert/hammerhead@F8000000", - "IOService:/ApplePlatformExpert/bandit/AppleMacRiscPCI" - }; - - IORegistryEntry * entry; - char str[256]; - int len; - - for( unsigned int i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { +SYSCTL_PROC(_kern, OID_AUTO, iokittest, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + 0, 0, sysctl_iokittest, "I", ""); - len = sizeof( str ); - entry = IORegistryEntry::fromPath( tests[i], 0, str, &len ); - IOLog("\"%s\" ", tests[i] ); - if( entry) { - IOLog("found %s, tail = \"%s\"\n", entry->getName(), str ); - len = sizeof( str ); - if( entry->getPath( str, &len, - IORegistryEntry::getPlane("IODeviceTree"))) { - IOLog("path = \"%s\"\n", str); - } - entry->release(); - } else - IOLog("not found\n"); - } -} -} - -void TestsCpp( void * dtTop ) -{ - IORegistryEntry * dt; - - IOLog("\nivars %08x, containers %08x\n", - debug_ivars_size, debug_container_malloc_size); - - OSMetaClass::printInstanceCounts(); - dt = IODeviceTreeAlloc( dtTop ); - assert( dt ); - -// OSMetaClass::printInstanceCounts(); - DumpTree(); -// OSMetaClass::printInstanceCounts(); - dt->detachAll( gIODTPlane); - OSMetaClass::printInstanceCounts(); - IOLog("ivars %08x, containers %08x\n", - debug_ivars_size, debug_container_malloc_size); -} diff --git a/iokit/Tests/Tests.h b/iokit/Tests/Tests.h index 6c03b7a83..67abf6bf6 100644 --- a/iokit/Tests/Tests.h +++ b/iokit/Tests/Tests.h @@ -25,81 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include -#ifdef __cplusplus +extern int IOMemoryDescriptorTest(int x); -#define logPrintf(x) \ - do { \ - kprintf x; \ - } while (0) - -#define verPrintf(x) logPrintf(x) - -// Assumes 'bool res = true' in current scope -#define TEST_ASSERT(t, l, c) \ - do { \ - if ( !(c) ) { \ - verPrintf(("TEST (%c) test %s failed\n", t, l)); \ - res = false; \ - } \ - } while(0) - -#define logSpace() do { } while(0) -#define checkPointSpace() ((void *) 0) -#define checkSpace(l, ckp, d) ((int) 1) - -// In TestContainers.cc -extern const int numStrCache; -extern const char *strCache[]; - -extern void testString(); -extern void testSymbol(); -extern void testData(); - -// In TestCollections.cc -extern void testArray(); -extern void testSet(); -extern void testDictionary(); -extern void testIterator(); - -// In TestDevice.cc -extern void testWorkLoop(); - -#include - -class IOWorkLoop; -class IOCommandQueue; -class IOInterruptEventSource; - -class TestDevice; -typedef void (*TestDeviceAction)(TestDevice *, int, void *); - -class TestDevice : public OSObject -{ - OSDeclareDefaultStructors(TestDevice) - - IOWorkLoop *workLoop; - int intCount; - IOCommandQueue *commQ; - -public: - IOInterruptEventSource *intES; - - virtual bool init(); - virtual void free(); - - void rawCommandOccurred - (void *field0, void *field1, void *field2, void *field3); - kern_return_t enqueueCommand(bool sleep, - TestDeviceAction act, int tag, void *dataP); - - void interruptAction(IOInterruptEventSource *event, int count); - - void producer1Action(int tag); - void producer2Action(int tag, void *inCount); - - void alarm(); -}; - -#endif /* __cplusplus */ diff --git a/iokit/bsddev/IOKitBSDInit.cpp b/iokit/bsddev/IOKitBSDInit.cpp index 9b08bb834..3e45ff1ff 100644 --- a/iokit/bsddev/IOKitBSDInit.cpp +++ b/iokit/bsddev/IOKitBSDInit.cpp @@ -32,6 +32,7 @@ #include #include #include +#include extern "C" { @@ -39,6 +40,7 @@ extern "C" { #include #include #include +#include // how long to wait for matching root device, secs #if DEBUG @@ -47,11 +49,32 @@ extern "C" { #define ROOTDEVICETIMEOUT 60 #endif +int panic_on_exception_triage = 0; + extern dev_t mdevadd(int devid, uint64_t base, unsigned int size, int phys); extern dev_t mdevlookup(int devid); extern void mdevremoveall(void); extern void di_root_ramfile(IORegistryEntry * entry); + +#if DEVELOPMENT +#define IOPOLLED_COREFILE 1 +// no sizing +#define kIOCoreDumpSize 0ULL +#define kIOCoreDumpFreeSize 0ULL +#else +#define IOPOLLED_COREFILE 0 +#endif + + +#if IOPOLLED_COREFILE +static bool +NewKernelCoreMedia(void * target, void * refCon, + IOService * newService, + IONotifier * notifier); +#endif /* IOPOLLED_COREFILE */ + + kern_return_t IOKitBSDInit( void ) { @@ -763,3 +786,157 @@ int IOBSDIsMediaEjectable( const char *cdev_name ) } } /* extern "C" */ + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include +#include +#include +#include +#include +#include + +IOPolledFileIOVars * gIOPolledCoreFileVars; + +#if IOPOLLED_COREFILE + +static IOReturn +IOOpenPolledCoreFile(const char * filename) +{ + IOReturn err; + unsigned int debug; + + if (gIOPolledCoreFileVars) return (kIOReturnBusy); + if (!IOPolledInterface::gMetaClass.getInstanceCount()) return (kIOReturnUnsupported); + + debug = 0; + PE_parse_boot_argn("debug", &debug, sizeof (debug)); + if (DB_DISABLE_LOCAL_CORE & debug) return (kIOReturnUnsupported); + + err = IOPolledFileOpen(filename, kIOCoreDumpSize, kIOCoreDumpFreeSize, + NULL, 0, + &gIOPolledCoreFileVars, NULL, NULL, 0); + if (kIOReturnSuccess != err) return (err); + + err = IOPolledFilePollersSetup(gIOPolledCoreFileVars, kIOPolledPreflightCoreDumpState); + if (kIOReturnSuccess != err) + { + IOPolledFileClose(&gIOPolledCoreFileVars, NULL, NULL, 0, 0, 0); + } + + return (err); +} + +static void +IOClosePolledCoreFile(void) +{ + IOPolledFilePollersClose(gIOPolledCoreFileVars, kIOPolledPostflightState); + IOPolledFileClose(&gIOPolledCoreFileVars, NULL, NULL, 0, 0, 0); +} + +static thread_call_t gIOOpenPolledCoreFileTC; +static IONotifier * gIOPolledCoreFileNotifier; +static IONotifier * gIOPolledCoreFileInterestNotifier; + +static IOReturn +KernelCoreMediaInterest(void * target, void * refCon, + UInt32 messageType, IOService * provider, + void * messageArgument, vm_size_t argSize ) +{ + if (kIOMessageServiceIsTerminated == messageType) + { + gIOPolledCoreFileInterestNotifier->remove(); + gIOPolledCoreFileInterestNotifier = 0; + IOClosePolledCoreFile(); + } + + return (kIOReturnSuccess); +} + +static void +OpenKernelCoreMedia(thread_call_param_t p0, thread_call_param_t p1) +{ + IOService * newService; + OSString * string; + char filename[16]; + + newService = (IOService *) p1; + do + { + if (gIOPolledCoreFileVars) break; + string = OSDynamicCast(OSString, newService->getProperty(kIOBSDNameKey)); + if (!string) break; + snprintf(filename, sizeof(filename), "/dev/%s", string->getCStringNoCopy()); + if (kIOReturnSuccess != IOOpenPolledCoreFile(filename)) break; + gIOPolledCoreFileInterestNotifier = newService->registerInterest( + gIOGeneralInterest, &KernelCoreMediaInterest, NULL, 0); + } + while (false); + + newService->release(); +} + +static bool +NewKernelCoreMedia(void * target, void * refCon, + IOService * newService, + IONotifier * notifier) +{ + do + { + if (gIOPolledCoreFileVars) break; + if (!gIOOpenPolledCoreFileTC) break; + newService = newService->getProvider(); + if (!newService) break; + newService->retain(); + thread_call_enter1(gIOOpenPolledCoreFileTC, newService); + } + while (false); + + return (false); +} + +#endif /* IOPOLLED_COREFILE */ + +extern "C" void +IOBSDMountChange(struct mount * mp, uint32_t op) +{ +#if IOPOLLED_COREFILE + + OSDictionary * bsdMatching; + OSDictionary * mediaMatching; + OSString * string; + + if (!gIOPolledCoreFileNotifier) do + { + if (!gIOOpenPolledCoreFileTC) gIOOpenPolledCoreFileTC = thread_call_allocate(&OpenKernelCoreMedia, NULL); + bsdMatching = IOService::serviceMatching("IOMediaBSDClient"); + if (!bsdMatching) break; + mediaMatching = IOService::serviceMatching("IOMedia"); + string = OSString::withCStringNoCopy("5361644D-6163-11AA-AA11-00306543ECAC"); + if (!string || !mediaMatching) break; + mediaMatching->setObject("Content", string); + string->release(); + bsdMatching->setObject(gIOParentMatchKey, mediaMatching); + mediaMatching->release(); + + gIOPolledCoreFileNotifier = IOService::addMatchingNotification( + gIOFirstMatchNotification, bsdMatching, + &NewKernelCoreMedia, NULL, NULL, -1000); + } + while (false); + +#endif /* IOPOLLED_COREFILE */ +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +extern "C" boolean_t +IOTaskHasEntitlement(task_t task, const char * entitlement) +{ + OSObject * obj; + obj = IOUserClient::copyClientEntitlement(task, entitlement); + if (!obj) return (false); + obj->release(); + return (obj != kOSBooleanFalse); +} + diff --git a/iokit/conf/Makefile.template b/iokit/conf/Makefile.template index ceffec084..4fe56b115 100644 --- a/iokit/conf/Makefile.template +++ b/iokit/conf/Makefile.template @@ -87,13 +87,13 @@ $(SOBJS): .SFLAGS $(COMPONENT).filelist: $(OBJS) $(_v)for hib_file in ${HIB_FILES}; \ do \ - $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \ - mv $${hib_file}__ $${hib_file} ; \ + $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} || exit 1; \ + mv $${hib_file}__ $${hib_file} || exit 1; \ done @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${OBJS}; do \ + $(_v)for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ - done; ) > $(COMPONENT).filelist + done > $(COMPONENT).filelist do_all: $(COMPONENT).filelist diff --git a/iokit/conf/files b/iokit/conf/files index 31173f88c..0e883af2b 100644 --- a/iokit/conf/files +++ b/iokit/conf/files @@ -70,10 +70,11 @@ iokit/Kernel/IOUserClient.cpp optional iokitcpp iokit/Kernel/IOKitDebug.cpp optional iokitcpp iokit/Kernel/IODataQueue.cpp optional iokitcpp iokit/Kernel/IOSharedDataQueue.cpp optional iokitcpp -# iokit/Tests/Tests.cpp optional iokitcpp -# iokit/Tests/TestDevice.cpp optional iokitcpp -# iokit/Tests/TestContainers.cpp optional iokitcpp -# iokit/Tests/TestCollections.cpp optional iokitcpp +iokit/Tests/Tests.cpp optional iokitcpp +iokit/Tests/TestIOMemoryDescriptor.cpp optional iokitcpp +# iokit/Tests/TestDevice.cpp optional iokitcpp +# iokit/Tests/TestContainers.cpp optional iokitcpp +# iokit/Tests/TestCollections.cpp optional iokitcpp iokit/Kernel/IOStatistics.cpp optional iokitcpp iokit/Kernel/IOInterruptAccounting.cpp optional iokitcpp diff --git a/libkdd/kcdata/KCDBasicTypeDescription.h b/libkdd/kcdata/KCDBasicTypeDescription.h new file mode 100644 index 000000000..ebab25863 --- /dev/null +++ b/libkdd/kcdata/KCDBasicTypeDescription.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include "kdd.h" +#include +#import + +@interface KCDBasicTypeDescription : KCDataType + +- (id)initWithKCTypeDesc:(kcdata_subtype_descriptor_t)sub_type_desc; + +/* + * Restricted. Only for internal use + * the following interface creates a basic type_0x33 = [ array of uint8_t ] kind + */ +- (id)createDefaultForType:(uint32_t)typeID; + +@end diff --git a/libkdd/kcdata/KCDBasicTypeDescription.m b/libkdd/kcdata/KCDBasicTypeDescription.m new file mode 100644 index 000000000..151093b0e --- /dev/null +++ b/libkdd/kcdata/KCDBasicTypeDescription.m @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#import "KCDBasicTypeDescription.h" + +@interface +KCDBasicTypeDescription () { + int _typeID; + uint32_t _size; + uint32_t _count; + NSString * _name; + struct kcdata_subtype_descriptor _subtype_desc; +} + +@end + +@implementation KCDBasicTypeDescription + +- (id)initWithKCTypeDesc:(kcdata_subtype_descriptor_t)sub_type_desc +{ + _typeID = sub_type_desc->kcs_elem_type; + _count = kcs_get_elem_count(sub_type_desc); + _size = kcs_get_elem_size(sub_type_desc); + + memcpy(&_subtype_desc, sub_type_desc, sizeof(_subtype_desc)); + _name = [NSString stringWithFormat:@"%s", _subtype_desc.kcs_name]; + + return self; +} + +- (id)createDefaultForType:(uint32_t)typeID +{ + struct kcdata_subtype_descriptor subtype; + subtype.kcs_flags = KCS_SUBTYPE_FLAGS_ARRAY; + subtype.kcs_elem_type = KC_ST_UINT8; + subtype.kcs_elem_offset = 0; + subtype.kcs_elem_size = KCS_SUBTYPE_PACK_SIZE(UINT16_MAX, (uint16_t)sizeof(uint8_t)); + subtype.kcs_name[0] = '\0'; + (void)[self initWithKCTypeDesc:&subtype]; + _name = [NSString stringWithFormat:@"Type_0x%x", typeID]; + return self; +} + +- (NSObject *)objectForType:(kctype_subtype_t)elem_type withData:(uint8_t *)data +{ + NSObject * obj; + + switch (elem_type) { + case KC_ST_CHAR: obj = [NSString stringWithFormat:@"%c", *(char *)data]; break; + case KC_ST_INT8: obj = [NSNumber numberWithInt:*(int8_t *)data]; break; + case KC_ST_UINT8: obj = [NSNumber numberWithInt:*(uint8_t *)data]; break; + case KC_ST_INT16: obj = [NSNumber numberWithShort:*(int16_t *)data]; break; + case KC_ST_UINT16: obj = [NSNumber numberWithUnsignedShort:*(uint16_t *)data]; break; + case KC_ST_INT32: obj = [NSNumber numberWithInt:*(int32_t *)data]; break; + case KC_ST_UINT32: obj = [NSNumber numberWithUnsignedInt:*(uint32_t *)data]; break; + case KC_ST_INT64: obj = [NSNumber numberWithLongLong:*(int64_t *)data]; break; + case KC_ST_UINT64: obj = [NSNumber numberWithUnsignedLongLong:*(uint64_t *)data]; break; + + default: obj = @""; break; + } + + return obj; +} + +- (NSMutableDictionary *)parseData:(void *)dataBuffer ofLength:(uint32_t)length +{ + NSMutableDictionary * retval = [[NSMutableDictionary alloc] init]; + uint8_t * data = (uint8_t *)dataBuffer; + uint32_t elem_count = MIN(_count, length / (_size / _count)); + uint32_t elem_size = _size / _count; + if (_count == 1) { + retval[_name] = [self objectForType:_subtype_desc.kcs_elem_type withData:&data[_subtype_desc.kcs_elem_offset]]; + } else if (_subtype_desc.kcs_elem_type == KC_ST_CHAR) { + retval[_name] = [NSString stringWithFormat:@"%s", (char *)&data[_subtype_desc.kcs_elem_offset]]; + } else { + NSMutableArray * objArray = [NSMutableArray arrayWithCapacity:elem_count]; + for (unsigned int i = 0; i < elem_count; i++) { + [objArray addObject:[self objectForType:_subtype_desc.kcs_elem_type + withData:&data[(_subtype_desc.kcs_elem_offset + (elem_size * i))]]]; + } + retval[_name] = objArray; + } + return retval; +} + +- (NSString *)description +{ + return [NSString stringWithFormat:@"type: %d => \"%@\" ", [self typeID], [self name]]; +} + +- (NSString *)name +{ + return _name; +} + +- (uint32_t)count +{ + return _count; +} + +- (int)typeID +{ + return _typeID; +} + +@end diff --git a/libkdd/kcdata/KCDStructTypeDescription.h b/libkdd/kcdata/KCDStructTypeDescription.h new file mode 100644 index 000000000..68a200e60 --- /dev/null +++ b/libkdd/kcdata/KCDStructTypeDescription.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#import +#import "KCDBasicTypeDescription.h" + +@interface KCDStructTypeDescription : KCDataType + +- (id)initWithType:(int)typeID withName:(NSString *)name; + +- (void)addFieldBasicType:(KCDBasicTypeDescription *)fieldType; + +@end diff --git a/libkdd/kcdata/KCDStructTypeDescription.m b/libkdd/kcdata/KCDStructTypeDescription.m new file mode 100644 index 000000000..60f70b163 --- /dev/null +++ b/libkdd/kcdata/KCDStructTypeDescription.m @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#import "KCDStructTypeDescription.h" + +#ifndef KCDATA_TYPE_MAX_WITH_DESC +#define KCDATA_TYPE_MAX_WITH_DESC 0x6 +#endif + +@interface +KCDStructTypeDescription () { + int _typeID; + NSString * _name; + NSMutableArray * _fields; + BOOL _needDescriptionAsKey; +} + +@end + +@implementation KCDStructTypeDescription + +- (id)initWithType:(int)typeID withName:(NSString *)name +{ + if ((self = [super init])) { + _typeID = typeID; + _name = name; + _needDescriptionAsKey = NO; + if (typeID >= 0x1 && typeID <= KCDATA_TYPE_MAX_WITH_DESC) + _needDescriptionAsKey = YES; + + _fields = [[NSMutableArray alloc] init]; + return self; + } + return NULL; +} + +- (void)addFieldBasicType:(KCDBasicTypeDescription *)fieldType +{ + [_fields addObject:fieldType]; +} + +- (NSMutableDictionary *)parseData:(void *)dataBuffer ofLength:(uint32_t)length +{ + NSMutableDictionary * retval = [[NSMutableDictionary alloc] init]; + for (KCDataType * fi in _fields) { + NSMutableDictionary * _d = [fi parseData:dataBuffer ofLength:length]; + for (NSString * k in [_d keyEnumerator]) { + retval[k] = _d[k]; + } + } + if (_needDescriptionAsKey) { + NSString * desc = retval[@"desc"]; + NSObject * obj = retval[@"data"]; + retval[desc] = obj; + [retval removeObjectForKey:@"desc"]; + [retval removeObjectForKey:@"data"]; + } + return retval; +} + +- (NSString *)description +{ + return [NSString stringWithFormat:@"type: %d => \"%@\" ", _typeID, _name]; +} + +- (NSString *)name +{ + return _name; +} + +- (uint32_t)count +{ + return (uint32_t)[_fields count]; +} + +- (int)typeID +{ + return _typeID; +} + +@end diff --git a/libkdd/kcdata/kcdata_core.m b/libkdd/kcdata/kcdata_core.m new file mode 100644 index 000000000..90e61942c --- /dev/null +++ b/libkdd/kcdata/kcdata_core.m @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#import +#import "kdd.h" +#import "KCDBasicTypeDescription.h" +#import "KCDStructTypeDescription.h" + +#define MAX_KCDATATYPE_BUFFER_SIZE 2048 +extern struct kcdata_type_definition *kcdata_get_typedescription(unsigned type_id, uint8_t *buffer, uint32_t buffer_size); + + +/*! + * @function getTypeFromTypeDef + * + * @abstract + * Build a KCDataType from a type definition. + * + * @param typeDef + * A pointer to kcdata_type_definition_t that specifies the type fields and has subtype definitions + * in the memory immediately following the type_definition. + * + * @return KCDataType * type object which can be used to parse data into dictionaries. + * This may return nil if it finds the data to be invalid. + * + * @discussion + * This routine tries to decode the typeDef structure and create either a basic type (KCDBasicTypeDescription) + * or a struct type. + */ +static KCDataType * getTypeFromTypeDef(struct kcdata_type_definition * typeDef); + +static KCDataType * +getTypeFromTypeDef(struct kcdata_type_definition * typeDef) +{ + if (typeDef == NULL) { + return nil; + } + NSString * kct_name = [NSString stringWithFormat:@"%s", typeDef->kct_name]; + if (typeDef->kct_num_elements == 1) { + KCDBasicTypeDescription * retval = [[KCDBasicTypeDescription alloc] initWithKCTypeDesc:&typeDef->kct_elements[0]]; + return retval; + } else { + KCDStructTypeDescription * retval = + [[KCDStructTypeDescription alloc] initWithType:typeDef->kct_type_identifier withName:kct_name]; + /* need to do work here to get the array of elements setup here */ + KCDBasicTypeDescription * curField = nil; + for (unsigned int i = 0; i < typeDef->kct_num_elements; i++) { + curField = [[KCDBasicTypeDescription alloc] initWithKCTypeDesc:&typeDef->kct_elements[i]]; + [retval addFieldBasicType:curField]; + } + return retval; + } + return nil; +} + +KCDataType * +getKCDataTypeForID(uint32_t typeID) +{ + static dispatch_once_t onceToken; + static NSMutableDictionary * knownTypes = nil; + dispatch_once(&onceToken, ^{ + if (!knownTypes) { + knownTypes = [[NSMutableDictionary alloc] init]; + } + }); + NSNumber * type = [NSNumber numberWithUnsignedInt:typeID]; + if (!knownTypes[type]) { + /* code to query system for type information */ + uint8_t buffer[MAX_KCDATATYPE_BUFFER_SIZE]; + struct kcdata_type_definition * sys_def = kcdata_get_typedescription(typeID, buffer, MAX_KCDATATYPE_BUFFER_SIZE); + if (sys_def == NULL) { + knownTypes[type] = [[KCDBasicTypeDescription alloc] createDefaultForType:typeID]; + } else { + knownTypes[type] = getTypeFromTypeDef(sys_def); + } + } + assert(knownTypes[type] != nil); + return knownTypes[type]; +} + +NSString * +KCDataTypeNameForID(uint32_t typeID) +{ + NSString * retval = [NSString stringWithFormat:@"%u", typeID]; + KCDataType * t = getKCDataTypeForID(typeID); + + if (![[t name] containsString:@"Type_"]) { + retval = [t name]; + } + return retval; +} + +NSMutableDictionary * +parseKCDataArray(void * dataBuffer) +{ + uint32_t typeID = KCDATA_ITEM_ARRAY_GET_EL_TYPE(dataBuffer); + uint32_t count = KCDATA_ITEM_ARRAY_GET_EL_COUNT(dataBuffer); + uint32_t size = KCDATA_ITEM_ARRAY_GET_EL_SIZE(dataBuffer); + uint8_t * buffer = (uint8_t *)KCDATA_ITEM_DATA_PTR(dataBuffer); + KCDataType * datatype = getKCDataTypeForID(typeID); + NSMutableDictionary * retval = [[NSMutableDictionary alloc] initWithCapacity:1]; + NSMutableArray * arr = [[NSMutableArray alloc] initWithCapacity:count]; + retval[[datatype name]] = arr; + NSMutableDictionary * tmpdict = NULL; + for (uint32_t i = 0; i < count; i++) { + tmpdict = [datatype parseData:(void *)&buffer[i * size] ofLength:size]; + [arr addObject:tmpdict]; + } + return retval; +} + +NSMutableDictionary * +parseKCDataContainer(void * dataBuffer, uint32_t * bytesParsed) +{ + if (bytesParsed == NULL) + return nil; + assert(KCDATA_ITEM_TYPE(dataBuffer) == KCDATA_TYPE_CONTAINER_BEGIN); + uint64_t containerID = KCDATA_CONTAINER_ID(dataBuffer); + + /* setup collection object for sub containers */ + NSMutableDictionary * sub_containers = [[NSMutableDictionary alloc] init]; + NSMutableDictionary * retval = [[NSMutableDictionary alloc] init]; + NSMutableDictionary * container = [[NSMutableDictionary alloc] init]; + struct kcdata_item * buffer = (struct kcdata_item *)KCDATA_ITEM_NEXT_HEADER(dataBuffer); + KCDataType * tmptype; + uint32_t _t; + void * _d; + NSMutableDictionary * tmpdict; + retval[KCDataTypeNameForID(kcdata_get_container_type(dataBuffer))] = container; + + KCDATA_ITEM_FOREACH(buffer) + { + _t = KCDATA_ITEM_TYPE(buffer); + _d = KCDATA_ITEM_DATA_PTR(buffer); + if (_t == KCDATA_TYPE_CONTAINER_END) { + if (KCDATA_CONTAINER_ID(buffer) == containerID) { + break; + } + continue; + } + + if (_t == KCDATA_TYPE_ARRAY) { + tmpdict = parseKCDataArray(buffer); + [container addEntriesFromDictionary:tmpdict]; + continue; + } + + if (_t == KCDATA_TYPE_CONTAINER_BEGIN) { + uint32_t container_size = 0; + tmpdict = parseKCDataContainer(buffer, &container_size); + NSString * subcontainerID = [NSString stringWithFormat:@"%llu", KCDATA_CONTAINER_ID(buffer)]; + NSString * k_desc = nil; + assert([tmpdict count] == 1); + for (NSString * k in [tmpdict keyEnumerator]) { + k_desc = k; + if ([k intValue] != 0) + k_desc = KCDataTypeNameForID([k intValue]); + + if ([sub_containers objectForKey:k_desc] == nil) { + sub_containers[k_desc] = [[NSMutableDictionary alloc] init]; + } + sub_containers[k_desc][subcontainerID] = tmpdict[k]; + } + buffer = (struct kcdata_item *)((uintptr_t)buffer + container_size); + if (KCDATA_ITEM_TYPE(buffer) == KCDATA_TYPE_BUFFER_END) { + break; + } + continue; + } + + tmptype = getKCDataTypeForID(_t); + tmpdict = [tmptype parseData:_d ofLength:KCDATA_ITEM_SIZE(buffer)]; + if ([tmpdict count] == 1) + [container addEntriesFromDictionary:tmpdict]; + else + container[[tmptype name]] = tmpdict; + } + [container addEntriesFromDictionary:sub_containers]; + *bytesParsed = (uint32_t)((uintptr_t)buffer - (uintptr_t)dataBuffer); + return retval; +} diff --git a/libkdd/kcdata/kcdtypes.c b/libkdd/kcdata/kcdtypes.c new file mode 100644 index 000000000..82c97f74b --- /dev/null +++ b/libkdd/kcdata/kcdtypes.c @@ -0,0 +1,552 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/*! + * @function kcdata_get_typedescription + * + * @abstract + * Search the known type definitions for type with id type_id. + * + * @param type_id + * A unsinged int type specified by the KCDATA. + * + * @param buffer + * pointer to data area where type definition will be saved. + * + * @param buffer_size + * size of the buffer provided. + * + * @return struct kcdata_type_definition * + * pointer to a malloc'ed buffer holding the type definition and each subtype defintion for its fields. + * It may return NULL if no type with id == type_id is found. + * Note: The caller is responsible to free() the memory when its no longer used. + * + * @discussion + * This function queries the known type definitions table. If found the defintion data is returned + * else NULL is returned. It is advised to cache the return value from this function since the data + * is always going to be the same for same type_id. The definition setup requires memory on heap. + * The caller should make sure to free() the data once its done with using it. + * + */ +struct kcdata_type_definition *kcdata_get_typedescription(unsigned type_id, uint8_t *buffer, uint32_t buffer_size); + + + +/* forward declarations for helper routines */ +static uint32_t get_kctype_subtype_size(kctype_subtype_t type); +static void setup_subtype_description(kcdata_subtype_descriptor_t desc, kctype_subtype_t type, uint32_t offset, char *name); +static void setup_subtype_array_description(kcdata_subtype_descriptor_t desc, kctype_subtype_t type, uint32_t offset, uint32_t count, char *name); +static void setup_type_definition(struct kcdata_type_definition *d, uint32_t type, uint32_t num_elems, char *name); + +struct kcdata_type_definition *kcdata_get_typedescription(unsigned type_id, uint8_t *buffer, uint32_t buffer_size) +{ + int i = 0; +#define _STR_VALUE(x) #x +#define _SUBTYPE(t, s, f) setup_subtype_description(&subtypes[i++], (t), offsetof(s,f), _STR_VALUE(f)) +#define _SUBTYPE_ARRAY(t, s, f, c) setup_subtype_array_description(&subtypes[i++], (t), offsetof(s,f), (c), _STR_VALUE(f)) +#define _STRINGTYPE(f) setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, UINT16_MAX, f) + + + + if (buffer_size < sizeof(struct kcdata_type_definition) || buffer == NULL) + return NULL; + + struct kcdata_type_definition *retval = (struct kcdata_type_definition *)&buffer[0]; + kcdata_subtype_descriptor_t subtypes = (kcdata_subtype_descriptor_t)&buffer[sizeof(struct kcdata_type_definition)]; + switch (type_id) { + + case KCDATA_TYPE_STRING_DESC: { + i = 0; + setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc"); + setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, KCDATA_DESC_MAXLEN, UINT16_MAX, "data"); + setup_type_definition(retval, type_id, i, "string_desc"); + break; + } + + case KCDATA_TYPE_UINT32_DESC: { + i = 0; + setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc"); + setup_subtype_description(&subtypes[i++], KC_ST_UINT32, KCDATA_DESC_MAXLEN, "data"); + setup_type_definition(retval, type_id, i, "uint32_desc"); + break; + } + + case KCDATA_TYPE_UINT64_DESC: { + i = 0; + setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc"); + setup_subtype_description(&subtypes[i++], KC_ST_UINT64, KCDATA_DESC_MAXLEN, "data"); + setup_type_definition(retval, type_id, i, "uint64_desc"); + break; + } + + case KCDATA_TYPE_INT32_DESC: { + i = 0; + setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc"); + setup_subtype_description(&subtypes[i++], KC_ST_INT32, KCDATA_DESC_MAXLEN, "data"); + setup_type_definition(retval, type_id, i, "int32_desc"); + break; + } + + case KCDATA_TYPE_INT64_DESC: { + i = 0; + setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc"); + setup_subtype_description(&subtypes[i++], KC_ST_INT64, KCDATA_DESC_MAXLEN, "data"); + setup_type_definition(retval, type_id, i, "int64_desc"); + break; + } + + case KCDATA_TYPE_CONTAINER_BEGIN :{ + i = 0; + setup_subtype_description(&subtypes[i++], KC_ST_UINT32, 0, "kcContainerType"); + setup_type_definition(retval, type_id, i, "container_begin"); + break; + } + + case KCDATA_TYPE_LIBRARY_LOADINFO: { + i = 0; + _SUBTYPE(KC_ST_UINT32, struct dyld_uuid_info_32, imageLoadAddress); + _SUBTYPE_ARRAY(KC_ST_UINT8, struct dyld_uuid_info_32, imageUUID, 16); + setup_type_definition(retval, type_id, i, "dyld_load_info"); + break; + + } + + case KCDATA_TYPE_LIBRARY_LOADINFO64: /* fall through */ + case STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO: { + i = 0; + _SUBTYPE(KC_ST_UINT64, struct dyld_uuid_info_64, imageLoadAddress); + _SUBTYPE_ARRAY(KC_ST_UINT8, struct dyld_uuid_info_64, imageUUID, 16); + setup_type_definition(retval, type_id, i, "dyld_load_info"); + break; + } + + case KCDATA_TYPE_TIMEBASE: { + i = 0; + _SUBTYPE(KC_ST_UINT32, struct mach_timebase_info, numer); + _SUBTYPE(KC_ST_UINT32, struct mach_timebase_info, denom); + setup_type_definition(retval, type_id, i, "mach_timebase_info"); + } + + case KCDATA_TYPE_MACH_ABSOLUTE_TIME: + setup_type_definition(retval, type_id, 1, "mach_absolute_time"); + setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "mach_absolute_time"); + break; + + case KCDATA_TYPE_TIMEVAL: { + i = 0; + _SUBTYPE(KC_ST_INT64, struct timeval64, tv_sec); + _SUBTYPE(KC_ST_INT64, struct timeval64, tv_usec); + setup_type_definition(retval, type_id, i, "timeval"); + } + + case KCDATA_TYPE_USECS_SINCE_EPOCH: + setup_type_definition(retval, type_id, 1, "usecs_since_epoch"); + setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "usecs_since_epoch"); + break; + + + /* stackshot specific types */ + case STACKSHOT_KCTYPE_IOSTATS: { + i = 0; + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_disk_reads_count); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_disk_reads_size); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_disk_writes_count); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_disk_writes_size); + _SUBTYPE_ARRAY(KC_ST_UINT64, struct io_stats_snapshot, ss_io_priority_count, STACKSHOT_IO_NUM_PRIORITIES); + _SUBTYPE_ARRAY(KC_ST_UINT64, struct io_stats_snapshot, ss_io_priority_size, STACKSHOT_IO_NUM_PRIORITIES); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_paging_count); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_paging_size); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_non_paging_count); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_non_paging_size); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_data_count); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_data_size); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_metadata_count); + _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_metadata_size); + + setup_type_definition(retval, type_id, i, "io_statistics"); + break; + } + + case STACKSHOT_KCTYPE_GLOBAL_MEM_STATS : + { i = 0; + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, snapshot_magic); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, free_pages); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, active_pages); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, inactive_pages); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, purgeable_pages); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, wired_pages); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, speculative_pages); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, throttled_pages); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, filebacked_pages); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, compressions); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, decompressions); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, compressor_size); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, busy_buffer_count); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, pages_wanted); + _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, pages_reclaimed); + _SUBTYPE(KC_ST_UINT8, struct mem_and_io_snapshot, pages_wanted_reclaimed_valid); + setup_type_definition(retval, type_id, i, "mem_and_io_snapshot"); + break; + } + + case STACKSHOT_KCCONTAINER_TASK: + setup_type_definition(retval, type_id, 0, "task_snapshots"); + break; + + case STACKSHOT_KCCONTAINER_THREAD: + setup_type_definition(retval, type_id, 0, "thread_snapshots"); + break; + + + case STACKSHOT_KCTYPE_TASK_SNAPSHOT: { + i = 0; + _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_unique_pid); + _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_ss_flags); + _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_user_time_in_terminated_threads); + _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_system_time_in_terminated_threads); + _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_p_start_sec); + _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_task_size); + _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_max_resident_size); + _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_suspend_count); + _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_faults); + _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_pageins); + _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_cow_faults); + _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_was_throttled); + _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_did_throttle); + _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_latency_qos); + _SUBTYPE(KC_ST_INT32, struct task_snapshot_v2, ts_pid); + _SUBTYPE_ARRAY(KC_ST_CHAR, struct task_snapshot_v2, ts_p_comm, 32); + setup_type_definition(retval, type_id, i, "task_snapshot"); + break; + } + + case STACKSHOT_KCTYPE_THREAD_SNAPSHOT: { + i = 0; + + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_thread_id); + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_wait_event); + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_continuation); + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_total_syscalls); + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_voucher_identifier); + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_dqserialnum); + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_user_time); + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_sys_time); + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_ss_flags); + _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_last_run_time); + _SUBTYPE(KC_ST_UINT32, struct thread_snapshot_v2, ths_state); + _SUBTYPE(KC_ST_UINT32, struct thread_snapshot_v2, ths_sched_flags); + _SUBTYPE(KC_ST_INT16, struct thread_snapshot_v2, ths_base_priority); + _SUBTYPE(KC_ST_INT16, struct thread_snapshot_v2, ths_sched_priority); + _SUBTYPE(KC_ST_UINT8, struct thread_snapshot_v2, ths_eqos); + _SUBTYPE(KC_ST_UINT8, struct thread_snapshot_v2, ths_rqos); + _SUBTYPE(KC_ST_UINT8, struct thread_snapshot_v2, ths_rqos_override); + _SUBTYPE(KC_ST_UINT8, struct thread_snapshot_v2, ths_io_tier); + + setup_type_definition(retval, type_id, i, "thread_snapshot"); + break; + } + + + case STASKSHOT_KCTYPE_DONATING_PIDS: + setup_type_definition(retval, type_id, 1, "donating_pids"); + setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "pid"); + break; + + case STACKSHOT_KCTYPE_THREAD_NAME:{ + i = 0; + setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, 64, "pth_name"); + setup_type_definition(retval, type_id, i, "pth_name"); + break; + } + + case STACKSHOT_KCTYPE_KERN_STACKFRAME : + setup_type_definition(retval, type_id, 2, "kernel_stack_frames"); + setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "lr"); + setup_subtype_description(&subtypes[1], KC_ST_UINT32, sizeof(uint32_t), "sp"); + break; + + case STACKSHOT_KCTYPE_KERN_STACKFRAME64 : + setup_type_definition(retval, type_id, 2, "kernel_stack_frames"); + setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "lr"); + setup_subtype_description(&subtypes[1], KC_ST_UINT64, sizeof(uint64_t), "sp"); + break; + + case STACKSHOT_KCTYPE_USER_STACKFRAME : + setup_type_definition(retval, type_id, 2, "user_stack_frames"); + setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "lr"); + setup_subtype_description(&subtypes[1], KC_ST_UINT32, sizeof(uint32_t), "sp"); + break; + + case STACKSHOT_KCTYPE_USER_STACKFRAME64 : + setup_type_definition(retval, type_id, 2, "user_stack_frames"); + setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "lr"); + setup_subtype_description(&subtypes[1], KC_ST_UINT64, sizeof(uint64_t), "sp"); + break; + + case STACKSHOT_KCTYPE_BOOTARGS: { + i = 0; + _STRINGTYPE("boot_args"); + setup_type_definition(retval, type_id, i, "boot_args"); + break; + } + + case STACKSHOT_KCTYPE_OSVERSION: { + i = 0; + _STRINGTYPE("osversion"); + setup_type_definition(retval, type_id, i, "osversion"); + break; + } + + case STACKSHOT_KCTYPE_KERN_PAGE_SIZE: { + i = 0; + setup_subtype_description(&subtypes[i++], KC_ST_UINT32, 0, "kernel_page_size"); + setup_type_definition(retval, type_id, i, "kernel_page_size"); + break; + } + + case STACKSHOT_KCTYPE_JETSAM_LEVEL: { + i = 0; + setup_subtype_description(&subtypes[i++], KC_ST_UINT32, 0, "jetsam_level"); + setup_type_definition(retval, type_id, i, "jetsam_level"); + break; + } + + /* crashinfo types */ + case TASK_CRASHINFO_BSDINFOWITHUNIQID: + { i = 0; + _SUBTYPE_ARRAY(KC_ST_UINT8, struct proc_uniqidentifierinfo, p_uuid, 16); + _SUBTYPE(KC_ST_UINT64, struct proc_uniqidentifierinfo, p_uniqueid); + _SUBTYPE(KC_ST_UINT64, struct proc_uniqidentifierinfo, p_puniqueid); + /* Ignore the p_reserve fields */ + setup_type_definition(retval, type_id, i, "proc_uniqidentifierinfo"); + break; + } + + case TASK_CRASHINFO_PID:{ + setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "pid"); + setup_type_definition(retval, type_id, 1, "pid"); + break; + } + + case TASK_CRASHINFO_PPID:{ + setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "ppid"); + setup_type_definition(retval, type_id, 1, "ppid"); + break; + } + + case TASK_CRASHINFO_RUSAGE_INFO: { + i = 0; + _SUBTYPE_ARRAY(KC_ST_UINT8, struct rusage_info_v3, ri_uuid, 16); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_user_time); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_system_time); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_pkg_idle_wkups); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_interrupt_wkups); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_pageins); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_wired_size); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_resident_size); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_phys_footprint); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_proc_start_abstime); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_proc_exit_abstime); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_user_time); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_system_time); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_pkg_idle_wkups); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_interrupt_wkups); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_pageins); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_elapsed_abstime); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_diskio_bytesread); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_diskio_byteswritten); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_default); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_maintenance); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_background); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_utility); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_legacy); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_user_initiated); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_user_interactive); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_billed_system_time); + _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_serviced_system_time); + setup_type_definition(retval, type_id, i, "rusage_info"); + } + + case TASK_CRASHINFO_PROC_NAME: { + i = 0; + _STRINGTYPE("p_comm"); + setup_type_definition(retval, type_id, i, "p_comm"); + } + + case TASK_CRASHINFO_USERSTACK: { + i = 0; + setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "userstack_ptr"); + setup_type_definition(retval, type_id, 1, "userstack_ptr"); + break; + } + + case TASK_CRASHINFO_ARGSLEN: { + i = 0; + setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "p_argslen"); + setup_type_definition(retval, type_id, 1, "p_argslen"); + break; + } + + case TASK_CRASHINFO_PROC_PATH: { + i = 0; + _STRINGTYPE("p_path"); + setup_type_definition(retval, type_id, i, "p_path"); + } + + case TASK_CRASHINFO_PROC_CSFLAGS:{ + setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "p_csflags"); + setup_type_definition(retval, type_id, 1, "p_csflags"); + break; + } + + case TASK_CRASHINFO_PROC_STATUS: { + setup_subtype_description(&subtypes[0], KC_ST_UINT8, 0, "p_status"); + setup_type_definition(retval, type_id, 1, "p_status"); + break; + } + + case TASK_CRASHINFO_UID:{ + setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "uid"); + setup_type_definition(retval, type_id, 1, "uid"); + break; + } + + case TASK_CRASHINFO_GID:{ + setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "gid"); + setup_type_definition(retval, type_id, 1, "gid"); + break; + } + + case TASK_CRASHINFO_PROC_ARGC:{ + setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "argc"); + setup_type_definition(retval, type_id, 1, "argc"); + break; + } + + case TASK_CRASHINFO_PROC_FLAGS:{ + setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "p_flags"); + setup_type_definition(retval, type_id, 1, "p_flags"); + break; + } + + case TASK_CRASHINFO_CPUTYPE:{ + setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "cputype"); + setup_type_definition(retval, type_id, 1, "cputype"); + break; + } + + case TASK_CRASHINFO_RESPONSIBLE_PID:{ + setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "responsible_pid"); + setup_type_definition(retval, type_id, 1, "responsible_pid"); + break; + } + + case TASK_CRASHINFO_DIRTY_FLAGS:{ + setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "dirty_flags"); + setup_type_definition(retval, type_id, 1, "dirty_flags"); + break; + } + + case TASK_CRASHINFO_CRASHED_THREADID: { + setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "crashed_threadid"); + setup_type_definition(retval, type_id, 1, "crashed_threadid"); + break; + } + + default: + retval = NULL; + break; + } + + assert(retval == NULL || (buffer_size > sizeof(struct kcdata_type_definition) + (retval->kct_num_elements * sizeof(struct kcdata_subtype_descriptor)))); + return retval; +} + + +static void setup_type_definition(struct kcdata_type_definition *d, uint32_t type, uint32_t num_elems, char *name) +{ + d->kct_type_identifier = type; + d->kct_num_elements = num_elems; + memcpy(d->kct_name, name, sizeof(d->kct_name)); + d->kct_name[sizeof(d->kct_name) - 1] = '\0'; +} + +static uint32_t get_kctype_subtype_size(kctype_subtype_t type){ + switch (type) { + case KC_ST_CHAR: + case KC_ST_INT8: + case KC_ST_UINT8: + return sizeof(uint8_t); + break; + case KC_ST_INT16: + case KC_ST_UINT16: + return sizeof(uint16_t); + break; + case KC_ST_INT32: + case KC_ST_UINT32: + return sizeof(uint32_t); + break; + case KC_ST_INT64: + case KC_ST_UINT64: + return sizeof(uint64_t); + break; + + default: + assert(0); + break; + } + return 0; +} + +static void setup_subtype_array_description(kcdata_subtype_descriptor_t desc, kctype_subtype_t type, uint32_t offset, uint32_t count, char *name) +{ + desc->kcs_flags = KCS_SUBTYPE_FLAGS_ARRAY; + desc->kcs_elem_type = type; + desc->kcs_elem_offset = offset; + desc->kcs_elem_size = KCS_SUBTYPE_PACK_SIZE(count, get_kctype_subtype_size(type)); + memcpy(desc->kcs_name, name, sizeof(desc->kcs_name)); + desc->kcs_name[sizeof(desc->kcs_name) - 1] = '\0'; +} + +static void setup_subtype_description(kcdata_subtype_descriptor_t desc, kctype_subtype_t type, uint32_t offset, char *name) +{ + desc->kcs_flags = KCS_SUBTYPE_FLAGS_NONE; + desc->kcs_elem_type = type; + desc->kcs_elem_offset = offset; + desc->kcs_elem_size = get_kctype_subtype_size(type); + memcpy(desc->kcs_name, name, sizeof(desc->kcs_name)); + desc->kcs_name[sizeof(desc->kcs_name) - 1] = '\0'; +} + diff --git a/libkdd/kcdata/kdd.h b/libkdd/kcdata/kdd.h new file mode 100644 index 000000000..ba9106d73 --- /dev/null +++ b/libkdd/kcdata/kdd.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _KDD_H_ +#define _KDD_H_ + +#import + +/*! + * @class KCDataType + * A basic abstraction that allows for parsing data provided by kernel chunked + * data library. + * + * @discussion + * Each type object has a name and a method to parse and populate data in memory to + * a dictionary. The dictionary will have keys as NSStrings and values could be NSObject + * + */ +@interface KCDataType : NSObject +- (NSMutableDictionary *)parseData:(void *)dataBuffer ofLength:(uint32_t)length; +- (NSString *)name; +@end + +/*! + * @function getKCDataTypeForID + * + * @abstract + * Find a type description for give TypeID + * + * @param typeID + * A unsinged int type specified by the KCDATA. + * + * @discussion + * This routine queries the system for a give type. If a known type description is found it will be used to + * initialize a KCDataType object. If no known type is found it assumes the data is uint8_t[]. + */ +KCDataType * getKCDataTypeForID(uint32_t typeID); + +/*! + * @function KCDataTypeNameForID + * + * @abstract + * Get a name for the type. + * + * @param typeID + * A unsinged int type specified by the KCDATA. + * + * @return NSString * + * Returns name of the type. If a type is not found the return + * value will be string object of the passed value. + */ +NSString * KCDataTypeNameForID(uint32_t typeID); + +/*! + * @function parseKCDataArray + * + * @abstract + * Parse the given KCDATA buffer as an Array of element. The buffer should begin with header + * of type KCDATA_TYPE_ARRAY. + * + * @param dataBuffer + * A pointer in memory where KCDATA is allocated. + * + * @return + * A dictionary with key specifying name of the type of each elements and value is an Array of data. + * + */ + +NSMutableDictionary * parseKCDataArray(void * dataBuffer); + +/*! + * @function parseKCDataContainer + * + * @abstract + * Parse the given KCDATA buffer as a container and convert each sub structures as fields in a dictionary. + * + * @param dataBuffer + * A pointer in memory where KCDATA is allocated. The data should be pointing to + * kcdata_item_t of type KCDATA_TYPE_CONTAINER_BEGIN + * + * @param bytesParsed + * A pointer to uint32_t field where the routine will save the number of bytes parsed for this container. + * + * @return NSDictionary * + * containing each field and potentially sub containers within the provided container. + * + * @discussion + * This function tries to parse one container. If it encounters sub containers + * they will be parsed and collected within the same dictionary. + * Other data type fields will also be parsed based on their type. The bytesParsed + * param is populated with the number of bytes processed. With this return value the caller can + * advance its buffer_read position as + * buffer = (kcdata_item_t)((uintptr_t)buffer + bytesParsed); //advance to next KCDATA_HEADER. + * Note: Keep in mind that the next header may be KCDATA_TYPE_BUFFER_END. + * + * A sample usage call can be: + * KCDATA_ITEM_FOREACH(buffer) { + * if(KCDATA_ITEM_TYPE(buffer) == KCDATA_TYPE_CONTAINER_BEGIN) { + * uint32_t container_size = 0; + * NSMutableDictionary *parsedContainer = parseKCDataContainer(buffer, &container_size); + * NSLog(@"Parsed container has : %@", parsedContainer); + * buffer = (kcdata_item_t) ((uintptr_t)buffer + container_size); + * if(KCDATA_ITEM_TYPE(buffer) == KCDATA_TYPE_BUFFER_END) + * break; + * } + * } + * + */ +NSMutableDictionary * parseKCDataContainer(void * dataBuffer, uint32_t * bytesParsed); + +#endif /* _KDD_H_ */ diff --git a/libkdd/kcdata/kdd.m b/libkdd/kcdata/kdd.m new file mode 100644 index 000000000..599cea923 --- /dev/null +++ b/libkdd/kcdata/kdd.m @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#import "kdd.h" + +@implementation KCDataType + +- (NSMutableDictionary *)parseData:(void *)dataBuffer ofLength:(uint32_t)length +{ + assert(0); +} + +- (NSString *)name +{ + assert(0); +} + +@end diff --git a/libkdd/kdd.xcodeproj/project.pbxproj b/libkdd/kdd.xcodeproj/project.pbxproj new file mode 100644 index 000000000..fb6753025 --- /dev/null +++ b/libkdd/kdd.xcodeproj/project.pbxproj @@ -0,0 +1,269 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + C91C93CB1ACB58B700119B60 /* kdd.h in Headers */ = {isa = PBXBuildFile; fileRef = C91C93CA1ACB58B700119B60 /* kdd.h */; settings = {ATTRIBUTES = (Private, ); }; }; + C91C93CD1ACB58B700119B60 /* kdd.m in Sources */ = {isa = PBXBuildFile; fileRef = C91C93CC1ACB58B700119B60 /* kdd.m */; }; + C91C93E41ACB598700119B60 /* KCDBasicTypeDescription.h in Headers */ = {isa = PBXBuildFile; fileRef = C91C93E01ACB598700119B60 /* KCDBasicTypeDescription.h */; }; + C91C93E51ACB598700119B60 /* KCDBasicTypeDescription.m in Sources */ = {isa = PBXBuildFile; fileRef = C91C93E11ACB598700119B60 /* KCDBasicTypeDescription.m */; }; + C91C93E61ACB598700119B60 /* KCDStructTypeDescription.h in Headers */ = {isa = PBXBuildFile; fileRef = C91C93E21ACB598700119B60 /* KCDStructTypeDescription.h */; }; + C91C93E71ACB598700119B60 /* KCDStructTypeDescription.m in Sources */ = {isa = PBXBuildFile; fileRef = C91C93E31ACB598700119B60 /* KCDStructTypeDescription.m */; }; + C9C5C68C1ACDAFDB00BE0E5E /* kcdtypes.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C5C68B1ACDAFDB00BE0E5E /* kcdtypes.c */; }; + C9DE39141ACB5A540020F4A3 /* kcdata_core.m in Sources */ = {isa = PBXBuildFile; fileRef = C9DE39131ACB5A540020F4A3 /* kcdata_core.m */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + C91C93C71ACB58B700119B60 /* libkdd.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkdd.a; sourceTree = BUILT_PRODUCTS_DIR; }; + C91C93CA1ACB58B700119B60 /* kdd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = kdd.h; sourceTree = ""; }; + C91C93CC1ACB58B700119B60 /* kdd.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = kdd.m; sourceTree = ""; }; + C91C93E01ACB598700119B60 /* KCDBasicTypeDescription.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KCDBasicTypeDescription.h; sourceTree = ""; }; + C91C93E11ACB598700119B60 /* KCDBasicTypeDescription.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = KCDBasicTypeDescription.m; sourceTree = ""; }; + C91C93E21ACB598700119B60 /* KCDStructTypeDescription.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KCDStructTypeDescription.h; sourceTree = ""; }; + C91C93E31ACB598700119B60 /* KCDStructTypeDescription.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = KCDStructTypeDescription.m; sourceTree = ""; }; + C9C5C68B1ACDAFDB00BE0E5E /* kcdtypes.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kcdtypes.c; sourceTree = ""; }; + C9DE39131ACB5A540020F4A3 /* kcdata_core.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = kcdata_core.m; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + C91C93C41ACB58B700119B60 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + C91C93BE1ACB58B700119B60 = { + isa = PBXGroup; + children = ( + C91C93C91ACB58B700119B60 /* kcdata */, + C91C93C81ACB58B700119B60 /* Products */, + ); + sourceTree = ""; + }; + C91C93C81ACB58B700119B60 /* Products */ = { + isa = PBXGroup; + children = ( + C91C93C71ACB58B700119B60 /* libkdd.a */, + ); + name = Products; + sourceTree = ""; + }; + C91C93C91ACB58B700119B60 /* kcdata */ = { + isa = PBXGroup; + children = ( + C9C5C68B1ACDAFDB00BE0E5E /* kcdtypes.c */, + C9DE39131ACB5A540020F4A3 /* kcdata_core.m */, + C91C93E01ACB598700119B60 /* KCDBasicTypeDescription.h */, + C91C93E11ACB598700119B60 /* KCDBasicTypeDescription.m */, + C91C93E21ACB598700119B60 /* KCDStructTypeDescription.h */, + C91C93E31ACB598700119B60 /* KCDStructTypeDescription.m */, + C91C93CA1ACB58B700119B60 /* kdd.h */, + C91C93CC1ACB58B700119B60 /* kdd.m */, + ); + path = kcdata; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + C91C93C51ACB58B700119B60 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + C91C93CB1ACB58B700119B60 /* kdd.h in Headers */, + C91C93E41ACB598700119B60 /* KCDBasicTypeDescription.h in Headers */, + C91C93E61ACB598700119B60 /* KCDStructTypeDescription.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + C91C93C61ACB58B700119B60 /* kdd */ = { + isa = PBXNativeTarget; + buildConfigurationList = C91C93DA1ACB58B700119B60 /* Build configuration list for PBXNativeTarget "kdd" */; + buildPhases = ( + C91C93C31ACB58B700119B60 /* Sources */, + C91C93C41ACB58B700119B60 /* Frameworks */, + C91C93C51ACB58B700119B60 /* Headers */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = kdd; + productName = kdd; + productReference = C91C93C71ACB58B700119B60 /* libkdd.a */; + productType = "com.apple.product-type.library.static"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + C91C93BF1ACB58B700119B60 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0700; + ORGANIZATIONNAME = "Vishal Patel"; + TargetAttributes = { + C91C93C61ACB58B700119B60 = { + CreatedOnToolsVersion = 7.0; + }; + }; + }; + buildConfigurationList = C91C93C21ACB58B700119B60 /* Build configuration list for PBXProject "kdd" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = C91C93BE1ACB58B700119B60; + productRefGroup = C91C93C81ACB58B700119B60 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + C91C93C61ACB58B700119B60 /* kdd */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + C91C93C31ACB58B700119B60 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + C9DE39141ACB5A540020F4A3 /* kcdata_core.m in Sources */, + C91C93E71ACB598700119B60 /* KCDStructTypeDescription.m in Sources */, + C91C93E51ACB598700119B60 /* KCDBasicTypeDescription.m in Sources */, + C91C93CD1ACB58B700119B60 /* kdd.m in Sources */, + C9C5C68C1ACDAFDB00BE0E5E /* kcdtypes.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + C91C93D81ACB58B700119B60 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_SYMBOLS_PRIVATE_EXTERN = NO; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + OTHER_CFLAGS = ""; + }; + name = Debug; + }; + C91C93D91ACB58B700119B60 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MTL_ENABLE_DEBUG_INFO = NO; + OTHER_CFLAGS = ""; + }; + name = Release; + }; + C91C93DB1ACB58B700119B60 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + EXECUTABLE_PREFIX = lib; + OTHER_CFLAGS = "-I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders"; + PRODUCT_NAME = kdd; + }; + name = Debug; + }; + C91C93DC1ACB58B700119B60 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + EXECUTABLE_PREFIX = lib; + OTHER_CFLAGS = "-I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders"; + PRODUCT_NAME = kdd; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + C91C93C21ACB58B700119B60 /* Build configuration list for PBXProject "kdd" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + C91C93D81ACB58B700119B60 /* Debug */, + C91C93D91ACB58B700119B60 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + C91C93DA1ACB58B700119B60 /* Build configuration list for PBXNativeTarget "kdd" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + C91C93DB1ACB58B700119B60 /* Debug */, + C91C93DC1ACB58B700119B60 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = C91C93BF1ACB58B700119B60 /* Project object */; +} diff --git a/libkern/.clang-format b/libkern/.clang-format new file mode 100644 index 000000000..cd99c24e5 --- /dev/null +++ b/libkern/.clang-format @@ -0,0 +1,30 @@ +# See top level .clang-format for explanation of options +AlignEscapedNewlinesLeft: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: false +AlwaysBreakBeforeMultilineStrings: true +BinPackArguments: true +BinPackParameters: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Allman +ColumnLimit: 132 +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +PointerAlignment: Middle +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +TabWidth: 4 +UseTab: Never diff --git a/libkern/Makefile b/libkern/Makefile index ad77bbce2..806567fc4 100644 --- a/libkern/Makefile +++ b/libkern/Makefile @@ -11,12 +11,14 @@ INSTINC_SUBDIRS = \ INSTINC_SUBDIRS_X86_64 = libkern INSTINC_SUBDIRS_X86_64H = libkern INSTINC_SUBDIRS_ARM = libkern +INSTINC_SUBDIRS_ARM64 = libkern EXPINC_SUBDIRS = \ libkern EXPINC_SUBDIRS_X86_64 = libkern EXPINC_SUBDIRS_X86_64H = libkern EXPINC_SUBDIRS_ARM = libkern +EXPINC_SUBDIRS_ARM64 = libkern COMP_SUBDIRS = conf diff --git a/libkern/OSKextVersion.c b/libkern/OSKextVersion.c index f9013853c..3f94e0d02 100644 --- a/libkern/OSKextVersion.c +++ b/libkern/OSKextVersion.c @@ -31,7 +31,7 @@ #include #else #include -#include +#include #include #endif /* KERNEL */ diff --git a/libkern/c++/OSArray.cpp b/libkern/c++/OSArray.cpp index 61ed05f97..fcdca78d1 100644 --- a/libkern/c++/OSArray.cpp +++ b/libkern/c++/OSArray.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #define super OSCollection @@ -46,14 +47,6 @@ OSMetaClassDefineReservedUnused(OSArray, 5); OSMetaClassDefineReservedUnused(OSArray, 6); OSMetaClassDefineReservedUnused(OSArray, 7); -#if OSALLOCDEBUG -extern "C" { - extern int debug_container_malloc_size; -}; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0) -#else -#define ACCUMSIZE(s) -#endif #define EXT_CAST(obj) \ reinterpret_cast(const_cast(obj)) @@ -70,7 +63,7 @@ bool OSArray::initWithCapacity(unsigned int inCapacity) return false; size = sizeof(const OSMetaClassBase *) * inCapacity; - array = (const OSMetaClassBase **) kalloc(size); + array = (const OSMetaClassBase **) kalloc_container(size); if (!array) return false; @@ -79,7 +72,7 @@ bool OSArray::initWithCapacity(unsigned int inCapacity) capacityIncrement = (inCapacity)? inCapacity : 16; bzero(array, size); - ACCUMSIZE(size); + OSCONTAINER_ACCUMSIZE(size); return true; } @@ -171,7 +164,7 @@ void OSArray::free() if (array) { kfree(array, sizeof(const OSMetaClassBase *) * capacity); - ACCUMSIZE( -(sizeof(const OSMetaClassBase *) * capacity) ); + OSCONTAINER_ACCUMSIZE( -(sizeof(const OSMetaClassBase *) * capacity) ); } super::free(); @@ -207,11 +200,11 @@ unsigned int OSArray::ensureCapacity(unsigned int newCapacity) newSize = sizeof(const OSMetaClassBase *) * finalCapacity; - newArray = (const OSMetaClassBase **) kalloc(newSize); + newArray = (const OSMetaClassBase **) kalloc_container(newSize); if (newArray) { oldSize = sizeof(const OSMetaClassBase *) * capacity; - ACCUMSIZE(newSize - oldSize); + OSCONTAINER_ACCUMSIZE(((size_t)newSize) - ((size_t)oldSize)); bcopy(array, newArray, oldSize); bzero(&newArray[capacity], newSize - oldSize); diff --git a/libkern/c++/OSCollectionIterator.cpp b/libkern/c++/OSCollectionIterator.cpp index d44e0d500..e623b6492 100644 --- a/libkern/c++/OSCollectionIterator.cpp +++ b/libkern/c++/OSCollectionIterator.cpp @@ -36,15 +36,6 @@ OSDefineMetaClassAndStructors(OSCollectionIterator, OSIterator) -#if OSALLOCDEBUG -extern "C" { - extern int debug_container_malloc_size; -}; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0) -#else -#define ACCUMSIZE(s) -#endif - bool OSCollectionIterator::initWithCollection(const OSCollection *inColl) { if ( !super::init() || !inColl) @@ -56,7 +47,7 @@ bool OSCollectionIterator::initWithCollection(const OSCollection *inColl) initialUpdateStamp = 0; valid = false; - return this; + return true; } OSCollectionIterator * @@ -77,7 +68,7 @@ void OSCollectionIterator::free() { if (collIterator) { kfree(collIterator, collection->iteratorSize()); - ACCUMSIZE(-(collection->iteratorSize())); + OSCONTAINER_ACCUMSIZE(-((size_t) collection->iteratorSize())); collIterator = 0; } @@ -94,8 +85,8 @@ void OSCollectionIterator::reset() valid = false; if (!collIterator) { - collIterator = (void *)kalloc(collection->iteratorSize()); - ACCUMSIZE(collection->iteratorSize()); + collIterator = (void *)kalloc_container(collection->iteratorSize()); + OSCONTAINER_ACCUMSIZE(collection->iteratorSize()); if (!collIterator) return; } @@ -110,8 +101,8 @@ void OSCollectionIterator::reset() bool OSCollectionIterator::isValid() { if (!collIterator) { - collIterator = (void *)kalloc(collection->iteratorSize()); - ACCUMSIZE(collection->iteratorSize()); + collIterator = (void *)kalloc_container(collection->iteratorSize()); + OSCONTAINER_ACCUMSIZE(collection->iteratorSize()); if (!collection->initIterator(collIterator)) return false; initialUpdateStamp = collection->updateStamp; diff --git a/libkern/c++/OSData.cpp b/libkern/c++/OSData.cpp index d43f67a13..a48142d2c 100644 --- a/libkern/c++/OSData.cpp +++ b/libkern/c++/OSData.cpp @@ -49,32 +49,30 @@ OSMetaClassDefineReservedUnused(OSData, 7); #define EXTERNAL ((unsigned int) -1) -#if OSALLOCDEBUG -extern int debug_container_malloc_size; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0) -#else -#define ACCUMSIZE(s) -#endif - bool OSData::initWithCapacity(unsigned int inCapacity) { + if (data) + { + OSCONTAINER_ACCUMSIZE(-((size_t)capacity)); + if (!inCapacity || (capacity < inCapacity)) + { + // clean out old data's storage if it isn't big enough + kfree(data, capacity); + data = 0; + capacity = 0; + } + } + if (!super::init()) return false; - if (data && (!inCapacity || capacity < inCapacity) ) { - // clean out old data's storage if it isn't big enough - kfree(data, capacity); - data = 0; - ACCUMSIZE(-capacity); - } - if (inCapacity && !data) { - data = (void *) kalloc(inCapacity); + data = (void *) kalloc_container(inCapacity); if (!data) return false; capacity = inCapacity; - ACCUMSIZE(inCapacity); } + OSCONTAINER_ACCUMSIZE(capacity); length = 0; if (inCapacity < 16) @@ -189,7 +187,7 @@ void OSData::free() { if (capacity != EXTERNAL && data && capacity) { kfree(data, capacity); - ACCUMSIZE( -capacity ); + OSCONTAINER_ACCUMSIZE( -((size_t)capacity) ); } else if (capacity == EXTERNAL) { DeallocFunction freemem = reserved ? reserved->deallocFunction : NULL; if (freemem && data && length) { @@ -230,7 +228,7 @@ unsigned int OSData::ensureCapacity(unsigned int newCapacity) if (finalCapacity < newCapacity) return capacity; - newData = (unsigned char *) kalloc(finalCapacity); + newData = (unsigned char *) kalloc_container(finalCapacity); if ( newData ) { bzero(newData + capacity, finalCapacity - capacity); @@ -238,7 +236,7 @@ unsigned int OSData::ensureCapacity(unsigned int newCapacity) bcopy(data, newData, capacity); kfree(data, capacity); } - ACCUMSIZE( finalCapacity - capacity ); + OSCONTAINER_ACCUMSIZE( ((size_t)finalCapacity) - ((size_t)capacity) ); data = (void *) newData; capacity = finalCapacity; } @@ -445,7 +443,7 @@ void OSData::setDeallocFunction(DeallocFunction func) { if (!reserved) { - reserved = (typeof(reserved)) kalloc(sizeof(ExpansionData)); + reserved = (typeof(reserved)) kalloc_container(sizeof(ExpansionData)); if (!reserved) return; bzero(reserved, sizeof(ExpansionData)); } @@ -456,7 +454,7 @@ void OSData::setSerializable(bool serializable) { if (!reserved) { - reserved = (typeof(reserved)) kalloc(sizeof(ExpansionData)); + reserved = (typeof(reserved)) kalloc_container(sizeof(ExpansionData)); if (!reserved) return; bzero(reserved, sizeof(ExpansionData)); } diff --git a/libkern/c++/OSDictionary.cpp b/libkern/c++/OSDictionary.cpp index 2f86e9a1d..c511e9d14 100644 --- a/libkern/c++/OSDictionary.cpp +++ b/libkern/c++/OSDictionary.cpp @@ -49,15 +49,6 @@ OSMetaClassDefineReservedUnused(OSDictionary, 5); OSMetaClassDefineReservedUnused(OSDictionary, 6); OSMetaClassDefineReservedUnused(OSDictionary, 7); -#if OSALLOCDEBUG -extern "C" { - extern int debug_container_malloc_size; -}; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0) -#else -#define ACCUMSIZE(s) -#endif - #define EXT_CAST(obj) \ reinterpret_cast(const_cast(obj)) @@ -72,12 +63,12 @@ bool OSDictionary::initWithCapacity(unsigned int inCapacity) unsigned int size = inCapacity * sizeof(dictEntry); //fOptions |= kSort; - dictionary = (dictEntry *) kalloc(size); + dictionary = (dictEntry *) kalloc_container(size); if (!dictionary) return false; bzero(dictionary, size); - ACCUMSIZE(size); + OSCONTAINER_ACCUMSIZE(size); count = 0; capacity = inCapacity; @@ -254,7 +245,7 @@ void OSDictionary::free() flushCollection(); if (dictionary) { kfree(dictionary, capacity * sizeof(dictEntry)); - ACCUMSIZE( -(capacity * sizeof(dictEntry)) ); + OSCONTAINER_ACCUMSIZE( -(capacity * sizeof(dictEntry)) ); } super::free(); @@ -293,14 +284,14 @@ unsigned int OSDictionary::ensureCapacity(unsigned int newCapacity) newSize = sizeof(dictEntry) * finalCapacity; - newDict = (dictEntry *) kalloc(newSize); + newDict = (dictEntry *) kalloc_container(newSize); if (newDict) { oldSize = sizeof(dictEntry) * capacity; bcopy(dictionary, newDict, oldSize); bzero(&newDict[capacity], newSize - oldSize); - ACCUMSIZE(newSize - oldSize); + OSCONTAINER_ACCUMSIZE(((size_t)newSize) - ((size_t)oldSize)); kfree(dictionary, oldSize); dictionary = newDict; diff --git a/libkern/c++/OSKext.cpp b/libkern/c++/OSKext.cpp index f1e613344..5f77f045b 100644 --- a/libkern/c++/OSKext.cpp +++ b/libkern/c++/OSKext.cpp @@ -30,6 +30,7 @@ extern "C" { #include #include #include +#include #include #include #include @@ -46,6 +47,8 @@ extern "C" { // 04/18/11 - gab: #include +#include + #if CONFIG_MACF #include #include @@ -62,6 +65,7 @@ extern "C" { #include #include +#include #if PRAGMA_MARK #pragma mark External & Internal Function Protos @@ -354,10 +358,15 @@ static AbsoluteTime sLastWakeTime; // last time we * to automatically parse the list of loaded kexts. **********/ static IOLock * sKextSummariesLock = NULL; +extern "C" lck_spin_t vm_allocation_sites_lock; +static IOSimpleLock * sKextAccountsLock = &vm_allocation_sites_lock; void (*sLoadedKextSummariesUpdated)(void) = OSKextLoadedKextSummariesUpdated; OSKextLoadedKextSummaryHeader * gLoadedKextSummaries __attribute__((used)) = NULL; static size_t sLoadedKextSummariesAllocSize = 0; + +static OSKextActiveAccount * sKextAccounts; +static uint32_t sKextAccountsCount; }; /********************************************************************* @@ -380,6 +389,22 @@ static OSArray * sUserSpaceLogMessageArray = NULL; * End scope for sKextInnerLock-protected variables. *********************************************************************/ + +/********************************************************************* + helper function used for collecting PGO data upon unload of a kext + */ + +static int OSKextGrabPgoDataLocked(OSKext *kext, + bool metadata, + uuid_t instance_uuid, + uint64_t *pSize, + char *pBuffer, + uint64_t bufferSize); + +/**********************************************************************/ + + + #if PRAGMA_MARK #pragma mark OSData callbacks (need to move to OSData) #endif @@ -852,7 +877,7 @@ OSKext::removeKextBootstrap(void) /* Allocate space for the LINKEDIT copy. */ mem_result = kmem_alloc(kernel_map, (vm_offset_t *) &seg_copy, - seg_length); + seg_length, VM_KERN_MEMORY_KEXT); if (mem_result != KERN_SUCCESS) { OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | @@ -1518,6 +1543,17 @@ OSKext::initWithPrelinkedInfoDict( } } + result = slidePrelinkedExecutable(); + if (result != kOSReturnSuccess) { + goto finish; + } + + /* set VM protections now, wire later at kext load */ + result = setVMAttributes(true, false); + if (result != KERN_SUCCESS) { + goto finish; + } + flags.prelinked = true; /* If we created a kext from prelink info, @@ -1532,7 +1568,6 @@ OSKext::initWithPrelinkedInfoDict( return result; } - /********************************************************************* *********************************************************************/ OSKext * @@ -2643,7 +2678,7 @@ z_alloc(void * notused __unused, u_int num_items, u_int size) } uint32_t allocSize = (uint32_t)allocSize64; - zmem = (z_mem *)kalloc(allocSize); + zmem = (z_mem *)kalloc_tag(allocSize, VM_KERN_MEMORY_OSKEXT); if (!zmem) { goto finish; } @@ -2691,7 +2726,7 @@ OSKext::extractMkext2FileData( } if (KERN_SUCCESS != kmem_alloc(kernel_map, - (vm_offset_t*)&uncompressedDataBuffer, fullSize)) { + (vm_offset_t*)&uncompressedDataBuffer, fullSize, VM_KERN_MEMORY_OSKEXT)) { /* How's this for cheesy? The kernel is only asked to extract * kext plists so we tailor the log messages. @@ -3092,7 +3127,7 @@ OSKext::serializeLogInfo( logInfo = serializer->text(); logInfoLength = serializer->getLength(); - kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, round_page(logInfoLength)); + kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, round_page(logInfoLength), VM_KERN_MEMORY_OSKEXT); if (kmem_result != KERN_SUCCESS) { OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | @@ -3203,6 +3238,53 @@ OSKext::lookupKextWithAddress(vm_address_t address) return foundKext; } + +/********************************************************************* +*********************************************************************/ +OSKext * +OSKext::lookupKextWithUUID(uuid_t wanted) +{ + OSKext * foundKext = NULL; // returned + uint32_t count, i; + + IORecursiveLockLock(sKextLock); + + count = sLoadedKexts->getCount(); + + for (i = 0; i < count; i++) { + OSKext * thisKext = NULL; + + thisKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); + if (!thisKext) { + continue; + } + + OSData *uuid_data = thisKext->copyUUID(); + if (!uuid_data) { + continue; + } + + uuid_t uuid; + memcpy(&uuid, uuid_data->getBytesNoCopy(), sizeof(uuid)); + uuid_data->release(); + + if (0 == uuid_compare(wanted, uuid)) { + foundKext = thisKext; + foundKext->retain(); + goto finish; + } + + } + +finish: + IORecursiveLockUnlock(sKextLock); + + return foundKext; +} + + + + /********************************************************************* *********************************************************************/ /* static */ @@ -3276,6 +3358,11 @@ OSKext::removeKext( } #endif + /* make sure there are no resource requests in flight - 17187548 */ + if (aKext->countRequestCallbacks()) { + goto finish; + } + /* If we are terminating, send the request to the IOCatalogue * (which will actually call us right back but that's ok we have * a recursive lock don't you know) but do not ask the IOCatalogue @@ -3923,7 +4010,7 @@ static char * makeHostArchKey(const char * key, uint32_t * keySizeOut) /* Add 1 for the ARCH_SEPARATOR_CHAR, and 1 for the '\0'. */ keySize = 1 + 1 + strlen(key) + strlen(ARCHNAME); - result = (char *)kalloc(keySize); + result = (char *)kalloc_tag(keySize, VM_KERN_MEMORY_OSKEXT); if (!result) { goto finish; } @@ -4018,7 +4105,7 @@ OSKext::isInExcludeList(void) * string (or strings) that we will not allow to load */ versionString = OSDynamicCast(OSString, sExcludeListByID->getObject(bundleID)); - if (!versionString) { + if (versionString == NULL || versionString->getLength() > (sizeof(myBuffer) - 1)) { return(false); } @@ -4397,7 +4484,7 @@ OSKext::load( if (!sKxldContext) { kxldResult = kxld_create_context(&sKxldContext, &kern_allocate, &kxld_log_callback, /* Flags */ (KXLDFlags) 0, - /* cputype */ 0, /* cpusubtype */ 0); + /* cputype */ 0, /* cpusubtype */ 0, /* page size */ 0); if (kxldResult) { OSKextLog(this, kOSKextLogErrorLevel | @@ -4479,6 +4566,19 @@ OSKext::load( goto finish; } + pendingPgoHead.next = &pendingPgoHead; + pendingPgoHead.prev = &pendingPgoHead; + + uuid_generate(instance_uuid); + account = IONew(OSKextAccount, 1); + if (!account) { + result = KERN_MEMORY_ERROR; + goto finish; + } + bzero(account, sizeof(*account)); + account->loadTag = kmod_info->id; + account->site.flags = VM_TAG_KMOD; + flags.loaded = true; /* Add the kext to the list of loaded kexts and update the kmod_info @@ -4524,6 +4624,14 @@ OSKext::load( #else jettisonLinkeditSegment(); #endif /* CONFIG_DTRACE */ + +#if !VM_MAPPED_KEXTS + /* If there is a page (or more) worth of padding after the end + * of the last data section but before the end of the data segment + * then free it in the same manner the LinkeditSegment is freed + */ + jettisonDATASegmentPadding(); +#endif } loaded: @@ -4601,7 +4709,7 @@ static char * strdup(const char * string) } size = 1 + strlen(string); - result = (char *)kalloc(size); + result = (char *)kalloc_tag(size, VM_KERN_MEMORY_OSKEXT); if (!result) { goto finish; } @@ -4615,6 +4723,40 @@ static char * strdup(const char * string) /********************************************************************* * *********************************************************************/ + +kernel_section_t * +OSKext::lookupSection(const char *segname, const char *secname) +{ + kernel_section_t * found_section = NULL; + kernel_mach_header_t * mh = NULL; + kernel_segment_command_t * seg = NULL; + kernel_section_t * sec = NULL; + + mh = (kernel_mach_header_t *)linkedExecutable->getBytesNoCopy(); + + for (seg = firstsegfromheader(mh); seg != NULL; seg = nextsegfromheader(mh, seg)) { + + if (0 != strcmp(seg->segname, segname)) { + continue; + } + + for (sec = firstsect(seg); sec != NULL; sec = nextsect(seg, sec)) { + + if (0 == strcmp(sec->sectname, secname)) { + found_section = sec; + goto out; + } + } + } + + out: + return found_section; +} + +/********************************************************************* +* +*********************************************************************/ + OSReturn OSKext::slidePrelinkedExecutable() { @@ -4891,13 +5033,19 @@ OSKext::loadExecutable() } if (isPrelinked()) { - result = slidePrelinkedExecutable(); - if (result != kOSReturnSuccess) { - goto finish; - } goto register_kmod; } + /* all callers must be entitled */ + if (FALSE == IOTaskHasEntitlement(current_task(), "com.apple.rootless.kext-management")) { + OSKextLog(this, + kOSKextLogErrorLevel | kOSKextLogLoadFlag, + "Not entitled to link kext '%s'", + getIdentifierCString()); + result = kOSKextReturnNotPrivileged; + goto finish; + } + theExecutable = getExecutable(); if (!theExecutable) { if (declaresExecutable()) { @@ -4952,7 +5100,7 @@ OSKext::loadExecutable() goto finish; } - kxlddeps = (KXLDDependency *)kalloc(num_kxlddeps * sizeof(*kxlddeps)); + kxlddeps = (KXLDDependency *)kalloc_tag(num_kxlddeps * sizeof(*kxlddeps), VM_KERN_MEMORY_OSKEXT); if (!kxlddeps) { OSKextLog(this, kOSKextLogErrorLevel | @@ -5057,7 +5205,7 @@ OSKext::loadExecutable() /* Whip up a fake kmod_info entry for the interface kext. */ - kmod_info = (kmod_info_t *)kalloc(sizeof(kmod_info_t)); + kmod_info = (kmod_info_t *)kalloc_tag(sizeof(kmod_info_t), VM_KERN_MEMORY_OSKEXT); if (!kmod_info) { result = KERN_MEMORY_ERROR; goto finish; @@ -5092,8 +5240,8 @@ OSKext::loadExecutable() */ num_kmod_refs = getNumDependencies(); if (num_kmod_refs) { - kmod_info->reference_list = (kmod_reference_t *)kalloc( - num_kmod_refs * sizeof(kmod_reference_t)); + kmod_info->reference_list = (kmod_reference_t *)kalloc_tag( + num_kmod_refs * sizeof(kmod_reference_t), VM_KERN_MEMORY_OSKEXT); if (!kmod_info->reference_list) { result = KERN_MEMORY_ERROR; goto finish; @@ -5123,7 +5271,8 @@ OSKext::loadExecutable() (unsigned)kmod_info->id); } - result = setVMProtections(); + /* if prelinked, VM protections are already set */ + result = setVMAttributes(!isPrelinked(), true); if (result != KERN_SUCCESS) { goto finish; } @@ -5193,14 +5342,6 @@ OSKext::jettisonLinkeditSegment(void) vm_size_t linkeditsize, kextsize; OSData * data = NULL; - /* 16K_XXX: To Remove */ - /* We don't currently guarantee alignment greater than 4KB for kext - * segments, so we cannot always jettison __LINKEDIT cleanly, so let - * it be for now. - */ - if (!TEST_PAGE_SIZE_4K) - return; - #if NO_KEXTD /* We can free symbol tables for all embedded kexts because we don't * support runtime kext linking. @@ -5264,6 +5405,61 @@ OSKext::jettisonLinkeditSegment(void) return; } +/********************************************************************* +* If there are whole pages that are unused betweem the last section +* of the DATA segment and the end of the DATA segment then we can free +* them +*********************************************************************/ +void +OSKext::jettisonDATASegmentPadding(void) +{ + kernel_mach_header_t * mh; + kernel_segment_command_t * dataSeg; + kernel_section_t * sec, * lastSec; + vm_offset_t dataSegEnd, lastSecEnd; + vm_size_t padSize; + + mh = (kernel_mach_header_t *)kmod_info->address; + + dataSeg = getsegbynamefromheader(mh, SEG_DATA); + if (dataSeg == NULL) { + return; + } + + lastSec = NULL; + sec = firstsect(dataSeg); + while (sec != NULL) { + lastSec = sec; + sec = nextsect(dataSeg, sec); + } + + if (lastSec == NULL) { + return; + } + + if ((dataSeg->vmaddr != round_page(dataSeg->vmaddr)) || + (dataSeg->vmsize != round_page(dataSeg->vmsize))) { + return; + } + + dataSegEnd = dataSeg->vmaddr + dataSeg->vmsize; + lastSecEnd = round_page(lastSec->addr + lastSec->size); + + if (dataSegEnd <= lastSecEnd) { + return; + } + + padSize = dataSegEnd - lastSecEnd; + + if (padSize >= PAGE_SIZE) { +#if VM_MAPPED_KEXTS + kext_free(lastSecEnd, padSize); +#else + ml_static_mfree(lastSecEnd, padSize); +#endif + } +} + /********************************************************************* *********************************************************************/ void @@ -5380,12 +5576,12 @@ OSKext_wire( vm_prot_t access_type, boolean_t user_wire) { - return vm_map_wire(map, start, end, access_type, user_wire); + return vm_map_wire(map, start, end, access_type | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_KEXT), user_wire); } #endif OSReturn -OSKext::setVMProtections(void) +OSKext::setVMAttributes(bool protect, bool wire) { vm_map_t kext_map = NULL; kernel_segment_command_t * seg = NULL; @@ -5393,7 +5589,7 @@ OSKext::setVMProtections(void) vm_map_offset_t end = 0; OSReturn result = kOSReturnError; - if (!kmod_info->address && !kmod_info->size) { + if (isInterface() || !declaresExecutable()) { result = kOSReturnSuccess; goto finish; } @@ -5406,8 +5602,9 @@ OSKext::setVMProtections(void) } /* Protect the headers as read-only; they do not need to be wired */ - result = OSKext_protect(kext_map, kmod_info->address, - kmod_info->address + kmod_info->hdr_size, VM_PROT_READ, TRUE); + result = (protect) ? OSKext_protect(kext_map, kmod_info->address, + kmod_info->address + kmod_info->hdr_size, VM_PROT_READ, TRUE) + : KERN_SUCCESS; if (result != KERN_SUCCESS) { goto finish; } @@ -5415,32 +5612,36 @@ OSKext::setVMProtections(void) /* Set the VM protections and wire down each of the segments */ seg = firstsegfromheader((kernel_mach_header_t *)kmod_info->address); while (seg) { + + start = round_page(seg->vmaddr); end = trunc_page(seg->vmaddr + seg->vmsize); - result = OSKext_protect(kext_map, start, end, seg->maxprot, TRUE); - if (result != KERN_SUCCESS) { - OSKextLog(this, - kOSKextLogErrorLevel | - kOSKextLogLoadFlag, - "Kext %s failed to set maximum VM protections " - "for segment %s - 0x%x.", - getIdentifierCString(), seg->segname, (int)result); - goto finish; - } + if (protect) { + result = OSKext_protect(kext_map, start, end, seg->maxprot, TRUE); + if (result != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s failed to set maximum VM protections " + "for segment %s - 0x%x.", + getIdentifierCString(), seg->segname, (int)result); + goto finish; + } - result = OSKext_protect(kext_map, start, end, seg->initprot, FALSE); - if (result != KERN_SUCCESS) { - OSKextLog(this, - kOSKextLogErrorLevel | - kOSKextLogLoadFlag, - "Kext %s failed to set initial VM protections " - "for segment %s - 0x%x.", - getIdentifierCString(), seg->segname, (int)result); - goto finish; + result = OSKext_protect(kext_map, start, end, seg->initprot, FALSE); + if (result != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s failed to set initial VM protections " + "for segment %s - 0x%x.", + getIdentifierCString(), seg->segname, (int)result); + goto finish; + } } - if (segmentShouldBeWired(seg)) { + if (segmentShouldBeWired(seg) && wire) { result = OSKext_wire(kext_map, start, end, seg->initprot, FALSE); if (result != KERN_SUCCESS) { goto finish; @@ -5897,9 +6098,10 @@ OSKext::stop(void) OSReturn OSKext::unload(void) { - OSReturn result = kOSReturnError; - unsigned int index; - uint32_t num_kmod_refs = 0; + OSReturn result = kOSReturnError; + unsigned int index; + uint32_t num_kmod_refs = 0; + OSKextAccount * freeAccount; if (!sUnloadEnabled) { OSKextLog(this, @@ -5978,6 +6180,24 @@ OSKext::unload(void) "Kext %s unloading.", getIdentifierCString()); + { + struct list_head *p; + struct list_head *prev; + struct list_head *next; + for (p = pendingPgoHead.next; p != &pendingPgoHead; p = next) { + OSKextGrabPgoStruct *s = container_of(p, OSKextGrabPgoStruct, list_head); + s->err = OSKextGrabPgoDataLocked(this, s->metadata, instance_uuid, s->pSize, s->pBuffer, s->bufferSize); + prev = p->prev; + next = p->next; + prev->next = next; + next->prev = prev; + p->prev = p; + p->next = p; + IORecursiveLockWakeup(sKextLock, s, false); + } + } + + /* Even if we don't call the stop function, we want to be sure we * have no OSMetaClass references before unloading the kext executable * from memory. OSMetaClasses may have pointers into the kext executable @@ -6039,6 +6259,13 @@ OSKext::unload(void) notifyKextUnloadObservers(this); + freeAccount = NULL; + IOSimpleLockLock(sKextAccountsLock); + if (account->site.tag) account->site.flags |= VM_TAG_UNLOAD; + else freeAccount = account; + IOSimpleLockUnlock(sKextAccountsLock); + if (freeAccount) IODelete(freeAccount, OSKextAccount, 1); + /* Unwire and free the linked executable. */ if (linkedExecutable) { @@ -7620,7 +7847,7 @@ OSKext::handleRequest( /* This kmem_alloc sets the return value of the function. */ kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, - round_page(responseLength)); + round_page(responseLength), VM_KERN_MEMORY_OSKEXT); if (kmem_result != KERN_SUCCESS) { OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | @@ -7664,6 +7891,274 @@ OSKext::handleRequest( return result; } + +// #include +extern "C" { + + uint64_t __llvm_profile_get_size_for_buffer_internal(const char *DataBegin, + const char *DataEnd, + const char *CountersBegin, + const char *CountersEnd , + const char *NamesBegin, + const char *NamesEnd); + int __llvm_profile_write_buffer_internal(char *Buffer, + const char *DataBegin, + const char *DataEnd, + const char *CountersBegin, + const char *CountersEnd , + const char *NamesBegin, + const char *NamesEnd); +} + + +static +void OSKextPgoMetadataPut(char *pBuffer, + size_t *position, + size_t bufferSize, + uint32_t *num_pairs, + const char *key, + const char *value) +{ + size_t strlen_key = strlen(key); + size_t strlen_value = strlen(value); + size_t len = strlen(key) + 1 + strlen(value) + 1; + char *pos = pBuffer + *position; + *position += len; + if (pBuffer && bufferSize && *position <= bufferSize) { + memcpy(pos, key, strlen_key); pos += strlen_key; + *(pos++) = '='; + memcpy(pos, value, strlen_value); pos += strlen_value; + *(pos++) = 0; + if (num_pairs) { + (*num_pairs)++; + } + } +} + + +static +void OSKextPgoMetadataPutMax(size_t *position, const char *key, size_t value_max) +{ + *position += strlen(key) + 1 + value_max + 1; +} + + +static +void OSKextPgoMetadataPutAll(OSKext *kext, + uuid_t instance_uuid, + char *pBuffer, + size_t *position, + size_t bufferSize, + uint32_t *num_pairs) +{ + assert_static(sizeof(clock_sec_t) % 2 == 0); + //log_10 2^16 ≈ 4.82 + const size_t max_secs_string_size = 5 * sizeof(clock_sec_t)/2; + const size_t max_timestamp_string_size = max_secs_string_size + 1 + 6; + + if (!pBuffer) { + OSKextPgoMetadataPutMax(position, "INSTANCE", 36); + OSKextPgoMetadataPutMax(position, "UUID", 36); + OSKextPgoMetadataPutMax(position, "TIMESTAMP", max_timestamp_string_size); + } else { + uuid_string_t instance_uuid_string; + uuid_unparse(instance_uuid, instance_uuid_string); + OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs, + "INSTANCE", instance_uuid_string); + + OSData *uuid_data; + uuid_t uuid; + uuid_string_t uuid_string; + uuid_data = kext->copyUUID(); + if (uuid_data) { + memcpy(uuid, uuid_data->getBytesNoCopy(), sizeof(uuid)); + OSSafeRelease(uuid_data); + uuid_unparse(uuid, uuid_string); + OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs, + "UUID", uuid_string); + } + + clock_sec_t secs; + clock_usec_t usecs; + clock_get_calendar_microtime(&secs, &usecs); + assert(usecs < 1000000); + char timestamp[max_timestamp_string_size + 1]; + assert_static(sizeof(long) >= sizeof(clock_sec_t)); + snprintf(timestamp, sizeof(timestamp), "%lu.%06d", (unsigned long)secs, (int)usecs); + OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs, + "TIMESTAMP", timestamp); + } + + OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs, + "NAME", kext->getIdentifierCString()); + + char versionCString[kOSKextVersionMaxLength]; + OSKextVersionGetString(kext->getVersion(), versionCString, kOSKextVersionMaxLength); + OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs, + "VERSION", versionCString); + +} + +static +size_t OSKextPgoMetadataSize(OSKext *kext) +{ + size_t position = 0; + uuid_t fakeuuid = {}; + OSKextPgoMetadataPutAll(kext, fakeuuid, NULL, &position, 0, NULL); + return position; +} + + +int OSKextGrabPgoDataLocked(OSKext *kext, + bool metadata, + uuid_t instance_uuid, + uint64_t *pSize, + char *pBuffer, + uint64_t bufferSize) +{ + + int err = 0; + + kernel_section_t *sect_prf_data = NULL; + kernel_section_t *sect_prf_name = NULL; + kernel_section_t *sect_prf_cnts = NULL; + uint64_t size; + size_t metadata_size = 0; + + sect_prf_data = kext->lookupSection("__DATA", "__llvm_prf_data"); + sect_prf_name = kext->lookupSection("__DATA", "__llvm_prf_name"); + sect_prf_cnts = kext->lookupSection("__DATA", "__llvm_prf_cnts"); + + if (!sect_prf_data || !sect_prf_name || !sect_prf_cnts) { + err = ENOTSUP; + goto out; + } + + size = __llvm_profile_get_size_for_buffer_internal( + (const char*) sect_prf_data->addr, (const char*) sect_prf_data->addr + sect_prf_data->size, + (const char*) sect_prf_cnts->addr, (const char*) sect_prf_cnts->addr + sect_prf_cnts->size, + (const char*) sect_prf_name->addr, (const char*) sect_prf_name->addr + sect_prf_name->size); + + if (metadata) { + metadata_size = OSKextPgoMetadataSize(kext); + size += metadata_size; + size += sizeof(pgo_metadata_footer); + } + + + if (pSize) { + *pSize = size; + } + + if (pBuffer && bufferSize) { + if (bufferSize < size) { + err = ERANGE; + goto out; + } + + err = __llvm_profile_write_buffer_internal( + pBuffer, + (const char*) sect_prf_data->addr, (const char*) sect_prf_data->addr + sect_prf_data->size, + (const char*) sect_prf_cnts->addr, (const char*) sect_prf_cnts->addr + sect_prf_cnts->size, + (const char*) sect_prf_name->addr, (const char*) sect_prf_name->addr + sect_prf_name->size); + + if (err) { + err = EIO; + goto out; + } + + if (metadata) { + char *end_of_buffer = pBuffer + size; + struct pgo_metadata_footer *footerp = (struct pgo_metadata_footer *) (end_of_buffer - sizeof(struct pgo_metadata_footer)); + char *metadata_buffer = end_of_buffer - (sizeof(struct pgo_metadata_footer) + metadata_size); + + size_t metadata_position = 0; + uint32_t num_pairs = 0; + OSKextPgoMetadataPutAll(kext, instance_uuid, metadata_buffer, &metadata_position, metadata_size, &num_pairs); + while (metadata_position < metadata_size) { + metadata_buffer[metadata_position++] = 0; + } + + struct pgo_metadata_footer footer; + footer.magic = htonl(0x6d657461); + footer.number_of_pairs = htonl( num_pairs ); + footer.offset_to_pairs = htonl( sizeof(struct pgo_metadata_footer) + metadata_size ); + memcpy(footerp, &footer, sizeof(footer)); + } + + } + +out: + return err; +} + + +int +OSKextGrabPgoData(uuid_t uuid, + uint64_t *pSize, + char *pBuffer, + uint64_t bufferSize, + int wait_for_unload, + int metadata) +{ + int err = 0; + OSKext *kext = NULL; + + + IORecursiveLockLock(sKextLock); + + kext = OSKext::lookupKextWithUUID(uuid); + if (!kext) { + err = ENOENT; + goto out; + } + + if (wait_for_unload) { + OSKextGrabPgoStruct s; + + s.metadata = metadata; + s.pSize = pSize; + s.pBuffer = pBuffer; + s.bufferSize = bufferSize; + s.err = EINTR; + + struct list_head *prev = &kext->pendingPgoHead; + struct list_head *next = kext->pendingPgoHead.next; + + s.list_head.prev = prev; + s.list_head.next = next; + + prev->next = &s.list_head; + next->prev = &s.list_head; + + kext->release(); + kext = NULL; + + IORecursiveLockSleep(sKextLock, &s, THREAD_ABORTSAFE); + + prev = s.list_head.prev; + next = s.list_head.next; + + prev->next = next; + next->prev = prev; + + err = s.err; + + } else { + err = OSKextGrabPgoDataLocked(kext, metadata, kext->instance_uuid, pSize, pBuffer, bufferSize); + } + + out: + if (kext) { + kext->release(); + } + + IORecursiveLockUnlock(sKextLock); + + return err; +} + + /********************************************************************* *********************************************************************/ /* static */ @@ -7680,6 +8175,26 @@ OSKext::copyLoadedKextInfo( IORecursiveLockLock(sKextLock); +#if CONFIG_MACF + /* Is the calling process allowed to query kext info? */ + if (current_task() != kernel_task) { + int macCheckResult = 0; + kauth_cred_t cred = NULL; + + cred = kauth_cred_get_with_ref(); + macCheckResult = mac_kext_check_query(cred); + kauth_cred_unref(&cred); + + if (macCheckResult != 0) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogLoadFlag, + "Failed to query kext info (MAC policy error 0x%x).", + macCheckResult); + goto finish; + } + } +#endif + /* Empty list of bundle ids is equivalent to no list (get all). */ if (kextIdentifiers && !kextIdentifiers->getCount()) { @@ -7699,6 +8214,45 @@ OSKext::copyLoadedKextInfo( if (!result) { goto finish; } + +#if 0 + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "kaslr: vm_kernel_slide 0x%lx \n", + vm_kernel_slide); + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "kaslr: vm_kernel_stext 0x%lx vm_kernel_etext 0x%lx \n", + vm_kernel_stext, vm_kernel_etext); + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "kaslr: vm_kernel_base 0x%lx vm_kernel_top 0x%lx \n", + vm_kernel_base, vm_kernel_top); + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "kaslr: vm_kext_base 0x%lx vm_kext_top 0x%lx \n", + vm_kext_base, vm_kext_top); + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "kaslr: vm_prelink_stext 0x%lx vm_prelink_etext 0x%lx \n", + vm_prelink_stext, vm_prelink_etext); + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "kaslr: vm_prelink_sinfo 0x%lx vm_prelink_einfo 0x%lx \n", + vm_prelink_sinfo, vm_prelink_einfo); + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "kaslr: vm_slinkedit 0x%lx vm_elinkedit 0x%lx \n", + vm_slinkedit, vm_elinkedit); +#endif + for (i = 0; i < count; i++) { OSKext * thisKext = NULL; // do not release Boolean includeThis = true; @@ -7804,6 +8358,7 @@ OSKext::copyInfo(OSArray * infoKeys) linkedExecutable->getBytesNoCopy(); #if !SECURE_KERNEL + // do not return macho header info on shipping iOS - 19095897 if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleMachOHeadersKey)) { kernel_mach_header_t * temp_kext_mach_hdr; struct load_command * lcp; @@ -7845,10 +8400,10 @@ OSKext::copyInfo(OSArray * infoKeys) VM_KERNEL_UNSLIDE(segp->vmaddr), segp->vmsize, segp->nsects); if ( (VM_KERNEL_IS_SLID(segp->vmaddr) == false) && - (VM_KERNEL_IS_KEXT(segp->vmaddr) == false) && - (VM_KERNEL_IS_PRELINKTEXT(segp->vmaddr) == false) && - (VM_KERNEL_IS_PRELINKINFO(segp->vmaddr) == false) && - (VM_KERNEL_IS_KEXT_LINKEDIT(segp->vmaddr) == false) ) { + (VM_KERNEL_IS_KEXT(segp->vmaddr) == false) && + (VM_KERNEL_IS_PRELINKTEXT(segp->vmaddr) == false) && + (VM_KERNEL_IS_PRELINKINFO(segp->vmaddr) == false) && + (VM_KERNEL_IS_KEXT_LINKEDIT(segp->vmaddr) == false) ) { OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | kOSKextLogGeneralFlag, @@ -7861,7 +8416,7 @@ OSKext::copyInfo(OSArray * infoKeys) for (secp = firstsect(segp); secp != NULL; secp = nextsect(segp, secp)) { secp->addr = VM_KERNEL_UNSLIDE(secp->addr); } - } + } lcp = (struct load_command *)((caddr_t)lcp + lcp->cmdsize); } result->setObject(kOSBundleMachOHeadersKey, headerData); @@ -7933,8 +8488,8 @@ OSKext::copyInfo(OSArray * infoKeys) // +1 for slash, +1 for \0 executablePathCStringSize = pathLength + executableRelPath->getLength() + 2; - executablePathCString = (char *)kalloc((executablePathCStringSize) * - sizeof(char)); // +1 for \0 + executablePathCString = (char *)kalloc_tag((executablePathCStringSize) * + sizeof(char), VM_KERN_MEMORY_OSKEXT); // +1 for \0 if (!executablePathCString) { goto finish; } @@ -9473,7 +10028,7 @@ OSKextVLog( va_end(argList); if (length + 1 >= sizeof(stackBuffer)) { - allocBuffer = (char *)kalloc((length + 1) * sizeof(char)); + allocBuffer = (char *)kalloc_tag((length + 1) * sizeof(char), VM_KERN_MEMORY_OSKEXT); if (!allocBuffer) { goto finish; } @@ -9739,6 +10294,7 @@ OSKext::printKextsInBacktrace( u_int i = 0; if (lockFlag) { + if (!sKextSummariesLock) return; IOLockLock(sKextSummariesLock); } @@ -10172,7 +10728,7 @@ OSKext::saveLoadedKextPanicList(void) uint32_t newlist_size = 0; newlist_size = KEXT_PANICLIST_SIZE; - newlist = (char *)kalloc(newlist_size); + newlist = (char *)kalloc_tag(newlist_size, VM_KERN_MEMORY_OSKEXT); if (!newlist) { OSKextLog(/* kext */ NULL, @@ -10303,7 +10859,13 @@ OSKext::updateLoadedKextSummaries(void) u_int count; u_int maxKexts; u_int i, j; + OSKextActiveAccount * accountingList; + OSKextActiveAccount * prevAccountingList; + uint32_t idx, accountingListAlloc, accountingListCount, prevAccountingListCount; + prevAccountingList = NULL; + prevAccountingListCount = 0; + #if DEVELOPMENT || DEBUG if (IORecursiveLockHaveLock(sKextLock) == false) { panic("sKextLock must be held"); @@ -10338,7 +10900,7 @@ OSKext::updateLoadedKextSummaries(void) } result = kmem_alloc(kernel_map, (vm_offset_t*)&summaryHeaderAlloc, - size); + size, VM_KERN_MEMORY_OSKEXT); if (result != KERN_SUCCESS) goto finish; summaryHeader = summaryHeaderAlloc; summarySize = size; @@ -10363,11 +10925,12 @@ OSKext::updateLoadedKextSummaries(void) bzero(summaryHeader, summarySize); summaryHeader->version = kOSKextLoadedKextSummaryVersion; summaryHeader->entry_size = sizeof(OSKextLoadedKextSummary); - + /* Populate each kext summary. */ count = sLoadedKexts->getCount(); + accountingListAlloc = 0; for (i = 0, j = 0; i < count && j < maxKexts; ++i) { aKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); if (!aKext || !aKext->isExecutable()) { @@ -10376,8 +10939,29 @@ OSKext::updateLoadedKextSummaries(void) aKext->updateLoadedKextSummary(&summaryHeader->summaries[j++]); summaryHeader->numSummaries++; + accountingListAlloc++; } - + + accountingList = IONew(typeof(accountingList[0]), accountingListAlloc); + accountingListCount = 0; + for (i = 0, j = 0; i < count && j < maxKexts; ++i) { + aKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); + if (!aKext || !aKext->isExecutable()) { + continue; + } + + OSKextActiveAccount activeAccount; + aKext->updateActiveAccount(&activeAccount); + // order by address + for (idx = 0; idx < accountingListCount; idx++) + { + if (activeAccount.address < accountingList[idx].address) break; + } + bcopy(&accountingList[idx], &accountingList[idx + 1], (accountingListCount - idx) * sizeof(accountingList[0])); + accountingList[idx] = activeAccount; + accountingListCount++; + } + assert(accountingListCount == accountingListAlloc); /* Write protect the buffer and move it into place. */ @@ -10396,6 +10980,13 @@ OSKext::updateLoadedKextSummaries(void) */ if (sLoadedKextSummariesUpdated) (*sLoadedKextSummariesUpdated)(); + IOSimpleLockLock(sKextAccountsLock); + prevAccountingList = sKextAccounts; + prevAccountingListCount = sKextAccountsCount; + sKextAccounts = accountingList; + sKextAccountsCount = accountingListCount; + IOSimpleLockUnlock(sKextAccountsLock); + finish: IOLockUnlock(sKextSummariesLock); @@ -10405,6 +10996,9 @@ OSKext::updateLoadedKextSummaries(void) if (summaryHeaderAlloc) { kmem_free(kernel_map, (vm_offset_t)summaryHeaderAlloc, summarySize); } + if (prevAccountingList) { + IODelete(prevAccountingList, typeof(accountingList[0]), prevAccountingListCount); + } return; } @@ -10435,6 +11029,67 @@ OSKext::updateLoadedKextSummary(OSKextLoadedKextSummary *summary) return; } +/********************************************************************* +*********************************************************************/ + +void +OSKext::updateActiveAccount(OSKextActiveAccount *account) +{ + bzero(account, sizeof(*account)); + account->address = kmod_info->address; + if (account->address) { + account->address_end = kmod_info->address + kmod_info->size; + } + account->account = this->account; +} + +extern "C" const vm_allocation_site_t * +OSKextGetAllocationSiteForCaller(uintptr_t address) +{ + OSKextActiveAccount * active; + vm_allocation_site_t * site; + uint32_t baseIdx; + uint32_t lim; + + IOSimpleLockLock(sKextAccountsLock); + site = NULL; + // bsearch sKextAccounts list + for (baseIdx = 0, lim = sKextAccountsCount; lim; lim >>= 1) + { + active = &sKextAccounts[baseIdx + (lim >> 1)]; + if ((address >= active->address) && (address < active->address_end)) + { + site = &active->account->site; + if (!site->tag) vm_tag_alloc_locked(site); + break; + } + else if (address > active->address) + { + // move right + baseIdx += (lim >> 1) + 1; + lim--; + } + // else move left + } + IOSimpleLockUnlock(sKextAccountsLock); + + return (site); +} + +extern "C" uint32_t +OSKextGetKmodIDForSite(vm_allocation_site_t * site) +{ + OSKextAccount * account = (typeof(account)) site; + return (account->loadTag); +} + +extern "C" void +OSKextFreeSite(vm_allocation_site_t * site) +{ + OSKextAccount * freeAccount = (typeof(freeAccount)) site; + IODelete(freeAccount, OSKextAccount, 1); +} + /********************************************************************* *********************************************************************/ @@ -10473,7 +11128,7 @@ GetAppleTEXTHashForKext(OSKext * theKext, OSDictionary *theInfoDict) // KEC_FIPS type kexts never unload so we don't have to clean up our // AppleTEXTHash_t if (kmem_alloc(kernel_map, (vm_offset_t *) &my_athp, - sizeof(AppleTEXTHash_t)) != KERN_SUCCESS) { + sizeof(AppleTEXTHash_t), VM_KERN_MEMORY_OSKEXT) != KERN_SUCCESS) { return(NULL); } diff --git a/libkern/c++/OSMetaClass.cpp b/libkern/c++/OSMetaClass.cpp index 807eb4598..b32ab8c9b 100644 --- a/libkern/c++/OSMetaClass.cpp +++ b/libkern/c++/OSMetaClass.cpp @@ -48,6 +48,9 @@ #include +#include + + __BEGIN_DECLS #include @@ -64,13 +67,6 @@ __BEGIN_DECLS /********************************************************************* * Macros *********************************************************************/ -#if OSALLOCDEBUG -extern int debug_container_malloc_size; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while (0) -#else -#define ACCUMSIZE(s) -#endif /* OSALLOCDEBUG */ - __END_DECLS #if PRAGMA_MARK @@ -111,10 +107,12 @@ static struct StalledData { } * sStalled; IOLock * sStalledClassesLock = NULL; - struct ExpansionData { - OSOrderedSet * instances; - OSKext * kext; + OSOrderedSet * instances; + OSKext * kext; +#if IOTRACKING + IOTrackingQueue * tracking; +#endif }; @@ -393,6 +391,9 @@ OSMetaClass::OSMetaClass( reserved = IONew(ExpansionData, 1); bzero(reserved, sizeof(ExpansionData)); +#if IOTRACKING + reserved->tracking = IOTrackingQueueAlloc(inClassName, inClassSize, 0, true); +#endif /* Hack alert: We are just casting inClassName and storing it in * an OSString * instance variable. This may be because you can't @@ -416,7 +417,7 @@ OSMetaClass::OSMetaClass( int newSize = oldSize + kKModCapacityIncrement * sizeof(OSMetaClass *); - sStalled->classes = (OSMetaClass **)kalloc(newSize); + sStalled->classes = (OSMetaClass **)kalloc_tag(newSize, VM_KERN_MEMORY_OSKEXT); if (!sStalled->classes) { sStalled->classes = oldStalled; sStalled->result = kOSMetaClassNoTempData; @@ -426,7 +427,7 @@ OSMetaClass::OSMetaClass( sStalled->capacity += kKModCapacityIncrement; memmove(sStalled->classes, oldStalled, oldSize); kfree(oldStalled, oldSize); - ACCUMSIZE(newSize - oldSize); + OSMETA_ACCUMSIZE(((size_t)newSize) - ((size_t)oldSize)); } sStalled->classes[sStalled->count++] = this; @@ -489,6 +490,10 @@ OSMetaClass::~OSMetaClass() } } } +#if IOTRACKING + IOTrackingQueueFree(reserved->tracking); +#endif + IODelete(reserved, ExpansionData, 1); } /********************************************************************* @@ -533,15 +538,15 @@ OSMetaClass::preModLoad(const char * kextIdentifier) IOLockLock(sStalledClassesLock); assert (sStalled == NULL); - sStalled = (StalledData *)kalloc(sizeof(* sStalled)); + sStalled = (StalledData *)kalloc_tag(sizeof(* sStalled), VM_KERN_MEMORY_OSKEXT); if (sStalled) { sStalled->classes = (OSMetaClass **) - kalloc(kKModCapacityIncrement * sizeof(OSMetaClass *)); + kalloc_tag(kKModCapacityIncrement * sizeof(OSMetaClass *), VM_KERN_MEMORY_OSKEXT); if (!sStalled->classes) { kfree(sStalled, sizeof(*sStalled)); return 0; } - ACCUMSIZE((kKModCapacityIncrement * sizeof(OSMetaClass *)) + + OSMETA_ACCUMSIZE((kKModCapacityIncrement * sizeof(OSMetaClass *)) + sizeof(*sStalled)); sStalled->result = kOSReturnSuccess; @@ -710,7 +715,7 @@ OSMetaClass::postModLoad(void * loadHandle) OSSafeRelease(myKext); if (sStalled) { - ACCUMSIZE(-(sStalled->capacity * sizeof(OSMetaClass *) + + OSMETA_ACCUMSIZE(-(sStalled->capacity * sizeof(OSMetaClass *) + sizeof(*sStalled))); kfree(sStalled->classes, sStalled->capacity * sizeof(OSMetaClass *)); kfree(sStalled, sizeof(*sStalled)); @@ -1192,3 +1197,61 @@ OSMetaClass::serializeClassDictionary(OSDictionary * serializeDictionary) return; } + + +/********************************************************************* +*********************************************************************/ + +#if IOTRACKING + +void *OSMetaClass::trackedNew(size_t size) +{ + IOTracking * mem; + + mem = (typeof(mem)) kalloc_tag_bt(size + sizeof(IOTracking), VM_KERN_MEMORY_LIBKERN); + assert(mem); + if (!mem) return (mem); + + memset(mem, 0, size + sizeof(IOTracking)); + mem++; + + OSIVAR_ACCUMSIZE(size); + + return (mem); +} + +void OSMetaClass::trackedDelete(void * instance, size_t size) +{ + IOTracking * mem = (typeof(mem)) instance; mem--; + + kfree(mem, size + sizeof(IOTracking)); + OSIVAR_ACCUMSIZE(-size); +} + +void OSMetaClass::trackedInstance(OSObject * instance) const +{ + IOTracking * mem = (typeof(mem)) instance; mem--; + + return (IOTrackingAdd(reserved->tracking, mem, classSize, false)); +} + +void OSMetaClass::trackedFree(OSObject * instance) const +{ + IOTracking * mem = (typeof(mem)) instance; mem--; + + return (IOTrackingRemove(reserved->tracking, mem, classSize)); +} + +void OSMetaClass::trackedAccumSize(OSObject * instance, size_t size) const +{ + IOTracking * mem = (typeof(mem)) instance; mem--; + + return (IOTrackingAccumSize(reserved->tracking, mem, size)); +} + +IOTrackingQueue * OSMetaClass::getTracking() const +{ + return (reserved->tracking); +} + +#endif /* IOTRACKING */ \ No newline at end of file diff --git a/libkern/c++/OSObject.cpp b/libkern/c++/OSObject.cpp index 0dc95ed55..45652a1ca 100644 --- a/libkern/c++/OSObject.cpp +++ b/libkern/c++/OSObject.cpp @@ -45,11 +45,6 @@ __BEGIN_DECLS int debug_ivars_size; __END_DECLS -#if OSALLOCDEBUG -#define ACCUMSIZE(s) do { debug_ivars_size += (s); } while(0) -#else -#define ACCUMSIZE(s) -#endif // OSDefineMetaClassAndAbstractStructors(OSObject, 0); /* Class global data */ @@ -58,8 +53,6 @@ const OSMetaClass * const OSObject::metaClass = &OSObject::gMetaClass; const OSMetaClass * const OSObject::superClass = 0; /* Class member functions - Can't use defaults */ -OSObject::OSObject() { retainCount = 1; } -OSObject::OSObject(const OSMetaClass *) { retainCount = 1; } OSObject::~OSObject() { } const OSMetaClass * OSObject::getMetaClass() const { return &gMetaClass; } @@ -94,18 +87,6 @@ static const char *getClassName(const OSObject *obj) return (meta) ? meta->getClassName() : "unknown class?"; } -bool OSObject::init() - { return true; } - -void OSObject::free() -{ - const OSMetaClass *meta = getMetaClass(); - - if (meta) - meta->instanceDestructed(); - delete this; -} - int OSObject::getRetainCount() const { return (int) ((UInt16) retainCount); @@ -257,91 +238,72 @@ bool OSObject::serialize(OSSerialize *s) const return (ok); } +void *OSObject::operator new(size_t size) +{ +#if IOTRACKING + if (kIOTracking & gIOKitDebug) return (OSMetaClass::trackedNew(size)); +#endif -thread_t gOSObjectTrackThread; - -queue_head_t gOSObjectTrackList = - { (queue_t) &gOSObjectTrackList, (queue_t) &gOSObjectTrackList }; + void * mem = kalloc_tag_bt(size, VM_KERN_MEMORY_LIBKERN); + assert(mem); + bzero(mem, size); + OSIVAR_ACCUMSIZE(size); -lck_spin_t gOSObjectTrackLock; + return (void *) mem; +} -OSArray * OSFlushObjectTrackList(void) +void OSObject::operator delete(void * mem, size_t size) { - OSArray * array; - queue_entry_t next; - - array = OSArray::withCapacity(16); + if (!mem) return; - lck_spin_lock(&gOSObjectTrackLock); - while (!queue_empty(&gOSObjectTrackList)) - { - next = queue_first(&gOSObjectTrackList); - remque(next); - lck_spin_unlock(&gOSObjectTrackLock); - array->setObject((OSObject *) (next + 1)); - lck_spin_lock(&gOSObjectTrackLock); - } - lck_spin_unlock(&gOSObjectTrackLock); +#if IOTRACKING + if (kIOTracking & gIOKitDebug) return (OSMetaClass::trackedDelete(mem, size)); +#endif - return (array); + kfree(mem, size); + OSIVAR_ACCUMSIZE(-size); } -struct OSObjectTracking +bool OSObject::init() { - queue_chain_t link; - void * bt[14]; -}; +#if IOTRACKING + if (kIOTracking & gIOKitDebug) getMetaClass()->trackedInstance(this); +#endif + return true; +} -void *OSObject::operator new(size_t size) +void OSObject::free() { - size_t tracking = (gIOKitDebug & kOSTraceObjectAlloc) - ? sizeof(OSObjectTracking) : 0; - OSObjectTracking * mem = (OSObjectTracking *) kalloc(size + tracking); - - assert(mem); + const OSMetaClass *meta = getMetaClass(); - if (tracking) + if (meta) { - if ((((thread_t) 1) == gOSObjectTrackThread) || (current_thread() == gOSObjectTrackThread)) - { - (void) OSBacktrace(&mem->bt[0], sizeof(mem->bt) / sizeof(mem->bt[0])); - lck_spin_lock(&gOSObjectTrackLock); - enqueue_tail(&gOSObjectTrackList, &mem->link); - lck_spin_unlock(&gOSObjectTrackLock); - } - else - mem->link.next = 0; - mem++; + meta->instanceDestructed(); +#if IOTRACKING + if (kIOTracking & gIOKitDebug) getMetaClass()->trackedFree(this); +#endif } - - bzero(mem, size); - - ACCUMSIZE(size); - - return (void *) mem; + delete this; } -void OSObject::operator delete(void *_mem, size_t size) +#if IOTRACKING +void OSObject::trackingAccumSize(size_t size) { - size_t tracking = (gIOKitDebug & kOSTraceObjectAlloc) - ? sizeof(OSObjectTracking) : 0; - OSObjectTracking * mem = (OSObjectTracking *) _mem; - - if (!mem) - return; + if (kIOTracking & gIOKitDebug) getMetaClass()->trackedAccumSize(this, size); +} +#endif - if (tracking) - { - mem--; - if (mem->link.next) - { - lck_spin_lock(&gOSObjectTrackLock); - remque(&mem->link); - lck_spin_unlock(&gOSObjectTrackLock); - } - } +/* Class member functions - Can't use defaults */ +/* During constructor vtable is always OSObject's - can't call any subclass */ - kfree(mem, size + tracking); +OSObject::OSObject() +{ + retainCount = 1; +// if (kIOTracking & gIOKitDebug) getMetaClass()->trackedInstance(this); +} - ACCUMSIZE(-size); +OSObject::OSObject(const OSMetaClass *) +{ + retainCount = 1; +// if (kIOTracking & gIOKitDebug) getMetaClass()->trackedInstance(this); } diff --git a/libkern/c++/OSOrderedSet.cpp b/libkern/c++/OSOrderedSet.cpp index 1ba5e04dd..cd9e4477a 100644 --- a/libkern/c++/OSOrderedSet.cpp +++ b/libkern/c++/OSOrderedSet.cpp @@ -42,14 +42,6 @@ OSMetaClassDefineReservedUnused(OSOrderedSet, 5); OSMetaClassDefineReservedUnused(OSOrderedSet, 6); OSMetaClassDefineReservedUnused(OSOrderedSet, 7); -#if OSALLOCDEBUG -extern "C" { - extern int debug_container_malloc_size; -}; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0) -#else -#define ACCUMSIZE(s) -#endif struct _Element { const OSMetaClassBase * obj; @@ -72,7 +64,7 @@ initWithCapacity(unsigned int inCapacity, return false; size = sizeof(_Element) * inCapacity; - array = (_Element *) kalloc(size); + array = (_Element *) kalloc_container(size); if (!array) return false; @@ -83,9 +75,9 @@ initWithCapacity(unsigned int inCapacity, orderingRef = inOrderingRef; bzero(array, size); - ACCUMSIZE(size); + OSCONTAINER_ACCUMSIZE(size); - return this; + return true; } OSOrderedSet * OSOrderedSet:: @@ -109,7 +101,7 @@ void OSOrderedSet::free() if (array) { kfree(array, sizeof(_Element) * capacity); - ACCUMSIZE( -(sizeof(_Element) * capacity) ); + OSCONTAINER_ACCUMSIZE( -(sizeof(_Element) * capacity) ); } super::free(); @@ -142,11 +134,11 @@ unsigned int OSOrderedSet::ensureCapacity(unsigned int newCapacity) } newSize = sizeof(_Element) * finalCapacity; - newArray = (_Element *) kalloc(newSize); + newArray = (_Element *) kalloc_container(newSize); if (newArray) { oldSize = sizeof(_Element) * capacity; - ACCUMSIZE(newSize - oldSize); + OSCONTAINER_ACCUMSIZE(((size_t)newSize) - ((size_t)oldSize)); bcopy(array, newArray, oldSize); bzero(&newArray[capacity], newSize - oldSize); diff --git a/libkern/c++/OSRuntime.cpp b/libkern/c++/OSRuntime.cpp index d16fa34ce..d8841a9ee 100644 --- a/libkern/c++/OSRuntime.cpp +++ b/libkern/c++/OSRuntime.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -73,7 +74,6 @@ static bool gKernelCPPInitialized = false; } \ } while (0) - #if PRAGMA_MARK #pragma mark kern_os Allocator Package #endif /* PRAGMA_MARK */ @@ -104,7 +104,7 @@ kern_os_malloc(size_t size) return (0); } - mem = (struct _mhead *)kalloc(memsize); + mem = (struct _mhead *)kalloc_tag_bt(memsize, VM_KERN_MEMORY_LIBKERN); if (!mem) { return (0); } @@ -172,7 +172,7 @@ kern_os_realloc( } nmemsize = sizeof (*nmem) + nsize ; - nmem = (struct _mhead *) kalloc(nmemsize); + nmem = (struct _mhead *) kalloc_tag_bt(nmemsize, VM_KERN_MEMORY_LIBKERN); if (!nmem){ kern_os_free(addr); return (0); @@ -412,7 +412,7 @@ OSRuntimeInitializeCPP( kernel_segment_command_t * segment = NULL; // do not free kernel_segment_command_t * failure_segment = NULL; // do not free - if (!kmodInfo || !kmodInfo->address || !kmodInfo->name) { + if (!kmodInfo || !kmodInfo->address) { result = kOSKextReturnInvalidArgument; goto finish; } @@ -537,14 +537,11 @@ OSRuntimeInitializeCPP( /********************************************************************* *********************************************************************/ -extern lck_spin_t gOSObjectTrackLock; extern lck_grp_t * IOLockGroup; extern kmod_info_t g_kernel_kmod_info; void OSlibkernInit(void) { - lck_spin_init(&gOSObjectTrackLock, IOLockGroup, LCK_ATTR_NULL); - // This must be called before calling OSRuntimeInitializeCPP. OSMetaClassBase::initialize(); @@ -568,6 +565,9 @@ __END_DECLS *********************************************************************/ void * operator new(size_t size) +#if __cplusplus >= 201103L + noexcept +#endif { void * result; @@ -577,6 +577,9 @@ operator new(size_t size) void operator delete(void * addr) +#if __cplusplus >= 201103L + noexcept +#endif { kern_os_free(addr); return; @@ -584,6 +587,9 @@ operator delete(void * addr) void * operator new[](unsigned long sz) +#if __cplusplus >= 201103L + noexcept +#endif { if (sz == 0) sz = 1; return kern_os_malloc(sz); @@ -591,6 +597,9 @@ operator new[](unsigned long sz) void operator delete[](void * ptr) +#if __cplusplus >= 201103L + noexcept +#endif { if (ptr) { kern_os_free(ptr); diff --git a/libkern/c++/OSSerialize.cpp b/libkern/c++/OSSerialize.cpp index 909bc0a4c..38696bc24 100644 --- a/libkern/c++/OSSerialize.cpp +++ b/libkern/c++/OSSerialize.cpp @@ -37,6 +37,7 @@ __END_DECLS #include #include #include +#include #define super OSObject @@ -50,14 +51,6 @@ OSMetaClassDefineReservedUnused(OSSerialize, 5); OSMetaClassDefineReservedUnused(OSSerialize, 6); OSMetaClassDefineReservedUnused(OSSerialize, 7); -#if OSALLOCDEBUG -extern "C" { - extern int debug_container_malloc_size; -}; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0) -#else -#define ACCUMSIZE(s) -#endif char * OSSerialize::text() const { @@ -184,7 +177,7 @@ bool OSSerialize::initWithCapacity(unsigned int inCapacity) // allocate from the kernel map so that we can safely map this data // into user space (the primary use of the OSSerialize object) - kern_return_t rc = kmem_alloc(kernel_map, (vm_offset_t *)&data, capacity); + kern_return_t rc = kmem_alloc(kernel_map, (vm_offset_t *)&data, capacity, IOMemoryTag(kernel_map)); if (rc) { tags->release(); tags = 0; @@ -193,7 +186,7 @@ bool OSSerialize::initWithCapacity(unsigned int inCapacity) bzero((void *)data, capacity); - ACCUMSIZE(capacity); + OSCONTAINER_ACCUMSIZE(capacity); return true; } @@ -233,13 +226,14 @@ unsigned int OSSerialize::ensureCapacity(unsigned int newCapacity) (vm_offset_t)data, capacity, (vm_offset_t *)&newData, - newCapacity); + newCapacity, + VM_KERN_MEMORY_IOKIT); if (!rc) { - ACCUMSIZE(newCapacity); + OSCONTAINER_ACCUMSIZE(newCapacity); // kmem realloc does not free the old address range kmem_free(kernel_map, (vm_offset_t)data, capacity); - ACCUMSIZE(-capacity); + OSCONTAINER_ACCUMSIZE(-((size_t)capacity)); // kmem realloc does not zero out the new memory // and this could end up going to user land @@ -259,7 +253,7 @@ void OSSerialize::free() if (data) { kmem_free(kernel_map, (vm_offset_t)data, capacity); - ACCUMSIZE( -capacity ); + OSCONTAINER_ACCUMSIZE( -((size_t)capacity) ); } super::free(); } diff --git a/libkern/c++/OSSerializeBinary.cpp b/libkern/c++/OSSerializeBinary.cpp index e939f0558..accbbf224 100644 --- a/libkern/c++/OSSerializeBinary.cpp +++ b/libkern/c++/OSSerializeBinary.cpp @@ -241,7 +241,7 @@ bool OSSerialize::binarySerialize(const OSMetaClassBase *o) if (idx >= v##Capacity) \ { \ uint32_t ncap = v##Capacity + 64; \ - typeof(v##Array) nbuf = (typeof(v##Array)) kalloc(ncap * sizeof(o)); \ + typeof(v##Array) nbuf = (typeof(v##Array)) kalloc_container(ncap * sizeof(o)); \ if (!nbuf) ok = false; \ if (v##Array) \ { \ @@ -276,6 +276,7 @@ OSUnserializeBinary(const char *buffer, size_t bufferSize, OSString **errorStrin OSSet * newSet; OSObject * o; OSSymbol * sym; + OSString * str; size_t bufferPos; const uint32_t * next; @@ -403,6 +404,12 @@ OSUnserializeBinary(const char *buffer, size_t bufferSize, OSString **errorStrin else { sym = OSDynamicCast(OSSymbol, o); + if (!sym && (str = OSDynamicCast(OSString, o))) + { + sym = (OSSymbol *) OSSymbol::withString(str); + o->release(); + o = 0; + } ok = (sym != 0); } } diff --git a/libkern/c++/OSString.cpp b/libkern/c++/OSString.cpp index 65032f5eb..2bd875ee6 100644 --- a/libkern/c++/OSString.cpp +++ b/libkern/c++/OSString.cpp @@ -56,15 +56,6 @@ OSMetaClassDefineReservedUnused(OSString, 13); OSMetaClassDefineReservedUnused(OSString, 14); OSMetaClassDefineReservedUnused(OSString, 15); -#if OSALLOCDEBUG -extern "C" { - extern int debug_container_malloc_size; -}; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0) -#else -#define ACCUMSIZE(s) -#endif - bool OSString::initWithString(const OSString *aString) { return initWithCString(aString->string); @@ -72,35 +63,54 @@ bool OSString::initWithString(const OSString *aString) bool OSString::initWithCString(const char *cString) { - if (!cString || !super::init()) - return false; + unsigned int newLength; + char * newString; - length = strlen(cString) + 1; - string = (char *) kalloc(length); - if (!string) - return false; + if (!cString || !super::init()) return false; + + newLength = strlen(cString) + 1; + newString = (char *) kalloc_container(newLength); + if (!newString) return false; - bcopy(cString, string, length); + bcopy(cString, newString, newLength); - ACCUMSIZE(length); + if ( !(flags & kOSStringNoCopy) && string) { + kfree(string, (vm_size_t)length); + OSCONTAINER_ACCUMSIZE(-((size_t)length)); + } + string = newString; + length = newLength; + flags &= ~kOSStringNoCopy; + + OSCONTAINER_ACCUMSIZE(length); return true; } bool OSString::initWithStringOfLength(const char *cString, size_t inlength) { - if (!cString || !super::init()) - return false; + unsigned int newLength; + char * newString; - length = inlength + 1; - string = (char *) kalloc(length); - if (!string) - return false; + if (!cString || !super::init()) return false; + + newLength = inlength + 1; + newString = (char *) kalloc_container(newLength); + if (!newString) return false; + + bcopy(cString, newString, inlength); + newString[inlength] = 0; + + if ( !(flags & kOSStringNoCopy) && string) { + kfree(string, (vm_size_t)length); + OSCONTAINER_ACCUMSIZE(-((size_t)length)); + } - bcopy(cString, string, inlength); - string[inlength] = 0; + string = newString; + length = newLength; + flags &= ~kOSStringNoCopy; - ACCUMSIZE(length); + OSCONTAINER_ACCUMSIZE(length); return true; } @@ -197,7 +207,7 @@ void OSString::free() { if ( !(flags & kOSStringNoCopy) && string) { kfree(string, (vm_size_t)length); - ACCUMSIZE(-length); + OSCONTAINER_ACCUMSIZE(-((size_t)length)); } super::free(); diff --git a/libkern/c++/OSSymbol.cpp b/libkern/c++/OSSymbol.cpp index b2f5f94bd..7b3f21408 100644 --- a/libkern/c++/OSSymbol.cpp +++ b/libkern/c++/OSSymbol.cpp @@ -40,15 +40,6 @@ typedef struct { unsigned int i, j; } OSSymbolPoolState; -#if OSALLOCDEBUG -extern "C" { - extern int debug_container_malloc_size; -}; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0) -#else -#define ACCUMSIZE(s) -#endif - #define INITIAL_POOL_SIZE (exp2ml(1 + log2(kInitBucketCount))) #define GROW_FACTOR (1) @@ -126,8 +117,8 @@ class OSSymbolPool void * OSSymbolPool::operator new(size_t size) { - void *mem = (void *)kalloc(size); - ACCUMSIZE(size); + void *mem = (void *)kalloc_tag(size, VM_KERN_MEMORY_LIBKERN); + OSMETA_ACCUMSIZE(size); assert(mem); bzero(mem, size); @@ -137,7 +128,7 @@ void * OSSymbolPool::operator new(size_t size) void OSSymbolPool::operator delete(void *mem, size_t size) { kfree(mem, size); - ACCUMSIZE(-size); + OSMETA_ACCUMSIZE(-size); } extern lck_grp_t *IOLockGroup; @@ -146,8 +137,8 @@ bool OSSymbolPool::init() { count = 0; nBuckets = INITIAL_POOL_SIZE; - buckets = (Bucket *) kalloc(nBuckets * sizeof(Bucket)); - ACCUMSIZE(nBuckets * sizeof(Bucket)); + buckets = (Bucket *) kalloc_tag(nBuckets * sizeof(Bucket), VM_KERN_MEMORY_LIBKERN); + OSMETA_ACCUMSIZE(nBuckets * sizeof(Bucket)); if (!buckets) return false; @@ -174,11 +165,11 @@ OSSymbolPool::~OSSymbolPool() for (thisBucket = &buckets[0]; thisBucket < &buckets[nBuckets]; thisBucket++) { if (thisBucket->count > 1) { kfree(thisBucket->symbolP, thisBucket->count * sizeof(OSSymbol *)); - ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *))); + OSMETA_ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *))); } } kfree(buckets, nBuckets * sizeof(Bucket)); - ACCUMSIZE(-(nBuckets * sizeof(Bucket))); + OSMETA_ACCUMSIZE(-(nBuckets * sizeof(Bucket))); } if (poolGate) @@ -253,8 +244,8 @@ void OSSymbolPool::reconstructSymbols(bool grow) count = 0; nBuckets = new_nBuckets; - buckets = (Bucket *) kalloc(nBuckets * sizeof(Bucket)); - ACCUMSIZE(nBuckets * sizeof(Bucket)); + buckets = (Bucket *) kalloc_tag(nBuckets * sizeof(Bucket), VM_KERN_MEMORY_LIBKERN); + OSMETA_ACCUMSIZE(nBuckets * sizeof(Bucket)); /* @@@ gvdl: Zero test and panic if can't set up pool */ bzero(buckets, nBuckets * sizeof(Bucket)); @@ -320,8 +311,8 @@ OSSymbol *OSSymbolPool::insertSymbol(OSSymbol *sym) && strncmp(probeSymbol->string, cString, probeSymbol->length) == 0) return probeSymbol; - list = (OSSymbol **) kalloc(2 * sizeof(OSSymbol *)); - ACCUMSIZE(2 * sizeof(OSSymbol *)); + list = (OSSymbol **) kalloc_tag(2 * sizeof(OSSymbol *), VM_KERN_MEMORY_LIBKERN); + OSMETA_ACCUMSIZE(2 * sizeof(OSSymbol *)); /* @@@ gvdl: Zero test and panic if can't set up pool */ list[0] = sym; list[1] = probeSymbol; @@ -342,13 +333,13 @@ OSSymbol *OSSymbolPool::insertSymbol(OSSymbol *sym) j = thisBucket->count++; count++; - list = (OSSymbol **) kalloc(thisBucket->count * sizeof(OSSymbol *)); - ACCUMSIZE(thisBucket->count * sizeof(OSSymbol *)); + list = (OSSymbol **) kalloc_tag(thisBucket->count * sizeof(OSSymbol *), VM_KERN_MEMORY_LIBKERN); + OSMETA_ACCUMSIZE(thisBucket->count * sizeof(OSSymbol *)); /* @@@ gvdl: Zero test and panic if can't set up pool */ list[0] = sym; bcopy(thisBucket->symbolP, list + 1, j * sizeof(OSSymbol *)); kfree(thisBucket->symbolP, j * sizeof(OSSymbol *)); - ACCUMSIZE(-(j * sizeof(OSSymbol *))); + OSMETA_ACCUMSIZE(-(j * sizeof(OSSymbol *))); thisBucket->symbolP = list; GROW_POOL(); @@ -392,7 +383,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) if (probeSymbol == sym) { thisBucket->symbolP = (OSSymbol **) list[1]; kfree(list, 2 * sizeof(OSSymbol *)); - ACCUMSIZE(-(2 * sizeof(OSSymbol *))); + OSMETA_ACCUMSIZE(-(2 * sizeof(OSSymbol *))); count--; thisBucket->count--; SHRINK_POOL(); @@ -403,7 +394,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) if (probeSymbol == sym) { thisBucket->symbolP = (OSSymbol **) list[0]; kfree(list, 2 * sizeof(OSSymbol *)); - ACCUMSIZE(-(2 * sizeof(OSSymbol *))); + OSMETA_ACCUMSIZE(-(2 * sizeof(OSSymbol *))); count--; thisBucket->count--; SHRINK_POOL(); @@ -419,8 +410,8 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) if (probeSymbol == sym) { list = (OSSymbol **) - kalloc((thisBucket->count-1) * sizeof(OSSymbol *)); - ACCUMSIZE((thisBucket->count-1) * sizeof(OSSymbol *)); + kalloc_tag((thisBucket->count-1) * sizeof(OSSymbol *), VM_KERN_MEMORY_LIBKERN); + OSMETA_ACCUMSIZE((thisBucket->count-1) * sizeof(OSSymbol *)); if (thisBucket->count-1 != j) bcopy(thisBucket->symbolP, list, (thisBucket->count-1-j) * sizeof(OSSymbol *)); @@ -429,7 +420,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) list + thisBucket->count-1-j, j * sizeof(OSSymbol *)); kfree(thisBucket->symbolP, thisBucket->count * sizeof(OSSymbol *)); - ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *))); + OSMETA_ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *))); thisBucket->symbolP = list; count--; thisBucket->count--; @@ -555,12 +546,7 @@ void OSSymbol::checkForPageUnload(void *startAddr, void *endAddr) state = pool->initHashState(); while ( (probeSymbol = pool->nextHashState(&state)) ) { if (probeSymbol->string >= startAddr && probeSymbol->string < endAddr) { - const char *oldString = probeSymbol->string; - - probeSymbol->string = (char *) kalloc(probeSymbol->length); - ACCUMSIZE(probeSymbol->length); - bcopy(oldString, probeSymbol->string, probeSymbol->length); - probeSymbol->flags &= ~kOSStringNoCopy; + probeSymbol->OSString::initWithCString(probeSymbol->string); } } pool->openGate(); diff --git a/libkern/conf/Makefile.template b/libkern/conf/Makefile.template index 5885a434c..23bbd4f6a 100644 --- a/libkern/conf/Makefile.template +++ b/libkern/conf/Makefile.template @@ -107,13 +107,13 @@ $(SOBJS): .SFLAGS $(COMPONENT).filelist: $(OBJS) $(_v)for hib_file in ${HIB_FILES}; \ do \ - $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \ - mv $${hib_file}__ $${hib_file} ; \ + $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} || exit 1; \ + mv $${hib_file}__ $${hib_file} || exit 1; \ done @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${OBJS}; do \ + $(_v)for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ - done; ) > $(COMPONENT).filelist + done > $(COMPONENT).filelist do_all: $(COMPONENT).filelist diff --git a/libkern/conf/files b/libkern/conf/files index bdf3cb6e0..c91df14cb 100644 --- a/libkern/conf/files +++ b/libkern/conf/files @@ -5,6 +5,7 @@ OPTIONS/kdebug optional kdebug OPTIONS/gprof optional gprof OPTIONS/config_dtrace optional config_dtrace OPTIONS/hibernation optional hibernation +OPTIONS/iotracking optional iotracking OPTIONS/networking optional networking OPTIONS/crypto optional crypto OPTIONS/zlib optional zlib diff --git a/libkern/conf/files.x86_64 b/libkern/conf/files.x86_64 index 51d4530fe..8b1378917 100644 --- a/libkern/conf/files.x86_64 +++ b/libkern/conf/files.x86_64 @@ -1,2 +1 @@ -libkern/x86_64/OSAtomic.s standard diff --git a/libkern/crypto/corecrypto_aesxts.c b/libkern/crypto/corecrypto_aesxts.c index dc0d6f40e..ef33084cf 100644 --- a/libkern/crypto/corecrypto_aesxts.c +++ b/libkern/crypto/corecrypto_aesxts.c @@ -101,5 +101,6 @@ int xts_decrypt(const uint8_t *ct, unsigned long ptlen, void xts_done(symmetric_xts *xts __unused) { - + cc_clear(sizeof(xts->enc), xts->enc); + cc_clear(sizeof(xts->dec), xts->dec); } diff --git a/libkern/crypto/corecrypto_sha2.c b/libkern/crypto/corecrypto_sha2.c index e85479d3b..3143c0161 100644 --- a/libkern/crypto/corecrypto_sha2.c +++ b/libkern/crypto/corecrypto_sha2.c @@ -83,7 +83,7 @@ void SHA384_Update(SHA384_CTX *ctx, const void *data, size_t len) void SHA384_Final(void *digest, SHA384_CTX *ctx) { const struct ccdigest_info *di; - di=g_crypto_funcs->ccsha512_di; + di=g_crypto_funcs->ccsha384_di; ccdigest_final(di, ctx->ctx, digest); } diff --git a/libkern/gen/OSAtomicOperations.c b/libkern/gen/OSAtomicOperations.c index 25ff477ff..5affc1eef 100644 --- a/libkern/gen/OSAtomicOperations.c +++ b/libkern/gen/OSAtomicOperations.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -27,6 +27,8 @@ */ #include +#include +#include enum { false = 0, @@ -37,28 +39,109 @@ enum { #define NULL ((void *)0) #endif +#define ATOMIC_DEBUG DEBUG + +#if ATOMIC_DEBUG +#define ALIGN_TEST(p,t) do{if((uintptr_t)p&(sizeof(t)-1)) panic("Unaligned atomic pointer %p\n",p);}while(0) +#else +#define ALIGN_TEST(p,t) do{}while(0) +#endif + +// 19831745 - start of big hammer! +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-qual" + /* * atomic operations - * these are _the_ atomic operations, currently cast atop CompareAndSwap, - * which is implemented in assembler. if we are worried about the cost of - * this layering (we shouldn't be), then all this stuff could be - * implemented in assembler, as it is in MacOS8/9 - * (derived from SuperMario/NativeLibs/IO/DriverServices/Synchronization.s, - * which I wrote for NuKernel in a previous life with a different last name...) - * - * native Boolean CompareAndSwap(UInt32 oldValue, UInt32 newValue, UInt32 * oldValuePtr); - * - * We've since implemented a few more of these -- OSAddAtomic, OSDequeueAtomic, - * OSEnqueueAtomic etc -- in assembler, either for speed or correctness. See also the - * commpage atomic operations, and the platform specific versions. - * Like standards, there are a lot of atomic ops to choose from! + * These are _the_ atomic operations, now implemented via compiler built-ins. + * It is expected that this C implementation is a candidate for Link-Time- + * Optimization inlining, whereas the assembler implementations they replace + * were not. */ -#if defined(__i386__) || defined(__x86_64__) -/* Implemented in assembly for i386 and x86_64 */ +#undef OSCompareAndSwap8 +Boolean OSCompareAndSwap8(UInt8 oldValue, UInt8 newValue, volatile UInt8 *address) +{ + return __c11_atomic_compare_exchange_strong((_Atomic UInt8 *)address, &oldValue, newValue, + memory_order_acq_rel_smp, memory_order_relaxed); +} + +#undef OSCompareAndSwap16 +Boolean OSCompareAndSwap16(UInt16 oldValue, UInt16 newValue, volatile UInt16 *address) +{ + return __c11_atomic_compare_exchange_strong((_Atomic UInt16 *)address, &oldValue, newValue, + memory_order_acq_rel_smp, memory_order_relaxed); +} + +#undef OSCompareAndSwap +Boolean OSCompareAndSwap(UInt32 oldValue, UInt32 newValue, volatile UInt32 *address) +{ + ALIGN_TEST(address, UInt32); + return __c11_atomic_compare_exchange_strong((_Atomic UInt32 *)address, &oldValue, newValue, + memory_order_acq_rel_smp, memory_order_relaxed); +} + +#undef OSCompareAndSwap64 +Boolean OSCompareAndSwap64(UInt64 oldValue, UInt64 newValue, volatile UInt64 *address) +{ + /* + * _Atomic uint64 requires 8-byte alignment on all architectures. + * This silences the compiler cast warning. ALIGN_TEST() verifies + * that the cast was legal, if defined. + */ + _Atomic UInt64 *aligned_addr = (_Atomic UInt64 *)(uintptr_t)address; + + ALIGN_TEST(address, UInt64); + return __c11_atomic_compare_exchange_strong(aligned_addr, &oldValue, newValue, + memory_order_acq_rel_smp, memory_order_relaxed); +} + +#undef OSCompareAndSwapPtr +Boolean OSCompareAndSwapPtr(void *oldValue, void *newValue, void * volatile *address) +{ +#if __LP64__ + return OSCompareAndSwap64((UInt64)oldValue, (UInt64)newValue, (volatile UInt64 *)address); #else -#error Unsupported arch + return OSCompareAndSwap((UInt32)oldValue, (UInt32)newValue, (volatile UInt32 *)address); #endif +} + +SInt8 OSAddAtomic8(SInt32 amount, volatile SInt8 *address) +{ + return __c11_atomic_fetch_add((_Atomic SInt8*)address, amount, memory_order_relaxed); +} + +SInt16 OSAddAtomic16(SInt32 amount, volatile SInt16 *address) +{ + return __c11_atomic_fetch_add((_Atomic SInt16*)address, amount, memory_order_relaxed); +} + +#undef OSAddAtomic +SInt32 OSAddAtomic(SInt32 amount, volatile SInt32 *address) +{ + ALIGN_TEST(address, UInt32); + return __c11_atomic_fetch_add((_Atomic SInt32*)address, amount, memory_order_relaxed); +} + +#undef OSAddAtomic64 +SInt64 OSAddAtomic64(SInt64 amount, volatile SInt64 *address) +{ + _Atomic SInt64* aligned_address = (_Atomic SInt64*)(uintptr_t)address; + + ALIGN_TEST(address, SInt64); + return __c11_atomic_fetch_add(aligned_address, amount, memory_order_relaxed); +} + +#undef OSAddAtomicLong +long +OSAddAtomicLong(long theAmount, volatile long *address) +{ +#ifdef __LP64__ + return (long)OSAddAtomic64((SInt64)theAmount, (SInt64*)address); +#else + return (long)OSAddAtomic((SInt32)theAmount, address); +#endif +} #undef OSIncrementAtomic SInt32 OSIncrementAtomic(volatile SInt32 * value) @@ -72,58 +155,24 @@ SInt32 OSDecrementAtomic(volatile SInt32 * value) return OSAddAtomic(-1, value); } -static UInt32 OSBitwiseAtomic(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt32 * value) -{ - UInt32 oldValue; - UInt32 newValue; - - do { - oldValue = *value; - newValue = ((oldValue & and_mask) | or_mask) ^ xor_mask; - } while (! OSCompareAndSwap(oldValue, newValue, value)); - - return oldValue; -} - #undef OSBitAndAtomic UInt32 OSBitAndAtomic(UInt32 mask, volatile UInt32 * value) { - return OSBitwiseAtomic(mask, 0, 0, value); + return __c11_atomic_fetch_and((_Atomic UInt32*)value, mask, memory_order_relaxed); } #undef OSBitOrAtomic UInt32 OSBitOrAtomic(UInt32 mask, volatile UInt32 * value) { - return OSBitwiseAtomic((UInt32) -1, mask, 0, value); + return __c11_atomic_fetch_or((_Atomic UInt32*)value, mask, memory_order_relaxed); } #undef OSBitXorAtomic UInt32 OSBitXorAtomic(UInt32 mask, volatile UInt32 * value) { - return OSBitwiseAtomic((UInt32) -1, 0, mask, value); + return __c11_atomic_fetch_xor((_Atomic UInt32*)value, mask, memory_order_relaxed); } -#if defined(__i386__) || defined(__x86_64__) -static Boolean OSCompareAndSwap8(UInt8 oldValue8, UInt8 newValue8, volatile UInt8 * value8) -{ - UInt32 mask = 0x000000ff; - UInt32 alignment = (UInt32)((unsigned long) value8) & (sizeof(UInt32) - 1); - UInt32 shiftValues = (24 << 24) | (16 << 16) | (8 << 8); - int shift = (UInt32) *(((UInt8 *) &shiftValues) + alignment); - volatile UInt32 * value32 = (volatile UInt32 *) ((uintptr_t)value8 - alignment); - UInt32 oldValue; - UInt32 newValue; - - mask <<= shift; - - oldValue = *value32; - oldValue = (oldValue & ~mask) | (oldValue8 << shift); - newValue = (oldValue & ~mask) | (newValue8 << shift); - - return OSCompareAndSwap(oldValue, newValue, value32); -} -#endif - static Boolean OSTestAndSetClear(UInt32 bit, Boolean wantSet, volatile UInt8 * startAddress) { UInt8 mask = 1; @@ -139,7 +188,8 @@ static Boolean OSTestAndSetClear(UInt32 bit, Boolean wantSet, volatile UInt8 * s if ((oldValue & mask) == wantValue) { break; } - } while (! OSCompareAndSwap8(oldValue, (oldValue & ~mask) | wantValue, startAddress)); + } while (! __c11_atomic_compare_exchange_strong((_Atomic UInt8 *)startAddress, + &oldValue, (oldValue & ~mask) | wantValue, memory_order_relaxed, memory_order_relaxed)); return (oldValue & mask) == wantValue; } @@ -168,70 +218,21 @@ SInt8 OSDecrementAtomic8(volatile SInt8 * value) return OSAddAtomic8(-1, value); } -#if defined(__i386__) || defined(__x86_64__) -SInt8 OSAddAtomic8(SInt32 amount, volatile SInt8 * value) -{ - SInt8 oldValue; - SInt8 newValue; - - do { - oldValue = *value; - newValue = oldValue + amount; - } while (! OSCompareAndSwap8((UInt8) oldValue, (UInt8) newValue, (volatile UInt8 *) value)); - - return oldValue; -} -#endif - -static UInt8 OSBitwiseAtomic8(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt8 * value) -{ - UInt8 oldValue; - UInt8 newValue; - - do { - oldValue = *value; - newValue = ((oldValue & and_mask) | or_mask) ^ xor_mask; - } while (! OSCompareAndSwap8(oldValue, newValue, value)); - - return oldValue; -} - UInt8 OSBitAndAtomic8(UInt32 mask, volatile UInt8 * value) { - return OSBitwiseAtomic8(mask, 0, 0, value); + return __c11_atomic_fetch_and((_Atomic UInt8 *)value, mask, memory_order_relaxed); } UInt8 OSBitOrAtomic8(UInt32 mask, volatile UInt8 * value) { - return OSBitwiseAtomic8((UInt32) -1, mask, 0, value); + return __c11_atomic_fetch_or((_Atomic UInt8 *)value, mask, memory_order_relaxed); } UInt8 OSBitXorAtomic8(UInt32 mask, volatile UInt8 * value) { - return OSBitwiseAtomic8((UInt32) -1, 0, mask, value); + return __c11_atomic_fetch_xor((_Atomic UInt8 *)value, mask, memory_order_relaxed); } -#if defined(__i386__) || defined(__x86_64__) -static Boolean OSCompareAndSwap16(UInt16 oldValue16, UInt16 newValue16, volatile UInt16 * value16) -{ - UInt32 mask = 0x0000ffff; - UInt32 alignment = (UInt32)((unsigned long) value16) & (sizeof(UInt32) - 1); - UInt32 shiftValues = (16 << 24) | (16 << 16); - UInt32 shift = (UInt32) *(((UInt8 *) &shiftValues) + alignment); - volatile UInt32 * value32 = (volatile UInt32 *) (((unsigned long) value16) - alignment); - UInt32 oldValue; - UInt32 newValue; - - mask <<= shift; - - oldValue = *value32; - oldValue = (oldValue & ~mask) | (oldValue16 << shift); - newValue = (oldValue & ~mask) | (newValue16 << shift); - - return OSCompareAndSwap(oldValue, newValue, value32); -} -#endif - SInt16 OSIncrementAtomic16(volatile SInt16 * value) { return OSAddAtomic16(1, value); @@ -242,46 +243,21 @@ SInt16 OSDecrementAtomic16(volatile SInt16 * value) return OSAddAtomic16(-1, value); } -#if defined(__i386__) || defined(__x86_64__) -SInt16 OSAddAtomic16(SInt32 amount, volatile SInt16 * value) -{ - SInt16 oldValue; - SInt16 newValue; - - do { - oldValue = *value; - newValue = oldValue + amount; - } while (! OSCompareAndSwap16((UInt16) oldValue, (UInt16) newValue, (volatile UInt16 *) value)); - - return oldValue; -} -#endif - -static UInt16 OSBitwiseAtomic16(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt16 * value) -{ - UInt16 oldValue; - UInt16 newValue; - - do { - oldValue = *value; - newValue = ((oldValue & and_mask) | or_mask) ^ xor_mask; - } while (! OSCompareAndSwap16(oldValue, newValue, value)); - - return oldValue; -} - UInt16 OSBitAndAtomic16(UInt32 mask, volatile UInt16 * value) { - return OSBitwiseAtomic16(mask, 0, 0, value); + return __c11_atomic_fetch_and((_Atomic UInt16 *)value, mask, memory_order_relaxed); } UInt16 OSBitOrAtomic16(UInt32 mask, volatile UInt16 * value) { - return OSBitwiseAtomic16((UInt32) -1, mask, 0, value); + return __c11_atomic_fetch_or((_Atomic UInt16 *)value, mask, memory_order_relaxed); } UInt16 OSBitXorAtomic16(UInt32 mask, volatile UInt16 * value) { - return OSBitwiseAtomic16((UInt32) -1, 0, mask, value); + return __c11_atomic_fetch_xor((_Atomic UInt16 *)value, mask, memory_order_relaxed); } +// 19831745 - end of big hammer! +#pragma clang diagnostic pop + diff --git a/libkern/gen/OSDebug.cpp b/libkern/gen/OSDebug.cpp index 7cb847108..305cfc3cb 100644 --- a/libkern/gen/OSDebug.cpp +++ b/libkern/gen/OSDebug.cpp @@ -49,6 +49,9 @@ __BEGIN_DECLS extern vm_offset_t min_valid_stack_address(void); extern vm_offset_t max_valid_stack_address(void); +// From osfmk/kern/printf.c +extern boolean_t doprnt_hide_pointers; + // From osfmk/kmod.c extern void kmod_dump_log(vm_offset_t *addr, unsigned int cnt, boolean_t doUnslide); @@ -106,12 +109,15 @@ OSReportWithBacktrace(const char *str, ...) lck_mtx_lock(sOSReportLock); { + boolean_t old_doprnt_hide_pointers = doprnt_hide_pointers; + doprnt_hide_pointers = FALSE; printf("%s\nBacktrace 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", buf, (unsigned long) VM_KERNEL_UNSLIDE(bt[2]), (unsigned long) VM_KERNEL_UNSLIDE(bt[3]), (unsigned long) VM_KERNEL_UNSLIDE(bt[4]), (unsigned long) VM_KERNEL_UNSLIDE(bt[5]), (unsigned long) VM_KERNEL_UNSLIDE(bt[6]), (unsigned long) VM_KERNEL_UNSLIDE(bt[7]), (unsigned long) VM_KERNEL_UNSLIDE(bt[8])); kmod_dump_log((vm_offset_t *) &bt[2], cnt - 2, TRUE); + doprnt_hide_pointers = old_doprnt_hide_pointers; } lck_mtx_unlock(sOSReportLock); } @@ -166,6 +172,7 @@ OSPrintBacktrace(void) unsigned OSBacktrace(void **bt, unsigned maxAddrs) { unsigned frame; + if (!current_thread()) return 0; #if __x86_64__ #define SANE_x86_64_FRAME_SIZE (kernel_stack_size >> 1) diff --git a/libkern/kxld/Makefile b/libkern/kxld/Makefile index 59f788571..cc20497a1 100644 --- a/libkern/kxld/Makefile +++ b/libkern/kxld/Makefile @@ -106,7 +106,7 @@ $(shell [ -d $(OBJROOT) ] || mkdir -p $(OBJROOT)) $(OBJROOT)/%.o : $(OBJSRC)/%.c $(CC) $(RC_CFLAGS) $(CFLAGS) $(DEFINES) $(OPTIM) $(INCLUDES) -c $< -o $@ $(OBJROOT)/%.o : $(TESTSRC)/%.c - $(CC) $(RC_CFLAGS) $(CFLAGS) $(DEFINES) -O0 -DDEBUG $(INCLUDES) -I $(SRCROOT) -c $< -o $@ + $(CC) $(RC_CFLAGS) $(CFLAGS) $(DEFINES) -O0 -DDEBUG $(INCLUDES) -I$(SRCROOT) -c $< -o $@ SRCROOTESC=$(subst /,\/,$(SRCROOT)) OBJROOTESC=$(subst /,\/,$(OBJROOT)) @@ -114,7 +114,7 @@ SEDOBJS=sed -E 's/(^[a-z_]+)\.o/$(OBJROOTESC)\/\1\.o $(OBJROOTESC)\/\1\.d/' SEDSRCS=sed -E 's/ ([a-z_]+\.[ch])/ $(SRCROOTESC)\/\1/g' $(OBJROOT)/%.d: $(OBJSRC)/%.c @set -e; rm -f $@; \ - $(CC) $(INCLUDES) -MM $< | $(SEDOBJS) | $(SEDSRCS) > $@; + $(CC) $(CFLAGS) $(DEFINES) $(INCLUDES) -MM $< | $(SEDOBJS) | $(SEDSRCS) > $@; # Rules release: OPTIM=-Os -dynamic @@ -194,8 +194,8 @@ $(TESTDST)/copyrighttest: $(COPYTESTOBJS) install -c -m 755 $(OBJROOT)/copyrighttest $@ analyze: - @$(CLANG_ANALYZER) *.c - @$(CLANG_ANALYZER) -I. tests/*.c + @$(CLANG_ANALYZER) $(CFLAGS) $(INCLUDES) $(filter-out WKdm%.c,$(wildcard *.c)) + @$(CLANG_ANALYZER) $(CFLAGS) $(INCLUDES) -I$(SRCROOT) tests/*.c @rm -f *.plist clean: diff --git a/libkern/kxld/kxld.c b/libkern/kxld/kxld.c index da3fbec7d..728774c37 100644 --- a/libkern/kxld/kxld.c +++ b/libkern/kxld/kxld.c @@ -32,6 +32,12 @@ #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" #include +#if KERNEL +#define __KXLD_KERNEL_UNUSED __unused +#else +#define __KXLD_KERNEL_UNUSED +#endif + #if !KERNEL #include "kxld.h" #include "kxld_types.h" @@ -104,7 +110,8 @@ static void clear_context(KXLDContext *context); kern_return_t kxld_create_context(KXLDContext **_context, KXLDAllocateCallback allocate_callback, KXLDLoggingCallback logging_callback, - KXLDFlags flags, cpu_type_t cputype, cpu_subtype_t cpusubtype) + KXLDFlags flags, cpu_type_t cputype, cpu_subtype_t cpusubtype, + vm_size_t pagesize __KXLD_KERNEL_UNUSED) { kern_return_t rval = KERN_FAILURE; KXLDContext * context = NULL; @@ -127,6 +134,12 @@ kxld_create_context(KXLDContext **_context, context->cputype = cputype; context->cpusubtype = cpusubtype; +#if !KERNEL + if (pagesize) { + kxld_set_cross_link_page_size(pagesize); + } +#endif /* !KERNEL */ + kxld_set_logging_callback(logging_callback); context->kext = kxld_alloc(kxld_kext_sizeof()); @@ -467,7 +480,7 @@ allocate_kext(KXLDContext *context, void *callback_data, kxld_kext_get_vmsize(context->kext, &header_size, &vmsize); vmaddr = context->allocate_callback(vmsize, &flags, callback_data); - require_action(!(vmaddr & (PAGE_SIZE-1)), finish, + require_action(!(vmaddr & (kxld_get_effective_page_size()-1)), finish, kxld_log(kKxldLogLinking, kKxldLogErr, "Load address %p is not page-aligned.", (void *) (uintptr_t) vmaddr)); diff --git a/libkern/kxld/kxld_demangle.c b/libkern/kxld/kxld_demangle.c index c0bb5e276..3b0dffe5e 100644 --- a/libkern/kxld/kxld_demangle.c +++ b/libkern/kxld/kxld_demangle.c @@ -53,8 +53,6 @@ kxld_demangle(const char *str, char **buffer __unused, size_t *length __unused) char *demangled = NULL; int status; - if (!str) goto finish; - rval = str; if (!buffer || !length) goto finish; diff --git a/libkern/kxld/kxld_demangle.h b/libkern/kxld/kxld_demangle.h index 5c38abc8f..a5250ab49 100644 --- a/libkern/kxld/kxld_demangle.h +++ b/libkern/kxld/kxld_demangle.h @@ -47,6 +47,6 @@ * */ const char * kxld_demangle(const char *str, char **buffer, size_t *length) - __attribute__((pure, nonnull, visibility("hidden"))); + __attribute__((pure, nonnull(1), visibility("hidden"))); #endif /* !_KXLD_DEMANGLE_H_ */ diff --git a/libkern/kxld/kxld_object.c b/libkern/kxld/kxld_object.c index d936c7853..36383e41e 100644 --- a/libkern/kxld/kxld_object.c +++ b/libkern/kxld/kxld_object.c @@ -634,6 +634,7 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out, break; case LC_VERSION_MIN_MACOSX: case LC_VERSION_MIN_IPHONEOS: + case LC_VERSION_MIN_WATCHOS: versionmin_hdr = (struct version_min_command *) cmd_hdr; kxld_versionmin_init_from_macho(&object->versionmin, versionmin_hdr); break; @@ -676,7 +677,7 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out, default: rval=KERN_FAILURE; kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO - "Invalid segment type in MH_KEXT_BUNDLE kext: %u.", cmd_hdr->cmd); + "Invalid load command type in MH_KEXT_BUNDLE kext: %u.", cmd_hdr->cmd); goto finish; } @@ -960,12 +961,13 @@ init_from_object(KXLDObject *object) break; case LC_VERSION_MIN_MACOSX: case LC_VERSION_MIN_IPHONEOS: + case LC_VERSION_MIN_WATCHOS: case LC_SOURCE_VERSION: /* Not supported for object files, fall through */ default: rval = KERN_FAILURE; kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO - "Invalid segment type in MH_OBJECT kext: %u.", cmd_hdr->cmd); + "Invalid load command type in MH_OBJECT kext: %u.", cmd_hdr->cmd); goto finish; } } @@ -1141,7 +1143,7 @@ get_macho_data_size(const KXLDObject *object) */ if ((symtab_size + reloc_size) > seg_vmsize) { u_long overflow = (symtab_size + reloc_size) - seg_vmsize; - data_size += round_page(overflow); + data_size += kxld_round_page_cross_safe(overflow); } } #endif // KXLD_PIC_KEXTS @@ -1710,7 +1712,7 @@ kxld_object_get_vmsize(const KXLDObject *object, u_long *header_size, /* vmsize is the padded header page(s) + segment vmsizes */ *header_size = (object->is_final_image) ? - 0 : round_page(get_macho_header_size(object)); + 0 : (u_long)kxld_round_page_cross_safe(get_macho_header_size(object)); *vmsize = *header_size + get_macho_data_size(object); } @@ -1746,7 +1748,6 @@ kxld_object_export_linked_object(const KXLDObject *object, /* Calculate the size of the headers and data */ header_size = get_macho_header_size(object); - data_offset = (object->is_final_image) ? header_size : round_page(header_size); size = object->output_buffer_size; /* Copy data to the file */ diff --git a/libkern/kxld/kxld_reloc.c b/libkern/kxld/kxld_reloc.c index 41e899eac..bb93003a8 100644 --- a/libkern/kxld/kxld_reloc.c +++ b/libkern/kxld/kxld_reloc.c @@ -659,7 +659,7 @@ get_pointer_at_addr_32(const KXLDRelocator *relocator, check(relocator); - addr = *(const uint32_t *) ((void *) (data + offset)); + addr = *(const uint32_t *) ((const void *) (data + offset)); #if !KERNEL if (relocator->swap) { addr = OSSwapInt32(addr); @@ -681,7 +681,7 @@ get_pointer_at_addr_64(const KXLDRelocator *relocator, check(relocator); - addr = *(const uint64_t *) ((void *) (data + offset)); + addr = *(const uint64_t *) ((const void *) (data + offset)); #if !KERNEL if (relocator->swap) { addr = OSSwapInt64(addr); diff --git a/libkern/kxld/kxld_seg.c b/libkern/kxld/kxld_seg.c index 4ea424356..00ef81333 100644 --- a/libkern/kxld/kxld_seg.c +++ b/libkern/kxld/kxld_seg.c @@ -195,7 +195,7 @@ kxld_seg_finalize_object_segment(KXLDArray *segarray, KXLDArray *section_order, /* Set the initial link address at the end of the header pages */ - seg->link_addr = round_page(hdrsize); + seg->link_addr = kxld_round_page_cross_safe(hdrsize); /* Fix up all of the section addresses */ @@ -209,7 +209,7 @@ kxld_seg_finalize_object_segment(KXLDArray *segarray, KXLDArray *section_order, /* Finish initializing the segment */ - seg->vmsize = round_page(sect_offset) - seg->link_addr; + seg->vmsize = kxld_round_page_cross_safe(sect_offset) - seg->link_addr; rval = KERN_SUCCESS; finish: @@ -420,7 +420,7 @@ kxld_seg_init_linkedit(KXLDArray *segs) le = kxld_array_get_item(segs, 1); strlcpy(le->segname, SEG_LINKEDIT, sizeof(le->segname)); - le->link_addr = round_page(seg->link_addr + seg->vmsize); + le->link_addr = kxld_round_page_cross_safe(seg->link_addr + seg->vmsize); le->maxprot = VM_PROT_ALL; le->initprot = VM_PROT_DEFAULT; @@ -511,7 +511,7 @@ kxld_seg_get_macho_data_size(const KXLDSeg *seg) size += kxld_sect_get_macho_data_size(sect); } - return round_page(size); + return kxld_round_page_cross_safe(size); } #endif @@ -572,7 +572,7 @@ kxld_seg_export_macho_to_file_buffer(const KXLDSeg *seg, u_char *buf, hdr64->filesize = (uint64_t) (*data_offset - base_data_offset); } - *data_offset = round_page(*data_offset); + *data_offset = (u_long)kxld_round_page_cross_safe(*data_offset); rval = KERN_SUCCESS; @@ -743,8 +743,15 @@ kxld_seg_finish_init(KXLDSeg *seg) KXLDSect *sect = NULL; kxld_addr_t maxaddr = 0; kxld_size_t maxsize = 0; - - if (seg->sects.nitems) { + + /* If we already have a size for this segment (e.g. from the mach-o load + * command) then don't recalculate the segment size. This is safer since + * when we recalculate we are making assumptions about page alignment and + * padding that the kext mach-o file was built with. Better to trust the + * macho-o info, if we have it. If we don't (i.e. vmsize == 0) then add up + * the section sizes and take a best guess at page padding. + */ + if ((seg->vmsize == 0) && (seg->sects.nitems)) { for (i = 0; i < seg->sects.nitems; ++i) { sect = get_sect_by_index(seg, i); require_action(sect, finish, rval=KERN_FAILURE); @@ -754,11 +761,8 @@ kxld_seg_finish_init(KXLDSeg *seg) } } - /* XXX Cross architecture linking will fail if the page size ever differs - * from 4096. (As of this writing, we're fine on i386, x86_64, arm, and - * arm64.) - */ - seg->vmsize = round_page(maxaddr + maxsize - seg->base_addr); + seg->vmsize = kxld_round_page_cross_safe(maxaddr + + maxsize - seg->base_addr); } rval = KERN_SUCCESS; @@ -772,14 +776,8 @@ kxld_seg_finish_init(KXLDSeg *seg) void kxld_seg_set_vm_protections(KXLDSeg *seg, boolean_t strict_protections) { - /* This is unnecessary except to make the clang analyzer happy. When - * the analyzer no longer ignores nonnull attributes for if statements, - * we can remove this line. - */ - if (!seg) return; - if (strict_protections) { - if (streq_safe(seg->segname, SEG_TEXT, const_strlen(SEG_TEXT))) { + if (!strncmp(seg->segname, SEG_TEXT, const_strlen(SEG_TEXT))) { seg->initprot = TEXT_SEG_PROT; seg->maxprot = TEXT_SEG_PROT; } else { @@ -828,6 +826,6 @@ kxld_seg_populate_linkedit(KXLDSeg *seg, const KXLDSymtab *symtab, boolean_t is_ } #endif /* KXLD_PIC_KEXTS */ - seg->vmsize = round_page(size); + seg->vmsize = kxld_round_page_cross_safe(size); } diff --git a/libkern/kxld/kxld_stubs.c b/libkern/kxld/kxld_stubs.c index 2b10ce687..31fcf7b51 100644 --- a/libkern/kxld/kxld_stubs.c +++ b/libkern/kxld/kxld_stubs.c @@ -44,7 +44,7 @@ kxld_create_context(KXLDContext **_context __unused, KXLDAllocateCallback allocate_callback __unused, KXLDLoggingCallback logging_callback __unused, KXLDFlags flags __unused, cpu_type_t cputype __unused, - cpu_subtype_t cpusubtype __unused) + cpu_subtype_t cpusubtype __unused, vm_size_t pagesize __unused) { return KERN_SUCCESS; } diff --git a/libkern/kxld/kxld_sym.c b/libkern/kxld/kxld_sym.c index d82cd5cce..252d39e3b 100644 --- a/libkern/kxld/kxld_sym.c +++ b/libkern/kxld/kxld_sym.c @@ -261,7 +261,7 @@ init_predicates(KXLDSym *sym, u_char n_type, u_short n_desc) } /* Set the C++-specific fields */ - if ((streq_safe(CXX_PREFIX, sym->name, const_strlen(CXX_PREFIX)))) { + if (!strncmp(CXX_PREFIX, sym->name, const_strlen(CXX_PREFIX))) { sym->is_cxx = 1; if (streq_safe(sym->name, METACLASS_VTABLE_PREFIX, diff --git a/libkern/kxld/kxld_util.c b/libkern/kxld/kxld_util.c index 67d838fe8..af9f16e4e 100644 --- a/libkern/kxld/kxld_util.c +++ b/libkern/kxld/kxld_util.c @@ -65,6 +65,12 @@ static KXLDLoggingCallback s_logging_callback = NULL; static const char *s_callback_name = NULL; static void *s_callback_data = NULL; +#if !KERNEL +static boolean_t s_cross_link_enabled = FALSE; +static kxld_size_t s_cross_link_page_size = PAGE_SIZE; +#endif + + /******************************************************************************* *******************************************************************************/ void @@ -165,7 +171,7 @@ kxld_page_alloc_untracked(size_t size) if (size < KALLOC_MAX) { ptr = kalloc(size); } else { - rval = kmem_alloc(kernel_map, &addr, size); + rval = kmem_alloc(kernel_map, &addr, size, VM_KERN_MEMORY_OSKEXT); if (!rval) ptr = (void *) addr; } #else /* !KERNEL */ @@ -204,7 +210,7 @@ kxld_alloc_pageable(size_t size) kern_return_t rval = 0; vm_offset_t ptr = 0; - rval = kmem_alloc_pageable(kernel_map, &ptr, size); + rval = kmem_alloc_pageable(kernel_map, &ptr, size, VM_KERN_MEMORY_OSKEXT); if (rval) ptr = 0; return (void *) ptr; @@ -804,3 +810,53 @@ kxld_print_memory_report(void) #endif } +/********************************************************************* +*********************************************************************/ +#if !KERNEL +boolean_t kxld_set_cross_link_page_size(kxld_size_t target_page_size) +{ + // verify radix 2 + if ((target_page_size != 0) && + ((target_page_size & (target_page_size - 1)) == 0)) { + + s_cross_link_enabled = TRUE; + s_cross_link_page_size = target_page_size; + + return TRUE; + } else { + return FALSE; + } +} +#endif /* !KERNEL */ + +/********************************************************************* +*********************************************************************/ +kxld_size_t kxld_get_effective_page_size(void) +{ +#if KERNEL + return PAGE_SIZE; +#else + if (s_cross_link_enabled) { + return s_cross_link_page_size; + } else { + return PAGE_SIZE; + } +#endif /* KERNEL */ +} + +/********************************************************************* +*********************************************************************/ +kxld_addr_t kxld_round_page_cross_safe(kxld_addr_t offset) +{ +#if KERNEL + return round_page(offset); +#else + // assume s_cross_link_page_size is power of 2 + if (s_cross_link_enabled) { + return (offset + (s_cross_link_page_size - 1)) & + (~(s_cross_link_page_size - 1)); + } else { + return round_page(offset); + } +#endif /* KERNEL */ +} diff --git a/libkern/kxld/kxld_util.h b/libkern/kxld/kxld_util.h index 0eb0f2f7a..f20bc18e2 100644 --- a/libkern/kxld/kxld_util.h +++ b/libkern/kxld/kxld_util.h @@ -205,4 +205,14 @@ const char * kxld_strstr(const char *s, const char *find) void kxld_print_memory_report(void) __attribute__((visibility("hidden"))); +/******************************************************************************* +* Cross Linking +*******************************************************************************/ +#if !KERNEL +boolean_t kxld_set_cross_link_page_size(kxld_size_t target_page_size); +#endif /* !KERNEL */ +kxld_size_t kxld_get_effective_page_size(void); +kxld_addr_t kxld_round_page_cross_safe(kxld_addr_t addr); + + #endif /* _KXLD_UTIL_H_ */ diff --git a/libkern/kxld/kxld_versionmin.c b/libkern/kxld/kxld_versionmin.c index 9b4753c4b..e422495e5 100644 --- a/libkern/kxld/kxld_versionmin.c +++ b/libkern/kxld/kxld_versionmin.c @@ -42,7 +42,7 @@ kxld_versionmin_init_from_macho(KXLDversionmin *versionmin, struct version_min_c { check(versionmin); check(src); - check((src->cmd == LC_VERSION_MIN_MACOSX) || (src->cmd == LC_VERSION_MIN_IPHONEOS)); + check((src->cmd == LC_VERSION_MIN_MACOSX) || (src->cmd == LC_VERSION_MIN_IPHONEOS) || (src->cmd == LC_VERSION_MIN_WATCHOS)); switch (src->cmd) { case LC_VERSION_MIN_MACOSX: @@ -51,6 +51,9 @@ kxld_versionmin_init_from_macho(KXLDversionmin *versionmin, struct version_min_c case LC_VERSION_MIN_IPHONEOS: versionmin->platform = kKxldVersionMiniPhoneOS; break; + case LC_VERSION_MIN_WATCHOS: + versionmin->platform = kKxldVersionMinWatchOS; + break; } versionmin->version = src->version; @@ -99,6 +102,9 @@ kxld_versionmin_export_macho(const KXLDversionmin *versionmin, u_char *buf, case kKxldVersionMiniPhoneOS: versionminhdr->cmd = LC_VERSION_MIN_IPHONEOS; break; + case kKxldVersionMinWatchOS: + versionminhdr->cmd = LC_VERSION_MIN_WATCHOS; + break; } versionminhdr->cmdsize = (uint32_t) sizeof(*versionminhdr); versionminhdr->version = versionmin->version; diff --git a/libkern/kxld/kxld_versionmin.h b/libkern/kxld/kxld_versionmin.h index 3ebcac665..d4ce76b21 100644 --- a/libkern/kxld/kxld_versionmin.h +++ b/libkern/kxld/kxld_versionmin.h @@ -40,7 +40,8 @@ typedef struct kxld_versionmin KXLDversionmin; enum kxld_versionmin_platforms { kKxldVersionMinMacOSX, - kKxldVersionMiniPhoneOS + kKxldVersionMiniPhoneOS, + kKxldVersionMinWatchOS }; struct kxld_versionmin { diff --git a/libkern/kxld/tests/kextcopyright.c b/libkern/kxld/tests/kextcopyright.c index 7e545d328..b99feebf8 100644 --- a/libkern/kxld/tests/kextcopyright.c +++ b/libkern/kxld/tests/kextcopyright.c @@ -112,7 +112,7 @@ convert_cfstring(CFStringRef the_string) result = converted_string; finish: - CFRelease(the_data); + if (the_data) CFRelease(the_data); return result; } diff --git a/libkern/libkern/Makefile b/libkern/libkern/Makefile index 8b2f0606a..ee046b925 100644 --- a/libkern/libkern/Makefile +++ b/libkern/libkern/Makefile @@ -14,61 +14,62 @@ INSTINC_SUBDIRS_X86_64 = \ i386 INSTINC_SUBDIRS_X86_64H = \ i386 +INSTINC_SUBDIRS_ARM = \ + arm +INSTINC_SUBDIRS_ARM64 = \ + arm EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_X86_64H = ${INSTINC_SUBDIRS_X86_64H} +EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} +EXPINC_SUBDIRS_ARM64 = ${INSTINC_SUBDIRS_ARM64} DATAFILES = \ - OSAtomic.h \ - OSBase.h \ - OSByteOrder.h \ - _OSByteOrder.h \ - OSDebug.h \ - OSKextLib.h \ - OSMalloc.h \ - OSReturn.h \ - OSTypes.h \ - locks.h \ - sysctl.h \ - tree.h \ - zconf.h \ + OSByteOrder.h \ + _OSByteOrder.h \ + OSDebug.h \ + OSKextLib.h \ + OSReturn.h \ + OSTypes.h + +KERNELFILES = \ + ${DATAFILES} \ + OSAtomic.h \ + OSBase.h \ + OSMalloc.h \ + locks.h \ + sysctl.h \ + tree.h \ + zconf.h \ zlib.h -PRIVATE_DATAFILES = \ - OSKextLibPrivate.h \ +PRIVATE_KERNELFILES = \ + OSKextLibPrivate.h \ + OSSerializeBinary.h \ kext_request_keys.h \ - mkext.h \ - prelink.h \ - OSSerializeBinary.h - -INSTALL_MI_LIST = \ - OSByteOrder.h \ - _OSByteOrder.h \ - OSDebug.h \ - OSKextLib.h \ - OSReturn.h \ - OSTypes.h + mkext.h \ + prelink.h + +PRIVATE_DATAFILES = \ + ${PRIVATE_KERNELFILES} \ + tree.h + +INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = libkern INSTALL_MI_LCL_LIST = \ - ${INSTALL_MI_LIST} \ ${PRIVATE_DATAFILES} \ - tree.h \ kext_panic_report.h \ OSCrossEndian.h -INSTALL_KF_MI_LIST = \ - ${DATAFILES} +INSTALL_KF_MI_LIST = ${KERNELFILES} -INSTALL_KF_MI_LCL_LIST = \ - ${DATAFILES} \ - ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} -EXPORT_MI_LIST = \ - ${DATAFILES} \ - ${PRIVATE_DATAFILES} \ +EXPORT_MI_LIST = \ + $(sort ${KERNELFILES} ${PRIVATE_DATAFILES}) \ kernel_mach_header.h \ kxld.h \ kxld_types.h \ diff --git a/libkern/libkern/OSAtomic.h b/libkern/libkern/OSAtomic.h index c802adeff..656a2cd2a 100644 --- a/libkern/libkern/OSAtomic.h +++ b/libkern/libkern/OSAtomic.h @@ -172,6 +172,52 @@ inline static long OSDecrementAtomicLong(volatile long * address) #endif /* XNU_KERNEL_PRIVATE */ #if XNU_KERNEL_PRIVATE +/*! + * @function OSCompareAndSwap8 + * + * @abstract + * Compare and swap operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSCompareAndSwap8 function compares the value at the specified address with oldVal. The value of newValue is written to the address only if oldValue and the value at the address are equal. OSCompareAndSwap returns true if newValue is written to the address; otherwise, it returns false. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * + * @param oldValue The value to compare at address. + * @param newValue The value to write to address if oldValue compares true. + * @param address The byte aligned address of the data to update atomically. + * @result true if newValue was written to the address. + */ +extern Boolean OSCompareAndSwap8( + UInt8 oldValue, + UInt8 newValue, + volatile UInt8 * address); +#define OSCompareAndSwap8(a, b, c) \ + (OSCompareAndSwap8(a, b, __SAFE_CAST_PTR(volatile UInt8*,c))) + +/*! + * @function OSCompareAndSwap16 + * + * @abstract + * Compare and swap operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSCompareAndSwap16 function compares the value at the specified address with oldVal. The value of newValue is written to the address only if oldValue and the value at the address are equal. OSCompareAndSwap returns true if newValue is written to the address; otherwise, it returns false. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * + * @param oldValue The value to compare at address. + * @param newValue The value to write to address if oldValue compares true. + * @param address The 2-byte aligned address of the data to update atomically. + * @result true if newValue was written to the address. + */ +extern Boolean OSCompareAndSwap16( + UInt16 oldValue, + UInt16 newValue, + volatile UInt16 * address); +#define OSCompareAndSwap16(a, b, c) \ + (OSCompareAndSwap16(a, b, __SAFE_CAST_PTR(volatile UInt16*,c))) + #endif /* XNU_KERNEL_PRIVATE */ /*! @@ -546,7 +592,7 @@ extern UInt8 OSBitXorAtomic8( * * @discussion * The OSTestAndSet function sets a single bit in a byte at a specified address. It returns true if the bit was already set, false otherwise. - * @param bit The bit number in the range 0 through 7. + * @param bit The bit number in the range 0 through 7. Bit 0 is the most significant. * @param startAddress The address of the byte to update atomically. * @result true if the bit was already set, false otherwise. */ @@ -564,7 +610,7 @@ extern Boolean OSTestAndSet( * The OSTestAndClear function clears a single bit in a byte at a specified address. It returns true if the bit was already clear, false otherwise. * * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. - * @param bit The bit number in the range 0 through 7. + * @param bit The bit number in the range 0 through 7. Bit 0 is the most significant. * @param startAddress The address of the byte to update atomically. * @result true if the bit was already clear, false otherwise. */ diff --git a/libkern/libkern/OSKextLib.h b/libkern/libkern/OSKextLib.h index 8d752baf6..8435d0c49 100644 --- a/libkern/libkern/OSKextLib.h +++ b/libkern/libkern/OSKextLib.h @@ -35,13 +35,14 @@ __BEGIN_DECLS #include #include #include +#include #ifdef KERNEL #include #include #else #include -#include +#include #endif /* KERNEL */ /*! @@ -878,6 +879,36 @@ OSReturn OSKextCancelRequest( void ** contextOut); +/*! + * @function OSKextGrabPgoData + * + * @abstract + * Grab a LLVM profile data buffer from a loaded kext. + * + * @param uuid the uuid identifying the kext to retrieve data from + * @param pSize pointer of where to store the size of the buffer. May be NULL. + * @param pBuffer pointer to the output buffer. May be NULL. + * @param bufferSize size of the buffer pointed to by pBuffer + * @param wait_for_unload (boolean) sleep until the kext is unloaded + * @param metadata (boolean) include metadata footer + * + * @result + * 0 on success + * ENOTSUP if the kext does not have profile data to retrieve. + * ENOTSUP if no kext with the given UUID is found + * ERRORS if the provided buffer is too small + * EIO internal error, such as if __llvm_profile_write_buffer_internal fails + */ +int +OSKextGrabPgoData(uuid_t uuid, + uint64_t *pSize, + char *pBuffer, + uint64_t bufferSize, + int wait_for_unload, + int metadata); + + + #if PRAGMA_MARK #pragma mark - /********************************************************************/ diff --git a/libkern/libkern/OSKextLibPrivate.h b/libkern/libkern/OSKextLibPrivate.h index 06a7fe8d3..4ae4b9806 100644 --- a/libkern/libkern/OSKextLibPrivate.h +++ b/libkern/libkern/OSKextLibPrivate.h @@ -39,7 +39,7 @@ __BEGIN_DECLS #include #else #include -#include +#include #endif /* KERNEL */ __END_DECLS @@ -910,6 +910,14 @@ extern OSKextLoadedKextSummaryHeader * gLoadedKextSummaries; */ void OSKextLoadedKextSummariesUpdated(void); +#ifdef XNU_KERNEL_PRIVATE + +extern const vm_allocation_site_t * OSKextGetAllocationSiteForCaller(uintptr_t address); +extern uint32_t OSKextGetKmodIDForSite(vm_allocation_site_t * site); +extern void OSKextFreeSite(vm_allocation_site_t * site); + +#endif /* XNU_KERNEL_PRIVATE */ + __END_DECLS #endif /* ! _LIBKERN_OSKEXTLIBPRIVATE_H */ diff --git a/libkern/libkern/c++/OSArray.h b/libkern/libkern/c++/OSArray.h index 91cbd81bf..67da96ff4 100644 --- a/libkern/libkern/c++/OSArray.h +++ b/libkern/libkern/c++/OSArray.h @@ -104,9 +104,9 @@ class OSArray : public OSCollection ExpansionData * reserved; /* OSCollectionIterator interfaces. */ - virtual unsigned int iteratorSize() const; - virtual bool initIterator(void * iterator) const; - virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const; + virtual unsigned int iteratorSize() const APPLE_KEXT_OVERRIDE; + virtual bool initIterator(void * iterator) const APPLE_KEXT_OVERRIDE; + virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const APPLE_KEXT_OVERRIDE; public: @@ -312,7 +312,7 @@ class OSArray : public OSCollection * release@/link * instead. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @@ -324,7 +324,7 @@ class OSArray : public OSCollection * @result * The current number of objects within the array. */ - virtual unsigned int getCount() const; + virtual unsigned int getCount() const APPLE_KEXT_OVERRIDE; /*! @@ -349,7 +349,7 @@ class OSArray : public OSCollection * //apple_ref/cpp/instm/OSArray/ensureCapacity/virtualunsignedint/(unsignedint) * ensureCapacity.@/link */ - virtual unsigned int getCapacity() const; + virtual unsigned int getCapacity() const APPLE_KEXT_OVERRIDE; /*! @@ -365,7 +365,7 @@ class OSArray : public OSCollection * An OSArray allocates storage for objects in multiples * of the capacity increment. */ - virtual unsigned int getCapacityIncrement() const; + virtual unsigned int getCapacityIncrement() const APPLE_KEXT_OVERRIDE; /*! @@ -383,7 +383,7 @@ class OSArray : public OSCollection * of the capacity increment. * Calling this function does not immediately reallocate storage. */ - virtual unsigned int setCapacityIncrement(unsigned increment); + virtual unsigned int setCapacityIncrement(unsigned increment) APPLE_KEXT_OVERRIDE; /*! @@ -409,7 +409,7 @@ class OSArray : public OSCollection * * There is no way to reduce the capacity of an OSArray. */ - virtual unsigned int ensureCapacity(unsigned int newCapacity); + virtual unsigned int ensureCapacity(unsigned int newCapacity) APPLE_KEXT_OVERRIDE; /*! @@ -422,7 +422,7 @@ class OSArray : public OSCollection * The array's capacity (and therefore direct memory consumption) * is not reduced by this function. */ - virtual void flushCollection(); + virtual void flushCollection() APPLE_KEXT_OVERRIDE; /*! @@ -575,7 +575,7 @@ class OSArray : public OSCollection * if that object is derived from OSArray * and contains the same or equivalent objects. */ - virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE; /*! @@ -657,7 +657,7 @@ class OSArray : public OSCollection * @result * true if serialization succeeds, false if not. */ - virtual bool serialize(OSSerialize * serializer) const; + virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE; /*! @@ -686,7 +686,7 @@ class OSArray : public OSCollection virtual unsigned setOptions( unsigned options, unsigned mask, - void * context = 0); + void * context = 0) APPLE_KEXT_OVERRIDE; /*! @@ -711,7 +711,7 @@ class OSArray : public OSCollection * Objects that are not derived from OSCollection are retained * rather than copied. */ - OSCollection * copyCollection(OSDictionary * cycleDict = 0); + OSCollection * copyCollection(OSDictionary * cycleDict = 0) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(OSArray, 0); OSMetaClassDeclareReservedUnused(OSArray, 1); diff --git a/libkern/libkern/c++/OSBoolean.h b/libkern/libkern/c++/OSBoolean.h index 4eb533ccf..8821a1864 100644 --- a/libkern/libkern/c++/OSBoolean.h +++ b/libkern/libkern/c++/OSBoolean.h @@ -80,7 +80,7 @@ class OSBoolean : public OSObject */ virtual void taggedRelease( const void * tag, - const int when) const; + const int when) const APPLE_KEXT_OVERRIDE; public: static void initialize(); @@ -113,7 +113,7 @@ class OSBoolean : public OSObject * @discussion * This function should never be called. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @@ -124,7 +124,7 @@ class OSBoolean : public OSObject * * @param tag Unused. */ - virtual void taggedRetain(const void * tag) const; + virtual void taggedRetain(const void * tag) const APPLE_KEXT_OVERRIDE; /*! @@ -210,7 +210,7 @@ class OSBoolean : public OSObject * if that object is derived from OSBoolean * and represents the same C++ bool value. */ - virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE; /*! @@ -225,7 +225,7 @@ class OSBoolean : public OSObject * @result * true if serialization succeeds, false if not. */ - virtual bool serialize(OSSerialize * serializer) const; + virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(OSBoolean, 0); OSMetaClassDeclareReservedUnused(OSBoolean, 1); diff --git a/libkern/libkern/c++/OSCollection.h b/libkern/libkern/c++/OSCollection.h index adb7cbf8b..91deba1fa 100644 --- a/libkern/libkern/c++/OSCollection.h +++ b/libkern/libkern/c++/OSCollection.h @@ -212,7 +212,7 @@ class OSCollection : public OSObject * This function is used to initialize state * within a newly created OSCollection object. */ - virtual bool init(); + virtual bool init() APPLE_KEXT_OVERRIDE; public: @@ -458,3 +458,5 @@ class OSCollection : public OSObject }; #endif /* !_OS_OSCOLLECTION_H */ + + diff --git a/libkern/libkern/c++/OSCollectionIterator.h b/libkern/libkern/c++/OSCollectionIterator.h index 72e8e9792..235877add 100644 --- a/libkern/libkern/c++/OSCollectionIterator.h +++ b/libkern/libkern/c++/OSCollectionIterator.h @@ -149,7 +149,7 @@ class OSCollectionIterator : public OSIterator * release@/link * instead. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @@ -159,7 +159,7 @@ class OSCollectionIterator : public OSIterator * Resets the iterator to the beginning of the collection, * as if it had just been created. */ - virtual void reset(); + virtual void reset() APPLE_KEXT_OVERRIDE; /*! @@ -173,7 +173,7 @@ class OSCollectionIterator : public OSIterator * false otherwise * (typically because the iteration context has been modified). */ - virtual bool isValid(); + virtual bool isValid() APPLE_KEXT_OVERRIDE; /*! @@ -202,7 +202,7 @@ class OSCollectionIterator : public OSIterator * and then to advance the iteration context to the next object (if any) * and return that next object, or NULL if there is none. */ - virtual OSObject * getNextObject(); + virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE; }; #endif /* !_OS_OSCOLLECTIONITERATOR_H */ diff --git a/libkern/libkern/c++/OSData.h b/libkern/libkern/c++/OSData.h index 5c499cc8c..b3fcd5732 100644 --- a/libkern/libkern/c++/OSData.h +++ b/libkern/libkern/c++/OSData.h @@ -388,7 +388,7 @@ class OSData : public OSObject * release@/link * instead. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @@ -657,7 +657,7 @@ class OSData : public OSObject * if that object is derived from OSData * and contains the equivalent bytes of the same length. */ - virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE; /*! @@ -698,7 +698,7 @@ class OSData : public OSObject * @result * true if serialization succeeds, false if not. */ - virtual bool serialize(OSSerialize * serializer) const; + virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE; /*! diff --git a/libkern/libkern/c++/OSDictionary.h b/libkern/libkern/c++/OSDictionary.h index 9bdba7ac2..5bc256278 100644 --- a/libkern/libkern/c++/OSDictionary.h +++ b/libkern/libkern/c++/OSDictionary.h @@ -131,9 +131,9 @@ class OSDictionary : public OSCollection ExpansionData * reserved; // Member functions used by the OSCollectionIterator class. - virtual unsigned int iteratorSize() const; - virtual bool initIterator(void * iterator) const; - virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const; + virtual unsigned int iteratorSize() const APPLE_KEXT_OVERRIDE; + virtual bool initIterator(void * iterator) const APPLE_KEXT_OVERRIDE; + virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const APPLE_KEXT_OVERRIDE; public: @@ -438,7 +438,7 @@ class OSDictionary : public OSCollection * release@/link * instead. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @@ -452,7 +452,7 @@ class OSDictionary : public OSCollection * The current number of key/object pairs * contained within the dictionary. */ - virtual unsigned int getCount() const; + virtual unsigned int getCount() const APPLE_KEXT_OVERRIDE; /*! @@ -476,7 +476,7 @@ class OSDictionary : public OSCollection * //apple_ref/cpp/instm/OSDictionary/ensureCapacity/virtualunsignedint/(unsignedint) * ensureCapacity@/link. */ - virtual unsigned int getCapacity() const; + virtual unsigned int getCapacity() const APPLE_KEXT_OVERRIDE; /*! @@ -492,7 +492,7 @@ class OSDictionary : public OSCollection * An OSDictionary allocates storage for key/object pairs in multiples * of the capacity increment. */ - virtual unsigned int getCapacityIncrement() const; + virtual unsigned int getCapacityIncrement() const APPLE_KEXT_OVERRIDE; /*! @@ -510,7 +510,7 @@ class OSDictionary : public OSCollection * of the capacity increment. * Calling this function does not immediately reallocate storage. */ - virtual unsigned int setCapacityIncrement(unsigned increment); + virtual unsigned int setCapacityIncrement(unsigned increment) APPLE_KEXT_OVERRIDE; /*! @@ -536,7 +536,7 @@ class OSDictionary : public OSCollection * * There is no way to reduce the capacity of an OSDictionary. */ - virtual unsigned int ensureCapacity(unsigned int newCapacity); + virtual unsigned int ensureCapacity(unsigned int newCapacity) APPLE_KEXT_OVERRIDE; /*! @@ -549,7 +549,7 @@ class OSDictionary : public OSCollection * The dictionary's capacity (and therefore direct memory consumption) * is not reduced by this function. */ - virtual void flushCollection(); + virtual void flushCollection() APPLE_KEXT_OVERRIDE; /*! @@ -831,7 +831,7 @@ class OSDictionary : public OSCollection * if that object is derived from OSDictionary * and contains the same or equivalent objects. */ - virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE; /*! @@ -846,7 +846,7 @@ class OSDictionary : public OSCollection * @result * true if serialization succeeds, false if not. */ - virtual bool serialize(OSSerialize * serializer) const; + virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE; /*! @@ -875,7 +875,7 @@ class OSDictionary : public OSCollection virtual unsigned setOptions( unsigned options, unsigned mask, - void * context = 0); + void * context = 0) APPLE_KEXT_OVERRIDE; /*! @@ -901,7 +901,7 @@ class OSDictionary : public OSCollection * Objects that are not derived from OSCollection are retained * rather than copied. */ - OSCollection * copyCollection(OSDictionary * cycleDict = 0); + OSCollection * copyCollection(OSDictionary * cycleDict = 0) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(OSDictionary, 0); diff --git a/libkern/libkern/c++/OSKext.h b/libkern/libkern/c++/OSKext.h index 815d85501..abc3db0e4 100644 --- a/libkern/libkern/c++/OSKext.h +++ b/libkern/libkern/c++/OSKext.h @@ -105,6 +105,44 @@ void kmod_dump_log(vm_offset_t*, unsigned int, boolean_t); #if PRAGMA_MARK #pragma mark - #endif + +struct list_head { + struct list_head *prev; + struct list_head *next; +}; + +struct OSKextGrabPgoStruct { + bool metadata; + uint64_t *pSize; + char *pBuffer; + uint64_t bufferSize; + int err; + struct list_head list_head; +}; + +#ifndef container_of +#define container_of(ptr,type,member) ((type*)(((uintptr_t)ptr) - offsetof(type, member))) +#endif +/********************************************************************/ + +#if XNU_KERNEL_PRIVATE + +struct OSKextAccount +{ + vm_allocation_site_t site; + uint32_t loadTag; +}; + +struct OSKextActiveAccount +{ + uintptr_t address; + uintptr_t address_end; + OSKextAccount * account; +}; +typedef struct OSKextActiveAccount OSKextActiveAccount; + +#endif /* XNU_KERNEL_PRIVATE */ + /* * @class OSKext */ @@ -122,6 +160,13 @@ class OSKext : public OSObject friend class KLDBootstrap; friend class OSMetaClass; + friend int OSKextGrabPgoData(uuid_t uuid, + uint64_t *pSize, + char *pBuffer, + uint64_t bufferSize, + int wait_for_unload, + int metadata); + #ifdef XNU_KERNEL_PRIVATE friend void OSKextVLog( OSKext * aKext, @@ -238,6 +283,10 @@ class OSKext : public OSObject unsigned int jettisonLinkeditSeg:1; } flags; + struct list_head pendingPgoHead; + uuid_t instance_uuid; + OSKextAccount * account; + #if PRAGMA_MARK /**************************************/ #pragma mark Private Functions @@ -298,7 +347,7 @@ class OSKext : public OSObject bool externalDataIsMkext = false); virtual bool registerIdentifier(void); - virtual void free(void); + virtual void free(void) APPLE_KEXT_OVERRIDE; static OSReturn removeKext( OSKext * aKext, @@ -373,6 +422,7 @@ class OSKext : public OSObject virtual OSReturn slidePrelinkedExecutable(void); virtual OSReturn loadExecutable(void); virtual void jettisonLinkeditSegment(void); + virtual void jettisonDATASegmentPadding(void); static void considerDestroyingLinkContext(void); virtual OSData * getExecutable(void); virtual void setLinkedExecutable(OSData * anExecutable); @@ -386,7 +436,7 @@ class OSKext : public OSObject virtual OSReturn start(bool startDependenciesFlag = true); virtual OSReturn stop(void); - virtual OSReturn setVMProtections(void); + virtual OSReturn setVMAttributes(bool protect, bool wire); virtual boolean_t segmentShouldBeWired(kernel_segment_command_t *seg); virtual OSReturn validateKextMapping(bool startFlag); virtual boolean_t verifySegmentMapping(kernel_segment_command_t *seg); @@ -491,6 +541,7 @@ class OSKext : public OSObject */ static void updateLoadedKextSummaries(void); void updateLoadedKextSummary(OSKextLoadedKextSummary *summary); + void updateActiveAccount(OSKextActiveAccount *account); /* C++ Initialization. */ @@ -509,6 +560,9 @@ class OSKext : public OSObject static OSKext * lookupKextWithIdentifier(OSString * kextIdentifier); static OSKext * lookupKextWithLoadTag(OSKextLoadTag aTag); static OSKext * lookupKextWithAddress(vm_address_t address); + static OSKext * lookupKextWithUUID(uuid_t uuid); + + kernel_section_t *lookupSection(const char *segname, const char*secname); static bool isKextWithIdentifierLoaded(const char * kextIdentifier); diff --git a/libkern/libkern/c++/OSLib.h b/libkern/libkern/c++/OSLib.h index 90034109b..80bc292da 100644 --- a/libkern/libkern/c++/OSLib.h +++ b/libkern/libkern/c++/OSLib.h @@ -46,6 +46,33 @@ __BEGIN_DECLS __END_DECLS + +#if XNU_KERNEL_PRIVATE +#include + +#define kalloc_container(size) \ + kalloc_tag_bt(size, VM_KERN_MEMORY_LIBKERN) + +#if OSALLOCDEBUG +extern "C" int debug_container_malloc_size; +extern "C" int debug_ivars_size; +#if IOTRACKING +#define OSCONTAINER_ACCUMSIZE(s) do { OSAddAtomic((SInt32)(s), &debug_container_malloc_size); trackingAccumSize(s); } while(0) +#else +#define OSCONTAINER_ACCUMSIZE(s) do { OSAddAtomic((SInt32)(s), &debug_container_malloc_size); } while(0) +#endif +#define OSMETA_ACCUMSIZE(s) do { OSAddAtomic((SInt32)(s), &debug_container_malloc_size); } while(0) +#define OSIVAR_ACCUMSIZE(s) do { OSAddAtomic((SInt32)(s), &debug_ivars_size); } while(0) + +#else /* OSALLOCDEBUG */ + +#define OSCONTAINER_ACCUMSIZE(s) +#define OSMETA_ACCUMSIZE(s) +#define OSIVAR_ACCUMSIZE(s) + +#endif /* !OSALLOCDEBUG */ +#endif /* XNU_KERNEL_PRIVATE */ + #ifndef NULL #if defined (__cplusplus) #define NULL 0 diff --git a/libkern/libkern/c++/OSMetaClass.h b/libkern/libkern/c++/OSMetaClass.h index 84c30e6ab..2d2267ab1 100644 --- a/libkern/libkern/c++/OSMetaClass.h +++ b/libkern/libkern/c++/OSMetaClass.h @@ -88,6 +88,20 @@ class OSOrderedSet; /*! @parseOnly */ #define APPLE_KEXT_DEPRECATED __attribute__((deprecated)) + +#if __cplusplus >= 201103L +#define APPLE_KEXT_OVERRIDE override +#if defined(__LP64__) +#define APPLE_KEXT_COMPATIBILITY_OVERRIDE +#else +#define APPLE_KEXT_COMPATIBILITY_OVERRIDE APPLE_KEXT_OVERRIDE +#endif +#else +#define APPLE_KEXT_OVERRIDE +#define APPLE_KEXT_COMPATIBILITY_OVERRIDE +#endif + + /*! * @class OSMetaClassBase * @@ -1582,7 +1596,7 @@ class OSMetaClass : private OSMetaClassBase virtual OSObject *alloc() const; \ } gMetaClass; \ friend class className ::MetaClass; \ - virtual const OSMetaClass * getMetaClass() const; \ + virtual const OSMetaClass * getMetaClass() const APPLE_KEXT_OVERRIDE; \ protected: \ className (const OSMetaClass *); \ virtual ~ className () @@ -2065,6 +2079,17 @@ void className ::_RESERVED ## className ## index () \ // I/O Kit debug internal routines. static void printInstanceCounts(); static void serializeClassDictionary(OSDictionary * dict); +#ifdef XNU_KERNEL_PRIVATE +#if IOTRACKING +public: + static void * trackedNew(size_t size); + static void trackedDelete(void * mem, size_t size); + void trackedInstance(OSObject * instance) const; + void trackedFree(OSObject * instance) const; + void trackedAccumSize(OSObject * instance, size_t size) const; + struct IOTrackingQueue * getTracking() const; +#endif +#endif private: // Obsolete APIs diff --git a/libkern/libkern/c++/OSNumber.h b/libkern/libkern/c++/OSNumber.h index 6502a3039..c54c3a3c6 100644 --- a/libkern/libkern/c++/OSNumber.h +++ b/libkern/libkern/c++/OSNumber.h @@ -215,7 +215,7 @@ class OSNumber : public OSObject * release@/link * instead. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @@ -397,7 +397,7 @@ class OSNumber : public OSObject * An OSNumber is considered equal to another object if that object is * derived from OSNumber and represents the same C integer value. */ - virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE; /*! @@ -412,7 +412,7 @@ class OSNumber : public OSObject * @result * true if serialization succeeds, false if not. */ - virtual bool serialize(OSSerialize * serializer) const; + virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(OSNumber, 0); diff --git a/libkern/libkern/c++/OSObject.h b/libkern/libkern/c++/OSObject.h index a24f30e98..01a480f19 100644 --- a/libkern/libkern/c++/OSObject.h +++ b/libkern/libkern/c++/OSObject.h @@ -203,7 +203,7 @@ class OSObject : public OSMetaClassBase * can be used to break certain retain cycles in object graphs. * In general, however, it should be avoided. */ - virtual void release(int freeWhen) const; + virtual void release(int freeWhen) const APPLE_KEXT_OVERRIDE; /*! * @function taggedRelease @@ -230,7 +230,7 @@ class OSObject : public OSMetaClassBase * can be used to break certain retain cycles in object graphs. * In general, however, it should be avoided. */ - virtual void taggedRelease(const void * tag, const int freeWhen) const; + virtual void taggedRelease(const void * tag, const int freeWhen) const APPLE_KEXT_OVERRIDE; /*! @@ -324,7 +324,7 @@ class OSObject : public OSMetaClassBase * @result * The reference count of the object. */ - virtual int getRetainCount() const; + virtual int getRetainCount() const APPLE_KEXT_OVERRIDE; /*! @@ -339,7 +339,7 @@ class OSObject : public OSMetaClassBase * outside the context in which you received it, * you should always retain it immediately. */ - virtual void retain() const; + virtual void retain() const APPLE_KEXT_OVERRIDE; /*! @@ -357,7 +357,7 @@ class OSObject : public OSMetaClassBase * //apple_ref/cpp/instm/OSObject/free/virtualvoid/() * free@/link. */ - virtual void release() const; + virtual void release() const APPLE_KEXT_OVERRIDE; /*! @@ -378,7 +378,7 @@ class OSObject : public OSMetaClassBase * outside the context in which you received it, * you should always retain it immediately. */ - virtual void taggedRetain(const void * tag = 0) const; + virtual void taggedRetain(const void * tag = 0) const APPLE_KEXT_OVERRIDE; /*! @@ -396,7 +396,7 @@ class OSObject : public OSMetaClassBase * It is for use by OSCollection and subclasses to track * inclusion in collections. */ - virtual void taggedRelease(const void * tag = 0) const; + virtual void taggedRelease(const void * tag = 0) const APPLE_KEXT_OVERRIDE; // xx-review: used to say, "Remove a reference on this object with this tag, if an attempt is made to remove a reference that isn't associated with this tag the kernel will panic immediately", but I don't see that in the implementation @@ -422,7 +422,13 @@ class OSObject : public OSMetaClassBase * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link * for more information. */ - virtual bool serialize(OSSerialize * serializer) const; + virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE; + +#ifdef XNU_KERNEL_PRIVATE +#if IOTRACKING + void trackingAccumSize(size_t size); +#endif +#endif // Unused Padding OSMetaClassDeclareReservedUnused(OSObject, 0); diff --git a/libkern/libkern/c++/OSOrderedSet.h b/libkern/libkern/c++/OSOrderedSet.h index 8819f9332..4f94b889a 100644 --- a/libkern/libkern/c++/OSOrderedSet.h +++ b/libkern/libkern/c++/OSOrderedSet.h @@ -135,9 +135,9 @@ class OSOrderedSet : public OSCollection protected: /* OSCollectionIterator interfaces. */ - virtual unsigned int iteratorSize() const; - virtual bool initIterator(void *iterator) const; - virtual bool getNextObjectForIterator(void *iterator, OSObject **ret) const; + virtual unsigned int iteratorSize() const APPLE_KEXT_OVERRIDE; + virtual bool initIterator(void *iterator) const APPLE_KEXT_OVERRIDE; + virtual bool getNextObjectForIterator(void *iterator, OSObject **ret) const APPLE_KEXT_OVERRIDE; public: @@ -250,7 +250,7 @@ class OSOrderedSet : public OSCollection * release@/link * instead. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @@ -262,7 +262,7 @@ class OSOrderedSet : public OSCollection * @result * The current number of objects within the ordered set. */ - virtual unsigned int getCount() const; + virtual unsigned int getCount() const APPLE_KEXT_OVERRIDE; /*! @@ -287,7 +287,7 @@ class OSOrderedSet : public OSCollection * //apple_ref/cpp/instm/OSOrderedSet/ensureCapacity/virtualunsignedint/(unsignedint) * ensureCapacity@/link. */ - virtual unsigned int getCapacity() const; + virtual unsigned int getCapacity() const APPLE_KEXT_OVERRIDE; /*! @@ -303,7 +303,7 @@ class OSOrderedSet : public OSCollection * An OSOrderedSet allocates storage for objects in multiples * of the capacity increment. */ - virtual unsigned int getCapacityIncrement() const; + virtual unsigned int getCapacityIncrement() const APPLE_KEXT_OVERRIDE; /*! @@ -321,7 +321,7 @@ class OSOrderedSet : public OSCollection * of the capacity increment. * Calling this function does not immediately reallocate storage. */ - virtual unsigned int setCapacityIncrement(unsigned increment); + virtual unsigned int setCapacityIncrement(unsigned increment) APPLE_KEXT_OVERRIDE; /*! @@ -347,7 +347,7 @@ class OSOrderedSet : public OSCollection * * There is no way to reduce the capacity of an OSOrderedSet. */ - virtual unsigned int ensureCapacity(unsigned int newCapacity); + virtual unsigned int ensureCapacity(unsigned int newCapacity) APPLE_KEXT_OVERRIDE; /*! @@ -360,7 +360,7 @@ class OSOrderedSet : public OSCollection * The ordered set's capacity (and therefore direct memory consumption) * is not reduced by this function. */ - virtual void flushCollection(); + virtual void flushCollection() APPLE_KEXT_OVERRIDE; /*! @@ -706,7 +706,7 @@ class OSOrderedSet : public OSCollection * if the other object is derived from OSOrderedSet * and compares equal as an OSOrderedSet. */ - virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE; /*! @@ -734,7 +734,7 @@ class OSOrderedSet : public OSCollection virtual unsigned setOptions( unsigned options, unsigned mask, - void * context = 0); + void * context = 0) APPLE_KEXT_OVERRIDE; /*! @@ -759,7 +759,7 @@ class OSOrderedSet : public OSCollection * Objects that are not derived from OSCollection are retained * rather than copied. */ - OSCollection *copyCollection(OSDictionary * cycleDict = 0); + OSCollection *copyCollection(OSDictionary * cycleDict = 0) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(OSOrderedSet, 0); OSMetaClassDeclareReservedUnused(OSOrderedSet, 1); diff --git a/libkern/libkern/c++/OSSerialize.h b/libkern/libkern/c++/OSSerialize.h index 4d3d56fb6..0ffb861d9 100644 --- a/libkern/libkern/c++/OSSerialize.h +++ b/libkern/libkern/c++/OSSerialize.h @@ -305,7 +305,7 @@ class OSSerialize : public OSObject virtual unsigned int getCapacityIncrement() const; virtual unsigned int setCapacityIncrement(unsigned increment); virtual unsigned int ensureCapacity(unsigned int newCapacity); - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(OSSerialize, 0); OSMetaClassDeclareReservedUnused(OSSerialize, 1); @@ -337,7 +337,7 @@ class OSSerializer : public OSObject OSSerializerCallback callback, void * ref = 0); - virtual bool serialize(OSSerialize * serializer) const; + virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE; }; #endif /* _OS_OSSERIALIZE_H */ diff --git a/libkern/libkern/c++/OSSet.h b/libkern/libkern/c++/OSSet.h index 558f4d1da..6637fa2a8 100644 --- a/libkern/libkern/c++/OSSet.h +++ b/libkern/libkern/c++/OSSet.h @@ -94,9 +94,9 @@ class OSSet : public OSCollection /* * OSCollectionIterator interfaces. */ - virtual unsigned int iteratorSize() const; - virtual bool initIterator(void * iterator) const; - virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const; + virtual unsigned int iteratorSize() const APPLE_KEXT_OVERRIDE; + virtual bool initIterator(void * iterator) const APPLE_KEXT_OVERRIDE; + virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const APPLE_KEXT_OVERRIDE; struct ExpansionData { }; @@ -390,7 +390,7 @@ class OSSet : public OSCollection * release@/link * instead. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @@ -402,7 +402,7 @@ class OSSet : public OSCollection * @result * The current number of objects within the set. */ - virtual unsigned int getCount() const; + virtual unsigned int getCount() const APPLE_KEXT_OVERRIDE; /*! @@ -427,7 +427,7 @@ class OSSet : public OSCollection * //apple_ref/cpp/instm/OSSet/ensureCapacity/virtualunsignedint/(unsignedint) * ensureCapacity@/link. */ - virtual unsigned int getCapacity() const; + virtual unsigned int getCapacity() const APPLE_KEXT_OVERRIDE; /*! @@ -443,7 +443,7 @@ class OSSet : public OSCollection * An OSSet allocates storage for objects in multiples * of the capacity increment. */ - virtual unsigned int getCapacityIncrement() const; + virtual unsigned int getCapacityIncrement() const APPLE_KEXT_OVERRIDE; /*! @@ -461,7 +461,7 @@ class OSSet : public OSCollection * of the capacity increment. * Calling this function does not immediately reallocate storage. */ - virtual unsigned int setCapacityIncrement(unsigned increment); + virtual unsigned int setCapacityIncrement(unsigned increment) APPLE_KEXT_OVERRIDE; /*! @@ -486,7 +486,7 @@ class OSSet : public OSCollection * * There is no way to reduce the capacity of an OSSet. */ - virtual unsigned int ensureCapacity(unsigned int newCapacity); + virtual unsigned int ensureCapacity(unsigned int newCapacity) APPLE_KEXT_OVERRIDE; /*! @@ -499,7 +499,7 @@ class OSSet : public OSCollection * The set's capacity (and therefore direct memory consumption) * is not reduced by this function. */ - virtual void flushCollection(); + virtual void flushCollection() APPLE_KEXT_OVERRIDE; /*! @@ -698,7 +698,7 @@ class OSSet : public OSCollection * An OSSet object is considered equal to another object if the other object * is derived from OSSet and compares equal as a set. */ - virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE; /*! @@ -713,7 +713,7 @@ class OSSet : public OSCollection * @result * true if serialization succeeds, false if not. */ - virtual bool serialize(OSSerialize * serializer) const; + virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE; /*! @@ -739,7 +739,7 @@ class OSSet : public OSCollection * Child collections' options are changed only if the receiving set's * options actually change. */ - virtual unsigned setOptions(unsigned options, unsigned mask, void * context = 0); + virtual unsigned setOptions(unsigned options, unsigned mask, void * context = 0) APPLE_KEXT_OVERRIDE; /*! @@ -764,7 +764,7 @@ class OSSet : public OSCollection * Objects that are not derived from OSCollection are retained * rather than copied. */ - OSCollection *copyCollection(OSDictionary *cycleDict = 0); + OSCollection *copyCollection(OSDictionary *cycleDict = 0) APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(OSSet, 0); OSMetaClassDeclareReservedUnused(OSSet, 1); diff --git a/libkern/libkern/c++/OSString.h b/libkern/libkern/c++/OSString.h index 29c8be084..5ce0e5f6e 100644 --- a/libkern/libkern/c++/OSString.h +++ b/libkern/libkern/c++/OSString.h @@ -180,7 +180,9 @@ class OSString : public OSObject */ static OSString * withCStringNoCopy(const char * cString); +#if XNU_KERNEL_PRIVATE static OSString * withStringOfLength(const char *cString, size_t length); +#endif /* XNU_KERNEL_PRIVATE */ /*! * @function initWithString @@ -264,7 +266,7 @@ class OSString : public OSObject * release@/link * instead. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; /*! @@ -375,7 +377,7 @@ class OSString : public OSObject * if that object is derived from OSString * and contains the equivalent bytes of the same length. */ - virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE; /*! @@ -415,7 +417,7 @@ class OSString : public OSObject * @result * true if serialization succeeds, false if not. */ - virtual bool serialize(OSSerialize * serializer) const; + virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE; OSMetaClassDeclareReservedUnused(OSString, 0); OSMetaClassDeclareReservedUnused(OSString, 1); diff --git a/libkern/libkern/c++/OSSymbol.h b/libkern/libkern/c++/OSSymbol.h index d3ae9e1e1..5fe2f46f7 100644 --- a/libkern/libkern/c++/OSSymbol.h +++ b/libkern/libkern/c++/OSSymbol.h @@ -112,7 +112,7 @@ class OSSymbol : public OSString * Overrides OSString's implementation to prevent creation * of distinct OSSymbols with the same string value. */ - virtual bool initWithString(const OSString * aString); + virtual bool initWithString(const OSString * aString) APPLE_KEXT_OVERRIDE; /*! @@ -130,7 +130,7 @@ class OSSymbol : public OSString * Overrides OSString's implementation to prevent creation * of distinct OSSymbols with the same string value. */ - virtual bool initWithCString(const char * cString); + virtual bool initWithCString(const char * cString) APPLE_KEXT_OVERRIDE; /*! @@ -148,7 +148,7 @@ class OSSymbol : public OSString * Overrides OSString's implementation to prevent creation * of distinct OSSymbols with the same string value. */ - virtual bool initWithCStringNoCopy(const char *cString); + virtual bool initWithCStringNoCopy(const char *cString) APPLE_KEXT_OVERRIDE; protected: @@ -174,7 +174,7 @@ class OSSymbol : public OSString */ virtual void taggedRelease( const void * tag, - const int freeWhen) const; + const int freeWhen) const APPLE_KEXT_OVERRIDE; // xx-review: should we just omit this from headerdoc? @@ -193,7 +193,7 @@ class OSSymbol : public OSString * must synchronize access to the class-internal tables * used to track those instances. */ - virtual void free(); + virtual void free() APPLE_KEXT_OVERRIDE; public: @@ -227,7 +227,7 @@ class OSSymbol : public OSString * //apple_ref/cpp/instm/OSObject/taggedRelease/virtualvoid/(constvoid*,constint) * OSObject::taggedRelease(const void *, const int)@/link. */ - virtual void taggedRelease(const void * tag) const; + virtual void taggedRelease(const void * tag) const APPLE_KEXT_OVERRIDE; /*! @@ -345,7 +345,7 @@ class OSSymbol : public OSString * are equivalent to the C string's, * false otherwise. */ - virtual bool isEqualTo(const char * cString) const; + virtual bool isEqualTo(const char * cString) const APPLE_KEXT_OVERRIDE; /*! @@ -363,7 +363,7 @@ class OSSymbol : public OSString * @link //apple_ref/doc/class/OSMetaClassBase OSString@/link * and contains the equivalent bytes of the same length. */ - virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE; #ifdef XNU_KERNEL_PRIVATE diff --git a/libkern/libkern/crypto/sha2.h b/libkern/libkern/crypto/sha2.h index af660ec9e..8fe2a54dc 100644 --- a/libkern/libkern/crypto/sha2.h +++ b/libkern/libkern/crypto/sha2.h @@ -40,7 +40,7 @@ extern "C" { #define SHA256_DIGEST_LENGTH CCSHA256_OUTPUT_SIZE #define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1) #define SHA384_BLOCK_LENGTH CCSHA512_BLOCK_SIZE -#define SHA384_DIGEST_LENGTH CCSHA512_OUTPUT_SIZE +#define SHA384_DIGEST_LENGTH CCSHA384_OUTPUT_SIZE #define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1) #define SHA512_BLOCK_LENGTH CCSHA512_BLOCK_SIZE #define SHA512_DIGEST_LENGTH CCSHA512_OUTPUT_SIZE diff --git a/libkern/libkern/kxld.h b/libkern/libkern/kxld.h index 4fa1e9021..6b3ef392b 100644 --- a/libkern/libkern/kxld.h +++ b/libkern/libkern/kxld.h @@ -50,6 +50,7 @@ * flags Flags to control the behavior of kxld * cputype The target arch's CPU type (0 for host arch) * cpusubtype The target arch's CPU subtype (0 for host subtype) +* pagesize The target page size (0 for host page size) *******************************************************************************/ kern_return_t kxld_create_context( KXLDContext **context, @@ -57,7 +58,8 @@ kern_return_t kxld_create_context( KXLDLoggingCallback log_callback, KXLDFlags flags, cpu_type_t cputype, - cpu_subtype_t cpusubtype) + cpu_subtype_t cpusubtype, + vm_size_t pagesize) __attribute__((nonnull(1,2),visibility("default"))); /******************************************************************************* diff --git a/libkern/libkern/zlib.h b/libkern/libkern/zlib.h index 3f5a5c2c3..c859769d1 100644 --- a/libkern/libkern/zlib.h +++ b/libkern/libkern/zlib.h @@ -621,6 +621,15 @@ ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, destination. */ +#if XNU_KERNEL_PRIVATE + +typedef int (*z_input_func) (z_streamp strm, Bytef *buf, unsigned size); +typedef int (*z_output_func)(z_streamp strm, Bytef *buf, unsigned size); + +ZEXTERN int ZEXPORT deflateResetWithIO(z_streamp strm, z_input_func zinput, z_output_func zoutput); + +#endif /* XNU_KERNEL_PRIVATE */ + ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); /* This function is equivalent to deflateEnd followed by deflateInit, @@ -1091,6 +1100,12 @@ ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. */ +#if XNU_KERNEL_PRIVATE + +ZEXTERN uLong zlib_deflate_memory_size(int wbits, int memlevel); + +#endif /* XNU_KERNEL_PRIVATE */ + #if !KERNEL typedef voidp gzFile; diff --git a/libkern/x86_64/OSAtomic.s b/libkern/x86_64/OSAtomic.s deleted file mode 100644 index f3a7e617c..000000000 --- a/libkern/x86_64/OSAtomic.s +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#;*************************************************************************** -#;* Boolean OSCompareAndSwap(SInt32 oldValue, SInt32 newValue, SInt32 *ptr) * -#;*************************************************************************** - - .globl _OSCompareAndSwap -_OSCompareAndSwap: #;oldValue, newValue, ptr -#if DEBUG - test $3, %rdx - jz 1f - ud2 -1: -#endif - movl %edi, %eax - lock - cmpxchgl %esi, (%rdx) #; CAS (eax is an implicit operand) - sete %al #; did CAS succeed? (TZ=1) - movzbq %al, %rax #; clear out the high bytes - ret - -#;***************************************************************************** -#;* Boolean OSCompareAndSwap64(SInt64 oldValue, SInt64 newValue, SInt64 *ptr) * -#;***************************************************************************** - - .globl _OSCompareAndSwap64 - .globl _OSCompareAndSwapPtr - -_OSCompareAndSwap64: -_OSCompareAndSwapPtr: #;oldValue, newValue, ptr -#if DEBUG - test $7, %rdx - jz 1f - ud2 -1: -#endif - movq %rdi, %rax - lock - cmpxchgq %rsi, (%rdx) #; CAS (rax is an implicit operand) - sete %al #; did CAS succeed? (TZ=1) - movzbq %al, %rax #; clear out the high bytes - ret - -#;******************************************************* -#;* SInt64 OSAddAtomic64(SInt64 theAmount, SInt64 *ptr) * -#;******************************************************* - - .globl _OSAddAtomicLong - .globl _OSAddAtomic64 -_OSAddAtomic64: -_OSAddAtomicLong: -#if DEBUG - test $7, %rsi - jz 1f - ud2 -1: -#endif - lock - xaddq %rdi, (%rsi) #; Atomic exchange and add - movq %rdi, %rax; - ret - - -#;******************************************************* -#; SInt32 OSAddAtomic(SInt32 delta, SInt32 *address) -#;******************************************************* - - .globl _OSAddAtomic -_OSAddAtomic: -#if DEBUG - test $3, %rsi - jz 1f - ud2 -1: -#endif - lock - xaddl %edi, (%rsi) #; Atomic exchange and add - movl %edi, %eax; - ret diff --git a/libkern/zlib/deflate.c b/libkern/zlib/deflate.c index 069331e7f..6323a0e18 100644 --- a/libkern/zlib/deflate.c +++ b/libkern/zlib/deflate.c @@ -383,6 +383,20 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) } /* ========================================================================= */ + +ZEXTERN int ZEXPORT deflateResetWithIO(z_streamp strm, z_input_func zinput, z_output_func zoutput) +{ + int zerr; + + zerr = deflateReset(strm); + if (Z_OK != zerr) return (zerr); + strm->state->zinput = zinput; + strm->state->zoutput = zoutput; + return Z_OK; +} + +/* ========================================================================= */ + int ZEXPORT deflateReset (strm) z_streamp strm; { @@ -400,6 +414,8 @@ int ZEXPORT deflateReset (strm) s = (deflate_state *)strm->state; s->pending = 0; s->pending_out = s->pending_buf; + s->zinput = &read_buf; + s->zoutput = NULL; if (s->wrap < 0) { s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ @@ -563,14 +579,18 @@ local void flush_pending(strm) { unsigned len = strm->state->pending; - if (len > strm->avail_out) len = strm->avail_out; - if (len == 0) return; + if (strm->state->zoutput) { + len = (*strm->state->zoutput)(strm, strm->state->pending_out, len); + } else { + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + zmemcpy(strm->next_out, strm->state->pending_out, len); + strm->next_out += len; + strm->avail_out -= len; + } - zmemcpy(strm->next_out, strm->state->pending_out, len); - strm->next_out += len; strm->state->pending_out += len; strm->total_out += len; - strm->avail_out -= len; strm->state->pending -= len; if (strm->state->pending == 0) { strm->state->pending_out = strm->state->pending_buf; @@ -1368,7 +1388,7 @@ local void fill_window(s) */ Assert(more >= 2, "more < 2"); - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + n = (*s->zinput)(s->strm, s->window + s->strstart + s->lookahead, more); s->lookahead += n; /* Initialize the hash value now that we have some input: */ @@ -1763,3 +1783,12 @@ local block_state deflate_rle(s, flush) return flush == Z_FINISH ? finish_done : block_done; } #endif + +#if XNU_KERNEL_PRIVATE + +uLong zlib_deflate_memory_size(int wbits, int memlevel) +{ + return (31 + sizeof(deflate_state) + (1 << (wbits + 2)) + (1 << (memlevel + 9))); +} + +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/libkern/zlib/deflate.h b/libkern/zlib/deflate.h index 6378b20ab..a2c347a31 100644 --- a/libkern/zlib/deflate.h +++ b/libkern/zlib/deflate.h @@ -120,6 +120,8 @@ typedef unsigned IPos; typedef struct internal_state { z_streamp strm; /* pointer back to this zlib stream */ + z_input_func zinput; + z_output_func zoutput; int status; /* as the name implies */ Bytef *pending_buf; /* output still pending */ ulg pending_buf_size; /* size of pending_buf */ diff --git a/libsa/conf/Makefile.template b/libsa/conf/Makefile.template index 940446104..657ce25e7 100644 --- a/libsa/conf/Makefile.template +++ b/libsa/conf/Makefile.template @@ -61,15 +61,17 @@ $(SOBJS): .SFLAGS .SFLAGS: ALWAYS $(_v)$(REPLACECONTENTS) $@ $(S_KCC) $(SFLAGS) $(INCFLAGS) +KLD_FILES = $(OBJS) + $(COMPONENT).filelist: $(OBJS) - $(_v)for kld_file in ${OBJS}; do \ - $(SEG_HACK) -n __KLD -o $${kld_file}__ $${kld_file} ; \ - mv $${kld_file}__ $${kld_file} ; \ + $(_v)for kld_file in ${KLD_FILES}; do \ + $(SEG_HACK) -n __KLD -o $${kld_file}__ $${kld_file} || exit 1; \ + mv $${kld_file}__ $${kld_file} || exit 1; \ done @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${OBJS}; do \ + $(_v)for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ - done; ) > $(COMPONENT).filelist + done > $(COMPONENT).filelist do_all: $(COMPONENT).filelist @@ -80,3 +82,10 @@ do_build_all:: do_all include $(MakeInc_rule) include $(MakeInc_dir) + +# the KLD segment is mapped read-only on arm, so if we include llvm profiling +# here it will segfault the kernel. (see arm_vm_init.c) We don't currently have +# a way of retrieving these counters from KLD anyway, so there's no harm in just +# disabling them. +CXXFLAGS_GEN:=$(filter-out -fprofile-instr-generate,$(CXXFLAGS_GEN)) +CFLAGS_GEN:=$(filter-out -fprofile-instr-generate,$(CFLAGS_GEN)) diff --git a/libsa/lastkerneldataconst.c b/libsa/lastkerneldataconst.c new file mode 100644 index 000000000..9b8db0b51 --- /dev/null +++ b/libsa/lastkerneldataconst.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include + +/* + * This file is compiled and linked to be the last .o of the __const section + * of the __DATA segment (see MakeInc.kernel, lastkernelconstructor is placed + * in the __LAST segment.) + * + * This blank page allows us to safely map the const section RO while the rest + * of __DATA is RW. This is needed since ld has no way of specifying section size + * alignment and no straight forward way to specify section ordering. + */ + +#define PAD_SIZE PAGE_SIZE + +static const uint8_t __attribute__((section("__DATA,__const"))) data_const_padding[PAD_SIZE] = {[0 ... PAD_SIZE-1] = 0xFF}; +const vm_offset_t __attribute__((section("__DATA,__data"))) _lastkerneldataconst = (vm_offset_t)&data_const_padding[0]; +const vm_size_t __attribute__((section("__DATA,__data"))) _lastkerneldataconst_padsize = sizeof(data_const_padding); diff --git a/libsyscall/Libsyscall.xcconfig b/libsyscall/Libsyscall.xcconfig index ddebd34a2..a1c2fc8d4 100644 --- a/libsyscall/Libsyscall.xcconfig +++ b/libsyscall/Libsyscall.xcconfig @@ -1,18 +1,14 @@ #include "/Makefiles/CoreOS/Xcode/BSD.xcconfig" -#include "/AppleInternal/XcodeConfig/SimulatorSupport.xcconfig" -// Set INSTALL_PATH[sdk=macosx*] when SimulatorSupport.xcconfig is unavailable -INSTALL_PATH[sdk=macosx*] = $(INSTALL_PATH_ACTUAL) - BUILD_VARIANTS = normal -SUPPORTED_PLATFORMS = macosx iphoneos iphoneosnano +SUPPORTED_PLATFORMS = macosx iphoneos iphoneosnano tvos appletvos watchos ONLY_ACTIVE_ARCH = NO DEAD_CODE_STRIPPING = YES DEBUG_INFORMATION_FORMAT = dwarf-with-dsym -INSTALL_PATH_ACTUAL = /usr/lib/system -PUBLIC_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/include -PRIVATE_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/local/include -OS_PRIVATE_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/local/include/os +INSTALL_PATH = /usr/lib/system +PUBLIC_HEADERS_FOLDER_PATH = /usr/include +PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include +OS_PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include/os EXECUTABLE_PREFIX = libsystem_ PRODUCT_NAME = kernel ALWAYS_SEARCH_USER_PATHS = NO @@ -21,6 +17,9 @@ OTHER_CFLAGS = -fdollars-in-identifiers -no-cpp-precomp -fno-common -fno-stack-p OTHER_CFLAGS[sdk=macosx*] = $(inherited) -DSYSCALL_PRE1050 OTHER_CFLAGS[sdk=macosx*][arch=x86_64*] = $(inherited) -DNO_SYSCALL_LEGACY OTHER_CFLAGS[sdk=iphoneos*] = $(inherited) -DNO_SYSCALL_LEGACY +OTHER_CFLAGS[sdk=watchos*] = $(inherited) -DNO_SYSCALL_LEGACY +OTHER_CFLAGS[sdk=tvos*] = $(inherited) -DNO_SYSCALL_LEGACY +OTHER_CFLAGS[sdk=appletvos*] = $(inherited) -DNO_SYSCALL_LEGACY GCC_PREPROCESSOR_DEFINITIONS = CF_OPEN_SOURCE CF_EXCLUDE_CSTD_HEADERS DEBUG _FORTIFY_SOURCE=0 HEADER_SEARCH_PATHS = $(PROJECT_DIR)/mach $(PROJECT_DIR)/os $(PROJECT_DIR)/wrappers $(PROJECT_DIR)/wrappers/string $(PROJECT_DIR)/wrappers/libproc $(PROJECT_DIR)/wrappers/libproc/spawn $(BUILT_PRODUCTS_DIR)/internal_hdr/include $(BUILT_PRODUCTS_DIR)/mig_hdr/local/include $(BUILT_PRODUCTS_DIR)/mig_hdr/include $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders WARNING_CFLAGS = -Wmost @@ -30,6 +29,9 @@ CODE_SIGN_IDENTITY = - DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion) DYLIB_LDFLAGS = -umbrella System -all_load -Wl,-alias_list,$(SRCROOT)/Libsyscall.aliases DYLIB_LDFLAGS[sdk=iphoneos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000 +DYLIB_LDFLAGS[sdk=watchos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000 +DYLIB_LDFLAGS[sdk=tvos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000 +DYLIB_LDFLAGS[sdk=appletvos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000 OTHER_LDFLAGS = INSTALLHDRS_SCRIPT_PHASE = YES INSTALLHDRS_COPY_PHASE = YES diff --git a/libsyscall/Libsyscall.xcodeproj/project.pbxproj b/libsyscall/Libsyscall.xcodeproj/project.pbxproj index ccd84ed54..824393376 100644 --- a/libsyscall/Libsyscall.xcodeproj/project.pbxproj +++ b/libsyscall/Libsyscall.xcodeproj/project.pbxproj @@ -45,6 +45,7 @@ /* Begin PBXBuildFile section */ 030B179B135377B400DAD1F0 /* open_dprotected_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 030B179A135377B400DAD1F0 /* open_dprotected_np.c */; }; + 13B598941A142F6400DB2D5A /* stackshot.c in Sources */ = {isa = PBXBuildFile; fileRef = 13B598931A142F5900DB2D5A /* stackshot.c */; }; 240BAC4C1214770F000A1719 /* memcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B028D511FF4FBB00CA64A9 /* memcpy.c */; }; 2419382B12135FF6003CDE41 /* chmod.c in Sources */ = {isa = PBXBuildFile; fileRef = 2419382A12135FF6003CDE41 /* chmod.c */; }; 242AB66611EBDC1200107336 /* errno.c in Sources */ = {isa = PBXBuildFile; fileRef = 242AB66511EBDC1200107336 /* errno.c */; }; @@ -104,6 +105,7 @@ 29A59AE6183B110C00E8B896 /* unlinkat.c in Sources */ = {isa = PBXBuildFile; fileRef = 29A59AE5183B110C00E8B896 /* unlinkat.c */; }; 2BA88DCC1810A3CE00EB63F6 /* coalition.c in Sources */ = {isa = PBXBuildFile; fileRef = 2BA88DCB1810A3CE00EB63F6 /* coalition.c */; }; 374A36E314748F1300AAF39D /* varargs_wrappers.s in Sources */ = {isa = PBXBuildFile; fileRef = 374A36E214748EE400AAF39D /* varargs_wrappers.s */; }; + 435F3CAA1B06B7BA005ED9EF /* work_interval.c in Sources */ = {isa = PBXBuildFile; fileRef = 435F3CA91B06B7BA005ED9EF /* work_interval.c */; }; 467DAFD4157E8AF200CE68F0 /* guarded_open_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */; }; 4BDD5F1D1891AB2F004BF300 /* mach_approximate_time.c in Sources */ = {isa = PBXBuildFile; fileRef = 4BDD5F1B1891AB2F004BF300 /* mach_approximate_time.c */; }; 4BDD5F1E1891AB2F004BF300 /* mach_approximate_time.s in Sources */ = {isa = PBXBuildFile; fileRef = 4BDD5F1C1891AB2F004BF300 /* mach_approximate_time.s */; }; @@ -111,7 +113,6 @@ 72B1E6ED190723DB00FB3FA2 /* guarded_open_dprotected_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 72B1E6EC190723DB00FB3FA2 /* guarded_open_dprotected_np.c */; }; 74119F46188F3B6A00C6F48F /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; }; 7466C924170CBA53004557CC /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; }; - 746C7FEA18E48791008639D7 /* vm_page_size.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; }; 74F3290B18EB269400B2B70E /* vm_page_size.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; }; 7AE28FDF18AC41B1006A5626 /* csr.c in Sources */ = {isa = PBXBuildFile; fileRef = 7AE28FDE18AC41B1006A5626 /* csr.c */; }; 9002401118FC9A7F00D73BFA /* rename_ext.c in Sources */ = {isa = PBXBuildFile; fileRef = 906AA2D018F74CD1001C681A /* rename_ext.c */; }; @@ -124,6 +125,7 @@ BA4414B518336E3600AAE813 /* mach in Copy Files */ = {isa = PBXBuildFile; fileRef = BA4414A51833697C00AAE813 /* mach */; }; BA4414B618336E3A00AAE813 /* servers in Copy Files */ = {isa = PBXBuildFile; fileRef = BA4414A6183369A100AAE813 /* servers */; }; BA4414B818336E6F00AAE813 /* mach in CopyFiles */ = {isa = PBXBuildFile; fileRef = BA4414A7183369C100AAE813 /* mach */; }; + BABA36CB1A856C4700BBBCF7 /* host.c in Sources */ = {isa = PBXBuildFile; fileRef = BABA36CA1A856C4700BBBCF7 /* host.c */; }; C639F0E51741C25800A39F47 /* gethostuuid.h in Headers */ = {isa = PBXBuildFile; fileRef = C639F0E41741C09A00A39F47 /* gethostuuid.h */; settings = {ATTRIBUTES = (Public, ); }; }; C6460B7C182025DF00F73CCA /* sfi.c in Sources */ = {isa = PBXBuildFile; fileRef = C6460B7B182025DF00F73CCA /* sfi.c */; }; C6AB38DB174202C10036DD9F /* gethostuuid.h in Headers */ = {isa = PBXBuildFile; fileRef = C639F0E41741C09A00A39F47 /* gethostuuid.h */; settings = {ATTRIBUTES = (Public, ); }; }; @@ -292,7 +294,6 @@ dstSubfolderSpec = 0; files = ( BA4414AD18336A9300AAE813 /* mach in CopyFiles */, - 746C7FEA18E48791008639D7 /* vm_page_size.h in CopyFiles */, ); runOnlyForDeploymentPostprocessing = 1; }; @@ -343,6 +344,7 @@ /* Begin PBXFileReference section */ 030B179A135377B400DAD1F0 /* open_dprotected_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = open_dprotected_np.c; sourceTree = ""; }; + 13B598931A142F5900DB2D5A /* stackshot.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = stackshot.c; sourceTree = ""; }; 240D716711933ED300556E97 /* mach_install_mig.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = mach_install_mig.sh; sourceTree = ""; }; 2419382A12135FF6003CDE41 /* chmod.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = chmod.c; sourceTree = ""; }; 242AB66511EBDC1200107336 /* errno.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = errno.c; sourceTree = ""; }; @@ -428,6 +430,7 @@ 2BA88DCB1810A3CE00EB63F6 /* coalition.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = coalition.c; sourceTree = ""; }; 374A36E214748EE400AAF39D /* varargs_wrappers.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = varargs_wrappers.s; sourceTree = ""; }; 37DDFB7614748713009D3355 /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = ""; }; + 435F3CA91B06B7BA005ED9EF /* work_interval.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = work_interval.c; sourceTree = ""; }; 467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = guarded_open_np.c; sourceTree = ""; }; 4BDD5F1B1891AB2F004BF300 /* mach_approximate_time.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_approximate_time.c; sourceTree = ""; }; 4BDD5F1C1891AB2F004BF300 /* mach_approximate_time.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mach_approximate_time.s; sourceTree = ""; }; @@ -442,6 +445,7 @@ BA4414A6183369A100AAE813 /* servers */ = {isa = PBXFileReference; lastKnownFileType = text; name = servers; path = mig_hdr/include/servers; sourceTree = BUILT_PRODUCTS_DIR; }; BA4414A7183369C100AAE813 /* mach */ = {isa = PBXFileReference; lastKnownFileType = text; name = mach; path = mig_hdr/local/include/mach; sourceTree = BUILT_PRODUCTS_DIR; }; BA5CDB4018AEBAD500E37982 /* __thread_selfusage.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __thread_selfusage.s; sourceTree = ""; }; + BABA36CA1A856C4700BBBCF7 /* host.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = host.c; sourceTree = ""; }; C639F0E41741C09A00A39F47 /* gethostuuid.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = gethostuuid.h; sourceTree = ""; }; C6460B7B182025DF00F73CCA /* sfi.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sfi.c; sourceTree = ""; }; C6BEE9171806840200D25AAB /* posix_sem_obsolete.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = posix_sem_obsolete.c; sourceTree = ""; }; @@ -606,6 +610,7 @@ C9D9BCD7114B00600000D8B9 /* fprintf_stderr.c */, C9D9BCD8114B00600000D8B9 /* mach */, C9D9BCE4114B00600000D8B9 /* host_priv.defs */, + BABA36CA1A856C4700BBBCF7 /* host.c */, C9D9BCE5114B00600000D8B9 /* host_security.defs */, C9D9BCEA114B00600000D8B9 /* lock_set.defs */, C9D9BCEB114B00600000D8B9 /* mach_error_string.c */, @@ -722,10 +727,12 @@ C962B16B18DBA2C80031244A /* setpriority.c */, C6460B7B182025DF00F73CCA /* sfi.c */, 24B223B3121DFF12007DAEDE /* sigsuspend-base.c */, + 13B598931A142F5900DB2D5A /* stackshot.c */, 248AA962122C7B2A0085F5B1 /* unlink.c */, 29A59AE5183B110C00E8B896 /* unlinkat.c */, 374A36E214748EE400AAF39D /* varargs_wrappers.s */, BA0D9FB0199031AD007E8A73 /* kdebug_trace.c */, + 435F3CA91B06B7BA005ED9EF /* work_interval.c */, ); path = wrappers; sourceTree = ""; @@ -1223,6 +1230,7 @@ C9D9BD21114B00600000D8B9 /* exc_catcher.c in Sources */, C9D9BD24114B00600000D8B9 /* fprintf_stderr.c in Sources */, 72B1E6ED190723DB00FB3FA2 /* guarded_open_dprotected_np.c in Sources */, + BABA36CB1A856C4700BBBCF7 /* host.c in Sources */, C9D9BD36114B00600000D8B9 /* mach_error_string.c in Sources */, C9D9BD37114B00600000D8B9 /* mach_error.c in Sources */, C9D9BD3B114B00600000D8B9 /* mach_init.c in Sources */, @@ -1268,6 +1276,7 @@ 248BA01D121C56BF008C073F /* connect.c in Sources */, 248BA01F121C607E008C073F /* fchmod.c in Sources */, E4D45C3616F86BD80002AF25 /* posix_spawn.c in Sources */, + 13B598941A142F6400DB2D5A /* stackshot.c in Sources */, C962B16C18DBA2C80031244A /* setpriority.c in Sources */, 248BA04F121C8F06008C073F /* fcntl.c in Sources */, 248BA05C121C9649008C073F /* fcntl-cancel.c in Sources */, @@ -1297,6 +1306,7 @@ 24B223B5121DFF29007DAEDE /* sigsuspend.c in Sources */, 248AA963122C7B2A0085F5B1 /* unlink.c in Sources */, 248AA965122C7C330085F5B1 /* rmdir.c in Sources */, + 435F3CAA1B06B7BA005ED9EF /* work_interval.c in Sources */, 248AA967122C7CDA0085F5B1 /* rename.c in Sources */, 24B8C2621237F53900D36CC3 /* remove-counter.c in Sources */, C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */, @@ -1350,7 +1360,7 @@ baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; buildSettings = { COPY_PHASE_STRIP = NO; - INSTALL_PATH_ACTUAL = /usr/local/lib/dyld; + INSTALL_PATH = /usr/local/lib/dyld; STRIP_INSTALLED_PRODUCT = NO; }; name = Release; @@ -1377,9 +1387,12 @@ COPY_PHASE_STRIP = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; MAP_PLATFORM = "$(MAP_PLATFORM_$(PLATFORM_NAME))"; + MAP_PLATFORM_appletvos = iPhoneOS; MAP_PLATFORM_iphoneos = iPhoneOS; MAP_PLATFORM_iphoneosnano = iPhoneOS; MAP_PLATFORM_macosx = MacOSX; + MAP_PLATFORM_tvos = iPhoneOS; + MAP_PLATFORM_watchos = iPhoneOS; PRODUCT_NAME = Syscalls; STRIP_STYLE = debugging; }; diff --git a/libsyscall/mach/host.c b/libsyscall/mach/host.c new file mode 100644 index 000000000..335038434 --- /dev/null +++ b/libsyscall/mach/host.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include + +kern_return_t +host_get_atm_diagnostic_flag(host_t host __unused, + uint32_t *diagnostic_flag) +{ + volatile uint32_t *diagnostic_flag_address = (volatile uint32_t *)(uintptr_t)(_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG); + *diagnostic_flag = *diagnostic_flag_address; + return KERN_SUCCESS; +} + diff --git a/libsyscall/mach/mach/mach.h b/libsyscall/mach/mach/mach.h index 1d1db7ab3..00abb7216 100644 --- a/libsyscall/mach/mach/mach.h +++ b/libsyscall/mach/mach/mach.h @@ -97,18 +97,24 @@ extern void slot_name(cpu_type_t, extern void mig_reply_setup(mach_msg_header_t *, mach_msg_header_t *); +__WATCHOS_PROHIBITED __TVOS_PROHIBITED extern void mach_msg_destroy(mach_msg_header_t *); +__WATCHOS_PROHIBITED __TVOS_PROHIBITED extern mach_msg_return_t mach_msg_receive(mach_msg_header_t *); +__WATCHOS_PROHIBITED __TVOS_PROHIBITED extern mach_msg_return_t mach_msg_send(mach_msg_header_t *); +__WATCHOS_PROHIBITED __TVOS_PROHIBITED extern mach_msg_return_t mach_msg_server_once(boolean_t (*) (mach_msg_header_t *, mach_msg_header_t *), mach_msg_size_t, mach_port_t, mach_msg_options_t); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED extern mach_msg_return_t mach_msg_server(boolean_t (*) (mach_msg_header_t *, mach_msg_header_t *), @@ -116,6 +122,7 @@ extern mach_msg_return_t mach_msg_server(boolean_t (*) mach_port_t, mach_msg_options_t); +__WATCHOS_PROHIBITED __TVOS_PROHIBITED extern mach_msg_return_t mach_msg_server_importance(boolean_t (*) (mach_msg_header_t *, mach_msg_header_t *), diff --git a/libsyscall/mach/mach_init.c b/libsyscall/mach/mach_init.c index 893a06e75..90a42ceb5 100644 --- a/libsyscall/mach/mach_init.c +++ b/libsyscall/mach/mach_init.c @@ -115,6 +115,7 @@ _mach_fork_child(void) return 0; } + void mach_init_doit(void) { diff --git a/libsyscall/mach/mach_msg.c b/libsyscall/mach/mach_msg.c index 676a5392c..bdb446c33 100644 --- a/libsyscall/mach/mach_msg.c +++ b/libsyscall/mach/mach_msg.c @@ -682,141 +682,7 @@ mach_msg_server_importance( mach_port_t rcv_name, mach_msg_options_t options) { - mig_reply_error_t *bufRequest, *bufReply; - mach_msg_size_t request_size; - mach_msg_size_t new_request_alloc; - mach_msg_size_t request_alloc; - mach_msg_size_t trailer_alloc; - mach_msg_size_t reply_alloc; - mach_msg_return_t mr; - kern_return_t kr; - mach_port_t self = mach_task_self_; - int retval = 1; - uint64_t token; - voucher_mach_msg_state_t old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; - - options &= ~(MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_VOUCHER|MACH_RCV_OVERWRITE); - - reply_alloc = (mach_msg_size_t)round_page((options & MACH_SEND_TRAILER) ? - (max_size + MAX_TRAILER_SIZE) : max_size); - - kr = vm_allocate(self, - (vm_address_t *)&bufReply, - reply_alloc, - VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE); - if (kr != KERN_SUCCESS) - return kr; - - request_alloc = 0; - trailer_alloc = REQUESTED_TRAILER_SIZE(options); - new_request_alloc = (mach_msg_size_t)round_page(max_size + trailer_alloc); - - request_size = (options & MACH_RCV_LARGE) ? - new_request_alloc : max_size + trailer_alloc; - - for (;;) { - if (request_alloc < new_request_alloc) { - request_alloc = new_request_alloc; - kr = vm_allocate(self, - (vm_address_t *)&bufRequest, - request_alloc, - VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE); - if (kr != KERN_SUCCESS) { - vm_deallocate(self, - (vm_address_t)bufReply, - reply_alloc); - return kr; - } - } - - mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|MACH_RCV_VOUCHER|options, - 0, request_size, rcv_name, - MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); - - if (mr == MACH_MSG_SUCCESS) { - /* we have another request message */ - - old_state = voucher_mach_msg_adopt(&bufRequest->Head); - - retval = proc_importance_assertion_begin_with_msg(&bufRequest->Head, NULL, &token); - - (void) (*demux)(&bufRequest->Head, &bufReply->Head); - - if (!(bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) { - if (bufReply->RetCode == MIG_NO_REPLY) - bufReply->Head.msgh_remote_port = MACH_PORT_NULL; - else if ((bufReply->RetCode != KERN_SUCCESS) && - (bufRequest->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) { - /* destroy the request - but not the reply port */ - bufRequest->Head.msgh_remote_port = MACH_PORT_NULL; - mach_msg_destroy(&bufRequest->Head); - } - } - - /* - * We don't want to block indefinitely because the client - * isn't receiving messages from the reply port. - * If we have a send-once right for the reply port, then - * this isn't a concern because the send won't block. - * If we have a send right, we need to use MACH_SEND_TIMEOUT. - * To avoid falling off the kernel's fast RPC path, - * we only supply MACH_SEND_TIMEOUT when absolutely necessary. - */ - if (bufReply->Head.msgh_remote_port != MACH_PORT_NULL) { - - mr = mach_msg( - &bufReply->Head, - (MACH_MSGH_BITS_REMOTE(bufReply->Head.msgh_bits) == - MACH_MSG_TYPE_MOVE_SEND_ONCE) ? - MACH_SEND_MSG|options : - MACH_SEND_MSG|MACH_SEND_TIMEOUT|options, - bufReply->Head.msgh_size, 0, MACH_PORT_NULL, - MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); - - if ((mr != MACH_SEND_INVALID_DEST) && - (mr != MACH_SEND_TIMED_OUT)) { - if (retval == 0) - proc_importance_assertion_complete(token); - - voucher_mach_msg_revert(old_state); - old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; - - continue; - } - mr = MACH_MSG_SUCCESS; - } - if (bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) - mach_msg_destroy(&bufReply->Head); - if (retval == 0) - proc_importance_assertion_complete(token); - - voucher_mach_msg_revert(old_state); - old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; - - } /* if (mr == MACH_MSG_SUCCESS) */ - - if ((mr == MACH_RCV_TOO_LARGE) && (options & MACH_RCV_LARGE)) { - new_request_alloc = (mach_msg_size_t)round_page(bufRequest->Head.msgh_size + - trailer_alloc); - request_size = new_request_alloc; - vm_deallocate(self, - (vm_address_t) bufRequest, - request_alloc); - continue; - } else if (mr == MACH_MSG_SUCCESS) - continue; - else - break; - - } /* for(;;) */ - - (void)vm_deallocate(self, - (vm_address_t) bufRequest, - request_alloc); - (void)vm_deallocate(self, - (vm_address_t) bufReply, - reply_alloc); - return mr; + return mach_msg_server(demux, max_size, rcv_name, options); } kern_return_t diff --git a/libsyscall/mach/watchos_prohibited_mig.txt b/libsyscall/mach/watchos_prohibited_mig.txt new file mode 100644 index 000000000..4d27c6243 --- /dev/null +++ b/libsyscall/mach/watchos_prohibited_mig.txt @@ -0,0 +1,53 @@ +__WATCHOS_PROHIBITED __TVOS_PROHIBITED +thread_terminate +act_get_state +act_set_state +thread_get_state +thread_set_state +thread_suspend +thread_resume +thread_abort +thread_abort_safely +thread_depress_abort +thread_get_special_port +thread_set_special_port +thread_set_exception_ports +thread_get_exception_ports +thread_swap_exception_ports +thread_get_mach_voucher +thread_set_mach_voucher +thread_swap_mach_voucher +mach_ports_register +mach_ports_lookup +task_suspend +task_resume +task_set_info +task_get_special_port +task_set_special_port +thread_create +thread_create_running +task_set_exception_ports +task_get_exception_ports +task_swap_exception_ports +task_policy_set +task_policy_get +task_zone_info +task_get_state +task_set_state +task_set_phys_footprint_limit +task_suspend2 +task_resume2 +task_get_mach_voucher +task_set_mach_voucher +task_swap_mach_voucher +task_set_port_space +host_request_notification +host_info +task_wire +mach_port_allocate_name +host_create_mach_voucher +host_register_mach_voucher_attr_manager +host_register_well_known_mach_voucher_attr_manager +host_set_atm_diagnostic_flag +host_get_atm_diagnostic_flag + diff --git a/libsyscall/wrappers/cancelable/fcntl-base.c b/libsyscall/wrappers/cancelable/fcntl-base.c index c8808f3ae..e421e0af4 100644 --- a/libsyscall/wrappers/cancelable/fcntl-base.c +++ b/libsyscall/wrappers/cancelable/fcntl-base.c @@ -40,9 +40,15 @@ fcntl(int fd, int cmd, ...) va_start(ap, cmd); switch(cmd) { case F_GETLK: + case F_GETLKPID: case F_SETLK: case F_SETLKW: case F_SETLKWTIMEOUT: + case F_OFD_GETLK: + case F_OFD_GETLKPID: + case F_OFD_SETLK: + case F_OFD_SETLKW: + case F_OFD_SETLKWTIMEOUT: case F_PREALLOCATE: case F_SETSIZE: case F_RDADVISE: @@ -57,6 +63,7 @@ fcntl(int fd, int cmd, ...) case F_ADDSIGS: case F_ADDFILESIGS: case F_ADDFILESIGS_FOR_DYLD_SIM: + case F_ADDFILESIGS_RETURN: case F_FINDSIGS: case F_TRANSCODEKEY: arg = va_arg(ap, void *); diff --git a/libsyscall/wrappers/csr.c b/libsyscall/wrappers/csr.c index 2870bf97f..c5944c507 100644 --- a/libsyscall/wrappers/csr.c +++ b/libsyscall/wrappers/csr.c @@ -29,15 +29,10 @@ int __csrctl(csr_op_t op, void *buffer, size_t size); int csr_check(csr_config_t mask) { - return __csrctl(CSR_OP_CHECK, &mask, sizeof(csr_config_t)); + return __csrctl(CSR_SYSCALL_CHECK, &mask, sizeof(csr_config_t)); } int csr_get_active_config(csr_config_t *config) { - return __csrctl(CSR_OP_GET_ACTIVE_CONFIG, config, sizeof(csr_config_t)); -} - -int csr_get_pending_config(csr_config_t *config) -{ - return __csrctl(CSR_OP_GET_PENDING_CONFIG, config, sizeof(csr_config_t)); + return __csrctl(CSR_SYSCALL_GET_ACTIVE_CONFIG, config, sizeof(csr_config_t)); } diff --git a/libsyscall/wrappers/kdebug_trace.c b/libsyscall/wrappers/kdebug_trace.c index 4867f9b51..02f074cab 100644 --- a/libsyscall/wrappers/kdebug_trace.c +++ b/libsyscall/wrappers/kdebug_trace.c @@ -22,40 +22,90 @@ */ #include +#include #include #include #include -#define CLASS_MASK 0xff000000 -#define CLASS_OFFSET 24 -#define SUBCLASS_MASK 0x00ff0000 -#define SUBCLASS_OFFSET 16 +extern int __kdebug_trace64(uint32_t code, uint64_t arg1, uint64_t arg2, + uint64_t arg3, uint64_t arg4); +extern uint64_t __kdebug_trace_string(uint32_t debugid, uint64_t str_id, + const char *str); -#define EXTRACT_CLASS(debugid) ((uint8_t)(((debugid) & CLASS_MASK) >> CLASS_OFFSET)) -#define EXTRACT_SUBCLASS(debugid) ( (uint8_t) ( ((debugid) & SUBCLASS_MASK) >> SUBCLASS_OFFSET ) ) +/* Returns non-zero if tracing is enabled. */ +static int +kdebug_enabled(void) +{ + volatile uint32_t *kdebug_enable_address = + (volatile uint32_t *)(uintptr_t)(_COMM_PAGE_KDEBUG_ENABLE); + + if (*kdebug_enable_address == 0) { + return 0; + } -extern int __kdebug_trace64(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4); + return 1; +} -int -kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) +static int +kdebug_validate_debugid(uint32_t debugid) { - uint8_t code_class; - volatile uint32_t *kdebug_enable_address = (volatile uint32_t *)(uintptr_t)(_COMM_PAGE_KDEBUG_ENABLE); + uint8_t debugid_class; /* - * This filtering is also done in the kernel, but we also do it here so that errors - * are returned in all cases, not just when the system call is actually performed. + * This filtering is also done in the kernel, but we also do it here so + * that errors are returned in all cases, not just when the system call + * is actually performed. */ - code_class = EXTRACT_CLASS(code); - switch (code_class) { + debugid_class = KDBG_EXTRACT_CLASS(debugid); + switch (debugid_class) { case DBG_TRACE: - errno = EPERM; - return -1; + return EPERM; } - if (*kdebug_enable_address == 0) { + return 0; +} + +int +kdebug_trace(uint32_t debugid, uint64_t arg1, uint64_t arg2, uint64_t arg3, + uint64_t arg4) +{ + int err; + + if (!kdebug_enabled()) { return 0; } - - return __kdebug_trace64(code, arg1, arg2, arg3, arg4); + + if ((err = kdebug_validate_debugid(debugid)) != 0) { + errno = err; + return -1; + } + + return __kdebug_trace64(debugid, arg1, arg2, arg3, arg4); +} + +uint64_t +kdebug_trace_string(uint32_t debugid, uint64_t str_id, const char *str) +{ + int err; + + if (!kdebug_enabled()) { + return 0; + } + + if ((int64_t)str_id == -1) { + errno = EINVAL; + return (uint64_t)-1; + } + + if (str_id == 0 && str == NULL) { + errno = EINVAL; + return (uint64_t)-1; + } + + if ((err = kdebug_validate_debugid(debugid)) != 0) { + errno = err; + return (uint64_t)-1; + } + + return __kdebug_trace_string(debugid, str_id, str); } diff --git a/libsyscall/wrappers/libproc/libproc.c b/libsyscall/wrappers/libproc/libproc.c index 2f47dcaf4..730a15e41 100644 --- a/libsyscall/wrappers/libproc/libproc.c +++ b/libsyscall/wrappers/libproc/libproc.c @@ -26,10 +26,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include "libproc_internal.h" @@ -113,6 +115,17 @@ proc_pidoriginatorinfo(int flavor, void *buffer, int buffersize) return(retval); } +int +proc_listcoalitions(int flavor, int coaltype, void *buffer, int buffersize) +{ + int retval; + + if ((retval = __proc_info(PROC_INFO_CALL_LISTCOALITIONS, flavor, coaltype, 0, buffer, buffersize)) == -1) + return 0; + + return retval; +} + int proc_pid_rusage(int pid, int flavor, rusage_info_t *buffer) { @@ -517,6 +530,78 @@ proc_disable_wakemon(pid_t pid) return (proc_rlimit_control(pid, RLIMIT_WAKEUPS_MONITOR, ¶ms)); } +int +proc_list_uptrs(int pid, uint64_t *buf, uint32_t bufsz) +{ + int i, j; + int nfds, nkns; + int count = 0; + int knote_max = 4096; /* arbitrary starting point */ + + /* if buffer is empty, this call simply counts the knotes */ + if (bufsz > 0 && buf == NULL) { + errno = EFAULT; + return -1; + } + + struct proc_fdinfo fdlist[OPEN_MAX]; + nfds = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, fdlist, OPEN_MAX*sizeof(struct proc_fdinfo)); + if (nfds <= 0 || nfds > OPEN_MAX) { + return -1; + } + + struct kevent_extinfo *kqext = malloc(knote_max * sizeof(struct kevent_extinfo)); + if (!kqext) { + errno = ENOMEM; + return -1; + } + + for (i = 0; i < nfds; i++) { + if (fdlist[i].proc_fdtype != PROX_FDTYPE_KQUEUE) { + continue; + } + + again: + nkns = __proc_info(PROC_INFO_CALL_PIDFDINFO, pid, PROC_PIDFDKQUEUE_EXTINFO, + (uint64_t)fdlist[i].proc_fd, kqext, knote_max * sizeof(struct kevent_extinfo)); + if (nkns < 0) { + if (errno == EBADF) { + /* the FD table can change after enumerating the FDs */ + errno = EAGAIN; + } + free(kqext); + return -1; + } + + if (nkns > knote_max) { + /* there are more knotes than we requested - try again with a + * larger buffer */ + free(kqext); + knote_max = nkns + 32; /* small margin in case of extra knotes */ + kqext = malloc(knote_max * sizeof(struct kevent_extinfo)); + if (!kqext) { + errno = ENOMEM; + return -1; + } + goto again; + } + + for (j = 0; j < nkns; j++) { + if (kqext[j].kqext_kev.udata == 0) { + continue; + } + + if (bufsz >= sizeof(uint64_t)) { + *buf++ = kqext[j].kqext_kev.udata; + bufsz -= sizeof(uint64_t); + } + count++; + } + } + + free(kqext); + return count; +} diff --git a/libsyscall/wrappers/libproc/libproc.h b/libsyscall/wrappers/libproc/libproc.h index 9e98f1760..27633ffa4 100644 --- a/libsyscall/wrappers/libproc/libproc.h +++ b/libsyscall/wrappers/libproc/libproc.h @@ -126,6 +126,20 @@ int proc_clear_dirty(pid_t pid, uint32_t flags); int proc_terminate(pid_t pid, int *sig); +#ifdef PRIVATE +/* + * Enumerate potential userspace pointers embedded in kernel data structures. + * Currently inspects kqueues only. + * + * NOTE: returned "pointers" are opaque user-supplied values and thus not + * guaranteed to address valid objects or be pointers at all. + * + * Returns the number of pointers found (which may exceed buffersize), or -1 on + * failure and errno set appropriately. + */ +int proc_list_uptrs(pid_t pid, uint64_t *buffer, uint32_t buffersize); +#endif /* PRIVATE */ + __END_DECLS #endif /*_LIBPROC_H_ */ diff --git a/libsyscall/wrappers/libproc/libproc_internal.h b/libsyscall/wrappers/libproc/libproc_internal.h index a39de570f..182cf886f 100644 --- a/libsyscall/wrappers/libproc/libproc_internal.h +++ b/libsyscall/wrappers/libproc/libproc_internal.h @@ -95,6 +95,8 @@ int proc_trace_log(pid_t pid, uint64_t uniqueid) __OSX_AVAILABLE_STARTING(__MAC_ /* proc_info call to get the originator information */ int proc_pidoriginatorinfo(int flavor, void *buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int proc_listcoalitions(int flavor, int coaltype, void *buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_8_3); + #if !TARGET_IPHONE_SIMULATOR #define PROC_SUPPRESS_SUCCESS (0) diff --git a/libsyscall/wrappers/spawn/posix_spawn.c b/libsyscall/wrappers/spawn/posix_spawn.c index d3bec4ede..88b6cabf3 100644 --- a/libsyscall/wrappers/spawn/posix_spawn.c +++ b/libsyscall/wrappers/spawn/posix_spawn.c @@ -38,6 +38,7 @@ #include #include #include +#include /* for COALITION_TYPE_MAX */ /* @@ -112,7 +113,6 @@ posix_spawnattr_init(posix_spawnattr_t *attr) (*psattrp)->short_padding = 0; (*psattrp)->flags_padding = 0; - (*psattrp)->int_padding = 0; /* Default is no new apptype requested */ (*psattrp)->psa_apptype = POSIX_SPAWN_PROCESS_TYPE_DEFAULT; @@ -120,7 +120,8 @@ posix_spawnattr_init(posix_spawnattr_t *attr) /* Jetsam related */ (*psattrp)->psa_jetsam_flags = 0; (*psattrp)->psa_priority = -1; - (*psattrp)->psa_high_water_mark = -1; + (*psattrp)->psa_memlimit_active = -1; + (*psattrp)->psa_memlimit_inactive = -1; /* Default is no CPU usage monitor active. */ (*psattrp)->psa_cpumonitor_percent = 0; @@ -129,11 +130,26 @@ posix_spawnattr_init(posix_spawnattr_t *attr) /* Default is no MAC policy extensions. */ (*psattrp)->psa_mac_extensions = NULL; - /* Default is to inherit parent's coalition */ - (*psattrp)->psa_coalitionid = 0; + /* Default is to inherit parent's coalition(s) */ + (*psattrp)->psa_coalition_info = NULL; + + (*psattrp)->reserved = NULL; + + /* + * old coalition field + * For backwards compatibility reasons, we set this to 1 + * which is the first valid coalition id. This will allow + * newer user space code to properly spawn processes on + * older kernels + * (they will just all end up in the same coalition). + */ + (*psattrp)->psa_reserved = 1; /* Default is no new clamp */ (*psattrp)->psa_qos_clamp = POSIX_SPAWN_PROC_CLAMP_NONE; + + /* Default is no change to role */ + (*psattrp)->psa_darwin_role = POSIX_SPAWN_DARWIN_ROLE_NONE; } return (err); @@ -161,6 +177,8 @@ posix_spawnattr_init(posix_spawnattr_t *attr) * EINVAL The value specified by attr is invalid. */ static int posix_spawn_destroyportactions_np(posix_spawnattr_t *); +static int posix_spawn_destroycoalition_info_np(posix_spawnattr_t *); + int posix_spawnattr_destroy(posix_spawnattr_t *attr) @@ -172,6 +190,7 @@ posix_spawnattr_destroy(posix_spawnattr_t *attr) psattr = *(_posix_spawnattr_t *)attr; posix_spawn_destroyportactions_np(attr); + posix_spawn_destroycoalition_info_np(attr); free(psattr); *attr = NULL; @@ -736,6 +755,29 @@ posix_spawn_destroyportactions_np(posix_spawnattr_t *attr) return 0; } +/* + * posix_spawn_destroycoalition_info_np + * Description: clean up coalition_info struct in posix_spawnattr_t attr + */ +static int +posix_spawn_destroycoalition_info_np(posix_spawnattr_t *attr) +{ + _posix_spawnattr_t psattr; + struct _posix_spawn_coalition_info *coal_info; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + coal_info = psattr->psa_coalition_info; + if (coal_info == NULL) + return EINVAL; + + psattr->psa_coalition_info = NULL; + free(coal_info); + return 0; +} + /* * posix_spawn_appendportaction_np * Description: append a port action, grow the array if necessary @@ -1390,16 +1432,31 @@ posix_spawnattr_setmacpolicyinfo_np(posix_spawnattr_t * __restrict attr, return 0; } -int posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict attr, uint64_t coalitionid) +int posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict attr, + uint64_t coalitionid, int type, int role) { _posix_spawnattr_t psattr; + struct _posix_spawn_coalition_info *coal_info; if (attr == NULL || *attr == NULL) { return EINVAL; } + if (type < 0 || type > COALITION_TYPE_MAX) + return EINVAL; psattr = *(_posix_spawnattr_t *)attr; - psattr->psa_coalitionid = coalitionid; + + coal_info = psattr->psa_coalition_info; + if (!coal_info) { + coal_info = (struct _posix_spawn_coalition_info *)malloc(sizeof(*coal_info)); + if (!coal_info) + return ENOMEM; + memset(coal_info, 0, sizeof(*coal_info)); + psattr->psa_coalition_info = coal_info; + } + + coal_info->psci_info[type].psci_id = coalitionid; + coal_info->psci_info[type].psci_role = role; return 0; } @@ -1437,6 +1494,34 @@ posix_spawnattr_get_qos_clamp_np(const posix_spawnattr_t * __restrict attr, uint return (0); } +int posix_spawnattr_set_darwin_role_np(const posix_spawnattr_t * __restrict attr, uint64_t darwin_role) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) { + return EINVAL; + } + + psattr = *(_posix_spawnattr_t *)attr; + psattr->psa_darwin_role = darwin_role; + + return 0; +} + +int +posix_spawnattr_get_darwin_role_np(const posix_spawnattr_t * __restrict attr, uint64_t * __restrict darwin_rolep) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) { + return EINVAL; + } + + psattr = *(_posix_spawnattr_t *)attr; + *darwin_rolep = psattr->psa_darwin_role; + + return (0); +} /* * posix_spawn @@ -1511,6 +1596,10 @@ posix_spawn(pid_t * __restrict pid, const char * __restrict path, ad.mac_extensions_size = PS_MAC_EXTENSIONS_SIZE( ad.mac_extensions->psmx_count); } + if (psattr->psa_coalition_info != NULL) { + ad.coal_info_size = sizeof(struct _posix_spawn_coalition_info); + ad.coal_info = psattr->psa_coalition_info; + } } if (file_actions != NULL && *file_actions != NULL) { _posix_spawn_file_actions_t psactsp = diff --git a/libsyscall/wrappers/spawn/spawn.h b/libsyscall/wrappers/spawn/spawn.h index 53b4ecb86..663dd3ca5 100644 --- a/libsyscall/wrappers/spawn/spawn.h +++ b/libsyscall/wrappers/spawn/spawn.h @@ -29,9 +29,9 @@ * [SPN] Support for _POSIX_SPAWN */ -#include +#include #include <_types.h> -#include /* shared types */ +#include /* shared types */ #include @@ -56,38 +56,72 @@ __BEGIN_DECLS * gcc under c99 mode won't compile "[ __restrict]" by itself. As a workaround, * a dummy argument name is added. */ + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawn(pid_t * __restrict, const char * __restrict, const posix_spawn_file_actions_t *, const posix_spawnattr_t * __restrict, char *const __argv[ __restrict], char *const __envp[ __restrict]) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnp(pid_t * __restrict, const char * __restrict, const posix_spawn_file_actions_t *, const posix_spawnattr_t * __restrict, char *const __argv[ __restrict], char *const __envp[ __restrict]) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *, int, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawn_file_actions_addopen( posix_spawn_file_actions_t * __restrict, int, const char * __restrict, int, mode_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawn_file_actions_destroy(posix_spawn_file_actions_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawn_file_actions_init(posix_spawn_file_actions_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_destroy(posix_spawnattr_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_getsigdefault(const posix_spawnattr_t * __restrict, sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_getflags(const posix_spawnattr_t * __restrict, short * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_getpgroup(const posix_spawnattr_t * __restrict, pid_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_getsigmask(const posix_spawnattr_t * __restrict, sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_init(posix_spawnattr_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_setsigdefault(posix_spawnattr_t * __restrict, const sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_setflags(posix_spawnattr_t *, short) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_setpgroup(posix_spawnattr_t *, pid_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_setsigmask(posix_spawnattr_t * __restrict, const sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); @@ -115,17 +149,28 @@ __END_DECLS __BEGIN_DECLS +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_getbinpref_np(const posix_spawnattr_t * __restrict, size_t, cpu_type_t *__restrict, size_t *__restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_setauditsessionport_np(posix_spawnattr_t *__restrict, mach_port_t) __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_3_2); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_setbinpref_np(posix_spawnattr_t * __restrict, size_t, cpu_type_t *__restrict, size_t *__restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_setexceptionports_np(posix_spawnattr_t *__restrict, exception_mask_t, mach_port_t, exception_behavior_t, thread_state_flavor_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawnattr_setspecialport_np(posix_spawnattr_t *__restrict, mach_port_t, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +__WATCHOS_PROHIBITED __TVOS_PROHIBITED int posix_spawn_file_actions_addinherit_np(posix_spawn_file_actions_t *, int) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_3); diff --git a/libsyscall/wrappers/spawn/spawn_private.h b/libsyscall/wrappers/spawn/spawn_private.h index 10a1b544c..f98d2d2bd 100644 --- a/libsyscall/wrappers/spawn/spawn_private.h +++ b/libsyscall/wrappers/spawn/spawn_private.h @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -48,9 +49,12 @@ int posix_spawnattr_set_importancewatch_port_np(posix_spawnattr_t * __restrict a int posix_spawnattr_getmacpolicyinfo_np(const posix_spawnattr_t * __restrict, const char *, void **, size_t *) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); int posix_spawnattr_setmacpolicyinfo_np(posix_spawnattr_t * __restrict, const char *, void *, size_t) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); -int posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict, uint64_t, int, int) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); int posix_spawnattr_set_qos_clamp_np(const posix_spawnattr_t * __restrict, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); int posix_spawnattr_get_qos_clamp_np(const posix_spawnattr_t * __restrict, uint64_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int posix_spawnattr_set_darwin_role_np(const posix_spawnattr_t * __restrict, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0); +int posix_spawnattr_get_darwin_role_np(const posix_spawnattr_t * __restrict, uint64_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0); + #endif /* !defined _SPAWN_PRIVATE_H_*/ diff --git a/libsyscall/wrappers/stackshot.c b/libsyscall/wrappers/stackshot.c new file mode 100644 index 000000000..c5633120b --- /dev/null +++ b/libsyscall/wrappers/stackshot.c @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include + +/* + * System call entry point + */ +int __stack_snapshot_with_config(int stackshot_config_version, user_addr_t stackshot_config, size_t stackshot_config_size); + +/* + * stackshot_config_create: create and initialize the arguments for a stackshot + * + * Outputs: NULL if malloc fails + * a pointer to a new stackshot_config_t on success + */ +stackshot_config_t * +stackshot_config_create(void) +{ + stackshot_config_t *s_config; + + s_config = malloc(sizeof(stackshot_config_t)); + if (s_config == NULL) { + return NULL; + } + + s_config->sc_pid = -1; + s_config->sc_flags = 0; + s_config->sc_since_timestamp = 0; + s_config->sc_buffer = 0; + s_config->sc_size = 0; + + return s_config; +} + +/* + * stackshot_config_set_pid: set the PID to be traced + * + * Inputs: stackshot_config - a pointer to the stackshot_config_t we want to update + * pid - process id of process to be traced, or -1 for the entire system + * + * Outputs: EINVAL if the passed stackshot_config pointer is NULL + * 0 on success + */ +int +stackshot_config_set_pid(stackshot_config_t *stackshot_config, int pid) +{ + stackshot_config_t *s_config; + + if (stackshot_config == NULL) { + return EINVAL; + } + + s_config = (stackshot_config_t *) stackshot_config; + s_config->sc_pid = pid; + + return 0; +} + +/* + * stackshot_config_set_flags: set the flags to be passed for the stackshot + * + * Inputs: stackshot_config - a pointer to the stackshot_config_t we want to update + * flags - flags to pass to stackshot + * + * Outputs: EINVAL if the passed stackshot_config pointer is NULL + * 0 on success + */ +int +stackshot_config_set_flags(stackshot_config_t *stackshot_config, uint32_t flags) +{ + stackshot_config_t *s_config; + + if (stackshot_config == NULL) { + return EINVAL; + } + + s_config = (stackshot_config_t *) stackshot_config; + s_config->sc_flags = flags; + + return 0; +} + +/* + * stackshot_capture_with_config: take a stackshot with the provided config + * + * Inputs: stackshot_config - a pointer to the stackshot_config_t we want to use + * + * Outputs: EINVAL if the passed stackshot_config pointer is NULL, a caller is trying + * to reuse a config without deallocating its buffer or if there is a + * problem with the arguments + * EFAULT if there was a problem with accessing the arguments from the kernel + * EPERM if the caller is not privileged + * ENOTSUP if the caller is passing a stackshot config version that is not + * supported by the kernel (indicates libsyscall:kernel mismatch), + * or if the caller is requesting unsupported flags + * ENOMEM if the kernel is unable to allocate memory + * ENOSPC if the caller doesn't have enough space in their address space for + * the kernel to remap the buffer + * ENOENT if the caller is requesting an existing buffer that doesn't exist + * or the target PID isn't found + * 0 on success + */ +int +stackshot_capture_with_config(stackshot_config_t *stackshot_config) +{ + int ret; + stackshot_config_t *s_config; + + if (stackshot_config == NULL) { + return EINVAL; + } + + s_config = (stackshot_config_t *) stackshot_config; + if (s_config->sc_buffer != 0) { + return EINVAL; + } + + s_config->sc_out_buffer_addr = &s_config->sc_buffer; + s_config->sc_out_size_addr = &s_config->sc_size; + ret = __stack_snapshot_with_config(STACKSHOT_CONFIG_TYPE, s_config, sizeof(stackshot_config_t)); + + if (ret != 0) { + ret = errno; + s_config->sc_buffer = 0; + s_config->sc_size = 0; + } + + return ret; +} + +/* + * stackshot_config_get_stackshot_buffer: get a pointer to the buffer containing the stackshot + * + * Inputs: stackshot_config - a pointer to a stackshot_config_t + * + * Outputs: NULL if the passed stackshot_config is NULL or if its buffer is NULL + * a pointer to the buffer containing the stackshot on success + */ +void * +stackshot_config_get_stackshot_buffer(stackshot_config_t *stackshot_config) +{ + stackshot_config_t *s_config; + + if (stackshot_config == NULL) { + return NULL; + } + s_config = (stackshot_config_t *) stackshot_config; + + return ((void *)s_config->sc_buffer); +} + +/* + * stackshot_config_get_stackshot_size: get the size of the stackshot buffer + * + * Inputs: stackshot_config - a pointer to a stackshot_config_t + * + * Outputs: -1 if the passed stackshot config is NULL or there is no buffer + * the length of the stackshot buffer on success + */ +uint32_t +stackshot_config_get_stackshot_size(stackshot_config_t * stackshot_config) +{ + if (stackshot_config == NULL || (void *)stackshot_config->sc_buffer == NULL) { + return -1; + } + + return stackshot_config->sc_size; +} + +/* + * stackshot_config_set_size_hint: set the size of the stackshot buffer + * + * Inputs: stackshot_config - a pointer to a stackshot_config_t + * suggested_size - hint for size allocation of stackshot + * + * Outputs: -1 if the passed stackshot config is NULL or there is existing stackshot buffer set. + * the length of the stackshot buffer on success. + */ +int +stackshot_config_set_size_hint(stackshot_config_t *stackshot_config, uint32_t suggested_size) +{ + if (stackshot_config == NULL || (void *)stackshot_config->sc_buffer != NULL) { + return -1; + } + + stackshot_config->sc_size = suggested_size; + + return 0; +} + +/* + * stackshot_config_dealloc_buffer: dealloc the stackshot buffer and reset the size so that a + * stackshot_config_t can be used again + * + * Inputs: stackshot_config - a pointer to a stackshot_config_t + * + * Outputs: EINVAL if the passed stackshot_config is NULL or if its buffer is NULL + * 0 otherwise + */ +int +stackshot_config_dealloc_buffer(stackshot_config_t *stackshot_config) +{ + stackshot_config_t *s_config; + + if (stackshot_config == NULL) { + return EINVAL; + } + s_config = (stackshot_config_t *) stackshot_config; + + if (s_config->sc_size && s_config->sc_buffer) { + mach_vm_deallocate(mach_task_self(), (mach_vm_offset_t)s_config->sc_buffer, (mach_vm_size_t)s_config->sc_size); + } + + s_config->sc_buffer = 0; + s_config->sc_size = 0; + + return 0; +} + +/* + * stackshot_config_dealloc: dealloc the stackshot buffer and the stackshot config + * + * Inputs: stackshot_config - a pointer to a stackshot_config_t + * + * Outputs: EINVAL if the passed stackshot_cofnig is NULL + * 0 otherwise + */ +int +stackshot_config_dealloc(stackshot_config_t *stackshot_config) +{ + stackshot_config_t *s_config; + + if (stackshot_config == NULL) { + return EINVAL; + } + s_config = (stackshot_config_t *) stackshot_config; + + if (s_config->sc_size && s_config->sc_buffer) { + mach_vm_deallocate(mach_task_self(), (mach_vm_offset_t)s_config->sc_buffer, (mach_vm_size_t)s_config->sc_size); + } + + s_config->sc_buffer = 0; + s_config->sc_size = 0; + + free(s_config); + return 0; +} diff --git a/libsyscall/wrappers/work_interval.c b/libsyscall/wrappers/work_interval.c new file mode 100644 index 000000000..29dd2ad61 --- /dev/null +++ b/libsyscall/wrappers/work_interval.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2015 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include + +struct work_interval { + uint64_t thread_id; + uint64_t work_interval_id; +}; + +extern uint64_t __thread_selfid(void); + +/* Create a new work interval handle (currently for the current thread only). Flags is unused */ +int +work_interval_create(work_interval_t *interval_handle, uint32_t flags __unused) +{ + int ret; + uint64_t work_interval_id; + work_interval_t handle; + + ret = __work_interval_ctl(WORK_INTERVAL_OPERATION_CREATE, 0, &work_interval_id, sizeof(work_interval_id)); + if (ret == -1) { + return ret; + } + + handle = malloc(sizeof(*handle)); + if (handle == NULL) { + errno = ENOMEM; + return -1; + } + + handle->thread_id = __thread_selfid(); + handle->work_interval_id = work_interval_id; + + *interval_handle = handle; + return 0; +} + +int +work_interval_notify(work_interval_t interval_handle, uint64_t start, uint64_t finish, uint64_t deadline, uint64_t next_start, uint32_t flags) +{ + int ret; + uint64_t work_interval_id; + struct work_interval_notification notification = { + .start = start, + .finish = finish, + .deadline = deadline, + .next_start = next_start, + .flags = flags, + .unused1 = 0 + }; + + if (interval_handle == NULL) { + errno = EINVAL; + return -1; + } + + work_interval_id = interval_handle->work_interval_id; + + ret = __work_interval_ctl(WORK_INTERVAL_OPERATION_NOTIFY, work_interval_id, ¬ification, sizeof(notification)); + return ret; +} + +int +work_interval_notify_simple(work_interval_t interval_handle, uint64_t start, uint64_t deadline, uint64_t next_start) +{ + return work_interval_notify(interval_handle, start, mach_absolute_time(), deadline, next_start, 0); +} + +int +work_interval_destroy(work_interval_t interval_handle) +{ + int ret, saved_errno; + uint64_t work_interval_id; + + if (interval_handle == NULL) { + errno = EINVAL; + return -1; + } + + work_interval_id = interval_handle->work_interval_id; + + ret = __work_interval_ctl(WORK_INTERVAL_OPERATION_DESTROY, work_interval_id, NULL, 0); + saved_errno = errno; + free(interval_handle); + errno = saved_errno; + + return ret; +} diff --git a/libsyscall/xcodescripts/create-syscalls.pl b/libsyscall/xcodescripts/create-syscalls.pl index eace5b955..9c587b536 100755 --- a/libsyscall/xcodescripts/create-syscalls.pl +++ b/libsyscall/xcodescripts/create-syscalls.pl @@ -61,9 +61,9 @@ # size in bytes of known types (only used for i386) my %TypeBytes = ( 'au_asid_t' => 4, - 'associd_t' => 4, + 'sae_associd_t' => 4, 'caddr_t' => 4, - 'connid_t' => 4, + 'sae_connid_t' => 4, 'gid_t' => 4, 'id_t' => 4, 'idtype_t' => 4, diff --git a/libsyscall/xcodescripts/filter_mig.awk b/libsyscall/xcodescripts/filter_mig.awk new file mode 100755 index 000000000..90fd39824 --- /dev/null +++ b/libsyscall/xcodescripts/filter_mig.awk @@ -0,0 +1,37 @@ +#!/usr/bin/awk -f + +# Usage: foo