From e6b06a0cae3e9d846b70e2d56cfee7757998ec0d Mon Sep 17 00:00:00 2001 From: Darwin Date: Mon, 15 Sep 2008 21:01:57 +0000 Subject: [PATCH] xnu-1228.7.58 Imported from https://opensource.apple.com/tarballs/xnu/xnu-1228.7.58.tar.gz --- bsd/dev/dtrace/dtrace.c | 21 ++- bsd/hfs/hfs_btreeio.c | 25 +++- bsd/hfs/hfs_catalog.c | 4 +- bsd/hfs/hfs_cnode.c | 10 +- bsd/hfs/hfs_cnode.h | 2 +- bsd/hfs/hfs_endian.c | 21 ++- bsd/hfs/hfs_endian.h | 5 +- bsd/hfs/hfs_link.c | 25 ++-- bsd/hfs/hfs_lookup.c | 20 ++- bsd/hfs/hfs_readwrite.c | 12 +- bsd/hfs/hfs_vfsops.c | 70 +++++++--- bsd/hfs/hfs_vfsutils.c | 15 +- bsd/hfs/hfs_vnops.c | 134 ++++++++++-------- bsd/hfs/hfscommon/BTree/BTree.c | 30 ++-- bsd/hfs/hfscommon/BTree/BTreeAllocate.c | 8 +- bsd/hfs/hfscommon/BTree/BTreeMiscOps.c | 8 +- bsd/hfs/hfscommon/BTree/BTreeNodeOps.c | 15 +- bsd/hfs/hfscommon/BTree/BTreeScanner.c | 9 +- bsd/hfs/hfscommon/BTree/BTreeTreeOps.c | 16 +-- bsd/hfs/hfscommon/headers/BTreesInternal.h | 3 +- bsd/hfs/hfscommon/headers/BTreesPrivate.h | 10 +- bsd/kern/kern_sysctl.c | 2 + bsd/kern/tty_ptmx.c | 24 +++- bsd/kern/uipc_mbuf.c | 5 +- bsd/netinet/in_gif.c | 7 +- bsd/netinet/ip_divert.c | 24 ---- bsd/netinet6/esp_input.c | 4 +- bsd/netinet6/in6_gif.c | 9 +- bsd/sys/errno.h | 1 + bsd/vfs/vfs_cache.c | 48 ++++--- bsd/vfs/vfs_cluster.c | 153 +++++---------------- bsd/vfs/vfs_journal.c | 2 +- bsd/vfs/vfs_subr.c | 17 ++- bsd/vfs/vfs_vnops.c | 4 +- config/MasterVersion | 2 +- osfmk/i386/AT386/model_dep.c | 39 +++++- osfmk/i386/i386_init.c | 5 +- osfmk/i386/locore.s | 36 ++--- osfmk/i386/mp.c | 22 ++- osfmk/i386/mp.h | 3 +- osfmk/i386/mp_events.h | 1 + osfmk/i386/pmap.c | 52 ++++--- osfmk/i386/trap.c | 2 +- osfmk/i386/trap.h | 2 +- osfmk/vm/bsd_vm.c | 19 ++- osfmk/vm/vm_object.c | 6 +- osfmk/vm/vm_resident.c | 25 ++-- 47 files changed, 553 insertions(+), 424 deletions(-) diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index aa200c11f..c28ae6f0b 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -1071,6 +1071,21 @@ dtrace_priv_proc(dtrace_state_t *state) return (0); } +#if defined(__APPLE__) +/* dtrace_priv_proc() omitting the P_LNOATTACH check. For PID and EXECNAME accesses. */ +static int +dtrace_priv_proc_relaxed(dtrace_state_t *state) +{ + + if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) + return (1); + + cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; + + return (0); +} +#endif /* __APPLE__ */ + static int dtrace_priv_kernel(dtrace_state_t *state) { @@ -2709,7 +2724,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, #else case DIF_VAR_PID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc_relaxed(state)) return (0); /* @@ -2738,7 +2753,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_procp->p_ppid); #else case DIF_VAR_PPID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc_relaxed(state)) return (0); /* @@ -2800,7 +2815,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, mstate->dtms_scratch_base + mstate->dtms_scratch_size) return 0; - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc_relaxed(state)) return (0); mstate->dtms_scratch_ptr += scratch_size; diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index 0c81c879f..0b9a39160 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,6 +76,16 @@ OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions opt { OSStatus retval = E_NONE; struct buf *bp = NULL; + u_int8_t allow_empty_node; + + /* If the btree block is being read using hint, it is + * fine for the swap code to find zeroed out nodes. + */ + if (options & kGetBlockHint) { + allow_empty_node = true; + } else { + allow_empty_node = false; + } if (options & kGetEmptyBlock) { daddr64_t blkno; @@ -115,21 +125,21 @@ OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions opt * size once the B-tree control block is set up with the node size * from the header record. */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly); + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node); } else if (block->blockReadFromDisk) { /* * The node was just read from disk, so always swap/check it. * This is necessary on big endian since the test below won't trigger. */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost); + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); } else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) { /* * The node was left in the cache in non-native order, so swap it. * This only happens on little endian, after the node is written * back to disk. */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost); + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); } /* @@ -191,8 +201,11 @@ btree_swap_node(struct buf *bp, __unused void *arg) block.blockReadFromDisk = (buf_fromcache(bp) == 0); block.blockSize = buf_count(bp); - // swap the data now that this node is ready to go to disk - retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig); + /* Swap the data now that this node is ready to go to disk. + * We allow swapping of zeroed out nodes here because we might + * be writing node whose last record just got deleted. + */ + retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true); if (retval) panic("btree_swap_node: about to write corrupt node!\n"); } diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index 59d20b4ef..04db5d290 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -286,11 +286,11 @@ cat_releasedesc(struct cat_desc *descp) /* * These Catalog functions allow access to the HFS Catalog (database). - * The catalog b-tree lock must be aquired before calling any of these routines. + * The catalog b-tree lock must be acquired before calling any of these routines. */ /* - * cat_lookup - lookup a catalog node using a cnode decriptor + * cat_lookup - lookup a catalog node using a cnode descriptor * * Note: The caller is responsible for releasing the output * catalog descriptor (when supplied outdescp is non-null). diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index 17a0cba63..7ff95e593 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -127,8 +127,10 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) */ if (v_type == VDIR) { hfs_reldirhints(cp, 0); - if (cp->c_flag & C_HARDLINK) - hfs_relorigins(cp); + } + + if (cp->c_flag & C_HARDLINK) { + hfs_relorigins(cp); } if (cp->c_datafork) @@ -472,6 +474,10 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) if (vnode_isdir(vp)) { hfs_reldirhints(cp, 0); } + + if (cp->c_flag & C_HARDLINK) { + hfs_relorigins(cp); + } } /* Release the file fork and related data */ if (fp) { diff --git a/bsd/hfs/hfs_cnode.h b/bsd/hfs/hfs_cnode.h index 22c5c0784..7a5b13601 100644 --- a/bsd/hfs/hfs_cnode.h +++ b/bsd/hfs/hfs_cnode.h @@ -91,7 +91,7 @@ struct linkorigin { typedef struct linkorigin linkorigin_t; #define MAX_CACHED_ORIGINS 10 - +#define MAX_CACHED_FILE_ORIGINS 8 /* * The cnode is used to represent each active (or recently active) diff --git a/bsd/hfs/hfs_endian.c b/bsd/hfs/hfs_endian.c index 41251934f..e5775bfbc 100644 --- a/bsd/hfs/hfs_endian.c +++ b/bsd/hfs/hfs_endian.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -83,7 +83,8 @@ int hfs_swap_BTNode ( BlockDescriptor *src, vnode_t vp, - enum HFSBTSwapDirection direction + enum HFSBTSwapDirection direction, + u_int8_t allow_empty_node ) { BTNodeDescriptor *srcDesc = src->buffer; @@ -177,9 +178,13 @@ hfs_swap_BTNode ( * Sanity check: must be even, and within the node itself. * * We may be called to swap an unused node, which contains all zeroes. - * This is why we allow the record offset to be zero. + * Unused nodes are expected only when allow_empty_node is true. + * If it is false and record offset is zero, return error. */ - if ((srcOffs[i] & 1) || (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || (srcOffs[i] >= src->blockSize)) { + if ((srcOffs[i] & 1) || ( + (allow_empty_node == false) && (srcOffs[i] == 0)) || + (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || + (srcOffs[i] >= src->blockSize)) { printf("hfs_swap_BTNode: record #%d invalid offset (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); error = fsBTInvalidHeaderErr; goto fail; @@ -306,9 +311,15 @@ hfs_swap_BTNode ( * Sanity check: must be even, and within the node itself. * * We may be called to swap an unused node, which contains all zeroes. + * This can happen when the last record from a node gets deleted. * This is why we allow the record offset to be zero. + * Unused nodes are expected only when allow_empty_node is true + * (the caller should set it to true for kSwapBTNodeBigToHost). */ - if ((srcOffs[i] & 1) || (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || (srcOffs[i] >= src->blockSize)) { + if ((srcOffs[i] & 1) || + ((allow_empty_node == false) && (srcOffs[i] == 0)) || + (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || + (srcOffs[i] >= src->blockSize)) { panic("hfs_UNswap_BTNode: record #%d invalid offset (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); error = fsBTInvalidHeaderErr; goto fail; diff --git a/bsd/hfs/hfs_endian.h b/bsd/hfs/hfs_endian.h index 519c40104..c1c46f7aa 100644 --- a/bsd/hfs/hfs_endian.h +++ b/bsd/hfs/hfs_endian.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000, 2002-2003, 2005-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -93,7 +93,8 @@ enum HFSBTSwapDirection { kSwapBTNodeHeaderRecordOnly = 3 }; -int hfs_swap_BTNode (BlockDescriptor *src, vnode_t vp, enum HFSBTSwapDirection direction); +int hfs_swap_BTNode (BlockDescriptor *src, vnode_t vp, enum HFSBTSwapDirection direction, + u_int8_t allow_empty_node); #ifdef __cplusplus } diff --git a/bsd/hfs/hfs_link.c b/bsd/hfs/hfs_link.c index a2e08a098..f6c5e8409 100644 --- a/bsd/hfs/hfs_link.c +++ b/bsd/hfs/hfs_link.c @@ -677,12 +677,10 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c goto out; } - /* Purge any cached origin entries for a directory hard link. */ - if (cndesc.cd_flags & CD_ISDIR) { - hfs_relorigin(cp, dcp->c_fileid); - if (dcp->c_fileid != dcp->c_cnid) { - hfs_relorigin(cp, dcp->c_cnid); - } + /* Purge any cached origin entries for a directory or file hard link. */ + hfs_relorigin(cp, dcp->c_fileid); + if (dcp->c_fileid != dcp->c_cnid) { + hfs_relorigin(cp, dcp->c_cnid); } /* Delete the link record. */ @@ -996,7 +994,7 @@ hfs_lookuplink(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid, c } /* - * Cache the orgin of a directory hard link + * Cache the origin of a directory or file hard link * * cnode must be lock on entry */ @@ -1007,6 +1005,7 @@ hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid) linkorigin_t *origin = NULL; void * thread = current_thread(); int count = 0; + int maxorigins = (S_ISDIR(cp->c_mode)) ? MAX_CACHED_ORIGINS : MAX_CACHED_FILE_ORIGINS; /* * Look for an existing origin first. If not found, create/steal one. @@ -1020,7 +1019,7 @@ hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid) } if (origin == NULL) { /* Recycle the last (i.e., the oldest) if we have too many. */ - if (count > MAX_CACHED_ORIGINS) { + if (count > maxorigins) { origin = TAILQ_LAST(&cp->c_originlist, hfs_originhead); TAILQ_REMOVE(&cp->c_originlist, origin, lo_link); } else { @@ -1034,7 +1033,7 @@ hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid) } /* - * Release any cached origins for a directory hard link + * Release any cached origins for a directory or file hard link * * cnode must be lock on entry */ @@ -1051,7 +1050,7 @@ hfs_relorigins(struct cnode *cp) } /* - * Release a specific origin for a directory hard link + * Release a specific origin for a directory or file hard link * * cnode must be lock on entry */ @@ -1073,7 +1072,7 @@ hfs_relorigin(struct cnode *cp, cnid_t parentcnid) } /* - * Test if a directory hard link has a cached origin + * Test if a directory or file hard link has a cached origin * * cnode must be lock on entry */ @@ -1095,7 +1094,7 @@ hfs_haslinkorigin(cnode_t *cp) } /* - * Obtain the current parent cnid of a directory hard link + * Obtain the current parent cnid of a directory or file hard link * * cnode must be lock on entry */ @@ -1117,7 +1116,7 @@ hfs_currentparent(cnode_t *cp) } /* - * Obtain the current cnid of a directory hard link + * Obtain the current cnid of a directory or file hard link * * cnode must be lock on entry */ diff --git a/bsd/hfs/hfs_lookup.c b/bsd/hfs/hfs_lookup.c index b59038b3b..6009fb787 100644 --- a/bsd/hfs/hfs_lookup.c +++ b/bsd/hfs/hfs_lookup.c @@ -360,9 +360,15 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int } goto exit; } - - /* Save the origin info of a directory link for future ".." requests. */ - if (S_ISDIR(attr.ca_mode) && (attr.ca_recflags & kHFSHasLinkChainMask)) { + + /* + * Save the origin info for file and directory hardlinks. Directory hardlinks + * need the origin for '..' lookups, and file hardlinks need it to ensure that + * competing lookups do not cause us to vend different hardlinks than the ones requested. + * We want to restrict saving the cache entries to LOOKUP namei operations, since + * we're really doing this to protect getattr. + */ + if ((cnp->cn_nameiop == LOOKUP) && (VTOC(tvp)->c_flag & C_HARDLINK)) { hfs_savelinkorigin(VTOC(tvp), VTOC(dvp)->c_fileid); } *cnode_locked = 1; @@ -479,6 +485,14 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) replace_desc(cp, &desc); hfs_systemfile_unlock(VTOHFS(dvp), lockflags); } + + /* Save the lookup result in the origin list for future lookups, but + * only if it was through a LOOKUP nameiop + */ + if (cnp->cn_nameiop == LOOKUP) { + hfs_savelinkorigin(cp, dcp->c_fileid); + } + hfs_unlock(cp); } #if NAMEDRSRCFORK diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index a667aa364..958ca6e3a 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -80,6 +80,8 @@ enum { /* from bsd/vfs/vfs_cluster.c */ extern int is_file_clean(vnode_t vp, off_t filesize); +/* from bsd/hfs/hfs_vfsops.c */ +extern int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *); static int hfs_clonefile(struct vnode *, int, int, int); @@ -1328,7 +1330,11 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { bufptr = (char *)ap->a_data; cnid = strtoul(bufptr, NULL, 10); - if ((error = hfs_vget(hfsmp, cnid, &file_vp, 1))) { + /* We need to call hfs_vfs_vget to leverage the code that will fix the + * origin list for us if needed, as opposed to calling hfs_vget, since + * we will need it for the subsequent build_path call. + */ + if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) { return (error); } error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context); @@ -3029,7 +3035,7 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap) block.blockSize = buf_count(bp); /* Endian un-swap B-Tree node */ - retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig); + retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false); if (retval) panic("hfs_vnop_bwrite: about to write corrupt node!\n"); } diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index bf7635eba..7b67b6686 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -118,6 +118,9 @@ lck_grp_t * hfs_mutex_group; lck_grp_t * hfs_rwlock_group; extern struct vnodeopv_desc hfs_vnodeop_opv_desc; +/* not static so we can re-use in hfs_readwrite.c for build_path */ +int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); + static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args); static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context); @@ -136,7 +139,6 @@ static int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context); static int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen, vfs_context_t context); static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context); -static int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context); static int hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vfs_context_t context); @@ -372,13 +374,18 @@ hfs_changefs_callback(struct vnode *vp, void *cargs) struct cat_desc cndesc; struct cat_attr cnattr; struct hfs_changefs_cargs *args; + int lockflags; + int error; args = (struct hfs_changefs_cargs *)cargs; cp = VTOC(vp); vcb = HFSTOVCB(args->hfsmp); - if (cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL)) { + lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL); + hfs_systemfile_unlock(args->hfsmp, lockflags); + if (error) { /* * If we couldn't find this guy skip to the next one */ @@ -526,8 +533,9 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) * * hfs_changefs_callback will be called for each vnode * hung off of this mount point - * the vnode will be - * properly referenced and unreferenced around the callback + * + * The vnode will be properly referenced and unreferenced + * around the callback */ cargs.hfsmp = hfsmp; cargs.namefix = namefix; @@ -561,6 +569,7 @@ hfs_reload_callback(struct vnode *vp, void *cargs) { struct cnode *cp; struct hfs_reload_cargs *args; + int lockflags; args = (struct hfs_reload_cargs *)cargs; /* @@ -585,8 +594,12 @@ hfs_reload_callback(struct vnode *vp, void *cargs) datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL; /* lookup by fileID since name could have changed */ - if ((args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork))) + lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork); + hfs_systemfile_unlock(args->hfsmp, lockflags); + if (args->error) { return (VNODE_RETURNED_DONE); + } /* update cnode's catalog descriptor */ (void) replace_desc(cp, &desc); @@ -2276,33 +2289,48 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, return (ENOTSUP); } - -static int +/* hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support the + * build_path ioctl. We use it to leverage the code below that updates the origin + * cache if necessary. + */ +int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) { int error; + int lockflags; + struct hfsmount *hfsmp; - error = hfs_vget(VFSTOHFS(mp), (cnid_t)ino, vpp, 1); + hfsmp = VFSTOHFS(mp); + + error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1); if (error) return (error); /* * ADLs may need to have their origin state updated - * since build_path needs a valid parent. + * since build_path needs a valid parent. The same is true + * for hardlinked files as well. There isn't a race window here in re-acquiring + * the cnode lock since we aren't pulling any data out of the cnode; instead, we're + * going back to the catalog. */ - if (vnode_isdir(*vpp) && - (VTOC(*vpp)->c_flag & C_HARDLINK) && + if ((VTOC(*vpp)->c_flag & C_HARDLINK) && (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) { cnode_t *cp = VTOC(*vpp); struct cat_desc cdesc; - if (!hfs_haslinkorigin(cp) && - (cat_findname(VFSTOHFS(mp), (cnid_t)ino, &cdesc) == 0)) { - if (cdesc.cd_parentcnid != - VFSTOHFS(mp)->hfs_private_desc[DIR_HARDLINKS].cd_cnid) { - hfs_savelinkorigin(cp, cdesc.cd_parentcnid); + if (!hfs_haslinkorigin(cp)) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_findname(hfsmp, (cnid_t)ino, &cdesc); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error == 0) { + if ((cdesc.cd_parentcnid != + hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && + (cdesc.cd_parentcnid != + hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) { + hfs_savelinkorigin(cp, cdesc.cd_parentcnid); + } + cat_releasedesc(&cdesc); } - cat_releasedesc(&cdesc); } hfs_unlock(cp); } @@ -2413,6 +2441,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) cnid_t nextlinkid; cnid_t prevlinkid; struct cat_desc linkdesc; + int lockflags; cnattr.ca_linkref = linkref; @@ -2422,7 +2451,10 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) */ if ((hfs_lookuplink(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) && (nextlinkid != 0)) { - if (cat_findname(hfsmp, nextlinkid, &linkdesc) == 0) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_findname(hfsmp, nextlinkid, &linkdesc); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error == 0) { cat_releasedesc(&cndesc); bcopy(&linkdesc, &cndesc, sizeof(linkdesc)); } @@ -2452,7 +2484,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, &cnfork, &vp); - if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK) && vnode_isdir(vp)) { + if ((error == 0) && (VTOC(vp)->c_flag & C_HARDLINK)) { hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid); } FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index 0433aec32..736ab6199 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -243,13 +243,6 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, /* mark the volume dirty (clear clean unmount bit) */ vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; - /* - * all done with system files so we can unlock now... - */ - hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); - hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); - hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); - if (error == noErr) { error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, NULL, NULL, NULL); @@ -262,6 +255,14 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, MarkVCBDirty( vcb ); // mark VCB dirty so it will be written } } + + /* + * all done with system files so we can unlock now... + */ + hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); + goto CmdDone; //-- Release any resources allocated so far before exiting with an error: diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 45eda0938..eef6b5e96 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -384,7 +384,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) struct vnode *vp = ap->a_vp; struct vnode_attr *vap = ap->a_vap; - struct vnode *rvp = NULL; + struct vnode *rvp = NULLVP; struct hfsmount *hfsmp; struct cnode *cp; uint64_t data_size; @@ -516,11 +516,11 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) } if (cp->c_blocks - VTOF(vp)->ff_blocks) { + /* We deal with resource fork vnode iocount at the end of the function */ error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE); if (error) { goto out; } - rcp = VTOC(rvp); if (rcp && rcp->c_rsrcfork) { total_size += rcp->c_rsrcfork->ff_size; @@ -592,8 +592,15 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) * which are hardlink-ignorant, will ask for va_linkid. */ vap->va_fileid = (u_int64_t)cp->c_fileid; - /* Hardlinked directories have multiple cnids and parents (one per link). */ - if ((v_type == VDIR) && (cp->c_flag & C_HARDLINK)) { + /* + * We need to use the origin cache for both hardlinked files + * and directories. Hardlinked directories have multiple cnids + * and parents (one per link). Hardlinked files also have their + * own parents and link IDs separate from the indirect inode number. + * If we don't use the cache, we could end up vending the wrong ID + * because the cnode will only reflect the link that was looked up most recently. + */ + if (cp->c_flag & C_HARDLINK) { vap->va_linkid = (u_int64_t)hfs_currentcnid(cp); vap->va_parentid = (u_int64_t)hfs_currentparent(cp); } else { @@ -617,70 +624,79 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) VNODE_ATTR_va_encoding | VNODE_ATTR_va_rdev | VNODE_ATTR_va_data_size; - /* if this is the root, let VFS to find out the mount name, which may be different from the real name */ + /* If this is the root, let VFS to find out the mount name, which may be different from the real name. + * Otherwise, we need to just take care for hardlinked files, which need to be looked up, if necessary + */ if (VATTR_IS_ACTIVE(vap, va_name) && (cp->c_cnid != kHFSRootFolderID)) { - /* Return the name for ATTR_CMN_NAME */ - if (cp->c_desc.cd_namelen == 0) { - if ((cp->c_flag & C_HARDLINK) && ((cp->c_flag & C_DELETED) == 0 || (cp->c_linkcount > 1))) { - cnid_t nextlinkid; - cnid_t prevlinkid; - struct vnode *file_vp; - - if ((error = hfs_lookuplink(hfsmp, cp->c_fileid, &prevlinkid, &nextlinkid))) { - goto out; - } - - // - // don't bother trying to get a linkid that's the same - // as the current cnid - // - if (nextlinkid == VTOC(vp)->c_cnid) { - if (prevlinkid == VTOC(vp)->c_cnid) { - hfs_unlock(cp); - goto out2; - } else { - nextlinkid = prevlinkid; - } - } - - hfs_unlock(cp); - - if (nextlinkid == 0 || (error = hfs_vget(hfsmp, nextlinkid, &file_vp, 1))) { - if (prevlinkid == 0 || (error = hfs_vget(hfsmp, prevlinkid, &file_vp, 1))) { - goto out2; - } + struct cat_desc linkdesc; + int lockflags; + int uselinkdesc = 0; + cnid_t nextlinkid = 0; + cnid_t prevlinkid = 0; + + /* Get the name for ATTR_CMN_NAME. We need to take special care for hardlinks + * here because the info. for the link ID requested by getattrlist may be + * different than what's currently in the cnode. This is because the cnode + * will be filled in with the information for the most recent link ID that went + * through namei/lookup(). If there are competing lookups for hardlinks that point + * to the same inode, one (or more) getattrlists could be vended incorrect name information. + * Also, we need to beware of open-unlinked files which could have a namelen of 0. Note + * that if another hardlink sibling of this file is being unlinked, that could also thrash + * the name fields but it should *not* be treated like an open-unlinked file here. + */ + if ((cp->c_flag & C_HARDLINK) && + ((cp->c_desc.cd_namelen == 0) || (vap->va_linkid != cp->c_cnid))) { + /* If we have no name and our linkID is the raw inode number, then we may + * have an open-unlinked file. Go to the next link in this case. + */ + if ((cp->c_desc.cd_namelen == 0) && (vap->va_linkid == cp->c_fileid)) { + if ((error = hfs_lookuplink(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))) { + goto out; + } } - - cp = VTOC(file_vp); - if (hfs_lock(cp, HFS_SHARED_LOCK) == 0) { - if (cp->c_desc.cd_namelen) { - strlcpy(vap->va_name, (const char *)cp->c_desc.cd_nameptr, MAXPATHLEN); - } - hfs_unlock(cp); - vnode_put(file_vp); - goto out2; + else { + nextlinkid = vap->va_linkid; } - - if (vnode_name(file_vp)) { - strlcpy(vap->va_name, vnode_name(file_vp), MAXPATHLEN); - } else { - error = ENOENT; + /* Now probe the catalog for the linkID. Note that we don't know if we have + * the exclusive lock here for the cnode, so we can't just update the descriptor. + * Instead, we should just store the descriptor's value locally and then use it to pass + * out the name value as needed below. + */ + if (nextlinkid) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_findname(hfsmp, nextlinkid, &linkdesc); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error == 0) { + uselinkdesc = 1; + } } - vnode_put(file_vp); - goto out2; - } else { - error = ENOENT; - goto out; - } - } else { - strlcpy(vap->va_name, (const char *)cp->c_desc.cd_nameptr, MAXPATHLEN); - VATTR_SET_SUPPORTED(vap, va_name); + } + + /* By this point, we either patched the name above, and the c_desc points + * to correct data, or it already did, in which case we just proceed by copying + * the name into the VAP. Note that we will never set va_name to supported if + * nextlinkid is never initialized. This could happen in the degenerate case above + * involving the raw inode number, where it has no nextlinkid. In this case, we will + * simply not export the name as supported. + */ + if (uselinkdesc) { + strlcpy(vap->va_name, (const char *)linkdesc.cd_nameptr, MAXPATHLEN); + VATTR_SET_SUPPORTED(vap, va_name); + cat_releasedesc(&linkdesc); + } + else if (cp->c_desc.cd_namelen) { + strlcpy(vap->va_name, (const char *)cp->c_desc.cd_nameptr, MAXPATHLEN); + VATTR_SET_SUPPORTED(vap, va_name); } } out: hfs_unlock(cp); -out2: + /* + * We need to drop the iocount on the rsrc fork vnode only *after* we've + * released the cnode lock, since vnode_put can trigger an inactive call, which + * will go back into the HFS and try to acquire a cnode lock. + */ if (rvp) { vnode_put(rvp); } diff --git a/bsd/hfs/hfscommon/BTree/BTree.c b/bsd/hfs/hfscommon/BTree/BTree.c index 2ee3159e3..86307fbcb 100644 --- a/bsd/hfs/hfscommon/BTree/BTree.c +++ b/bsd/hfs/hfscommon/BTree/BTree.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -334,7 +334,7 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc) ++btreePtr->numReleaseNodes; M_ExitOnError (err); - err = GetNode (btreePtr, kHeaderNodeNum, &nodeRec ); + err = GetNode (btreePtr, kHeaderNodeNum, 0, &nodeRec ); M_ExitOnError (err); } @@ -504,7 +504,7 @@ OSStatus BTSearchRecord (FCB *filePtr, { nodeNum = searchIterator->hint.nodeNum; - err = GetNode (btreePtr, nodeNum, &node); + err = GetNode (btreePtr, nodeNum, kGetNodeHint, &node); if( err == noErr ) { if ( ((BTNodeDescriptor*) node.buffer)->kind == kBTLeafNode && @@ -711,7 +711,7 @@ OSStatus BTIterateRecord (FCB *filePtr, goto ErrorExit; } - err = GetNode (btreePtr, nodeNum, &node); + err = GetNode (btreePtr, nodeNum, 0, &node); M_ExitOnError (err); if ( ((NodeDescPtr) node.buffer)->kind != kBTLeafNode || @@ -763,7 +763,7 @@ OSStatus BTIterateRecord (FCB *filePtr, M_ExitOnError(err); // Look up the left node - err = GetNode (btreePtr, nodeNum, &left); + err = GetNode (btreePtr, nodeNum, 0, &left); M_ExitOnError (err); // Look up the current node again @@ -811,7 +811,7 @@ OSStatus BTIterateRecord (FCB *filePtr, nodeNum = ((NodeDescPtr) node.buffer)->fLink; if ( nodeNum > 0) { - err = GetNode (btreePtr, nodeNum, &right); + err = GetNode (btreePtr, nodeNum, 0, &right); M_ExitOnError (err); } else { err = fsBTEndOfIterationErr; @@ -1019,7 +1019,7 @@ BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator goto ErrorExit; } - err = GetNode(btreePtr, nodeNum, &node); + err = GetNode(btreePtr, nodeNum, 0, &node); M_ExitOnError(err); if ( ((NodeDescPtr)node.buffer)->kind != kBTLeafNode || @@ -1074,7 +1074,7 @@ BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator M_ExitOnError(err); // Look up the left node - err = GetNode (btreePtr, nodeNum, &left); + err = GetNode (btreePtr, nodeNum, 0, &left); M_ExitOnError (err); // Look up the current node again @@ -1122,7 +1122,7 @@ BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator nodeNum = ((NodeDescPtr)node.buffer)->fLink; if ( nodeNum > 0) { - err = GetNode(btreePtr, nodeNum, &right); + err = GetNode(btreePtr, nodeNum, 0, &right); M_ExitOnError(err); } else { err = fsBTEndOfIterationErr; @@ -1172,7 +1172,7 @@ BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator nodeNum = ((NodeDescPtr)node.buffer)->fLink; if ( nodeNum > 0) { - err = GetNode(btreePtr, nodeNum, &right); + err = GetNode(btreePtr, nodeNum, 0, &right); M_ExitOnError(err); } else { err = fsBTEndOfIterationErr; @@ -1459,7 +1459,7 @@ OSStatus BTReplaceRecord (FCB *filePtr, { insertNodeNum = iterator->hint.nodeNum; - err = GetNode (btreePtr, insertNodeNum, &nodeRec); + err = GetNode (btreePtr, insertNodeNum, kGetNodeHint, &nodeRec); if( err == noErr ) { // XXXdbg @@ -1602,7 +1602,7 @@ BTUpdateRecord(FCB *filePtr, BTreeIterator *iterator, { insertNodeNum = iterator->hint.nodeNum; - err = GetNode (btreePtr, insertNodeNum, &nodeRec); + err = GetNode (btreePtr, insertNodeNum, kGetNodeHint, &nodeRec); if (err == noErr) { if (((NodeDescPtr)nodeRec.buffer)->kind == kBTLeafNode && @@ -1870,7 +1870,7 @@ BTReloadData(FCB *filePtr) REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false); - err = GetNode(btreePtr, kHeaderNodeNum, &node); + err = GetNode(btreePtr, kHeaderNodeNum, 0, &node); if (err != noErr) return (err); @@ -2042,7 +2042,7 @@ BTGetUserData(FCB *filePtr, void * dataPtr, int dataSize) REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false); - err = GetNode(btreePtr, kHeaderNodeNum, &node); + err = GetNode(btreePtr, kHeaderNodeNum, 0, &node); if (err) return (err); @@ -2080,7 +2080,7 @@ BTSetUserData(FCB *filePtr, void * dataPtr, int dataSize) REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false); - err = GetNode(btreePtr, kHeaderNodeNum, &node); + err = GetNode(btreePtr, kHeaderNodeNum, 0, &node); if (err) return (err); diff --git a/bsd/hfs/hfscommon/BTree/BTreeAllocate.c b/bsd/hfs/hfscommon/BTree/BTreeAllocate.c index ff917113e..41adf8863 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeAllocate.c +++ b/bsd/hfs/hfscommon/BTree/BTreeAllocate.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003, 2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003, 2005-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -451,7 +451,7 @@ OSStatus ExtendBTree (BTreeControlBlockPtr btreePtr, err = UpdateNode (btreePtr, &mapNode, 0, kLockTransaction); M_ExitOnError (err); - err = GetNode (btreePtr, nextNodeNum, &mapNode); + err = GetNode (btreePtr, nextNodeNum, 0, &mapNode); M_ExitOnError (err); // XXXdbg @@ -558,7 +558,7 @@ OSStatus GetMapNode (BTreeControlBlockPtr btreePtr, err = ReleaseNode (btreePtr, nodePtr); M_ExitOnError (err); - err = GetNode (btreePtr, nextNodeNum, nodePtr); + err = GetNode (btreePtr, nextNodeNum, 0, nodePtr); M_ExitOnError (err); if ( ((NodeDescPtr)nodePtr->buffer)->kind != kBTMapNode) @@ -570,7 +570,7 @@ OSStatus GetMapNode (BTreeControlBlockPtr btreePtr, ++btreePtr->numMapNodesRead; mapIndex = 0; } else { - err = GetNode (btreePtr, kHeaderNodeNum, nodePtr); + err = GetNode (btreePtr, kHeaderNodeNum, 0, nodePtr); M_ExitOnError (err); if ( ((NodeDescPtr)nodePtr->buffer)->kind != kBTHeaderNode) diff --git a/bsd/hfs/hfscommon/BTree/BTreeMiscOps.c b/bsd/hfs/hfscommon/BTree/BTreeMiscOps.c index 0e47310ee..0b1d26591 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeMiscOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeMiscOps.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003, 2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003, 2005-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -248,7 +248,7 @@ OSStatus UpdateHeader(BTreeControlBlockPtr btreePtr, Boolean forceWrite) return noErr; - err = GetNode (btreePtr, kHeaderNodeNum, &node ); + err = GetNode (btreePtr, kHeaderNodeNum, 0, &node ); if (err != noErr) { return err; } @@ -356,7 +356,7 @@ OSStatus FindIteratorPosition (BTreeControlBlockPtr btreePtr, goto SearchTheTree; } - err = GetNode (btreePtr, nodeNum, middle); + err = GetNode (btreePtr, nodeNum, kGetNodeHint, middle); if( err == fsBTInvalidNodeErr ) // returned if nodeNum is out of range goto SearchTheTree; @@ -392,7 +392,7 @@ OSStatus FindIteratorPosition (BTreeControlBlockPtr btreePtr, M_ExitOnError(err); // Look up the left node - err = GetNode (btreePtr, nodeNum, left); + err = GetNode (btreePtr, nodeNum, 0, left); M_ExitOnError (err); // Look up the current node again diff --git a/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c b/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c index ab2962683..2db71479d 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2002, 2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000, 2002, 2005-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -191,13 +191,15 @@ Output: nodePtr - pointer to beginning of node (nil if error) OSStatus GetNode (BTreeControlBlockPtr btreePtr, u_int32_t nodeNum, + u_int32_t flags, NodeRec *nodePtr ) { OSStatus err; GetBlockProcPtr getNodeProc; + u_int32_t options; - //�� is nodeNum within proper range? + // is nodeNum within proper range? if( nodeNum >= btreePtr->totalNodes ) { Panic("\pGetNode:nodeNum >= totalNodes"); @@ -206,17 +208,22 @@ OSStatus GetNode (BTreeControlBlockPtr btreePtr, } nodePtr->blockSize = btreePtr->nodeSize; // indicate the size of a node + + options = kGetBlock; + if ( flags & kGetNodeHint ) + { + options |= kGetBlockHint; + } getNodeProc = btreePtr->getBlockProc; err = getNodeProc (btreePtr->fileRefNum, nodeNum, - kGetBlock, + options, nodePtr ); if (err != noErr) { Panic ("\pGetNode: getNodeProc returned error."); - // nodePtr->buffer = nil; goto ErrorExit; } ++btreePtr->numGetNodes; diff --git a/bsd/hfs/hfscommon/BTree/BTreeScanner.c b/bsd/hfs/hfscommon/BTree/BTreeScanner.c index 35aeafc18..7eb4013d4 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeScanner.c +++ b/bsd/hfs/hfscommon/BTree/BTreeScanner.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1996-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -199,8 +199,11 @@ static int FindNextLeafNode( BTScanState *scanState, Boolean avoidIO ) fref = scanState->btcb->fileRefNum; - /* This node was read from disk, so it must be swapped/checked. */ - err = hfs_swap_BTNode(&block, fref, kSwapBTNodeBigToHost); + /* This node was read from disk, so it must be swapped/checked. + * Since we are reading multiple nodes, we might have read an + * unused node. Therefore we allow swapping of unused nodes. + */ + err = hfs_swap_BTNode(&block, fref, kSwapBTNodeBigToHost, true); if ( err != noErr ) { printf("FindNextLeafNode: Error from hfs_swap_BTNode (node %u)\n", scanState->nodeNum); continue; diff --git a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c index 2aad0a7b1..97e308497 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -240,7 +240,7 @@ OSStatus SearchTree (BTreeControlBlockPtr btreePtr, goto ErrorExit; } - err = GetNode (btreePtr, curNodeNum, &nodeRec); + err = GetNode (btreePtr, curNodeNum, 0, &nodeRec); if (err != noErr) { goto ErrorExit; @@ -472,7 +472,7 @@ OSStatus InsertLevel (BTreeControlBlockPtr btreePtr, PanicIf ( parentNodeNum == 0, "\p InsertLevel: parent node is zero!?"); - err = GetNode (btreePtr, parentNodeNum, &parentNode); // released as target node in next level up + err = GetNode (btreePtr, parentNodeNum, 0, &parentNode); // released as target node in next level up M_ExitOnError (err); #if defined(applec) && !defined(__SC__) if (DEBUG_BUILD && level > 1) @@ -609,7 +609,7 @@ static OSErr InsertNode (BTreeControlBlockPtr btreePtr, if ( leftNode->buffer == nil ) { - err = GetNode (btreePtr, leftNodeNum, leftNode); // will be released by caller or a split below + err = GetNode (btreePtr, leftNodeNum, 0, leftNode); // will be released by caller or a split below M_ExitOnError (err); // XXXdbg ModifyBlockStart(btreePtr->fileRefNum, leftNode); @@ -730,7 +730,7 @@ OSStatus DeleteTree (BTreeControlBlockPtr btreePtr, siblingNodeNum = targetNodePtr->bLink; // Left Sibling Node if ( siblingNodeNum != 0 ) { - err = GetNode (btreePtr, siblingNodeNum, &siblingNode); + err = GetNode (btreePtr, siblingNodeNum, 0, &siblingNode); M_ExitOnError (err); // XXXdbg @@ -748,7 +748,7 @@ OSStatus DeleteTree (BTreeControlBlockPtr btreePtr, siblingNodeNum = targetNodePtr->fLink; // Right Sibling Node if ( siblingNodeNum != 0 ) { - err = GetNode (btreePtr, siblingNodeNum, &siblingNode); + err = GetNode (btreePtr, siblingNodeNum, 0, &siblingNode); M_ExitOnError (err); // XXXdbg @@ -803,7 +803,7 @@ OSStatus DeleteTree (BTreeControlBlockPtr btreePtr, //// Get Parent Node and index index = treePathTable [level].index; - err = GetNode (btreePtr, treePathTable[level].node, &parentNode); + err = GetNode (btreePtr, treePathTable[level].node, 0, &parentNode); M_ExitOnError (err); if ( updateRequired ) @@ -889,7 +889,7 @@ static OSStatus CollapseTree (BTreeControlBlockPtr btreePtr, M_ExitOnError (err); //// Get New Root Node - err = GetNode (btreePtr, btreePtr->rootNode, blockPtr); + err = GetNode (btreePtr, btreePtr->rootNode, 0, blockPtr); M_ExitOnError (err); // XXXdbg diff --git a/bsd/hfs/hfscommon/headers/BTreesInternal.h b/bsd/hfs/hfscommon/headers/BTreesInternal.h index a5f151953..4e2a1df12 100644 --- a/bsd/hfs/hfscommon/headers/BTreesInternal.h +++ b/bsd/hfs/hfscommon/headers/BTreesInternal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -144,6 +144,7 @@ typedef FSBufferDescriptor *FSBufferDescriptorPtr; */ enum { kGetBlock = 0x00000000, + kGetBlockHint = 0x00000001, // if set, the block is being looked up using hint kForceReadBlock = 0x00000002, //�� how does this relate to Read/Verify? Do we need this? kGetEmptyBlock = 0x00000008 }; diff --git a/bsd/hfs/hfscommon/headers/BTreesPrivate.h b/bsd/hfs/hfscommon/headers/BTreesPrivate.h index c5d5ef4ad..6b7a1eb03 100644 --- a/bsd/hfs/hfscommon/headers/BTreesPrivate.h +++ b/bsd/hfs/hfscommon/headers/BTreesPrivate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -369,19 +369,23 @@ extern OSStatus TreeIsDirty(BTreeControlBlockPtr btreePtr); OSStatus GetNode (BTreeControlBlockPtr btreePtr, u_int32_t nodeNum, + u_int32_t flags, NodeRec *returnNodePtr ); +/* Flags for GetNode() */ +#define kGetNodeHint 0x1 /* If set, the node is being looked up using a hint */ + OSStatus GetLeftSiblingNode (BTreeControlBlockPtr btreePtr, NodeDescPtr node, NodeRec *left ); -#define GetLeftSiblingNode(btree,node,left) GetNode ((btree), ((NodeDescPtr)(node))->bLink, (left)) +#define GetLeftSiblingNode(btree,node,left) GetNode ((btree), ((NodeDescPtr)(node))->bLink, 0, (left)) OSStatus GetRightSiblingNode (BTreeControlBlockPtr btreePtr, NodeDescPtr node, NodeRec *right ); -#define GetRightSiblingNode(btree,node,right) GetNode ((btree), ((NodeDescPtr)(node))->fLink, (right)) +#define GetRightSiblingNode(btree,node,right) GetNode ((btree), ((NodeDescPtr)(node))->fLink, 0, (right)) OSStatus GetNewNode (BTreeControlBlockPtr btreePtr, diff --git a/bsd/kern/kern_sysctl.c b/bsd/kern/kern_sysctl.c index 27f0e0906..82f89c2de 100644 --- a/bsd/kern/kern_sysctl.c +++ b/bsd/kern/kern_sysctl.c @@ -731,6 +731,8 @@ debug_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, /* all sysctl names at this level are name and field */ if (namelen != 2) return (ENOTDIR); /* overloaded */ + if (name[0] < 0 || name[0] >= CTL_DEBUG_MAXID) + return (ENOTSUP); cdp = debugvars[name[0]]; if (cdp->debugname == 0) return (ENOTSUP); diff --git a/bsd/kern/tty_ptmx.c b/bsd/kern/tty_ptmx.c index 4bd839801..0f05583d5 100644 --- a/bsd/kern/tty_ptmx.c +++ b/bsd/kern/tty_ptmx.c @@ -367,6 +367,16 @@ ptmx_get_ioctl(int minor, int open_flag) _state.pis_total += PTMX_GROW_VECTOR; if (old_pis_ioctl_list) FREE(old_pis_ioctl_list, M_TTYS); + } + + if (_state.pis_ioctl_list[minor] != NULL) { + ttyfree(new_ptmx_ioctl->pt_tty); + DEVFS_UNLOCK(); + FREE(new_ptmx_ioctl, M_TTYS); + + /* Special error value so we know to redrive the open, we've been raced */ + return (struct ptmx_ioctl*)-1; + } /* Vector is large enough; grab a new ptmx_ioctl */ @@ -419,8 +429,6 @@ ptmx_free_ioctl(int minor, int open_flag) if (!(_state.pis_ioctl_list[minor]->pt_flags & (PF_OPEN_M|PF_OPEN_S))) { /* Mark as free so it can be reallocated later */ old_ptmx_ioctl = _state.pis_ioctl_list[ minor]; - _state.pis_ioctl_list[ minor] = NULL; - _state.pis_free++; } DEVFS_UNLOCK(); @@ -436,6 +444,12 @@ ptmx_free_ioctl(int minor, int open_flag) devfs_remove(old_ptmx_ioctl->pt_devhandle); ttyfree(old_ptmx_ioctl->pt_tty); FREE(old_ptmx_ioctl, M_TTYS); + + /* Don't remove the entry until the devfs slot is free */ + DEVFS_LOCK(); + _state.pis_ioctl_list[ minor] = NULL; + _state.pis_free++; + DEVFS_UNLOCK(); } return (0); /* Success */ @@ -767,9 +781,11 @@ ptmx_open(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) int error = 0; boolean_t funnel_state; - - if ((pti = ptmx_get_ioctl(minor(dev), PF_OPEN_M)) == NULL) { + pti = ptmx_get_ioctl(minor(dev), PF_OPEN_M); + if (pti == NULL) { return (ENXIO); + } else if (pti == (struct ptmx_ioctl*)-1) { + return (EREDRIVEOPEN); } tp = pti->pt_tty; diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index 38fe262e6..aba70cc66 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -3011,8 +3011,9 @@ m_copy_pkthdr(struct mbuf *to, struct mbuf *from) #endif /* MAC_NET */ to->m_pkthdr = from->m_pkthdr; /* especially tags */ m_tag_init(from); /* purge tags from src */ - to->m_flags = from->m_flags & M_COPYFLAGS; - to->m_data = (to)->m_pktdat; + to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); + if ((to->m_flags & M_EXT) == 0) + to->m_data = to->m_pktdat; } /* diff --git a/bsd/netinet/in_gif.c b/bsd/netinet/in_gif.c index 2076a730d..0c74fc181 100644 --- a/bsd/netinet/in_gif.c +++ b/bsd/netinet/in_gif.c @@ -93,6 +93,8 @@ #include +extern u_long route_generation; + int ip_gif_ttl = GIF_TTL; SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW, &ip_gif_ttl, 0, ""); @@ -189,7 +191,10 @@ in_gif_output( bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip)); if (dst->sin_family != sin_dst->sin_family || - dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) { + dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr || + (sc->gif_ro.ro_rt != NULL && + (sc->gif_ro.ro_rt->generation_id != route_generation || + sc->gif_ro.ro_rt->rt_ifp == ifp))) { /* cache route doesn't match */ dst->sin_family = sin_dst->sin_family; dst->sin_len = sizeof(struct sockaddr_in); diff --git a/bsd/netinet/ip_divert.c b/bsd/netinet/ip_divert.c index 12d193d69..73ab3ea91 100644 --- a/bsd/netinet/ip_divert.c +++ b/bsd/netinet/ip_divert.c @@ -397,30 +397,6 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr, m->m_pkthdr.rcvif = ifa->ifa_ifp; ifafree(ifa); } - - if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) & - m->m_pkthdr.csum_flags) == 0) { - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } - m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR | - CSUM_IP_CHECKED | CSUM_IP_VALID; - m->m_pkthdr.csum_data = 0xffff; - } - else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - int hlen; - -#ifdef _IP_VHL - hlen = IP_VHL_HL(ip->ip_vhl) << 2; -#else - hlen = ip->ip_hl << 2; -#endif - in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - ip->ip_sum = in_cksum(m, hlen); - } - #if CONFIG_MACF_NET mac_mbuf_label_associate_socket(so, m); #endif diff --git a/bsd/netinet6/esp_input.c b/bsd/netinet6/esp_input.c index 1286b439f..ef9f0af80 100644 --- a/bsd/netinet6/esp_input.c +++ b/bsd/netinet6/esp_input.c @@ -354,7 +354,7 @@ esp4_input(m, off) /* strip off the trailing pad area. */ m_adj(m, -taillen); - + ip = mtod(m, struct ip *); #ifdef IPLEN_FLIPPED ip->ip_len = ip->ip_len - taillen; #else @@ -795,7 +795,7 @@ esp6_input(mp, offp) /* strip off the trailing pad area. */ m_adj(m, -taillen); - + ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - taillen); /* was it transmitted over the IPsec tunnel SA? */ diff --git a/bsd/netinet6/in6_gif.c b/bsd/netinet6/in6_gif.c index 090d0ad31..029dd8810 100644 --- a/bsd/netinet6/in6_gif.c +++ b/bsd/netinet6/in6_gif.c @@ -67,6 +67,8 @@ #include +extern u_long route_generation; + static __inline__ void* _cast_non_const(const void * ptr) { union { @@ -172,7 +174,10 @@ in6_gif_output( ip6->ip6_flow |= htonl((u_int32_t)otos << 20); if (dst->sin6_family != sin6_dst->sin6_family || - !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) { + !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr) || + (sc->gif_ro6.ro_rt != NULL && + (sc->gif_ro6.ro_rt->generation_id != route_generation || + sc->gif_ro6.ro_rt->rt_ifp == ifp))) { /* cache route doesn't match */ bzero(dst, sizeof(*dst)); dst->sin6_family = sin6_dst->sin6_family; @@ -195,7 +200,7 @@ in6_gif_output( } /* if it constitutes infinite encapsulation, punt. */ - if (sc->gif_ro.ro_rt->rt_ifp == ifp) { + if (sc->gif_ro6.ro_rt->rt_ifp == ifp) { m_freem(m); return ENETUNREACH; /*XXX*/ } diff --git a/bsd/sys/errno.h b/bsd/sys/errno.h index 67aff41a1..ec42fa74d 100644 --- a/bsd/sys/errno.h +++ b/bsd/sys/errno.h @@ -258,5 +258,6 @@ __END_DECLS #define ERESTART (-1) /* restart syscall */ #define EJUSTRETURN (-2) /* don't modify regs, just return */ #define ERECYCLE (-5) /* restart lookup under heavy vnode pressure/recycling */ +#define EREDRIVEOPEN (-6) #endif #endif /* _SYS_ERRNO_H_ */ diff --git a/bsd/vfs/vfs_cache.c b/bsd/vfs/vfs_cache.c index c4e93ab8e..95a07d69a 100644 --- a/bsd/vfs/vfs_cache.c +++ b/bsd/vfs/vfs_cache.c @@ -303,33 +303,31 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs } /* Ask the file system for its parent id and for its name (optional). */ ret = vnode_getattr(vp, &va, ctx); + if (fixhardlink) { - if (vp->v_name || VATTR_IS_SUPPORTED(&va, va_name)) { - if (ret == 0) { - str = va.va_name; - } else if (vp->v_name) { - str = vp->v_name; - ret = 0; - } else { - ret = ENOENT; - goto bad_news; - } + if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) { + str = va.va_name; + } else if (vp->v_name) { + str = vp->v_name; + ret = 0; + } else { + ret = ENOENT; + goto bad_news; + } + len = strlen(str); - len = strlen(str); - - /* Check that there's enough space. */ - if ((end - buff) < (len + 1)) { - ret = ENOSPC; - } else { - /* Copy the name backwards. */ - str += len; - - for (; len > 0; len--) { - *--end = *--str; - } - /* Add a path separator. */ - *--end = '/'; + /* Check that there's enough space. */ + if ((end - buff) < (len + 1)) { + ret = ENOSPC; + } else { + /* Copy the name backwards. */ + str += len; + + for (; len > 0; len--) { + *--end = *--str; } + /* Add a path separator. */ + *--end = '/'; } bad_news: FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI); @@ -1644,7 +1642,7 @@ cache_purge(vnode_t vp) struct namecache *ncp; kauth_cred_t tcred = NULL; - if ((LIST_FIRST(&vp->v_nclinks) == NULL) && (LIST_FIRST(&vp->v_ncchildren) == NULL)) + if ((LIST_FIRST(&vp->v_nclinks) == NULL) && (LIST_FIRST(&vp->v_ncchildren) == NULL) && (vp->v_cred == NOCRED)) return; NAME_CACHE_LOCK(); diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c index 891852d26..3a34e1787 100644 --- a/bsd/vfs/vfs_cluster.c +++ b/bsd/vfs/vfs_cluster.c @@ -2423,7 +2423,6 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old long long zero_cnt1; off_t zero_off1; struct cl_extent cl; - int intersection; struct cl_writebehind *wbp; int bflag; u_int max_cluster_pgcount; @@ -2604,7 +2603,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old * to release the rest of the pages in the upl without modifying * there state and mark the failed page in error */ - ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES); + ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY); if (upl_size > PAGE_SIZE) ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); @@ -2640,7 +2639,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old * need to release the rest of the pages in the upl without * modifying there state and mark the failed page in error */ - ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES); + ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY); if (upl_size > PAGE_SIZE) ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); @@ -2745,6 +2744,33 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old */ cluster_zero(upl, io_size, upl_size - io_size, NULL); } + /* + * release the upl now if we hold one since... + * 1) pages in it may be present in the sparse cluster map + * and may span 2 separate buckets there... if they do and + * we happen to have to flush a bucket to make room and it intersects + * this upl, a deadlock may result on page BUSY + * 2) we're delaying the I/O... from this point forward we're just updating + * the cluster state... no need to hold the pages, so commit them + * 3) IO_SYNC is set... + * because we had to ask for a UPL that provides currenty non-present pages, the + * UPL has been automatically set to clear the dirty flags (both software and hardware) + * upon committing it... this is not the behavior we want since it's possible for + * pages currently present as part of a mapped file to be dirtied while the I/O is in flight. + * we'll pick these pages back up later with the correct behavior specified. + * 4) we don't want to hold pages busy in a UPL and then block on the cluster lock... if a flush + * of this vnode is in progress, we will deadlock if the pages being flushed intersect the pages + * we hold since the flushing context is holding the cluster lock. + */ + ubc_upl_commit_range(upl, 0, upl_size, + UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); +check_cluster: + /* + * calculate the last logical block number + * that this delayed I/O encompassed + */ + cl.e_addr = (daddr64_t)((upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64); + if (flags & IO_SYNC) /* * if the IO_SYNC flag is set than we need to @@ -2752,35 +2778,20 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old * the I/O */ goto issue_io; -check_cluster: + /* * take the lock to protect our accesses * of the writebehind and sparse cluster state */ wbp = cluster_get_wbp(vp, CLW_ALLOCATE | CLW_RETURNLOCKED); - /* - * calculate the last logical block number - * that this delayed I/O encompassed - */ - cl.e_addr = (daddr64_t)((upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64); - if (wbp->cl_scmap) { if ( !(flags & IO_NOCACHE)) { /* * we've fallen into the sparse * cluster method of delaying dirty pages - * first, we need to release the upl if we hold one - * since pages in it may be present in the sparse cluster map - * and may span 2 separate buckets there... if they do and - * we happen to have to flush a bucket to make room and it intersects - * this upl, a deadlock may result on page BUSY */ - if (upl_size) - ubc_upl_commit_range(upl, 0, upl_size, - UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - sparse_cluster_add(wbp, vp, &cl, newEOF, callback, callback_arg); lck_mtx_unlock(&wbp->cl_lockw); @@ -2793,21 +2804,10 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old * to uncached writes on the file, so go ahead * and push whatever's in the sparse map * and switch back to normal clustering - * - * see the comment above concerning a possible deadlock... */ - if (upl_size) { - ubc_upl_commit_range(upl, 0, upl_size, - UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - /* - * setting upl_size to 0 keeps us from committing a - * second time in the start_new_cluster path - */ - upl_size = 0; - } - sparse_cluster_push(wbp, vp, newEOF, PUSH_ALL, callback, callback_arg); - wbp->cl_number = 0; + + sparse_cluster_push(wbp, vp, newEOF, PUSH_ALL, callback, callback_arg); /* * no clusters of either type present at this point * so just go directly to start_new_cluster since @@ -2817,8 +2817,6 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old */ goto start_new_cluster; } - upl_offset = 0; - if (wbp->cl_number == 0) /* * no clusters currently present @@ -2862,21 +2860,6 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old */ wbp->cl_clusters[cl_index].e_addr = wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount; - if (upl_size) { - daddr64_t start_pg_in_upl; - - start_pg_in_upl = (daddr64_t)(upl_f_offset / PAGE_SIZE_64); - - if (start_pg_in_upl < wbp->cl_clusters[cl_index].e_addr) { - intersection = (int)((wbp->cl_clusters[cl_index].e_addr - start_pg_in_upl) * PAGE_SIZE); - - ubc_upl_commit_range(upl, upl_offset, intersection, - UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - upl_f_offset += intersection; - upl_offset += intersection; - upl_size -= intersection; - } - } cl.b_addr = wbp->cl_clusters[cl_index].e_addr; } /* @@ -2930,21 +2913,6 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old */ wbp->cl_clusters[cl_index].b_addr = wbp->cl_clusters[cl_index].e_addr - max_cluster_pgcount; - if (upl_size) { - intersection = (int)((cl.e_addr - wbp->cl_clusters[cl_index].b_addr) * PAGE_SIZE); - - if ((u_int)intersection > upl_size) - /* - * because the current write may consist of a number of pages found in the cache - * which are not part of the UPL, we may have an intersection that exceeds - * the size of the UPL that is also part of this write - */ - intersection = upl_size; - - ubc_upl_commit_range(upl, upl_offset + (upl_size - intersection), intersection, - UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - upl_size -= intersection; - } cl.e_addr = wbp->cl_clusters[cl_index].b_addr; } /* @@ -2999,16 +2967,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old * no more room in the normal cluster mechanism * so let's switch to the more expansive but expensive * sparse mechanism.... - * first, we need to release the upl if we hold one - * since pages in it may be present in the sparse cluster map (after the cluster_switch) - * and may span 2 separate buckets there... if they do and - * we happen to have to flush a bucket to make room and it intersects - * this upl, a deadlock may result on page BUSY */ - if (upl_size) - ubc_upl_commit_range(upl, upl_offset, upl_size, - UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - sparse_cluster_switch(wbp, vp, newEOF, callback, callback_arg); sparse_cluster_add(wbp, vp, &cl, newEOF, callback, callback_arg); @@ -3042,33 +3001,19 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old wbp->cl_number++; delay_io: - if (upl_size) - ubc_upl_commit_range(upl, upl_offset, upl_size, - UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - lck_mtx_unlock(&wbp->cl_lockw); continue; issue_io: /* - * we don't hold the vnode lock at this point + * we don't hold the lock at this point * - * because we had to ask for a UPL that provides currenty non-present pages, the - * UPL has been automatically set to clear the dirty flags (both software and hardware) - * upon committing it... this is not the behavior we want since it's possible for - * pages currently present as part of a mapped file to be dirtied while the I/O is in flight. - * in order to maintain some semblance of coherency with mapped writes - * we need to drop the current upl and pick it back up with COPYOUT_FROM set + * we've already dropped the current upl, so pick it back up with COPYOUT_FROM set * so that we correctly deal with a change in state of the hardware modify bit... * we do this via cluster_push_now... by passing along the IO_SYNC flag, we force * cluster_push_now to wait until all the I/Os have completed... cluster_push_now is also * responsible for generating the correct sized I/O(s) */ - ubc_upl_commit_range(upl, 0, upl_size, - UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - - cl.e_addr = (upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64; - retval = cluster_push_now(vp, &cl, newEOF, flags, callback, callback_arg); } } @@ -4646,19 +4591,6 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla goto dont_try; } } - /* - * drop the lock while we're firing off the I/Os... - * this is safe since I'm working off of a private sorted copy - * of the clusters, and I'm going to re-evaluate the public - * state after I retake the lock - * - * we need to drop it to avoid a lock inversion when trying to - * grab pages into the UPL... another thread in 'write' may - * have these pages in its UPL and be blocked trying to - * gain the write-behind lock for this vnode - */ - lck_mtx_unlock(&wbp->cl_lockw); - for (cl_index = 0; cl_index < cl_len; cl_index++) { int flags; struct cl_extent cl; @@ -4690,8 +4622,6 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla if ( !(push_flag & PUSH_ALL) ) break; } - lck_mtx_lock(&wbp->cl_lockw); - dont_try: if (cl_len > cl_pushed) { /* @@ -4979,23 +4909,8 @@ sparse_cluster_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_ wbp->cl_scdirty -= (int)(cl.e_addr - cl.b_addr); - /* - * drop the lock while we're firing off the I/Os... - * this is safe since I've already updated the state - * this lock is protecting and I'm going to re-evaluate - * the public state after I retake the lock - * - * we need to drop it to avoid a lock inversion when trying to - * grab pages into the UPL... another thread in 'write' may - * have these pages in its UPL and be blocked trying to - * gain the write-behind lock for this vnode - */ - lck_mtx_unlock(&wbp->cl_lockw); - cluster_push_now(vp, &cl, EOF, push_flag & IO_PASSIVE, callback, callback_arg); - lck_mtx_lock(&wbp->cl_lockw); - if ( !(push_flag & PUSH_ALL) ) break; } diff --git a/bsd/vfs/vfs_journal.c b/bsd/vfs/vfs_journal.c index 8299bfd33..bff33c625 100644 --- a/bsd/vfs/vfs_journal.c +++ b/bsd/vfs/vfs_journal.c @@ -2082,7 +2082,7 @@ check_free_space(journal *jnl, int desired_size) lcl_counter = 0; while (jnl->old_start[i] & 0x8000000000000000LL) { - if (lcl_counter++ > 100) { + if (lcl_counter++ > 1000) { panic("jnl: check_free_space: tr starting @ 0x%llx not flushing (jnl %p).\n", jnl->old_start[i], jnl); } diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index ca6d9e6b1..482fb8c46 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -3265,7 +3265,6 @@ new_vnode(vnode_t *vpp) struct timeval current_tv; struct unsafe_fsnode *l_unsafefs = 0; proc_t curproc = current_proc(); - pid_t current_pid = proc_pid(curproc); retry: microuptime(¤t_tv); @@ -3315,11 +3314,11 @@ new_vnode(vnode_t *vpp) if ( !(vp->v_listflag & VLIST_RAGE) || !(vp->v_flag & VRAGE)) panic("new_vnode: vp on RAGE list not marked both VLIST_RAGE and VRAGE"); - // skip vnodes which have a dependency on this process - // (i.e. they're vnodes in a disk image and this process - // is diskimages-helper) + // if we're a dependency-capable process, skip vnodes that can + // cause recycling deadlocks. (i.e. this process is diskimages + // helper and the vnode is in a disk image). // - if (vp->v_mount && vp->v_mount->mnt_dependent_pid != current_pid && vp->v_mount->mnt_dependent_process != curproc) { + if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || vp->v_mount->mnt_dependent_process == NULL) { break; } @@ -3339,11 +3338,11 @@ new_vnode(vnode_t *vpp) */ walk_count = 0; TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) { - // skip vnodes which have a dependency on this process - // (i.e. they're vnodes in a disk image and this process - // is diskimages-helper) + // if we're a dependency-capable process, skip vnodes that can + // cause recycling deadlocks. (i.e. this process is diskimages + // helper and the vnode is in a disk image) // - if (vp->v_mount && vp->v_mount->mnt_dependent_pid != current_pid && vp->v_mount->mnt_dependent_process != curproc) { + if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || vp->v_mount->mnt_dependent_process == NULL) { break; } diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index c7e566bab..4dafffdf3 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -427,8 +427,10 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) * Check for a race against unlink. We had a vnode * but according to vnode_authorize or VNOP_OPEN it * no longer exists. + * + * EREDRIVEOPEN: means that we were hit by the tty allocation race. */ - if ((error == ENOENT) && (*fmodep & O_CREAT)) { + if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN)) { goto again; } } diff --git a/config/MasterVersion b/config/MasterVersion index 2c14c9c7c..64db93b87 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -9.4.0 +9.5.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c index 6743dc70b..bd0182888 100644 --- a/osfmk/i386/AT386/model_dep.c +++ b/osfmk/i386/AT386/model_dep.c @@ -714,7 +714,7 @@ Debugger( __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); /* Print backtrace - callee is internally synchronized */ - panic_i386_backtrace(stackptr, 16); + panic_i386_backtrace(stackptr, 16, NULL, FALSE, NULL); /* everything should be printed now so copy to NVRAM */ @@ -725,6 +725,7 @@ Debugger( */ if (commit_paniclog_to_nvram) { unsigned int bufpos; + uintptr_t cr0; debug_putc(0); @@ -749,8 +750,17 @@ Debugger( * since we can subsequently halt the system. */ kprintf("Attempting to commit panic log to NVRAM\n"); +/* The following sequence is a workaround for: + * SnowLeopard10A67: AppleEFINVRAM should not invoke + * any routines that use floating point (MMX in this case) when saving panic + * logs to nvram/flash. + */ + cr0 = get_cr0(); + clear_ts(); + pi_size = PESavePanicInfo((unsigned char *)debug_buf, pi_size ); + set_cr0(cr0); /* Uncompress in-place, to permit examination of * the panic log by debuggers. @@ -939,10 +949,11 @@ panic_print_symbol_name(vm_address_t search) #define DUMPFRAMES 32 #define PBT_TIMEOUT_CYCLES (5 * 1000 * 1000 * 1000ULL) void -panic_i386_backtrace(void *_frame, int nframes) +panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdump, x86_saved_state_t *regs) { cframe_t *frame = (cframe_t *)_frame; vm_offset_t raddrs[DUMPFRAMES]; + vm_offset_t PC = 0; int frame_index; volatile uint32_t *ppbtcnt = &pbtcnt; uint64_t bt_tsc_timeout; @@ -959,8 +970,25 @@ panic_i386_backtrace(void *_frame, int nframes) PE_parse_boot_arg("keepsyms", &keepsyms); - kdb_printf("Backtrace, " - "Format - Frame : Return Address (4 potential args on stack) \n"); + if (msg != NULL) { + kdb_printf(msg); + } + + if ((regdump == TRUE) && (regs != NULL)) { + x86_saved_state32_t *ss32p = saved_state32(regs); + + kdb_printf( + "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" + "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" + "EFL: 0x%08x, EIP: 0x%08x, CS: 0x%08x, DS: 0x%08x\n", + ss32p->eax,ss32p->ebx,ss32p->ecx,ss32p->edx, + ss32p->cr2,ss32p->ebp,ss32p->esi,ss32p->edi, + ss32p->efl,ss32p->eip,ss32p->cs, ss32p->ds); + PC = ss32p->eip; + } + + kdb_printf("Backtrace (CPU %d), " + "Frame : Return Address (4 potential args on stack)\n", cpu_number()); for (frame_index = 0; frame_index < nframes; frame_index++) { vm_offset_t curframep = (vm_offset_t) frame; @@ -1020,6 +1048,9 @@ panic_i386_backtrace(void *_frame, int nframes) if (frame_index) kmod_dump((vm_offset_t *)&raddrs[0], frame_index); + if (PC != 0) + kmod_dump(&PC, 1); + panic_display_system_configuration(); /* Release print backtrace lock, to permit other callers in the * event of panics on multiple processors. diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index 2c0df6147..67f0f2803 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -120,6 +120,7 @@ i386_init(vm_offset_t boot_args_start) uint64_t maxmemtouse; unsigned int cpus; boolean_t legacy_mode; + boolean_t fidn; postcode(I386_INIT_ENTRY); @@ -187,8 +188,10 @@ i386_init(vm_offset_t boot_args_start) if (!PE_parse_boot_arg("himemory_mode", &vm_himemory_mode)) vm_himemory_mode = 0; - if (!PE_parse_boot_arg("immediate_NMI", &force_immediate_debugger_NMI)) + if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof (fidn))) force_immediate_debugger_NMI = FALSE; + else + force_immediate_debugger_NMI = fidn; /* * At this point we check whether we are a 64-bit processor diff --git a/osfmk/i386/locore.s b/osfmk/i386/locore.s index ac1bb007a..9fa0eb36f 100644 --- a/osfmk/i386/locore.s +++ b/osfmk/i386/locore.s @@ -141,13 +141,19 @@ call EXT(fn) ;\ movl %edi, %esp -#define CCALL3(fn, arg1, arg2, arg3) \ +/* + * CCALL5 is used for callee functions with 3 arguments but + * where arg2 (a3:a2) and arg3 (a5:a4) are 64-bit values. + */ +#define CCALL5(fn, a1, a2, a3, a4, a5) \ movl %esp, %edi ;\ - subl $12, %esp ;\ + subl $20, %esp ;\ andl $0xFFFFFFF0, %esp ;\ - movl arg3, 8(%esp) ;\ - movl arg2, 4(%esp) ;\ - movl arg1, 0(%esp) ;\ + movl a5, 16(%esp) ;\ + movl a4, 12(%esp) ;\ + movl a3, 8(%esp) ;\ + movl a2, 4(%esp) ;\ + movl a1, 0(%esp) ;\ call EXT(fn) ;\ movl %edi, %esp @@ -297,13 +303,13 @@ Entry(timer_grab) * Update time on user trap entry. * Uses %eax,%ebx,%ecx,%edx,%esi,%edi. */ -#define TIME_TRAP_UENTRY TIMER_EVENT(USER,SYSTEM) +#define TIME_TRAP_UENTRY TIMER_EVENT(USER,SYSTEM) /* * update time on user trap exit. * Uses %eax,%ebx,%ecx,%edx,%esi,%edi. */ -#define TIME_TRAP_UEXIT TIMER_EVENT(SYSTEM,USER) +#define TIME_TRAP_UEXIT TIMER_EVENT(SYSTEM,USER) /* * update time on interrupt entry. @@ -926,7 +932,7 @@ Entry(lo_diag_scall) popl %esp // Get back the original stack jmp EXT(return_to_user) // Normal return, do not check asts... 2: - CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1) + CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0) // pass what would be the diag syscall // error return - cause an exception /* no return */ @@ -950,6 +956,8 @@ Entry(lo_diag_scall) */ Entry(lo_syscall) + TIME_TRAP_UENTRY + /* * We can be here either for a mach, unix machdep or diag syscall, * as indicated by the syscall class: @@ -972,13 +980,11 @@ Entry(lo_syscall) sti /* Syscall class unknown */ - CCALL3(i386_exception, $(EXC_SYSCALL), %eax, $1) + CCALL5(i386_exception, $(EXC_SYSCALL), %eax, $0, $1, $0) /* no return */ Entry(lo64_unix_scall) - TIME_TRAP_UENTRY - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ movl ACT_TASK(%ecx),%ebx /* point to current task */ addl $1,TASK_SYSCALLS_UNIX(%ebx) /* increment call count */ @@ -1007,8 +1013,6 @@ Entry(lo64_unix_scall) Entry(lo64_mach_scall) - TIME_TRAP_UENTRY - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ movl ACT_TASK(%ecx),%ebx /* point to current task */ addl $1,TASK_SYSCALLS_MACH(%ebx) /* increment call count */ @@ -1037,8 +1041,6 @@ Entry(lo64_mach_scall) Entry(lo64_mdep_scall) - TIME_TRAP_UENTRY - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ movl ACT_TASK(%ecx),%ebx /* point to current task */ @@ -1066,8 +1068,6 @@ Entry(lo64_mdep_scall) Entry(lo64_diag_scall) - TIME_TRAP_UENTRY - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ movl ACT_TASK(%ecx),%ebx /* point to current task */ @@ -1094,7 +1094,7 @@ Entry(lo64_diag_scall) popl %esp // Get back the original stack jmp EXT(return_to_user) // Normal return, do not check asts... 2: - CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1) + CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0) // pass what would be the diag syscall // error return - cause an exception /* no return */ diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index fb7afc4d0..23ce61860 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -126,10 +126,10 @@ static void mp_broadcast_action(void); static int NMIInterruptHandler(x86_saved_state_t *regs); static boolean_t cpu_signal_pending(int cpu, mp_event_t event); -static void cpu_NMI_interrupt(int cpu); boolean_t smp_initialized = FALSE; -boolean_t force_immediate_debugger_NMI = FALSE; +volatile boolean_t force_immediate_debugger_NMI = FALSE; +volatile boolean_t pmap_tlb_flush_timeout = FALSE; decl_simple_lock_data(,mp_kdp_lock); @@ -931,10 +931,22 @@ cpu_signal_handler(x86_saved_state_t *regs) static int __attribute__((noinline)) NMIInterruptHandler(x86_saved_state_t *regs) { - boolean_t state = ml_set_interrupts_enabled(FALSE); + void *stackptr; + sync_iss_to_iks_unconditionally(regs); + __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); + + if (pmap_tlb_flush_timeout == TRUE && current_cpu_datap()->cpu_tlb_invalid) { + panic_i386_backtrace(stackptr, 10, "Panic: Unresponsive processor\n", TRUE, regs); + panic_io_port_read(); + mca_check_save(); + if (pmsafe_debug) + pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); + for(;;) { + cpu_pause(); + } + } mp_kdp_wait(FALSE); - (void) ml_set_interrupts_enabled(state); return 1; } @@ -1003,7 +1015,7 @@ cpu_interrupt(int cpu) /* * Send a true NMI via the local APIC to the specified CPU. */ -static void +void cpu_NMI_interrupt(int cpu) { boolean_t state; diff --git a/osfmk/i386/mp.h b/osfmk/i386/mp.h index 062d2a488..99ba34fe2 100644 --- a/osfmk/i386/mp.h +++ b/osfmk/i386/mp.h @@ -192,7 +192,8 @@ extern int kdb_debug; extern int kdb_active[]; extern volatile boolean_t mp_kdp_trap; -extern boolean_t force_immediate_debugger_NMI; +extern volatile boolean_t force_immediate_debugger_NMI; +extern volatile boolean_t pmap_tlb_flush_timeout; extern void mp_kdp_enter(void); extern void mp_kdp_exit(void); diff --git a/osfmk/i386/mp_events.h b/osfmk/i386/mp_events.h index 43257779a..0da1d98c0 100644 --- a/osfmk/i386/mp_events.h +++ b/osfmk/i386/mp_events.h @@ -72,6 +72,7 @@ extern void i386_signal_cpus(mp_event_t event, mp_sync_t mode); extern int i386_active_cpus(void); extern void i386_activate_cpu(void); extern void i386_deactivate_cpu(void); +extern void cpu_NMI_interrupt(int /* cpu */); __END_DECLS diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c index 72ecf5f76..b83947193 100644 --- a/osfmk/i386/pmap.c +++ b/osfmk/i386/pmap.c @@ -4493,6 +4493,20 @@ vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e) return vaddr; } +static inline void +pmap_cpuset_NMIPI(cpu_set cpu_mask) { + unsigned int cpu, cpu_bit; + uint64_t deadline; + + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + if (cpu_mask & cpu_bit) + cpu_NMI_interrupt(cpu); + } + deadline = mach_absolute_time() + (LockTimeOut >> 2); + while (mach_absolute_time() < deadline) + cpu_pause(); +} + /* * Called with pmap locked, we: @@ -4551,28 +4565,33 @@ pmap_flush_tlbs(pmap_t pmap) (int) pmap, cpus_to_signal, flush_self, 0, 0); if (cpus_to_signal) { + cpu_set cpus_to_respond = cpus_to_signal; + deadline = mach_absolute_time() + LockTimeOut; /* * Wait for those other cpus to acknowledge */ - for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { - while ((cpus_to_signal & cpu_bit) != 0) { - if (!cpu_datap(cpu)->cpu_running || - cpu_datap(cpu)->cpu_tlb_invalid == FALSE || - !CPU_CR3_IS_ACTIVE(cpu)) { - cpus_to_signal &= ~cpu_bit; - break; - } - if (mach_absolute_time() > deadline) { - force_immediate_debugger_NMI = TRUE; - panic("pmap_flush_tlbs() timeout: " - "cpu %d failing to respond to interrupts, pmap=%p cpus_to_signal=%lx", - cpu, pmap, cpus_to_signal); + while (cpus_to_respond != 0) { + if (mach_absolute_time() > deadline) { + pmap_tlb_flush_timeout = TRUE; + pmap_cpuset_NMIPI(cpus_to_respond); + panic("pmap_flush_tlbs() timeout: " + "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx", + pmap, cpus_to_respond); + } + + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + if ((cpus_to_respond & cpu_bit) != 0) { + if (!cpu_datap(cpu)->cpu_running || + cpu_datap(cpu)->cpu_tlb_invalid == FALSE || + !CPU_CR3_IS_ACTIVE(cpu)) { + cpus_to_respond &= ~cpu_bit; + } + cpu_pause(); } - cpu_pause(); + if (cpus_to_respond == 0) + break; } - if (cpus_to_signal == 0) - break; } } @@ -4585,7 +4604,6 @@ pmap_flush_tlbs(pmap_t pmap) if (flush_self) flush_tlb(); - PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, (int) pmap, cpus_to_signal, flush_self, 0, 0); } diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index bec974a5d..d65419a23 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -735,7 +735,7 @@ panic_double_fault( /* * Print backtrace leading to first fault: */ - panic_i386_backtrace((void *) my_ktss->ebp, 10); + panic_i386_backtrace((void *) my_ktss->ebp, 10, NULL, FALSE, NULL); #endif panic("Double fault at 0x%08x, thread:%p, code:0x%x, " diff --git a/osfmk/i386/trap.h b/osfmk/i386/trap.h index 2488dd2a8..9ae4a8b5f 100644 --- a/osfmk/i386/trap.h +++ b/osfmk/i386/trap.h @@ -145,7 +145,7 @@ extern perfCallback perfTrapHook; extern perfCallback perfASTHook; extern perfCallback perfIntHook; -extern void panic_i386_backtrace(void *, int); +extern void panic_i386_backtrace(void *, int, const char *, boolean_t, x86_saved_state_t *); #if MACH_KDP extern boolean_t kdp_i386_trap( unsigned int, diff --git a/osfmk/vm/bsd_vm.c b/osfmk/vm/bsd_vm.c index b02804825..e9af2b6ee 100644 --- a/osfmk/vm/bsd_vm.c +++ b/osfmk/vm/bsd_vm.c @@ -1026,7 +1026,7 @@ int fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vnodeaddr, uint32_t *vid) { - vm_map_t map = task->map; + vm_map_t map; vm_map_offset_t address = (vm_map_offset_t )arg; vm_map_entry_t tmp_entry; vm_map_entry_t entry; @@ -1034,16 +1034,23 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal * vm_region_extended_info_data_t extended; vm_region_top_info_data_t top; - - if (map == VM_MAP_NULL) - return(0); - + task_lock(task); + map = task->map; + if (map == VM_MAP_NULL) + { + task_unlock(task); + return(0); + } + vm_map_reference(map); + task_unlock(task); + vm_map_lock_read(map); start = address; if (!vm_map_lookup_entry(map, start, &tmp_entry)) { if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { vm_map_unlock_read(map); + vm_map_deallocate(map); return(0); } } else { @@ -1108,11 +1115,13 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal * if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) { vm_map_unlock_read(map); + vm_map_deallocate(map); return(1); } } vm_map_unlock_read(map); + vm_map_deallocate(map); return(1); } diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c index a573d49ea..680c07f12 100644 --- a/osfmk/vm/vm_object.c +++ b/osfmk/vm/vm_object.c @@ -1914,7 +1914,7 @@ vm_object_pmap_protect( for (phys_addr = phys_start; phys_addr < phys_end; phys_addr += PAGE_SIZE_64) { - pmap_page_protect(phys_addr >> 12, prot); + pmap_page_protect(phys_addr >> PAGE_SHIFT, prot); } } return; @@ -4766,7 +4766,7 @@ vm_object_populate_with_private( /* shadows on contiguous memory are not allowed */ /* we therefore can use the offset field */ - object->shadow_offset = (vm_object_offset_t)(phys_page << 12); + object->shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT; object->size = size; } vm_object_unlock(object); @@ -6195,7 +6195,7 @@ vm_object_page_op( if(object->phys_contiguous) { if (phys_entry) { *phys_entry = (ppnum_t) - (object->shadow_offset >> 12); + (object->shadow_offset >> PAGE_SHIFT); } vm_object_unlock(object); return KERN_SUCCESS; diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index 313549ecf..b92e35ae0 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -2164,18 +2164,23 @@ vm_page_free_list( nxt = (vm_page_t)(mem->pageq.next); if (!mem->fictitious) { - mem->free = TRUE; + if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) { + mem->pageq.next = NULL; + vm_page_release(mem); + } else { + mem->free = TRUE; - color = mem->phys_page & vm_color_mask; - if (queue_empty(&free_list[color])) { - inuse[color] = inuse_list_head; - inuse_list_head = color; + color = mem->phys_page & vm_color_mask; + if (queue_empty(&free_list[color])) { + inuse[color] = inuse_list_head; + inuse_list_head = color; + } + queue_enter_first(&free_list[color], + mem, + vm_page_t, + pageq); + pg_count++; } - queue_enter_first(&free_list[color], - mem, - vm_page_t, - pageq); - pg_count++; } else { assert(mem->phys_page == vm_page_fictitious_addr || mem->phys_page == vm_page_guard_addr);