Skip to content

Commit

Permalink
xnu-1228.15.4
Browse files Browse the repository at this point in the history
  • Loading branch information
Darwin authored and das committed Jun 4, 2017
1 parent 4a63d54 commit 1c19685
Show file tree
Hide file tree
Showing 28 changed files with 842 additions and 72 deletions.
28 changes: 28 additions & 0 deletions bsd/hfs/hfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <sys/quota.h>
#include <sys/dirent.h>
#include <sys/event.h>
#include <kern/thread_call.h>

#include <kern/locks.h>

Expand Down Expand Up @@ -272,8 +273,34 @@ typedef struct hfsmount {
/* Resize variables: */
u_int32_t hfs_resize_filesmoved;
u_int32_t hfs_resize_totalfiles;

/*
* About the sync counters:
* hfs_sync_scheduled keeps track whether a timer was scheduled but we
* haven't started processing the callback (i.e. we
* haven't begun the flush). This will be non-zero
* even if the callback has been invoked, before we
* start the flush.
* hfs_sync_incomplete keeps track of the number of callbacks that have
* not completed yet (including callbacks not yet
* invoked). We cannot safely unmount until this
* drops to zero.
*
* In both cases, we use counters, not flags, so that we can avoid
* taking locks.
*/
int32_t hfs_sync_scheduled;
int32_t hfs_sync_incomplete;
u_int64_t hfs_last_sync_request_time;
u_int64_t hfs_last_sync_time;
uint32_t hfs_active_threads;
thread_call_t hfs_syncer; // removeable devices get sync'ed by this guy

} hfsmount_t;

#define HFS_META_DELAY (100)
#define HFS_MILLISEC_SCALE (1000*1000)

typedef hfsmount_t ExtendedVCB;

/* Aliases for legacy (Mac OS 9) field names */
Expand Down Expand Up @@ -689,6 +716,7 @@ extern int hfs_virtualmetafile(struct cnode *);

extern int hfs_start_transaction(struct hfsmount *hfsmp);
extern int hfs_end_transaction(struct hfsmount *hfsmp);
extern void hfs_sync_ejectable(struct hfsmount *hfsmp);


/*****************************************************************************
Expand Down
16 changes: 12 additions & 4 deletions bsd/hfs/hfs_readwrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -1836,12 +1836,20 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
}

case HFS_GET_MOUNT_TIME:
return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
break;
if (is64bit) {
*(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_mount_time;
} else {
*(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_mount_time;
}
return 0;

case HFS_GET_LAST_MTIME:
return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
break;
if (is64bit) {
*(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_last_mounted_mtime;
} else {
*(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_last_mounted_mtime;
}
return 0;

case HFS_SET_BOOT_INFO:
if (!vnode_isvroot(vp))
Expand Down
152 changes: 152 additions & 0 deletions bsd/hfs/hfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,99 @@ hfs_reload(struct mount *mountp)
return (0);
}

int hfs_last_io_wait_time = 125000;
SYSCTL_INT (_kern, OID_AUTO, hfs_last_io_wait_time, CTLFLAG_RW, &hfs_last_io_wait_time, 0, "number of usecs to wait after an i/o before syncing ejectable media");

static void
hfs_syncer(void *arg0, void *unused)
{
#pragma unused(unused)

struct hfsmount *hfsmp = arg0;
uint32_t secs, usecs, delay = HFS_META_DELAY;
uint64_t now;
struct timeval nowtv, last_io;

clock_get_calendar_microtime(&secs, &usecs);
now = ((uint64_t)secs * 1000000LL) + usecs;
//
// If we have put off the last sync for more than
// 5 seconds, force it so that we don't let too
// much i/o queue up (since flushing the journal
// causes the i/o queue to drain)
//
if ((now - hfsmp->hfs_last_sync_time) >= 5000000LL) {
goto doit;
}

//
// Find out when the last i/o was done to this device (read or write).
//
throttle_info_get_last_io_time(hfsmp->hfs_mp, &last_io);
microuptime(&nowtv);
timevalsub(&nowtv, &last_io);

//
// If the last i/o was too recent, defer this sync until later.
// The limit chosen (125 milli-seconds) was picked based on
// some experiments copying data to an SD card and seems to
// prevent us from issuing too many syncs.
//
if (nowtv.tv_sec >= 0 && nowtv.tv_usec > 0 && nowtv.tv_usec < hfs_last_io_wait_time) {
delay /= 2;
goto resched;
}

//
// If there's pending i/o, also skip the sync.
//
if (hfsmp->hfs_devvp && hfsmp->hfs_devvp->v_numoutput > 0) {
goto resched;
}


//
// Only flush the journal if we have not sync'ed recently
// and the last sync request time was more than 100 milli
// seconds ago and there is no one in the middle of a
// transaction right now. Else we defer the sync and
// reschedule it for later.
//
if ( ((now - hfsmp->hfs_last_sync_time) >= 100000LL)
&& ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
&& (hfsmp->hfs_active_threads == 0)
&& (hfsmp->hfs_global_lock_nesting == 0)) {

doit:
OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
if (hfsmp->jnl) {
journal_flush(hfsmp->jnl);
}
OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);

clock_get_calendar_microtime(&secs, &usecs);
hfsmp->hfs_last_sync_time = ((int64_t)secs * 1000000) + usecs;

} else if (hfsmp->hfs_active_threads == 0) {
uint64_t deadline;

resched:
clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
return;
}

//
// NOTE: we decrement these *after* we're done the journal_flush() since
// it can take a significant amount of time and so we don't want more
// callbacks scheduled until we're done this one.
//
OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
}

extern int IOBSDIsMediaEjectable( const char *cdev_name );

/*
* Common code for mount and mountroot
Expand Down Expand Up @@ -855,12 +948,18 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
u_int32_t iswritable;
daddr64_t mdb_offset;
int isvirtual = 0;
int isroot = 0;

ronly = vfs_isrdonly(mp);
dev = vnode_specrdev(devvp);
cred = p ? vfs_context_ucred(context) : NOCRED;
mntwrapper = 0;

if (args == NULL) {
/* only hfs_mountroot passes us NULL as the 'args' argument */
isroot = 1;
}

bp = NULL;
hfsmp = NULL;
mdbp = NULL;
Expand Down Expand Up @@ -1379,6 +1478,18 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
}
}

/* ejectability checks will time out when the device is root_device, so skip them */
if (isroot == 0) {
if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
if (hfsmp->hfs_syncer == NULL) {
printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
}
}
}

/*
* Start looking for free space to drop below this level and generate a
* warning immediately if needed:
Expand Down Expand Up @@ -1451,6 +1562,38 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
(void) hfs_recording_suspend(hfsmp);

/*
* Cancel any pending timers for this volume. Then wait for any timers
* which have fired, but whose callbacks have not yet completed.
*/
if (hfsmp->hfs_syncer)
{
struct timespec ts = {0, 100000000}; /* 0.1 seconds */

/*
* Cancel any timers that have been scheduled, but have not
* fired yet. NOTE: The kernel considers a timer complete as
* soon as it starts your callback, so the kernel does not
* keep track of the number of callbacks in progress.
*/
if (thread_call_cancel(hfsmp->hfs_syncer))
OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
thread_call_free(hfsmp->hfs_syncer);
hfsmp->hfs_syncer = NULL;

/*
* This waits for all of the callbacks that were entered before
* we did thread_call_cancel above, but have not completed yet.
*/
while(hfsmp->hfs_sync_incomplete > 0)
{
msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
}

if (hfsmp->hfs_sync_incomplete < 0)
printf("hfs_unmount: pm_sync_incomplete underflow (%d)!\n", hfsmp->hfs_sync_incomplete);
}

/*
* Flush out the b-trees, volume bitmap and Volume Header
*/
Expand Down Expand Up @@ -1931,6 +2074,15 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
journal_flush(hfsmp->jnl);
}

{
uint32_t secs, usecs;
uint64_t now;

clock_get_calendar_microtime(&secs, &usecs);
now = ((uint64_t)secs * 1000000LL) + usecs;
hfsmp->hfs_last_sync_time = now;
}

lck_rw_unlock_shared(&hfsmp->hfs_insync);
return (allerror);
}
Expand Down
44 changes: 44 additions & 0 deletions bsd/hfs/hfs_vfsutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -2347,6 +2347,46 @@ hfs_virtualmetafile(struct cnode *cp)
}



//
// Fire off a timed callback to sync the disk if the
// volume is on ejectable media.
//
__private_extern__
void
hfs_sync_ejectable(struct hfsmount *hfsmp)
{
if (hfsmp->hfs_syncer) {
uint32_t secs, usecs;
uint64_t now;

clock_get_calendar_microtime(&secs, &usecs);
now = ((uint64_t)secs * 1000000) + usecs;

if (hfsmp->hfs_sync_scheduled == 0) {
uint64_t deadline;

hfsmp->hfs_last_sync_request_time = now;

clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);

/*
* Increment hfs_sync_scheduled on the assumption that we're the
* first thread to schedule the timer. If some other thread beat
* us, then we'll decrement it. If we *were* the first to
* schedule the timer, then we need to keep track that the
* callback is waiting to complete.
*/
OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
else
OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
}
}
}


__private_extern__
int
hfs_start_transaction(struct hfsmount *hfsmp)
Expand Down Expand Up @@ -2374,6 +2414,7 @@ hfs_start_transaction(struct hfsmount *hfsmp)

if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
lck_rw_lock_shared(&hfsmp->hfs_global_lock);
OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
unlock_on_err = 1;
}

Expand All @@ -2399,6 +2440,7 @@ hfs_start_transaction(struct hfsmount *hfsmp)
out:
if (ret != 0 && unlock_on_err) {
lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
}

return ret;
Expand All @@ -2424,7 +2466,9 @@ hfs_end_transaction(struct hfsmount *hfsmp)
}

if (need_unlock) {
OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
hfs_sync_ejectable(hfsmp);
}

return ret;
Expand Down
18 changes: 18 additions & 0 deletions bsd/hfs/hfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,11 @@ hfs_vnop_close(ap)
}

hfs_unlock(cp);

if (ap->a_fflag & FWASWRITTEN) {
hfs_sync_ejectable(hfsmp);
}

return (0);
}

Expand Down Expand Up @@ -2619,13 +2624,26 @@ hfs_vnop_rename(ap)
skip_rm:
/*
* All done with tvp and fvp
*
* We also jump to this point if there was no destination observed during lookup and namei.
* However, because only iocounts are held at the VFS layer, there is nothing preventing a
* competing thread from racing us and creating a file or dir at the destination of this rename
* operation. If this occurs, it may cause us to get a spurious EEXIST out of the cat_rename
* call below. To preserve rename's atomicity, we need to signal VFS to re-drive the
* namei/lookup and restart the rename operation. EEXIST is an allowable errno to be bubbled
* out of the rename syscall, but not for this reason, since it is a synonym errno for ENOTEMPTY.
* To signal VFS, we return ERECYCLE (which is also used for lookup restarts). This errno
* will be swallowed and it will restart the operation.
*/

lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
error = cat_rename(hfsmp, &from_desc, &tdcp->c_desc, &to_desc, &out_desc);
hfs_systemfile_unlock(hfsmp, lockflags);

if (error) {
if (error == EEXIST) {
error = ERECYCLE;
}
goto out;
}

Expand Down
Loading

0 comments on commit 1c19685

Please sign in to comment.