Skip to content

Commit

Permalink
enable no-copy stack switching implementation (#29578)
Browse files Browse the repository at this point in the history
Falls back to stack copying if there are too many stack mappings.

Also adds some code to free unused pooled stacks.
  • Loading branch information
JeffBezanson authored Oct 11, 2018
1 parent b519147 commit 48bc266
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 20 deletions.
68 changes: 56 additions & 12 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,25 @@
# include <sys/resource.h>
#endif

const size_t jl_guard_size = (4096 * 16);
#ifdef _P64
# ifdef _OS_WINDOWS_
# define MAX_STACK_MAPPINGS 500
# else
# define MAX_STACK_MAPPINGS 30000
# endif
#else
# ifdef _OS_WINDOWS_
# define MAX_STACK_MAPPINGS 250
# else
# define MAX_STACK_MAPPINGS 500
# endif
#endif

// number of stacks to always keep available per pool
#define MIN_STACK_MAPPINGS_PER_POOL 5

const size_t jl_guard_size = (4096 * 8);
static volatile uint32_t num_stack_mappings = 0;

#ifdef _OS_WINDOWS_
#define MAP_FAILED NULL
Expand All @@ -19,13 +37,15 @@ static void *malloc_stack(size_t bufsz)
VirtualFree(stk, 0, MEM_RELEASE);
return MAP_FAILED;
}
jl_atomic_fetch_add(&num_stack_mappings, 1);
return stk;
}


static void free_stack(void *stkbuf, size_t bufsz)
{
VirtualFree(stkbuf, 0, MEM_RELEASE);
jl_atomic_fetch_add(&num_stack_mappings, -1);
}

#else
Expand All @@ -42,12 +62,14 @@ static void *malloc_stack(size_t bufsz)
return MAP_FAILED;
}
#endif
jl_atomic_fetch_add(&num_stack_mappings, 1);
return stk;
}

static void free_stack(void *stkbuf, size_t bufsz)
{
munmap(stkbuf, bufsz);
jl_atomic_fetch_add(&num_stack_mappings, -1);
}
#endif

Expand Down Expand Up @@ -132,10 +154,12 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner)
ssize = LLT_ALIGN(ssize, jl_page_size);
}
if (stk == NULL) {
if (num_stack_mappings >= MAX_STACK_MAPPINGS)
return NULL;
// TODO: allocate blocks of stacks? but need to mprotect individually anyways
stk = malloc_stack(ssize);
if (stk == MAP_FAILED)
jl_throw(jl_memory_exception);
return NULL;
}
*bufsz = ssize;
if (owner) {
Expand All @@ -147,18 +171,38 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner)

void sweep_stack_pools(void)
{
// TODO: deallocate stacks if we have too many sitting around unused
// for (stk in halfof(free_stacks))
// free_stack(stk, pool_sz);
// // then sweep the task stacks
// for (t in live_tasks)
// if (!gc-marked(t))
// stkbuf = t->stkbuf
// bufsz = t->bufsz
// if (stkbuf)
// push(free_stacks[sz], stkbuf)
// Stack sweeping algorithm:
// // deallocate stacks if we have too many sitting around unused
// for (stk in halfof(free_stacks))
// free_stack(stk, pool_sz);
// // then sweep the task stacks
// for (t in live_tasks)
// if (!gc-marked(t))
// stkbuf = t->stkbuf
// bufsz = t->bufsz
// if (stkbuf)
// push(free_stacks[sz], stkbuf)
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls2 = jl_all_tls_states[i];

// free half of stacks that remain unused since last sweep
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
arraylist_t *al = &ptls2->heap.free_stacks[p];
size_t n_to_free;
if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
n_to_free = al->len / 2;
if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
}
else {
n_to_free = 0;
}
for (int n = 0; n < n_to_free; n++) {
void *stk = arraylist_pop(al);
free_stack(stk, pool_sizes[p]);
}
}

arraylist_t *live_tasks = &ptls2->heap.live_tasks;
size_t n = 0;
size_t ndel = 0;
Expand Down
12 changes: 9 additions & 3 deletions src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,18 @@

// task options ---------------------------------------------------------------

// select whether to enable the COPY_STACKS stack switching optimization
// select whether to allow the COPY_STACKS stack switching implementation
#define COPY_STACKS
// select whether to use COPY_STACKS for new Tasks by default
//#define ALWAYS_COPY_STACKS

// If you disbable COPY_STACKS the task-system is not as memory efficient so
// When not using COPY_STACKS the task-system is less memory efficient so
// you probably want to choose a smaller default stack size (factor of 8-10)
#define JL_STACK_SIZE (8*1024*1024)
#ifdef _P64
#define JL_STACK_SIZE (4*1024*1024)
#else
#define JL_STACK_SIZE (2*1024*1024)
#endif

// threading options ----------------------------------------------------------

Expand Down
37 changes: 32 additions & 5 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,23 @@ static void ctx_switch(jl_ptls_t ptls, jl_task_t **pt)
}
#endif

int started = (t->stkbuf != NULL);
int started = t->started;
int killed = (lastt->state == done_sym || lastt->state == failed_sym);
if (!started && !t->copy_stack) {
// may need to allocate the stack
t->stkbuf = jl_alloc_fiber(&t->ctx, &t->bufsz, t);
if (t->stkbuf == NULL) {
t->stkbuf = jl_alloc_fiber(&t->ctx, &t->bufsz, t);
if (t->stkbuf == NULL) {
#ifdef COPY_STACKS
// fall back to stack copying if mmap fails
t->copy_stack = 1;
t->bufsz = 0;
memcpy(&t->ctx, &ptls->base_ctx, sizeof(t->ctx));
#else
jl_throw(jl_memory_exception);
#endif
}
}
}

if (killed) {
Expand Down Expand Up @@ -302,8 +314,10 @@ static void ctx_switch(jl_ptls_t ptls, jl_task_t **pt)
if (t->copy_stack) {
if (lastt_ctx)
restore_stack2(ptls, lastt);
else if (lastt->copy_stack)
restore_stack(ptls, NULL); // (doesn't return)
else
restore_stack(ptls, NULL); // (doesn't return)
restore_stack(ptls, (char*)1); // (doesn't return)
}
else
#endif
Expand Down Expand Up @@ -414,17 +428,22 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, size_t ssize)
jl_task_t *t = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
t->copy_stack = 0;
if (ssize == 0) {
#ifdef COPY_STACKS
// stack size unspecified; use default
#if defined(COPY_STACKS) && defined(ALWAYS_COPY_STACKS)
t->copy_stack = 1;
t->bufsz = 0;
#else
t->bufsz = JL_STACK_SIZE; // unspecified -- use the default size
t->bufsz = JL_STACK_SIZE;
#endif
}
else {
// user requested dedicated stack of a certain size
if (ssize < MINSTKSZ)
ssize = MINSTKSZ;
t->bufsz = ssize;
t->stkbuf = jl_alloc_fiber(&t->ctx, &t->bufsz, t);
if (t->stkbuf == NULL)
jl_throw(jl_memory_exception);
}
t->tls = jl_nothing;
t->state = runnable_sym;
Expand Down Expand Up @@ -546,6 +565,8 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
jl_error("getcontext failed");
#endif
void *stk = jl_malloc_stack(ssize, owner);
if (stk == NULL)
return NULL;
t->uc_stack.ss_sp = stk;
t->uc_stack.ss_size = *ssize;
#ifdef _OS_WINDOWS_
Expand Down Expand Up @@ -603,6 +624,8 @@ static void start_basefiber(void)
static char *jl_alloc_fiber(unw_context_t *t, size_t *ssize, jl_task_t *owner)
{
char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
if (stkbuf == NULL)
return NULL;
char *stk = stkbuf;
stk += *ssize;
PUSH_RET(&jl_basecursor, stk);
Expand Down Expand Up @@ -659,6 +682,8 @@ static void jl_init_basefiber(size_t ssize)
static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
{
char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
if (stkbuf == NULL)
return NULL;
((char**)t)[0] = stkbuf; // stash the stack pointer somewhere for start_fiber
((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber
return stkbuf;
Expand Down Expand Up @@ -744,6 +769,8 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
struct sigaction sa, osa;
sigset_t set, oset;
void *stk = jl_malloc_stack(ssize, owner);
if (stk == NULL)
return NULL;
// setup
jl_ucontext_t base_ctx;
memcpy(&base_ctx, &ptls->base_ctx, sizeof(ptls->base_ctx));
Expand Down

0 comments on commit 48bc266

Please sign in to comment.