-
Notifications
You must be signed in to change notification settings - Fork 17.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
runtime: make sweep time proportional to in-use spans
Currently sweeping walks the list of all spans, which means the work in sweeping is proportional to the maximum number of spans ever used. If the heap was once large but is now small, this causes an amortization failure: on a small heap, GCs happen frequently, but a full sweep still has to happen in each GC cycle, which means we spent a lot of time in sweeping. Fix this by creating a separate list consisting of just the in-use spans to be swept, so sweeping is proportional to the number of in-use spans (which is proportional to the live heap). Specifically, we create two lists: a list of unswept in-use spans and a list of swept in-use spans. At the start of the sweep cycle, the swept list becomes the unswept list and the new swept list is empty. Allocating a new in-use span adds it to the swept list. Sweeping moves spans from the unswept list to the swept list. This fixes the amortization problem because a shrinking heap moves spans off the unswept list without adding them to the swept list, reducing the time required by the next sweep cycle. Updates #9265. This fix eliminates almost all of the time spent in sweepone; however, markrootSpans has essentially the same bug, so now the test program from this issue spends all of its time in markrootSpans. No significant effect on other benchmarks. Change-Id: Ib382e82790aad907da1c127e62b3ab45d7a4ac1e Reviewed-on: https://go-review.googlesource.com/30535 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
- Loading branch information
Showing
4 changed files
with
173 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
// Copyright 2016 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package runtime | ||
|
||
import ( | ||
"runtime/internal/atomic" | ||
"runtime/internal/sys" | ||
"unsafe" | ||
) | ||
|
||
// A gcSweepBuf is a set of *mspans. | ||
// | ||
// gcSweepBuf is safe for concurrent push operations *or* concurrent | ||
// pop operations, but not both simultaneously. | ||
type gcSweepBuf struct { | ||
// A gcSweepBuf is a two-level data structure consisting of a | ||
// growable spine that points to fixed-sized blocks. The spine | ||
// can be accessed without locks, but adding a block or | ||
// growing it requires taking the spine lock. | ||
// | ||
// Because each mspan covers at least 8K of heap and takes at | ||
// most 8 bytes in the gcSweepBuf, the growth of the spine is | ||
// quite limited. | ||
// | ||
// The spine and all blocks are allocated off-heap, which | ||
// allows this to be used in the memory manager and avoids the | ||
// need for write barriers on all of these. We never release | ||
// this memory because there could be concurrent lock-free | ||
// access and we're likely to reuse it anyway. (In principle, | ||
// we could do this during STW.) | ||
|
||
spineLock mutex | ||
spine unsafe.Pointer // *[N]*gcSweepBlock, accessed atomically | ||
spineLen uintptr // Spine array length, accessed atomically | ||
spineCap uintptr // Spine array cap, accessed under lock | ||
|
||
// index is the first unused slot in the logical concatenation | ||
// of all blocks. It is accessed atomically. | ||
index uint32 | ||
} | ||
|
||
const ( | ||
gcSweepBlockEntries = 512 // 4KB on 64-bit | ||
gcSweepBufInitSpineCap = 256 // Enough for 1GB heap on 64-bit | ||
) | ||
|
||
type gcSweepBlock struct { | ||
spans [gcSweepBlockEntries]*mspan | ||
} | ||
|
||
// push adds span s to buffer b. push is safe to call concurrently | ||
// with other push operations, but NOT to call concurrently with pop. | ||
func (b *gcSweepBuf) push(s *mspan) { | ||
// Obtain our slot. | ||
cursor := uintptr(atomic.Xadd(&b.index, +1) - 1) | ||
top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries | ||
|
||
// Do we need to add a block? | ||
spineLen := atomic.Loaduintptr(&b.spineLen) | ||
var block *gcSweepBlock | ||
retry: | ||
if top < spineLen { | ||
spine := atomic.Loadp(unsafe.Pointer(&b.spine)) | ||
blockp := add(spine, sys.PtrSize*top) | ||
block = (*gcSweepBlock)(atomic.Loadp(blockp)) | ||
} else { | ||
// Add a new block to the spine, potentially growing | ||
// the spine. | ||
lock(&b.spineLock) | ||
// spineLen cannot change until we release the lock, | ||
// but may have changed while we were waiting. | ||
spineLen = atomic.Loaduintptr(&b.spineLen) | ||
if top < spineLen { | ||
unlock(&b.spineLock) | ||
goto retry | ||
} | ||
|
||
if spineLen == b.spineCap { | ||
// Grow the spine. | ||
newCap := b.spineCap * 2 | ||
if newCap == 0 { | ||
newCap = gcSweepBufInitSpineCap | ||
} | ||
newSpine := persistentalloc(newCap*sys.PtrSize, sys.CacheLineSize, &memstats.gc_sys) | ||
if b.spineCap != 0 { | ||
// Blocks are allocated off-heap, so | ||
// no write barriers. | ||
memmove(newSpine, b.spine, b.spineCap*sys.PtrSize) | ||
} | ||
// Spine is allocated off-heap, so no write barrier. | ||
atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine) | ||
b.spineCap = newCap | ||
// We can't immediately free the old spine | ||
// since a concurrent push with a lower index | ||
// could still be reading from it. We let it | ||
// leak because even a 1TB heap would waste | ||
// less than 2MB of memory on old spines. If | ||
// this is a problem, we could free old spines | ||
// during STW. | ||
} | ||
|
||
// Allocate a new block and add it to the spine. | ||
block = (*gcSweepBlock)(persistentalloc(unsafe.Sizeof(gcSweepBlock{}), sys.CacheLineSize, &memstats.gc_sys)) | ||
blockp := add(b.spine, sys.PtrSize*top) | ||
// Blocks are allocated off-heap, so no write barrier. | ||
atomic.StorepNoWB(blockp, unsafe.Pointer(block)) | ||
atomic.Storeuintptr(&b.spineLen, spineLen+1) | ||
unlock(&b.spineLock) | ||
} | ||
|
||
// We have a block. Insert the span. | ||
block.spans[bottom] = s | ||
} | ||
|
||
// pop removes and returns a span from buffer b, or nil if b is empty. | ||
// pop is safe to call concurrently with other pop operations, but NOT | ||
// to call concurrently with push. | ||
func (b *gcSweepBuf) pop() *mspan { | ||
cursor := atomic.Xadd(&b.index, -1) | ||
if int32(cursor) < 0 { | ||
atomic.Xadd(&b.index, +1) | ||
return nil | ||
} | ||
|
||
// There are no concurrent spine or block modifications during | ||
// pop, so we can omit the atomics. | ||
top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries | ||
blockp := (**gcSweepBlock)(add(b.spine, sys.PtrSize*uintptr(top))) | ||
block := *blockp | ||
return block.spans[bottom] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters