diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 6be1ffda99410d..e162ab8b41c52f 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1684,7 +1684,12 @@ func gcSweep(mode gcMode) { lock(&mheap_.lock) mheap_.sweepgen += 2 mheap_.sweepdone = 0 - sweep.spanidx = 0 + if mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 { + // We should have drained this list during the last + // sweep phase. We certainly need to start this phase + // with an empty swept list. + throw("non-empty swept list") + } unlock(&mheap_.lock) if !_ConcurrentSweep || mode == gcForceBlockMode { diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 947c38e4000025..8119ade5a55cb5 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -20,10 +20,12 @@ type sweepdata struct { parked bool started bool - spanidx uint32 // background sweeper position - nbgsweep uint32 npausesweep uint32 + + // pacertracegen is the sweepgen at which the last pacer trace + // "sweep finished" message was printed. + pacertracegen uint32 } //go:nowritebarrier @@ -91,18 +93,23 @@ func sweepone() uintptr { _g_.m.locks++ sg := mheap_.sweepgen for { - idx := atomic.Xadd(&sweep.spanidx, 1) - 1 - if idx >= uint32(len(work.spans)) { + s := mheap_.sweepSpans[1-sg/2%2].pop() + if s == nil { mheap_.sweepdone = 1 _g_.m.locks-- - if debug.gcpacertrace > 0 && idx == uint32(len(work.spans)) { + if debug.gcpacertrace > 0 && atomic.Cas(&sweep.pacertracegen, sg-2, sg) { print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", mheap_.spanBytesAlloc>>20, "MB of spans; swept ", mheap_.pagesSwept, " pages at ", mheap_.sweepPagesPerByte, " pages/byte\n") } return ^uintptr(0) } - s := work.spans[idx] if s.state != mSpanInUse { - s.sweepgen = sg + // This can happen if direct sweeping already + // swept this span, but in that case the sweep + // generation should always be up-to-date. + if s.sweepgen != sg { + print("runtime: bad span s.state=", s.state, " s.sweepgen=", s.sweepgen, " sweepgen=", sg, "\n") + throw("non in-use span in unswept list") + } continue } if s.sweepgen != sg-2 || !atomic.Cas(&s.sweepgen, sg-2, sg-1) { @@ -110,6 +117,9 @@ func sweepone() uintptr { } npages := s.npages if !s.sweep(false) { + // Span is still in-use, so this returned no + // pages to the heap and the span needs to + // move to the swept in-use list. npages = 0 } _g_.m.locks-- @@ -348,6 +358,11 @@ func (s *mspan) sweep(preserve bool) bool { c.local_largefree += size res = true } + if !res { + // The span has been swept and is still in-use, so put + // it on the swept in-use list. + mheap_.sweepSpans[sweepgen/2%2].push(s) + } if trace.enabled { traceGCSweepDone() } diff --git a/src/runtime/mgcsweepbuf.go b/src/runtime/mgcsweepbuf.go new file mode 100644 index 00000000000000..4a7b535e57de68 --- /dev/null +++ b/src/runtime/mgcsweepbuf.go @@ -0,0 +1,133 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +// A gcSweepBuf is a set of *mspans. +// +// gcSweepBuf is safe for concurrent push operations *or* concurrent +// pop operations, but not both simultaneously. +type gcSweepBuf struct { + // A gcSweepBuf is a two-level data structure consisting of a + // growable spine that points to fixed-sized blocks. The spine + // can be accessed without locks, but adding a block or + // growing it requires taking the spine lock. + // + // Because each mspan covers at least 8K of heap and takes at + // most 8 bytes in the gcSweepBuf, the growth of the spine is + // quite limited. + // + // The spine and all blocks are allocated off-heap, which + // allows this to be used in the memory manager and avoids the + // need for write barriers on all of these. We never release + // this memory because there could be concurrent lock-free + // access and we're likely to reuse it anyway. (In principle, + // we could do this during STW.) + + spineLock mutex + spine unsafe.Pointer // *[N]*gcSweepBlock, accessed atomically + spineLen uintptr // Spine array length, accessed atomically + spineCap uintptr // Spine array cap, accessed under lock + + // index is the first unused slot in the logical concatenation + // of all blocks. It is accessed atomically. + index uint32 +} + +const ( + gcSweepBlockEntries = 512 // 4KB on 64-bit + gcSweepBufInitSpineCap = 256 // Enough for 1GB heap on 64-bit +) + +type gcSweepBlock struct { + spans [gcSweepBlockEntries]*mspan +} + +// push adds span s to buffer b. push is safe to call concurrently +// with other push operations, but NOT to call concurrently with pop. +func (b *gcSweepBuf) push(s *mspan) { + // Obtain our slot. + cursor := uintptr(atomic.Xadd(&b.index, +1) - 1) + top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries + + // Do we need to add a block? + spineLen := atomic.Loaduintptr(&b.spineLen) + var block *gcSweepBlock +retry: + if top < spineLen { + spine := atomic.Loadp(unsafe.Pointer(&b.spine)) + blockp := add(spine, sys.PtrSize*top) + block = (*gcSweepBlock)(atomic.Loadp(blockp)) + } else { + // Add a new block to the spine, potentially growing + // the spine. + lock(&b.spineLock) + // spineLen cannot change until we release the lock, + // but may have changed while we were waiting. + spineLen = atomic.Loaduintptr(&b.spineLen) + if top < spineLen { + unlock(&b.spineLock) + goto retry + } + + if spineLen == b.spineCap { + // Grow the spine. + newCap := b.spineCap * 2 + if newCap == 0 { + newCap = gcSweepBufInitSpineCap + } + newSpine := persistentalloc(newCap*sys.PtrSize, sys.CacheLineSize, &memstats.gc_sys) + if b.spineCap != 0 { + // Blocks are allocated off-heap, so + // no write barriers. + memmove(newSpine, b.spine, b.spineCap*sys.PtrSize) + } + // Spine is allocated off-heap, so no write barrier. + atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine) + b.spineCap = newCap + // We can't immediately free the old spine + // since a concurrent push with a lower index + // could still be reading from it. We let it + // leak because even a 1TB heap would waste + // less than 2MB of memory on old spines. If + // this is a problem, we could free old spines + // during STW. + } + + // Allocate a new block and add it to the spine. + block = (*gcSweepBlock)(persistentalloc(unsafe.Sizeof(gcSweepBlock{}), sys.CacheLineSize, &memstats.gc_sys)) + blockp := add(b.spine, sys.PtrSize*top) + // Blocks are allocated off-heap, so no write barrier. + atomic.StorepNoWB(blockp, unsafe.Pointer(block)) + atomic.Storeuintptr(&b.spineLen, spineLen+1) + unlock(&b.spineLock) + } + + // We have a block. Insert the span. + block.spans[bottom] = s +} + +// pop removes and returns a span from buffer b, or nil if b is empty. +// pop is safe to call concurrently with other pop operations, but NOT +// to call concurrently with push. +func (b *gcSweepBuf) pop() *mspan { + cursor := atomic.Xadd(&b.index, -1) + if int32(cursor) < 0 { + atomic.Xadd(&b.index, +1) + return nil + } + + // There are no concurrent spine or block modifications during + // pop, so we can omit the atomics. + top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries + blockp := (**gcSweepBlock)(add(b.spine, sys.PtrSize*uintptr(top))) + block := *blockp + return block.spans[bottom] +} diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 62cf8fe26706d6..a34a5eb1e47e68 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -60,6 +60,17 @@ type mheap struct { // mapped. cap(spans) indicates the total reserved memory. spans []*mspan + // sweepSpans contains two mspan stacks: one of swept in-use + // spans, and one of unswept in-use spans. These two trade + // roles on each GC cycle. Since the sweepgen increases by 2 + // on each cycle, this means the swept spans are in + // sweepSpans[sweepgen/2%2] and the unswept spans are in + // sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the + // unswept stack and pushes spans that are still in-use on the + // swept stack. Likewise, allocating an in-use span pushes it + // on the swept stack. + sweepSpans [2]gcSweepBuf + _ uint32 // align uint64 fields on 32-bit for atomics // Proportional sweep @@ -546,6 +557,7 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan { // Record span info, because gc needs to be // able to map interior pointer to containing span. atomic.Store(&s.sweepgen, h.sweepgen) + h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list. s.state = _MSpanInUse s.allocCount = 0 s.sizeclass = uint8(sizeclass)