// findRunnableGCWorker returns the background mark worker for _p_ if it// should be run. This must only be called when gcBlackenEnabled != 0.func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
if gcBlackenEnabled == 0 {
throw("gcControllerState.findRunnable: blackening not enabled")
}
if _p_.gcBgMarkWorker == 0 {
// The mark worker associated with this P is blocked// performing a mark transition. We can't run it// because it may be on some other run or wait queue.returnnil
}
if !gcMarkWorkAvailable(_p_) {
// No work to be done right now. This can happen at// the end of the mark phase when there are still// assists tapering off. Don't bother running a worker// now because it'll just return immediately.returnnil
}
// 原子减少对应的值, 如果减少后大于等于0则返回true, 否则返回false
decIfPositive := func(ptr *int64) bool {
if *ptr > 0 {
if atomic.Xaddint64(ptr, -1) >= 0 {
returntrue
}
// We lost a race
atomic.Xaddint64(ptr, +1)
}
returnfalse
}
// 减少dedicatedMarkWorkersNeeded, 成功时后台标记任务的模式是Dedicated// dedicatedMarkWorkersNeeded是当前P的数量的25%去除小数点// 详见startCycle函数if decIfPositive(&c.dedicatedMarkWorkersNeeded) {
// This P is now dedicated to marking until the end of// the concurrent mark phase.
_p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode
} else {
// 减少fractionalMarkWorkersNeeded, 成功是后台标记任务的模式是Fractional// 上面的计算如果小数点后有数值(不能够整除)则fractionalMarkWorkersNeeded为1, 否则为0// 详见startCycle函数// 举例来说, 4个P时会执行1个Dedicated模式的任务, 5个P时会执行1个Dedicated模式和1个Fractional模式的任务if !decIfPositive(&c.fractionalMarkWorkersNeeded) {
// No more workers are need right now.returnnil
}
// 按Dedicated模式的任务的执行时间判断cpu占用率是否超过预算值, 超过时不启动// This P has picked the token for the fractional worker.// Is the GC currently under or at the utilization goal?// If so, do more work.//// We used to check whether doing one time slice of work// would remain under the utilization goal, but that has the// effect of delaying work until the mutator has run for// enough time slices to pay for the work. During those time// slices, write barriers are enabled, so the mutator is running slower.// Now instead we do the work whenever we're under or at the// utilization work and pay for it by letting the mutator run later.// This doesn't change the overall utilization averages, but it// front loads the GC work so that the GC finishes earlier and// write barriers can be turned off sooner, effectively giving// the mutator a faster machine.//// The old, slower behavior can be restored by setting// gcForcePreemptNS = forcePreemptNS.const gcForcePreemptNS = 0// TODO(austin): We could fast path this and basically// eliminate contention on c.fractionalMarkWorkersNeeded by// precomputing the minimum time at which it's worth// next scheduling the fractional worker. Then Ps// don't have to fight in the window where we've// passed that deadline and no one has started the// worker yet.//// TODO(austin): Shorter preemption interval for mark// worker to improve fairness and give this// finer-grained control over schedule?
now := nanotime() - gcController.markStartTime
then := now + gcForcePreemptNS
timeUsed := c.fractionalMarkTime + gcForcePreemptNS
if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal {
// Nope, we'd overshoot the utilization goal
atomic.Xaddint64(&c.fractionalMarkWorkersNeeded, +1)
returnnil
}
_p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode
}
// 安排后台标记任务执行// Run the background mark worker
gp := _p_.gcBgMarkWorker.ptr()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.enabled {
traceGoUnpark(gp, 0)
}
return gp
}

// gcResetMarkState resets global state prior to marking (concurrent// or STW) and resets the stack scan state of all Gs.//// This is safe to do without the world stopped because any Gs created// during or after this will start out in the reset state.func gcResetMarkState() {
// This may be called during a concurrent phase, so make sure// allgs doesn't change.
lock(&allglock)
for _, gp := range allgs {
gp.gcscandone = false// set to true in gcphasework
gp.gcscanvalid = false// stack has not been scanned
gp.gcAssistBytes = 0
}
unlock(&allglock)
work.bytesMarked = 0
work.initialHeapLive = atomic.Load64(&memstats.heap_live)
work.markrootDone = false
}

// finishsweep_m ensures that all spans are swept.//// The world must be stopped. This ensures there are no sweeps in// progress.////go:nowritebarrierfunc finishsweep_m() {
// sweepone会取出一个未sweep的span然后执行sweep// 详细将在下面sweep阶段时分析// Sweeping must be complete before marking commences, so// sweep any unswept spans. If this is a concurrent GC, there// shouldn't be any spans left to sweep, so this should finish// instantly. If GC was forced before the concurrent sweep// finished, there may be spans to sweep.for sweepone() != ^uintptr(0) {
sweep.npausesweep++
}
// 所有span都sweep完成后, 启动一个新的markbit时代// 这个函数是实现span的gcmarkBits和allocBits的分配和复用的关键, 流程如下// - span分配gcmarkBits和allocBits// - span完成sweep// - 原allocBits不再被使用// - gcmarkBits变为allocBits// - 分配新的gcmarkBits// - 开启新的markbit时代// - span完成sweep, 同上// - 开启新的markbit时代// - 2个时代之前的bitmap将不再被使用, 可以复用这些bitmap
nextMarkBitArenaEpoch()
}

// gcBgMarkPrepare sets up state for background marking.// Mutator assists must not yet be enabled.func gcBgMarkPrepare() {
// Background marking will stop when the work queues are empty// and there are no more workers (note that, since this is// concurrent, this may be a transient state, but mark// termination will clean it up). Between background workers// and assists, we don't really know how many workers there// will be, so we pretend to have an arbitrarily large number// of workers, almost all of which are "waiting". While a// worker is working it decrements nwait. If nproc == nwait,// there are no workers.
work.nproc = ^uint32(0)
work.nwait = ^uint32(0)
}

// scanobject scans the object starting at b, adding pointers to gcw.// b must point to the beginning of a heap object or an oblet.// scanobject consults the GC bitmap for the pointer mask and the// spans for the size of the object.////go:nowritebarrierfunc scanobject(b uintptr, gcw *gcWork) {
// Note that arena_used may change concurrently during// scanobject and hence scanobject may encounter a pointer to// a newly allocated heap object that is *not* in// [start,used). It will not mark this object; however, we// know that it was just installed by a mutator, which means// that mutator will execute a write barrier and take care of// marking it. This is even more pronounced on relaxed memory// architectures since we access arena_used without barriers// or synchronization, but the same logic applies.
arena_start := mheap_.arena_start
arena_used := mheap_.arena_used
// Find the bits for b and the size of the object at b.//// b is either the beginning of an object, in which case this// is the size of the object to scan, or it points to an// oblet, in which case we compute the size to scan below.// 获取对象对应的bitmap
hbits := heapBitsForAddr(b)
// 获取对象所在的span
s := spanOfUnchecked(b)
// 获取对象的大小
n := s.elemsize
if n == 0 {
throw("scanobject n == 0")
}
// 对象大小过大时(maxObletBytes是128KB)需要分割扫描// 每次最多只扫描128KBif n > maxObletBytes {
// Large object. Break into oblets for better// parallelism and lower latency.if b == s.base() {
// It's possible this is a noscan object (not// from greyobject, but from other code// paths), in which case we must *not* enqueue// oblets since their bitmaps will be// uninitialized.if s.spanclass.noscan() {
// Bypass the whole scan.
gcw.bytesMarked += uint64(n)
return
}
// Enqueue the other oblets to scan later.// Some oblets may be in b's scalar tail, but// these will be marked as "no more pointers",// so we'll drop out immediately when we go to// scan those.for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes {
if !gcw.putFast(oblet) {
gcw.put(oblet)
}
}
}
// Compute the size of the oblet. Since this object// must be a large object, s.base() is the beginning// of the object.
n = s.base() + s.elemsize - b
if n > maxObletBytes {
n = maxObletBytes
}
}
// 扫描对象中的指针var i uintptrfor i = 0; i < n; i += sys.PtrSize {
// 获取对应的bit// Find bits for this word.if i != 0 {
// Avoid needless hbits.next() on last iteration.
hbits = hbits.next()
}
// Load bits once. See CL 22712 and issue 16973 for discussion.
bits := hbits.bits()
// 检查scan bit判断是否继续扫描, 注意第二个scan bit是checkmark// During checkmarking, 1-word objects store the checkmark// in the type bit for the one word. The only one-word objects// are pointers, or else they'd be merged with other non-pointer// data into larger allocations.if i != 1*sys.PtrSize && bits&bitScan == 0 {
break// no more pointers in this object
}
// 检查pointer bit, 不是指针则继续if bits&bitPointer == 0 {
continue// not a pointer
}
// 取出指针的值// Work here is duplicated in scanblock and above.// If you make changes here, make changes there too.
obj := *(*uintptr)(unsafe.Pointer(b + i))
// 如果指针在arena区域中, 则调用greyobject标记对象并把对象放到标记队列中// At this point we have extracted the next potential pointer.// Check if it points into heap and not back at the current object.if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
// Mark the object.if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw, objIndex)
}
}
}
// 统计扫描过的大小和对象数量
gcw.bytesMarked += uint64(n)
gcw.scanWork += int64(i)
}

// gcMarkDone transitions the GC from mark 1 to mark 2 and from mark 2// to mark termination.//// This should be called when all mark work has been drained. In mark// 1, this includes all root marking jobs, global work buffers, and// active work buffers in assists and background workers; however,// work may still be cached in per-P work buffers. In mark 2, per-P// caches are disabled.//// The calling context must be preemptible.//// Note that it is explicitly okay to have write barriers in this// function because completion of concurrent mark is best-effort// anyway. Any work created by write barriers here will be cleaned up// by mark termination.func gcMarkDone() {
top:
semacquire(&work.markDoneSema)
// Re-check transition condition under transition lock.if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) {
semrelease(&work.markDoneSema)
return
}
// 暂时禁止启动新的后台标记任务// Disallow starting new workers so that any remaining workers// in the current mark phase will drain out.//// TODO(austin): Should dedicated workers keep an eye on this// and exit gcDrain promptly?
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff)
atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff)
// 判断本地标记队列是否已禁用if !gcBlackenPromptly {
// 本地标记队列是否未禁用, 禁用然后重新开始后台标记任务// Transition from mark 1 to mark 2.//// The global work list is empty, but there can still be work// sitting in the per-P work caches.// Flush and disable work caches.// 禁用本地标记队列// Disallow caching workbufs and indicate that we're in mark 2.
gcBlackenPromptly = true// Prevent completion of mark 2 until we've flushed// cached workbufs.
atomic.Xadd(&work.nwait, -1)
// GC is set up for mark 2. Let Gs blocked on the// transition lock go while we flush caches.
semrelease(&work.markDoneSema)
// 把所有本地标记队列中的对象都推到全局标记队列
systemstack(func() {
// Flush all currently cached workbufs and// ensure all Ps see gcBlackenPromptly. This// also blocks until any remaining mark 1// workers have exited their loop so we can// start new mark 2 workers.
forEachP(func(_p_ *p) {
_p_.gcw.dispose()
})
})
// 除错用// Check that roots are marked. We should be able to// do this before the forEachP, but based on issue// #16083 there may be a (harmless) race where we can// enter mark 2 while some workers are still scanning// stacks. The forEachP ensures these scans are done.//// TODO(austin): Figure out the race and fix this// properly.
gcMarkRootCheck()
// 允许启动新的后台标记任务// Now we can start up mark 2 workers.
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff)
atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff)
// 如果确定没有更多的任务则可以直接跳到函数顶部// 这样就当作是第二次调用了
incnwait := atomic.Xadd(&work.nwait, +1)
if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
// This loop will make progress because// gcBlackenPromptly is now true, so it won't// take this same "if" branch.goto top
}
} else {
// 记录完成标记阶段开始的时间和STW开始的时间// Transition to mark termination.
now := nanotime()
work.tMarkTerm = now
work.pauseStart = now
// 禁止G被抢占
getg().m.preemptoff = "gcing"// 停止所有运行中的G, 并禁止它们运行
systemstack(stopTheWorldWithSema)
// !!!!!!!!!!!!!!!!// 世界已停止(STW)...// !!!!!!!!!!!!!!!!// The gcphase is _GCmark, it will transition to _GCmarktermination// below. The important thing is that the wb remains active until// all marking is complete. This includes writes made by the GC.// 标记对根对象的扫描已完成, 会影响gcMarkRootPrepare中的处理// Record that one root marking pass has completed.
work.markrootDone = true// 禁止辅助GC和后台标记任务的运行// Disable assists and background workers. We must do// this before waking blocked assists.
atomic.Store(&gcBlackenEnabled, 0)
// 唤醒所有因为辅助GC而休眠的G// Wake all blocked assists. These will run when we// start the world again.
gcWakeAllAssists()
// Likewise, release the transition lock. Blocked// workers and assists will run when we start the// world again.
semrelease(&work.markDoneSema)
// 计算下一次触发gc需要的heap大小// endCycle depends on all gcWork cache stats being// flushed. This is ensured by mark 2.
nextTriggerRatio := gcController.endCycle()
// 进入完成标记阶段, 会重新启动世界// Perform mark termination. This will restart the world.
gcMarkTermination(nextTriggerRatio)
}
}

// writebarrierptr_prewrite1 invokes a write barrier for *dst = src// prior to the write happening.//// Write barrier calls must not happen during critical GC and scheduler// related operations. In particular there are times when the GC assumes// that the world is stopped but scheduler related code is still being// executed, dealing with syscalls, dealing with putting gs on runnable// queues and so forth. This code cannot execute write barriers because// the GC might drop them on the floor. Stopping the world involves removing// the p associated with an m. We use the fact that m.p == nil to indicate// that we are in one these critical section and throw if the write is of// a pointer to a heap object.//go:nosplitfunc writebarrierptr_prewrite1(dst *uintptr, src uintptr) {
mp := acquirem()
if mp.inwb || mp.dying > 0 {
releasem(mp)
return
}
systemstack(func() {
if mp.p == 0 && memstats.enablegc && !mp.inwb && inheap(src) {
throw("writebarrierptr_prewrite1 called with mp.p == nil")
}
mp.inwb = true
gcmarkwb_m(dst, src)
})
mp.inwb = false
releasem(mp)
}