{-# LANGUAGE RecordWildCards, GADTs #-}#if __GLASGOW_HASKELL__ < 701{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}#endifmoduleCmmLayoutStack(cmmLayoutStack,setInfoTableStackMap)whereimportStgCmmUtils(callerSaveVolatileRegs)-- XXXimportStgCmmForeign(saveThreadState,loadThreadState)-- XXXimportCmmimportBlockIdimportCLabelimportCmmUtilsimportMkGraphimportModuleimportForeignCallimportCmmLiveimportCmmProcPointimportSMRepimportHooplimportConstantsimportUniqSupplyimportMaybesimportUniqFMimportUtilimportFastStringimportOutputableimportData.Map(Map)importqualifiedData.MapasMapimportqualifiedData.SetasSetimportControl.Monad.FiximportData.ArrayasArrayimportData.BitsimportData.List(nub)importControl.Monad(liftM)#include "HsVersions.h"dataStackSlot=Occupied|Empty-- Occupied: a return address or part of an update frameinstanceOutputableStackSlotwherepprOccupied=ptext(sLit"XXX")pprEmpty=ptext(sLit"---")-- All stack locations are expressed as positive byte offsets from the-- "base", which is defined to be the address above the return address-- on the stack on entry to this CmmProc.---- Lower addresses have higher StackLocs.--typeStackLoc=ByteOff{-
A StackMap describes the stack at any given point. At a continuation
it has a particular layout, like this:
| | <- base
|-------------|
| ret0 | <- base + 8
|-------------|
. upd frame . <- base + sm_ret_off
|-------------|
| |
. vars .
. (live/dead) .
| | <- base + sm_sp - sm_args
|-------------|
| ret1 |
. ret vals . <- base + sm_sp (<--- Sp points here)
|-------------|
Why do we include the final return address (ret0) in our stack map? I
have absolutely no idea, but it seems to be done that way consistently
in the rest of the code generator, so I played along here. --SDM
Note that we will be constructing an info table for the continuation
(ret1), which needs to describe the stack down to, but not including,
the update frame (or ret0, if there is no update frame).
-}dataStackMap=StackMap{sm_sp::StackLoc-- ^ the offset of Sp relative to the base on entry-- to this block.,sm_args::ByteOff-- ^ the number of bytes of arguments in the area for this block-- Defn: the offset of young(L) relative to the base is given by-- (sm_sp - sm_args) of the StackMap for block L.,sm_ret_off::ByteOff-- ^ Number of words of stack that we do not describe with an info-- table, because it contains an update frame.,sm_regs::UniqFM(LocalReg,StackLoc)-- ^ regs on the stack}instanceOutputableStackMapwherepprStackMap{..}=text"Sp = "<>intsm_sp$$text"sm_args = "<>intsm_args$$text"sm_ret_off = "<>intsm_ret_off$$text"sm_regs = "<>ppr(eltsUFMsm_regs)cmmLayoutStack::ProcPointSet->ByteOff->CmmGraph->UniqSM(CmmGraph,BlockEnvStackMap)cmmLayoutStackprocpointsentry_argsgraph0@(CmmGraph{g_entry=entry})=do-- pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()(graph,liveness)<-removeDeadAssignmentsgraph0-- pprTrace "liveness" (ppr liveness) $ return ()letblocks=postorderDfsgraph(final_stackmaps,_final_high_sp,new_blocks)<-mfix$\~(rec_stackmaps,rec_high_sp,_new_blocks)->layoutprocpointslivenessentryentry_argsrec_stackmapsrec_high_spblocksnew_blocks'<-mapMlowerSafeForeignCallnew_blocks-- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return ()return(ofBlockListentrynew_blocks',final_stackmaps)layout::BlockSet-- proc points->BlockEnvCmmLive-- liveness->BlockId-- entry->ByteOff-- stack args on entry->BlockEnvStackMap-- [final] stack maps->ByteOff-- [final] Sp high water mark->[CmmBlock]-- [in] blocks->UniqSM(BlockEnvStackMap-- [out] stack maps,ByteOff-- [out] Sp high water mark,[CmmBlock]-- [out] new blocks)layoutprocpointslivenessentryentry_argsfinal_stackmapsfinal_hwmblocks=goblocksinit_stackmapentry_args[]where(updfr,cont_info)=collectContInfoblocksinit_stackmap=mapSingletonentryStackMap{sm_sp=entry_args,sm_args=entry_args,sm_ret_off=updfr,sm_regs=emptyUFM}go[]acc_stackmapsacc_hwmacc_blocks=return(acc_stackmaps,acc_hwm,acc_blocks)go(b0:bs)acc_stackmapsacc_hwmacc_blocks=dolet(entry0@(CmmEntryentry_lbl),middle0,last0)=blockSplitb0letstack0@StackMap{sm_sp=sp0}=mapFindWithDefault(pprPanic"no stack map for"(pprentry_lbl))entry_lblacc_stackmaps-- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()-- (a) Update the stack map to include the effects of-- assignments in this blockletstack1=foldBlockNodesF(procMiddleacc_stackmaps)middle0stack0-- (b) Insert assignments to reload all the live variables if this-- block is a proc pointletmiddle1=ifentry_lbl`setMember`procpointsthenfoldrblockConsmiddle0(insertReloadsstack0)elsemiddle0-- (c) Look at the last node and if we are making a call or-- jumping to a proc point, we must save the live-- variables, adjust Sp, and construct the StackMaps for-- each of the successor blocks. See handleLastNode for-- details.(middle2,sp_off,last1,fixup_blocks,out)<-handleLastNodeprocpointslivenesscont_infoacc_stackmapsstack1middle0last0-- pprTrace "layout(out)" (ppr out) $ return ()-- (d) Manifest Sp: run over the nodes in the block and replace-- CmmStackSlot with CmmLoad from Sp with a concrete offset.---- our block:-- middle1 -- the original middle nodes-- middle2 -- live variable saves from handleLastNode-- Sp = Sp + sp_off -- Sp adjustment goes here-- last1 -- the last node--letmiddle_pre=blockToList$foldlblockSnocmiddle1middle2sp_high=final_hwm-entry_args-- The stack check value is adjusted by the Sp offset on-- entry to the proc, which is entry_args. We are-- assuming that we only do a stack check at the-- beginning of a proc, and we don't modify Sp before the-- check.final_blocks=manifestSpfinal_stackmapsstack0sp0sp_highentry0middle_presp_offlast1fixup_blocksacc_stackmaps'=mapUnionacc_stackmapsouthwm'=maximum(acc_hwm:(sp0-sp_off):mapsm_sp(mapElemsout))gobsacc_stackmaps'hwm'(final_blocks++acc_blocks)-- ------------------------------------------------------------------------------- This doesn't seem right somehow. We need to find out whether this-- proc will push some update frame material at some point, so that we-- can avoid using that area of the stack for spilling. The-- updfr_space field of the CmmProc *should* tell us, but it doesn't-- (I think maybe it gets filled in later when we do proc-point-- splitting).---- So we'll just take the max of all the cml_ret_offs. This could be-- unnecessarily pessimistic, but probably not in the code we-- generate.collectContInfo::[CmmBlock]->(ByteOff,BlockEnvByteOff)collectContInfoblocks=(maximumret_offs,mapFromList(catMaybesmb_argss))where(mb_argss,ret_offs)=mapAndUnzipget_contblocksget_contb=caselastNodebofCmmCall{cml_cont=Justl,..}->(Just(l,cml_ret_args),cml_ret_off)CmmForeignCall{..}->(Just(succ,0),updfr)-- ??_other->(Nothing,0)-- ------------------------------------------------------------------------------- Updating the StackMap from middle nodes-- Look for loads from stack slots, and update the StackMap. This is-- purely for optimisation reasons, so that we can avoid saving a-- variable back to a different stack slot if it is already on the-- stack.---- This happens a lot: for example when function arguments are passed-- on the stack and need to be immediately saved across a call, we-- want to just leave them where they are on the stack.--procMiddle::BlockEnvStackMap->CmmNodeex->StackMap->StackMapprocMiddlestackmapsnodesm=casenodeofCmmAssign(CmmLocalr)(CmmLoad(CmmStackSlotareaoff)_)->sm{sm_regs=addToUFM(sm_regssm)r(r,loc)}whereloc=getStackLocareaoffstackmapsCmmAssign(CmmLocalr)_other->sm{sm_regs=delFromUFM(sm_regssm)r}_other->smgetStackLoc::Area->ByteOff->BlockEnvStackMap->StackLocgetStackLocOldn_=ngetStackLoc(Youngl)nstackmaps=casemapLookuplstackmapsofNothing->pprPanic"getStackLoc"(pprl)Justsm->sm_spsm-sm_argssm+n-- ------------------------------------------------------------------------------- Handling stack allocation for a last node-- We take a single last node and turn it into:---- C1 (some statements)-- Sp = Sp + N-- C2 (some more statements)-- call f() -- the actual last node---- plus possibly some more blocks (we may have to add some fixup code-- between the last node and the continuation).---- C1: is the code for saving the variables across this last node onto-- the stack, if the continuation is a call or jumps to a proc point.---- C2: if the last node is a safe foreign call, we have to inject some-- extra code that goes *after* the Sp adjustment.handleLastNode::ProcPointSet->BlockEnvCmmLive->BlockEnvByteOff->BlockEnvStackMap->StackMap->BlockCmmNodeOO->CmmNodeOC->UniqSM([CmmNodeOO]-- nodes to go *before* the Sp adjustment,ByteOff-- amount to adjust Sp,CmmNodeOC-- new last node,[CmmBlock]-- new blocks,BlockEnvStackMap-- stackmaps for the continuations)handleLastNodeprocpointslivenesscont_infostackmapsstack0@StackMap{sm_sp=sp0}middlelast=caselastof-- At each return / tail call,-- adjust Sp to point to the last argument pushed, which-- is cml_args, after popping any other junk from the stack.CmmCall{cml_cont=Nothing,..}->doletsp_off=sp0-cml_argsreturn([],sp_off,last,[],mapEmpty)-- At each CmmCall with a continuation:CmmCall{cml_cont=Justcont_lbl,..}->return$lastCallcont_lblcml_argscml_ret_argscml_ret_offCmmForeignCall{succ=cont_lbl,..}->doreturn$lastCallcont_lblwORD_SIZEwORD_SIZE(sm_ret_offstack0)-- one word each for args and results: the return addressCmmBranch{..}->handleProcPointsCmmCondBranch{..}->handleProcPointsCmmSwitch{..}->handleProcPointswhere-- Calls and ForeignCalls are handled the same way:lastCall::BlockId->ByteOff->ByteOff->ByteOff->([CmmNodeOO],ByteOff,CmmNodeOC,[CmmBlock],BlockEnvStackMap)lastCalllblcml_argscml_ret_argscml_ret_off=(assignments,spOffsetForCallsp0cont_stackcml_args,last,[]-- no new blocks,mapSingletonlblcont_stack)where(assignments,cont_stack)=prepareStacklblcml_ret_argscml_ret_offprepareStacklblcml_ret_argscml_ret_off|Justcont_stack<-mapLookuplblstackmaps-- If we have already seen this continuation before, then-- we just have to make the stack look the same:=(fixupStackstack0cont_stack,cont_stack)-- Otherwise, we have to allocate the stack frame|otherwise=(save_assignments,new_cont_stack)where(new_cont_stack,save_assignments)=setupStackFramelbllivenesscml_ret_offcml_ret_argsstack0-- For other last nodes (branches), if any of the targets is a-- proc point, we have to set up the stack to match what the proc-- point is expecting.--handleProcPoints::UniqSM([CmmNodeOO],ByteOff,CmmNodeOC,[CmmBlock],BlockEnvStackMap)handleProcPoints-- Note [diamond proc point]|Justl<-futureContinuationmiddle,(nub$filter(`setMember`procpoints)$successorslast)==[l]=doletcont_args=mapFindWithDefault0lcont_info(assigs,cont_stack)=prepareStacklcont_args(sm_ret_offstack0)out=mapFromList[(l',cont_stack)|l'<-successorslast]return(assigs,spOffsetForCallsp0cont_stackwORD_SIZE,last,[],out)|otherwise=dopps<-mapMhandleProcPoint(successorslast)letlbl_map::LabelMapLabellbl_map=mapFromList[(l,tmp)|(l,tmp,_,_)<-pps]fix_lbll=mapLookupllbl_map`orElse`lreturn([],0,mapSuccessorsfix_lbllast,concat[blk|(_,_,_,blk)<-pps],mapFromList[(l,sm)|(l,_,sm,_)<-pps])-- For each proc point that is a successor of this block-- (a) if the proc point already has a stackmap, we need to-- shuffle the current stack to make it look the same.-- We have to insert a new block to make this happen.-- (b) otherwise, call "allocate live stack0" to make the-- stack map for the proc pointhandleProcPoint::BlockId->UniqSM(BlockId,BlockId,StackMap,[CmmBlock])handleProcPointl|not(l`setMember`procpoints)=return(l,l,stack0,[])|otherwise=dotmp_lbl<-liftMmkBlockId$getUniqueMlet(stack2,assigs)=casemapLookuplstackmapsofJustpp_sm->(pp_sm,fixupStackstack0pp_sm)Nothing->--pprTrace "first visit to proc point"-- (ppr l <+> ppr stack1) $(stack1,assigs)wherecont_args=mapFindWithDefault0lcont_info(stack1,assigs)=setupStackFramelliveness(sm_ret_offstack0)cont_argsstack0sp_off=sp0-sm_spstack2block=blockJoin(CmmEntrytmp_lbl)(maybeAddSpAdjsp_off(blockFromListassigs))(CmmBranchl)--return(l,tmp_lbl,stack2,[block])-- Sp is currently pointing to current_sp,-- we want it to point to-- (sm_sp cont_stack - sm_args cont_stack + args)-- so the difference is-- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)spOffsetForCall::ByteOff->StackMap->ByteOff->ByteOffspOffsetForCallcurrent_spcont_stackargs=current_sp-(sm_spcont_stack-sm_argscont_stack+args)-- | create a sequence of assignments to establish the new StackMap,-- given the old StackMap.fixupStack::StackMap->StackMap->[CmmNodeOO]fixupStackold_stacknew_stack=concatMapmovenew_locswhereold_map::MapLocalRegByteOffold_map=Map.fromList(stackSlotRegsold_stack)new_locs=stackSlotRegsnew_stackmove(r,n)|Justm<-Map.lookuprold_map,n==m=[]|otherwise=[CmmStore(CmmStackSlotOldn)(CmmReg(CmmLocalr))]setupStackFrame::BlockId-- label of continuation->BlockEnvCmmLive-- liveness->ByteOff-- updfr->ByteOff-- bytes of return values on stack->StackMap-- current StackMap->(StackMap,[CmmNodeOO])setupStackFramelbllivenessupdfr_offret_argsstack0=(cont_stack,assignments)where-- get the set of LocalRegs live in the continuationlive=mapFindWithDefaultSet.emptylblliveness-- the stack from the base to updfr_off is off-limits.-- our new stack frame contains:-- * saved live variables-- * the return address [young(C) + 8]-- * the args for the call,-- which are replaced by the return values at the return-- point.-- everything up to updfr_off is off-limits-- stack1 contains updfr_off, plus everything we need to save(stack1,assignments)=allocateupdfr_offlivestack0-- And the Sp at the continuation is:-- sm_sp stack1 + ret_argscont_stack=stack1{sm_sp=sm_spstack1+ret_args,sm_args=ret_args,sm_ret_off=updfr_off}-- ------------------------------------------------------------------------------- Note [diamond proc point]---- This special case looks for the pattern we get from a typical-- tagged case expression:---- Sp[young(L1)] = L1-- if (R1 & 7) != 0 goto L1 else goto L2-- L2:-- call [R1] returns to L1-- L1: live: {y}-- x = R1---- If we let the generic case handle this, we get---- Sp[-16] = L1-- if (R1 & 7) != 0 goto L1a else goto L2-- L2:-- Sp[-8] = y-- Sp = Sp - 16-- call [R1] returns to L1-- L1a:-- Sp[-8] = y-- Sp = Sp - 16-- goto L1-- L1:-- x = R1---- The code for saving the live vars is duplicated in each branch, and-- furthermore there is an extra jump in the fast path (assuming L1 is-- a proc point, which it probably is if there is a heap check).---- So to fix this we want to set up the stack frame before the-- conditional jump. How do we know when to do this, and when it is-- safe? The basic idea is, when we see the assignment-- -- Sp[young(L)] = L-- -- we know that-- * we are definitely heading for L-- * there can be no more reads from another stack area, because young(L)-- overlaps with it.---- We don't necessarily know that everything live at L is live now-- (some might be assigned between here and the jump to L). So we-- simplify and only do the optimisation when we see---- (1) a block containing an assignment of a return address L-- (2) ending in a branch where one (and only) continuation goes to L,-- and no other continuations go to proc points.---- then we allocate the stack frame for L at the end of the block,-- before the branch.---- We could generalise (2), but that would make it a bit more-- complicated to handle, and this currently catches the common case.futureContinuation::BlockCmmNodeOO->MaybeBlockIdfutureContinuationmiddle=foldBlockNodesBfmiddleNothingwheref::CmmNodeab->MaybeBlockId->MaybeBlockIdf(CmmStore(CmmStackSlot(Youngl)_)(CmmLit(CmmBlock_)))_=Justlf_r=r-- ------------------------------------------------------------------------------- Saving live registers-- | Given a set of live registers and a StackMap, save all the registers-- on the stack and return the new StackMap and the assignments to do-- the saving.--allocate::ByteOff->RegSet->StackMap->(StackMap,[CmmNodeOO])allocateret_offlivestackmap@StackMap{sm_sp=sp0,sm_regs=regs0}=-- pprTrace "allocate" (ppr live $$ ppr stackmap) $-- we only have to save regs that are not already in a slotletto_save=filter(not.(`elemUFM`regs0))(Set.elemslive)regs1=filterUFM(\(r,_)->elemRegSetrlive)regs0in-- make a map of the stackletstack=reverse$Array.elems$accumArray(\_x->x)Empty(1,toWords(maxsp0ret_off))$ret_words++live_wordswhereret_words=[(x,Occupied)|x<-[1..toWordsret_off]]live_words=[(toWordsx,Occupied)|(r,off)<-eltsUFMregs1,letw=localRegBytesr,x<-[off,off-wORD_SIZE..off-w+1]]in-- Pass over the stack: find slots to save all the new live variables,-- choosing the oldest slots first (hence a foldr).letsaveslot([],stack,n,assigs,regs)-- no more regs to save=([],slot:stack,n`plusW`1,assigs,regs)saveslot(to_save,stack,n,assigs,regs)=caseslotofOccupied->(to_save,Occupied:stack,n`plusW`1,assigs,regs)Empty|Just(stack',r,to_save')<-select_saveto_save(slot:stack)->letassig=CmmStore(CmmStackSlotOldn')(CmmReg(CmmLocalr))n'=n`plusW`1in(to_save',stack',n',assig:assigs,(r,(r,n')):regs)|otherwise->(to_save,slot:stack,n`plusW`1,assigs,regs)-- we should do better here: right now we'll fit the smallest first,-- but it would make more sense to fit the biggest first.select_save::[LocalReg]->[StackSlot]->Maybe([StackSlot],LocalReg,[LocalReg])select_saveregsstack=goregs[]wherego[]_no_fit=Nothinggo(r:rs)no_fit|Justrest<-dropEmptywordsstack=Just(replicatewordsOccupied++rest,r,rs++no_fit)|otherwise=gors(r:no_fit)wherewords=localRegWordsr-- fill in empty slots as much as possible(still_to_save,save_stack,n,save_assigs,save_regs)=foldrsave(to_save,[],0,[],[])stack-- push any remaining live vars on the stack(push_sp,push_assigs,push_regs)=foldrpush(n,[],[])still_to_savewherepushr(n,assigs,regs)=(n',assig:assigs,(r,(r,n')):regs)wheren'=n+localRegBytesrassig=CmmStore(CmmStackSlotOldn')(CmmReg(CmmLocalr))trim_sp|not(nullpush_regs)=push_sp|otherwise=n`plusW`(-length(takeWhileisEmptysave_stack))final_regs=regs1`addListToUFM`push_regs`addListToUFM`save_regsin-- XXX should be an assertif(n/=maxsp0ret_off)thenpprPanic"allocate"(pprn<+>pprsp0<+>pprret_off)elseif(trim_sp.&.(wORD_SIZE-1))/=0thenpprPanic"allocate2"(pprtrim_sp<+>pprfinal_regs<+>pprpush_sp)else(stackmap{sm_regs=final_regs,sm_sp=trim_sp},push_assigs++save_assigs)-- ------------------------------------------------------------------------------- Manifesting Sp-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The-- block looks like this:---- middle_pre -- the middle nodes-- Sp = Sp + sp_off -- Sp adjustment goes here-- last -- the last node---- And we have some extra blocks too (that don't contain Sp adjustments)---- The adjustment for middle_pre will be different from that for-- middle_post, because the Sp adjustment intervenes.--manifestSp::BlockEnvStackMap-- StackMaps for other blocks->StackMap-- StackMap for this block->ByteOff-- Sp on entry to the block->ByteOff-- SpHigh->CmmNodeCO-- first node->[CmmNodeOO]-- middle->ByteOff-- sp_off->CmmNodeOC-- last node->[CmmBlock]-- new blocks->[CmmBlock]-- final blocks with Sp manifestmanifestSpstackmapsstack0sp0sp_highfirstmiddle_presp_offlastfixup_blocks=final_block:fixup_blocks'wherearea_off=getAreaOffstackmapsadj_pre_sp,adj_post_sp::CmmNodeex->CmmNodeexadj_pre_sp=mapExpDeep(areaToSpsp0sp_higharea_off)adj_post_sp=mapExpDeep(areaToSp(sp0-sp_off)sp_higharea_off)final_middle=maybeAddSpAdjsp_off$blockFromList$mapadj_pre_sp$elimStackStoresstack0stackmapsarea_off$middle_prefinal_last=optStackCheck(adj_post_splast)final_block=blockJoinfirstfinal_middlefinal_lastfixup_blocks'=map(mapBlock3'(id,adj_post_sp,id))fixup_blocksgetAreaOff::BlockEnvStackMap->(Area->StackLoc)getAreaOff_Old=0getAreaOffstackmaps(Youngl)=casemapLookuplstackmapsofJustsm->sm_spsm-sm_argssmNothing->pprPanic"getAreaOff"(pprl)maybeAddSpAdj::ByteOff->BlockCmmNodeOO->BlockCmmNodeOOmaybeAddSpAdj0block=blockmaybeAddSpAdjsp_offblock=block`blockSnoc`CmmAssignspReg(cmmOffset(CmmRegspReg)sp_off){-
Sp(L) is the Sp offset on entry to block L relative to the base of the
OLD area.
SpArgs(L) is the size of the young area for L, i.e. the number of
arguments.
- in block L, each reference to [old + N] turns into
[Sp + Sp(L) - N]
- in block L, each reference to [young(L') + N] turns into
[Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
- be careful with the last node of each block: Sp has already been adjusted
to be Sp + Sp(L) - Sp(L')
-}areaToSp::ByteOff->ByteOff->(Area->StackLoc)->CmmExpr->CmmExprareaToSpsp_old_sp_hwmarea_off(CmmStackSlotarean)=cmmOffset(CmmRegspReg)(sp_old-area_offarea-n)areaToSp_sp_hwm_(CmmLitCmmHighStackMark)=CmmLit(mkIntCLitsp_hwm)areaToSp___(CmmMachOp(MO_U_Lt_)-- Note [null stack check][CmmMachOp(MO_Sub_)[CmmReg(CmmGlobalSp),CmmLit(CmmInt0_)],CmmReg(CmmGlobalSpLim)])=CmmLit(CmmInt0wordWidth)areaToSp___other=other-- ------------------------------------------------------------------------------- Note [null stack check]---- If the high-water Sp is zero, then we end up with---- if (Sp - 0 < SpLim) then .. else ..---- and possibly some dead code for the failure case. Optimising this-- away depends on knowing that SpLim <= Sp, so it is really the job-- of the stack layout algorithm, hence we do it now. This is also-- convenient because control-flow optimisation later will drop the-- dead code.optStackCheck::CmmNodeOC->CmmNodeOCoptStackCheckn=-- Note [null stack check]casenofCmmCondBranch(CmmLit(CmmInt0_))_truefalse->CmmBranchfalseother->other-- ------------------------------------------------------------------------------- | Eliminate stores of the form---- Sp[area+n] = r---- when we know that r is already in the same slot as Sp[area+n]. We-- could do this in a later optimisation pass, but that would involve-- a separate analysis and we already have the information to hand-- here. It helps clean up some extra stack stores in common cases.---- Note that we may have to modify the StackMap as we walk through the-- code using procMiddle, since an assignment to a variable in the-- StackMap will invalidate its mapping there.--elimStackStores::StackMap->BlockEnvStackMap->(Area->ByteOff)->[CmmNodeOO]->[CmmNodeOO]elimStackStoresstackmapstackmapsarea_offnodes=gostackmapnodeswherego_stackmap[]=[]gostackmap(n:ns)=casenofCmmStore(CmmStackSlotaream)(CmmReg(CmmLocalr))|Just(_,off)<-lookupUFM(sm_regsstackmap)r,area_offarea+m==off->-- pprTrace "eliminated a node!" (ppr r) $gostackmapns_otherwise->n:go(procMiddlestackmapsnstackmap)ns-- ------------------------------------------------------------------------------- Update info tables to include stack livenesssetInfoTableStackMap::BlockEnvStackMap->CmmDecl->CmmDeclsetInfoTableStackMapstackmaps(CmmProctop_info@TopInfo{..}lg@CmmGraph{g_entry=eid})=CmmProctop_info{info_tbl=fix_infoinfo_tbl}lgwherefix_infoinfo_tbl@CmmInfoTable{cit_rep=StackRep_}=info_tbl{cit_rep=StackRep(get_livenesseid)}fix_infoother=otherget_liveness::BlockId->Livenessget_livenesslbl=casemapLookuplblstackmapsofNothing->pprPanic"setInfoTableStackMap"(pprlbl)Justsm->stackMapToLivenesssmsetInfoTableStackMap_d=dstackMapToLiveness::StackMap->LivenessstackMapToLivenessStackMap{..}=reverse$Array.elems$accumArray(\_x->x)True(toWordssm_ret_off+1,toWords(sm_sp-sm_args))live_wordswherelive_words=[(toWordsoff,False)|(r,off)<-eltsUFMsm_regs,isGcPtrType(localRegTyper)]-- ------------------------------------------------------------------------------- Lowering safe foreign calls{-
Note [lower safe foreign calls]
We start with
Sp[young(L1)] = L1
,-----------------------
| r1 = foo(x,y,z) returns to L1
'-----------------------
L1:
R1 = r1 -- copyIn, inserted by mkSafeCall
...
the stack layout algorithm will arrange to save and reload everything
live across the call. Our job now is to expand the call so we get
Sp[young(L1)] = L1
,-----------------------
| SAVE_THREAD_STATE()
| token = suspendThread(BaseReg, interruptible)
| r = foo(x,y,z)
| BaseReg = resumeThread(token)
| LOAD_THREAD_STATE()
| R1 = r -- copyOut
| jump L1
'-----------------------
L1:
r = R1 -- copyIn, inserted by mkSafeCall
...
Note the copyOut, which saves the results in the places that L1 is
expecting them (see Note {safe foreign call convention]).
-}lowerSafeForeignCall::CmmBlock->UniqSMCmmBlocklowerSafeForeignCallblock|(entry,middle,CmmForeignCall{..})<-blockSplitblock=do-- Both 'id' and 'new_base' are KindNonPtr because they're-- RTS-only objects and are not subject to garbage collectionid<-newTempbWordnew_base<-newTemp(cmmRegType(CmmGlobalBaseReg))let(caller_save,caller_load)=callerSaveVolatileRegsload_tso<-newTempgcWordload_stack<-newTempgcWordletsuspend=saveThreadState<*>caller_save<*>mkMiddle(callSuspendThreadidintrbl)midCall=mkUnsafeCalltgtresargsresume=mkMiddle(callResumeThreadnew_baseid)<*>-- Assign the result to BaseReg: we-- might now have a different Capability!mkAssign(CmmGlobalBaseReg)(CmmReg(CmmLocalnew_base))<*>caller_load<*>loadThreadStateload_tsoload_stack-- Note: The successor must be a procpoint, and we have already split,-- so we use a jump, not a branch.succLbl=CmmLit(CmmLabel(infoTblLblsucc))(ret_args,regs,copyout)=copyOutOflowNativeReturnJump(Youngsucc)(map(CmmReg.CmmLocal)res)updfr(0,[])jump=CmmCall{cml_target=succLbl,cml_cont=Justsucc,cml_args_regs=regs,cml_args=widthInByteswordWidth,cml_ret_args=ret_args,cml_ret_off=updfr}graph'<-lgraphOfAGraph$suspend<*>midCall<*>resume<*>copyout<*>mkLastjumpcasetoBlockListgraph'of[one]->let(_,middle',last)=blockSplitoneinreturn(blockJoinentry(middle`blockAppend`middle')last)_->panic"lowerSafeForeignCall0"-- Block doesn't end in a safe foreign call:|otherwise=returnblockforeignLbl::FastString->CmmExprforeignLblname=CmmLit(CmmLabel(mkCmmCodeLabelrtsPackageIdname))newTemp::CmmType->UniqSMLocalRegnewTemprep=getUniqueM>>=\u->return(LocalRegurep)callSuspendThread::LocalReg->Bool->CmmNodeOOcallSuspendThreadidintrbl=CmmUnsafeForeignCall(ForeignTarget(foreignLbl(fsLit"suspendThread"))(ForeignConventionCCallConv[AddrHint,NoHint][AddrHint]))[id][CmmReg(CmmGlobalBaseReg),CmmLit(mkIntCLit(fromEnumintrbl))]callResumeThread::LocalReg->LocalReg->CmmNodeOOcallResumeThreadnew_baseid=CmmUnsafeForeignCall(ForeignTarget(foreignLbl(fsLit"resumeThread"))(ForeignConventionCCallConv[AddrHint][AddrHint]))[new_base][CmmReg(CmmLocalid)]-- -----------------------------------------------------------------------------plusW::ByteOff->WordOff->ByteOffplusWbw=b+w*wORD_SIZEdropEmpty::WordOff->[StackSlot]->Maybe[StackSlot]dropEmpty0ss=JustssdropEmptyn(Empty:ss)=dropEmpty(n-1)ssdropEmpty__=NothingisEmpty::StackSlot->BoolisEmptyEmpty=TrueisEmpty_=FalselocalRegBytes::LocalReg->ByteOfflocalRegBytesr=roundUpToWords(widthInBytes(typeWidth(localRegTyper)))localRegWords::LocalReg->WordOfflocalRegWords=toWords.localRegBytestoWords::ByteOff->WordOfftoWordsx=x`quot`wORD_SIZEinsertReloads::StackMap->[CmmNodeOO]insertReloadsstackmap=[CmmAssign(CmmLocalr)(CmmLoad(CmmStackSlotOldsp)(localRegTyper))|(r,sp)<-stackSlotRegsstackmap]stackSlotRegs::StackMap->[(LocalReg,StackLoc)]stackSlotRegssm=eltsUFM(sm_regssm)