//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===////// The LLVM Compiler Infrastructure//// This file is distributed under the University of Illinois Open Source// License. See LICENSE.TXT for details.////===----------------------------------------------------------------------===/////// \file/// \brief SI implementation of the TargetRegisterInfo class.////===----------------------------------------------------------------------===//#include"SIRegisterInfo.h"#include"SIInstrInfo.h"#include"SIMachineFunctionInfo.h"#include"llvm/CodeGen/MachineFrameInfo.h"#include"llvm/CodeGen/MachineInstrBuilder.h"#include"llvm/CodeGen/RegisterScavenging.h"#include"llvm/IR/Function.h"#include"llvm/IR/LLVMContext.h"usingnamespacellvm;SIRegisterInfo::SIRegisterInfo():AMDGPURegisterInfo(){unsignedNumRegPressureSets=getNumRegPressureSets();SGPR32SetID=NumRegPressureSets;VGPR32SetID=NumRegPressureSets;for(unsignedi=0;i<NumRegPressureSets;++i){if(strncmp("SGPR_32",getRegPressureSetName(i),7)==0)SGPR32SetID=i;elseif(strncmp("VGPR_32",getRegPressureSetName(i),7)==0)VGPR32SetID=i;}assert(SGPR32SetID<NumRegPressureSets&&VGPR32SetID<NumRegPressureSets);}voidSIRegisterInfo::reserveRegisterTuples(BitVector&Reserved,unsignedReg)const{MCRegAliasIteratorR(Reg,this,true);for(;R.isValid();++R)Reserved.set(*R);}unsignedSIRegisterInfo::reservedPrivateSegmentBufferReg(constMachineFunction&MF)const{constAMDGPUSubtarget&ST=MF.getSubtarget<AMDGPUSubtarget>();if(ST.hasSGPRInitBug()){// Leave space for flat_scr, xnack_mask, vcc, and alignmentunsignedBaseIdx=AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG-8-4;unsignedBaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));returngetMatchingSuperReg(BaseReg,AMDGPU::sub0,&AMDGPU::SReg_128RegClass);}if(ST.getGeneration()>=AMDGPUSubtarget::VOLCANIC_ISLANDS){// 96/97 need to be reserved for flat_scr, 98/99 for xnack_mask, and// 100/101 for vcc. This is the next sgpr128 down.returnAMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;}returnAMDGPU::SGPR96_SGPR97_SGPR98_SGPR99;}unsignedSIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(constMachineFunction&MF)const{constAMDGPUSubtarget&ST=MF.getSubtarget<AMDGPUSubtarget>();if(ST.hasSGPRInitBug()){unsignedIdx=AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG-6-1;returnAMDGPU::SGPR_32RegClass.getRegister(Idx);}if(ST.getGeneration()>=AMDGPUSubtarget::VOLCANIC_ISLANDS){// Next register before reservations for flat_scr, xnack_mask, vcc,// and scratch resource.returnAMDGPU::SGPR91;}returnAMDGPU::SGPR95;}BitVectorSIRegisterInfo::getReservedRegs(constMachineFunction&MF)const{BitVectorReserved(getNumRegs());Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);// EXEC_LO and EXEC_HI could be allocated and used as regular register, but// this seems likely to result in bugs, so I'm marking them as reserved.reserveRegisterTuples(Reserved,AMDGPU::EXEC);reserveRegisterTuples(Reserved,AMDGPU::FLAT_SCR);// Reserve the last 2 registers so we will always have at least 2 more that// will physically contain VCC.reserveRegisterTuples(Reserved,AMDGPU::SGPR102_SGPR103);constAMDGPUSubtarget&ST=MF.getSubtarget<AMDGPUSubtarget>();if(ST.getGeneration()>=AMDGPUSubtarget::VOLCANIC_ISLANDS){// SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation// for VCC/XNACK_MASK/FLAT_SCR.//// TODO The SGPRs that alias to XNACK_MASK could be used as general purpose// SGPRs when the XNACK feature is not used. This is currently not done// because the code that counts SGPRs cannot account for such holes.reserveRegisterTuples(Reserved,AMDGPU::SGPR96_SGPR97);reserveRegisterTuples(Reserved,AMDGPU::SGPR98_SGPR99);reserveRegisterTuples(Reserved,AMDGPU::SGPR100_SGPR101);}// Tonga and Iceland can only allocate a fixed number of SGPRs due// to a hw bug.if(ST.hasSGPRInitBug()){unsignedNumSGPRs=AMDGPU::SGPR_32RegClass.getNumRegs();// Reserve some SGPRs for FLAT_SCRATCH, XNACK_MASK, and VCC (6 SGPRs).unsignedLimit=AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG-6;for(unsignedi=Limit;i<NumSGPRs;++i){unsignedReg=AMDGPU::SGPR_32RegClass.getRegister(i);reserveRegisterTuples(Reserved,Reg);}}constSIMachineFunctionInfo*MFI=MF.getInfo<SIMachineFunctionInfo>();unsignedScratchWaveOffsetReg=MFI->getScratchWaveOffsetReg();if(ScratchWaveOffsetReg!=AMDGPU::NoRegister){// Reserve 1 SGPR for scratch wave offset in case we need to spill.reserveRegisterTuples(Reserved,ScratchWaveOffsetReg);}unsignedScratchRSrcReg=MFI->getScratchRSrcReg();if(ScratchRSrcReg!=AMDGPU::NoRegister){// Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need// to spill.// TODO: May need to reserve a VGPR if doing LDS spilling.reserveRegisterTuples(Reserved,ScratchRSrcReg);assert(!isSubRegister(ScratchRSrcReg,ScratchWaveOffsetReg));}returnReserved;}unsignedSIRegisterInfo::getRegPressureSetLimit(constMachineFunction&MF,unsignedIdx)const{constAMDGPUSubtarget&STI=MF.getSubtarget<AMDGPUSubtarget>();// FIXME: We should adjust the max number of waves based on LDS size.unsignedSGPRLimit=getNumSGPRsAllowed(STI.getGeneration(),STI.getMaxWavesPerCU());unsignedVGPRLimit=getNumVGPRsAllowed(STI.getMaxWavesPerCU());unsignedVSLimit=SGPRLimit+VGPRLimit;for(regclass_iteratorI=regclass_begin(),E=regclass_end();I!=E;++I){constTargetRegisterClass*RC=*I;unsignedNumSubRegs=std::max((int)RC->getSize()/4,1);unsignedLimit;if(isPseudoRegClass(RC)){// FIXME: This is a hack. We should never be considering the pressure of// these since no virtual register should ever have this class.Limit=VSLimit;}elseif(isSGPRClass(RC)){Limit=SGPRLimit/NumSubRegs;}else{Limit=VGPRLimit/NumSubRegs;}constint*Sets=getRegClassPressureSets(RC);assert(Sets);for(unsignedi=0;Sets[i]!=-1;++i){if(Sets[i]==(int)Idx)returnLimit;}}return256;}boolSIRegisterInfo::requiresRegisterScavenging(constMachineFunction&Fn)const{returnFn.getFrameInfo()->hasStackObjects();}staticunsignedgetNumSubRegsForSpillOp(unsignedOp){switch(Op){caseAMDGPU::SI_SPILL_S512_SAVE:caseAMDGPU::SI_SPILL_S512_RESTORE:caseAMDGPU::SI_SPILL_V512_SAVE:caseAMDGPU::SI_SPILL_V512_RESTORE:return16;caseAMDGPU::SI_SPILL_S256_SAVE:caseAMDGPU::SI_SPILL_S256_RESTORE:caseAMDGPU::SI_SPILL_V256_SAVE:caseAMDGPU::SI_SPILL_V256_RESTORE:return8;caseAMDGPU::SI_SPILL_S128_SAVE:caseAMDGPU::SI_SPILL_S128_RESTORE:caseAMDGPU::SI_SPILL_V128_SAVE:caseAMDGPU::SI_SPILL_V128_RESTORE:return4;caseAMDGPU::SI_SPILL_V96_SAVE:caseAMDGPU::SI_SPILL_V96_RESTORE:return3;caseAMDGPU::SI_SPILL_S64_SAVE:caseAMDGPU::SI_SPILL_S64_RESTORE:caseAMDGPU::SI_SPILL_V64_SAVE:caseAMDGPU::SI_SPILL_V64_RESTORE:return2;caseAMDGPU::SI_SPILL_S32_SAVE:caseAMDGPU::SI_SPILL_S32_RESTORE:caseAMDGPU::SI_SPILL_V32_SAVE:caseAMDGPU::SI_SPILL_V32_RESTORE:return1;default:llvm_unreachable("Invalid spill opcode");}}voidSIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iteratorMI,unsignedLoadStoreOp,unsignedValue,unsignedScratchRsrcReg,unsignedScratchOffset,int64_tOffset,RegScavenger*RS)const{MachineBasicBlock*MBB=MI->getParent();constMachineFunction*MF=MI->getParent()->getParent();constSIInstrInfo*TII=static_cast<constSIInstrInfo*>(MF->getSubtarget().getInstrInfo());LLVMContext&Ctx=MF->getFunction()->getContext();DebugLocDL=MI->getDebugLoc();boolIsLoad=TII->get(LoadStoreOp).mayLoad();boolRanOutOfSGPRs=false;boolScavenged=false;unsignedSOffset=ScratchOffset;unsignedNumSubRegs=getNumSubRegsForSpillOp(MI->getOpcode());unsignedSize=NumSubRegs*4;if(!isUInt<12>(Offset+Size)){SOffset=RS->scavengeRegister(&AMDGPU::SGPR_32RegClass,MI,0);if(SOffset==AMDGPU::NoRegister){RanOutOfSGPRs=true;SOffset=AMDGPU::SGPR0;}else{Scavenged=true;}BuildMI(*MBB,MI,DL,TII->get(AMDGPU::S_ADD_U32),SOffset).addReg(ScratchOffset).addImm(Offset);Offset=0;}if(RanOutOfSGPRs)Ctx.emitError("Ran out of SGPRs for spilling VGPRS");for(unsignedi=0,e=NumSubRegs;i!=e;++i,Offset+=4){unsignedSubReg=NumSubRegs>1?getPhysRegSubReg(Value,&AMDGPU::VGPR_32RegClass,i):Value;unsignedSOffsetRegState=0;if(i+1==e&&Scavenged)SOffsetRegState|=RegState::Kill;BuildMI(*MBB,MI,DL,TII->get(LoadStoreOp)).addReg(SubReg,getDefRegState(IsLoad)).addReg(ScratchRsrcReg).addReg(SOffset,SOffsetRegState).addImm(Offset).addImm(0)// glc.addImm(0)// slc.addImm(0)// tfe.addReg(Value,RegState::Implicit|getDefRegState(IsLoad)).setMemRefs(MI->memoperands_begin(),MI->memoperands_end());}}voidSIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iteratorMI,intSPAdj,unsignedFIOperandNum,RegScavenger*RS)const{MachineFunction*MF=MI->getParent()->getParent();MachineBasicBlock*MBB=MI->getParent();SIMachineFunctionInfo*MFI=MF->getInfo<SIMachineFunctionInfo>();MachineFrameInfo*FrameInfo=MF->getFrameInfo();constSIInstrInfo*TII=static_cast<constSIInstrInfo*>(MF->getSubtarget().getInstrInfo());DebugLocDL=MI->getDebugLoc();MachineOperand&FIOp=MI->getOperand(FIOperandNum);intIndex=MI->getOperand(FIOperandNum).getIndex();switch(MI->getOpcode()){// SGPR register spillcaseAMDGPU::SI_SPILL_S512_SAVE:caseAMDGPU::SI_SPILL_S256_SAVE:caseAMDGPU::SI_SPILL_S128_SAVE:caseAMDGPU::SI_SPILL_S64_SAVE:caseAMDGPU::SI_SPILL_S32_SAVE:{unsignedNumSubRegs=getNumSubRegsForSpillOp(MI->getOpcode());for(unsignedi=0,e=NumSubRegs;i<e;++i){unsignedSubReg=getPhysRegSubReg(MI->getOperand(0).getReg(),&AMDGPU::SGPR_32RegClass,i);structSIMachineFunctionInfo::SpilledRegSpill=MFI->getSpilledReg(MF,Index,i);BuildMI(*MBB,MI,DL,TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),Spill.VGPR).addReg(SubReg).addImm(Spill.Lane);// FIXME: Since this spills to another register instead of an actual// frame index, we should delete the frame index when all references to// it are fixed.}MI->eraseFromParent();break;}// SGPR register restorecaseAMDGPU::SI_SPILL_S512_RESTORE:caseAMDGPU::SI_SPILL_S256_RESTORE:caseAMDGPU::SI_SPILL_S128_RESTORE:caseAMDGPU::SI_SPILL_S64_RESTORE:caseAMDGPU::SI_SPILL_S32_RESTORE:{unsignedNumSubRegs=getNumSubRegsForSpillOp(MI->getOpcode());for(unsignedi=0,e=NumSubRegs;i<e;++i){unsignedSubReg=getPhysRegSubReg(MI->getOperand(0).getReg(),&AMDGPU::SGPR_32RegClass,i);structSIMachineFunctionInfo::SpilledRegSpill=MFI->getSpilledReg(MF,Index,i);BuildMI(*MBB,MI,DL,TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),SubReg).addReg(Spill.VGPR).addImm(Spill.Lane).addReg(MI->getOperand(0).getReg(),RegState::ImplicitDefine);}// TODO: only do this when it is neededswitch(MF->getSubtarget<AMDGPUSubtarget>().getGeneration()){caseAMDGPUSubtarget::SOUTHERN_ISLANDS:// "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states// ("S_NOP 3") on SITII->insertWaitStates(MI,4);break;caseAMDGPUSubtarget::SEA_ISLANDS:break;default:// VOLCANIC_ISLANDS and later// "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states// ("S_NOP 4") on VI and later. This also applies to VALUs which write// VCC, but we're unlikely to see VMEM use VCC.TII->insertWaitStates(MI,5);}MI->eraseFromParent();break;}// VGPR register spillcaseAMDGPU::SI_SPILL_V512_SAVE:caseAMDGPU::SI_SPILL_V256_SAVE:caseAMDGPU::SI_SPILL_V128_SAVE:caseAMDGPU::SI_SPILL_V96_SAVE:caseAMDGPU::SI_SPILL_V64_SAVE:caseAMDGPU::SI_SPILL_V32_SAVE:buildScratchLoadStore(MI,AMDGPU::BUFFER_STORE_DWORD_OFFSET,TII->getNamedOperand(*MI,AMDGPU::OpName::src)->getReg(),TII->getNamedOperand(*MI,AMDGPU::OpName::scratch_rsrc)->getReg(),TII->getNamedOperand(*MI,AMDGPU::OpName::scratch_offset)->getReg(),FrameInfo->getObjectOffset(Index),RS);MI->eraseFromParent();break;caseAMDGPU::SI_SPILL_V32_RESTORE:caseAMDGPU::SI_SPILL_V64_RESTORE:caseAMDGPU::SI_SPILL_V96_RESTORE:caseAMDGPU::SI_SPILL_V128_RESTORE:caseAMDGPU::SI_SPILL_V256_RESTORE:caseAMDGPU::SI_SPILL_V512_RESTORE:{buildScratchLoadStore(MI,AMDGPU::BUFFER_LOAD_DWORD_OFFSET,TII->getNamedOperand(*MI,AMDGPU::OpName::dst)->getReg(),TII->getNamedOperand(*MI,AMDGPU::OpName::scratch_rsrc)->getReg(),TII->getNamedOperand(*MI,AMDGPU::OpName::scratch_offset)->getReg(),FrameInfo->getObjectOffset(Index),RS);MI->eraseFromParent();break;}default:{int64_tOffset=FrameInfo->getObjectOffset(Index);FIOp.ChangeToImmediate(Offset);if(!TII->isImmOperandLegal(MI,FIOperandNum,FIOp)){unsignedTmpReg=RS->scavengeRegister(&AMDGPU::VGPR_32RegClass,MI,SPAdj);BuildMI(*MBB,MI,MI->getDebugLoc(),TII->get(AMDGPU::V_MOV_B32_e32),TmpReg).addImm(Offset);FIOp.ChangeToRegister(TmpReg,false,false,true);}}}}unsignedSIRegisterInfo::getHWRegIndex(unsignedReg)const{returngetEncodingValue(Reg)&0xff;}// FIXME: This is very slow. It might be worth creating a map from physreg to// register class.constTargetRegisterClass*SIRegisterInfo::getPhysRegClass(unsignedReg)const{assert(!TargetRegisterInfo::isVirtualRegister(Reg));staticconstTargetRegisterClass*constBaseClasses[]={&AMDGPU::VGPR_32RegClass,&AMDGPU::SReg_32RegClass,&AMDGPU::VReg_64RegClass,&AMDGPU::SReg_64RegClass,&AMDGPU::VReg_96RegClass,&AMDGPU::VReg_128RegClass,&AMDGPU::SReg_128RegClass,&AMDGPU::VReg_256RegClass,&AMDGPU::SReg_256RegClass,&AMDGPU::VReg_512RegClass,&AMDGPU::SReg_512RegClass};for(constTargetRegisterClass*BaseClass:BaseClasses){if(BaseClass->contains(Reg)){returnBaseClass;}}returnnullptr;}// TODO: It might be helpful to have some target specific flags in// TargetRegisterClass to mark which classes are VGPRs to make this trivial.boolSIRegisterInfo::hasVGPRs(constTargetRegisterClass*RC)const{switch(RC->getSize()){case4:returngetCommonSubClass(&AMDGPU::VGPR_32RegClass,RC)!=nullptr;case8:returngetCommonSubClass(&AMDGPU::VReg_64RegClass,RC)!=nullptr;case12:returngetCommonSubClass(&AMDGPU::VReg_96RegClass,RC)!=nullptr;case16:returngetCommonSubClass(&AMDGPU::VReg_128RegClass,RC)!=nullptr;case32:returngetCommonSubClass(&AMDGPU::VReg_256RegClass,RC)!=nullptr;case64:returngetCommonSubClass(&AMDGPU::VReg_512RegClass,RC)!=nullptr;default:llvm_unreachable("Invalid register class size");}}constTargetRegisterClass*SIRegisterInfo::getEquivalentVGPRClass(constTargetRegisterClass*SRC)const{switch(SRC->getSize()){case4:return&AMDGPU::VGPR_32RegClass;case8:return&AMDGPU::VReg_64RegClass;case12:return&AMDGPU::VReg_96RegClass;case16:return&AMDGPU::VReg_128RegClass;case32:return&AMDGPU::VReg_256RegClass;case64:return&AMDGPU::VReg_512RegClass;default:llvm_unreachable("Invalid register class size");}}constTargetRegisterClass*SIRegisterInfo::getEquivalentSGPRClass(constTargetRegisterClass*VRC)const{switch(VRC->getSize()){case4:return&AMDGPU::SGPR_32RegClass;case8:return&AMDGPU::SReg_64RegClass;case16:return&AMDGPU::SReg_128RegClass;case32:return&AMDGPU::SReg_256RegClass;case64:return&AMDGPU::SReg_512RegClass;default:llvm_unreachable("Invalid register class size");}}constTargetRegisterClass*SIRegisterInfo::getSubRegClass(constTargetRegisterClass*RC,unsignedSubIdx)const{if(SubIdx==AMDGPU::NoSubRegister)returnRC;// We can assume that each lane corresponds to one 32-bit register.unsignedCount=countPopulation(getSubRegIndexLaneMask(SubIdx));if(isSGPRClass(RC)){switch(Count){case1:return&AMDGPU::SGPR_32RegClass;case2:return&AMDGPU::SReg_64RegClass;case4:return&AMDGPU::SReg_128RegClass;case8:return&AMDGPU::SReg_256RegClass;case16:/* fall-through */default:llvm_unreachable("Invalid sub-register class size");}}else{switch(Count){case1:return&AMDGPU::VGPR_32RegClass;case2:return&AMDGPU::VReg_64RegClass;case3:return&AMDGPU::VReg_96RegClass;case4:return&AMDGPU::VReg_128RegClass;case8:return&AMDGPU::VReg_256RegClass;case16:/* fall-through */default:llvm_unreachable("Invalid sub-register class size");}}}boolSIRegisterInfo::shouldRewriteCopySrc(constTargetRegisterClass*DefRC,unsignedDefSubReg,constTargetRegisterClass*SrcRC,unsignedSrcSubReg)const{// We want to prefer the smallest register class possible, so we don't want to// stop and rewrite on anything that looks like a subregister// extract. Operations mostly don't care about the super register class, so we// only want to stop on the most basic of copies between the smae register// class.//// e.g. if we have something like// vreg0 = ...// vreg1 = ...// vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2// vreg3 = COPY vreg2, sub0//// We want to look through the COPY to find:// => vreg3 = COPY vreg0// Plain copy.returngetCommonSubClass(DefRC,SrcRC)!=nullptr;}unsignedSIRegisterInfo::getPhysRegSubReg(unsignedReg,constTargetRegisterClass*SubRC,unsignedChannel)const{switch(Reg){caseAMDGPU::VCC:switch(Channel){case0:returnAMDGPU::VCC_LO;case1:returnAMDGPU::VCC_HI;default:llvm_unreachable("Invalid SubIdx for VCC");}caseAMDGPU::FLAT_SCR:switch(Channel){case0:returnAMDGPU::FLAT_SCR_LO;case1:returnAMDGPU::FLAT_SCR_HI;default:llvm_unreachable("Invalid SubIdx for FLAT_SCR");}break;caseAMDGPU::EXEC:switch(Channel){case0:returnAMDGPU::EXEC_LO;case1:returnAMDGPU::EXEC_HI;default:llvm_unreachable("Invalid SubIdx for EXEC");}break;}constTargetRegisterClass*RC=getPhysRegClass(Reg);// 32-bit registers don't have sub-registers, so we can just return the// Reg. We need to have this check here, because the calculation below// using getHWRegIndex() will fail with special 32-bit registers like// VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0.if(RC->getSize()==4){assert(Channel==0);returnReg;}unsignedIndex=getHWRegIndex(Reg);returnSubRC->getRegister(Index+Channel);}boolSIRegisterInfo::opCanUseLiteralConstant(unsignedOpType)const{returnOpType==AMDGPU::OPERAND_REG_IMM32;}boolSIRegisterInfo::opCanUseInlineConstant(unsignedOpType)const{if(opCanUseLiteralConstant(OpType))returntrue;returnOpType==AMDGPU::OPERAND_REG_INLINE_C;}// FIXME: Most of these are flexible with HSA and we don't need to reserve them// as input registers if unused. Whether the dispatch ptr is necessary should be// easy to detect from used intrinsics. Scratch setup is harder to know.unsignedSIRegisterInfo::getPreloadedValue(constMachineFunction&MF,enumPreloadedValueValue)const{constSIMachineFunctionInfo*MFI=MF.getInfo<SIMachineFunctionInfo>();constAMDGPUSubtarget&ST=MF.getSubtarget<AMDGPUSubtarget>();(void)ST;switch(Value){caseSIRegisterInfo::WORKGROUP_ID_X:assert(MFI->hasWorkGroupIDX());returnMFI->WorkGroupIDXSystemSGPR;caseSIRegisterInfo::WORKGROUP_ID_Y:assert(MFI->hasWorkGroupIDY());returnMFI->WorkGroupIDYSystemSGPR;caseSIRegisterInfo::WORKGROUP_ID_Z:assert(MFI->hasWorkGroupIDZ());returnMFI->WorkGroupIDZSystemSGPR;caseSIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:returnMFI->PrivateSegmentWaveByteOffsetSystemSGPR;caseSIRegisterInfo::PRIVATE_SEGMENT_BUFFER:assert(ST.isAmdHsaOS()&&"Non-HSA ABI currently uses relocations");assert(MFI->hasPrivateSegmentBuffer());returnMFI->PrivateSegmentBufferUserSGPR;caseSIRegisterInfo::KERNARG_SEGMENT_PTR:assert(MFI->hasKernargSegmentPtr());returnMFI->KernargSegmentPtrUserSGPR;caseSIRegisterInfo::DISPATCH_ID:llvm_unreachable("unimplemented");caseSIRegisterInfo::FLAT_SCRATCH_INIT:assert(MFI->hasFlatScratchInit());returnMFI->FlatScratchInitUserSGPR;caseSIRegisterInfo::DISPATCH_PTR:assert(MFI->hasDispatchPtr());returnMFI->DispatchPtrUserSGPR;caseSIRegisterInfo::QUEUE_PTR:llvm_unreachable("not implemented");caseSIRegisterInfo::WORKITEM_ID_X:assert(MFI->hasWorkItemIDX());returnAMDGPU::VGPR0;caseSIRegisterInfo::WORKITEM_ID_Y:assert(MFI->hasWorkItemIDY());returnAMDGPU::VGPR1;caseSIRegisterInfo::WORKITEM_ID_Z:assert(MFI->hasWorkItemIDZ());returnAMDGPU::VGPR2;}llvm_unreachable("unexpected preloaded value type");}/// \brief Returns a register that is not used at any point in the function./// If all registers are used, then this function will return// AMDGPU::NoRegister.unsignedSIRegisterInfo::findUnusedRegister(constMachineRegisterInfo&MRI,constTargetRegisterClass*RC)const{for(unsignedReg:*RC)if(!MRI.isPhysRegUsed(Reg))returnReg;returnAMDGPU::NoRegister;}unsignedSIRegisterInfo::getNumVGPRsAllowed(unsignedWaveCount)const{switch(WaveCount){case10:return24;case9:return28;case8:return32;case7:return36;case6:return40;case5:return48;case4:return64;case3:return84;case2:return128;default:return256;}}unsignedSIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generationgen,unsignedWaveCount)const{if(gen>=AMDGPUSubtarget::VOLCANIC_ISLANDS){switch(WaveCount){case10:return80;case9:return80;case8:return96;default:return102;}}else{switch(WaveCount){case10:return48;case9:return56;case8:return64;case7:return72;case6:return80;case5:return96;default:return103;}}}