importastimportsysimportcodegenimportloggingimportpickleimportbb.pyshaspyshimportos.pathimportbb.utils,bb.dataimporthashlibfromitertoolsimportchainfrombb.pyshimportpyshyacc,pyshlex,sherrorsfrombb.cacheimportMultiProcessCachelogger=logging.getLogger('BitBake.CodeParser')defbbhash(s):returnhashlib.md5(s.encode("utf-8")).hexdigest()defcheck_indent(codestr):"""If the code is indented, add a top level piece of code to 'remove' the indentation"""i=0whilecodestr[i]in["\n","\t"," "]:i=i+1ifi==0:returncodestrifcodestr[i-1]=="\t"orcodestr[i-1]==" ":ifcodestr[0]=="\n":# Since we're adding a line, we need to remove one line of any empty padding# to ensure line numbers are correctcodestr=codestr[1:]return"if 1:\n"+codestrreturncodestr# Basically pickle, in python 2.7.3 at least, does badly with data duplication # upon pickling and unpickling. Combine this with duplicate objects and things# are a mess.## When the sets are originally created, python calls intern() on the set keys# which significantly improves memory usage. Sadly the pickle/unpickle process# doesn't call intern() on the keys and results in the same strings being duplicated# in memory. This also means pickle will save the same string multiple times in# the cache file.## By having shell and python cacheline objects with setstate/getstate, we force# the object creation through our own routine where we can call intern (via internSet).## We also use hashable frozensets and ensure we use references to these so that# duplicates can be removed, both in memory and in the resulting pickled data.## By playing these games, the size of the cache file shrinks dramatically# meaning faster load times and the reloaded cache files also consume much less# memory. Smaller cache files, faster load times and lower memory usage is good.## A custom getstate/setstate using tuples is actually worth 15% cachesize by# avoiding duplication of the attribute names!classSetCache(object):def__init__(self):self.setcache={}definternSet(self,items):new=[]foriinitems:new.append(sys.intern(i))s=frozenset(new)h=hash(s)ifhinself.setcache:returnself.setcache[h]self.setcache[h]=sreturnscodecache=SetCache()classpythonCacheLine(object):def__init__(self,refs,execs,contains):self.refs=codecache.internSet(refs)self.execs=codecache.internSet(execs)self.contains={}forcincontains:self.contains[c]=codecache.internSet(contains[c])def__getstate__(self):return(self.refs,self.execs,self.contains)def__setstate__(self,state):(refs,execs,contains)=stateself.__init__(refs,execs,contains)def__hash__(self):l=(hash(self.refs),hash(self.execs))forcinsorted(self.contains.keys()):l=l+(c,hash(self.contains[c]))returnhash(l)def__repr__(self):return" ".join([str(self.refs),str(self.execs),str(self.contains)])classshellCacheLine(object):def__init__(self,execs):self.execs=codecache.internSet(execs)def__getstate__(self):return(self.execs)def__setstate__(self,state):(execs)=stateself.__init__(execs)def__hash__(self):returnhash(self.execs)def__repr__(self):returnstr(self.execs)classCodeParserCache(MultiProcessCache):cache_file_name="bb_codeparser.dat"CACHE_VERSION=8def__init__(self):MultiProcessCache.__init__(self)self.pythoncache=self.cachedata[0]self.shellcache=self.cachedata[1]self.pythoncacheextras=self.cachedata_extras[0]self.shellcacheextras=self.cachedata_extras[1]# To avoid duplication in the codeparser cache, keep# a lookup of hashes of objects we already haveself.pythoncachelines={}self.shellcachelines={}defnewPythonCacheLine(self,refs,execs,contains):cacheline=pythonCacheLine(refs,execs,contains)h=hash(cacheline)ifhinself.pythoncachelines:returnself.pythoncachelines[h]self.pythoncachelines[h]=cachelinereturncachelinedefnewShellCacheLine(self,execs):cacheline=shellCacheLine(execs)h=hash(cacheline)ifhinself.shellcachelines:returnself.shellcachelines[h]self.shellcachelines[h]=cachelinereturncachelinedefinit_cache(self,d):# Check if we already have the cachesifself.pythoncache:returnMultiProcessCache.init_cache(self,d)# cachedata gets re-assigned in the parentself.pythoncache=self.cachedata[0]self.shellcache=self.cachedata[1]defcreate_cachedata(self):data=[{},{}]returndatacodeparsercache=CodeParserCache()defparser_cache_init(d):codeparsercache.init_cache(d)defparser_cache_save():codeparsercache.save_extras()defparser_cache_savemerge():codeparsercache.save_merge()Logger=logging.getLoggerClass()classBufferedLogger(Logger):def__init__(self,name,level=0,target=None):Logger.__init__(self,name)self.setLevel(level)self.buffer=[]self.target=targetdefhandle(self,record):self.buffer.append(record)defflush(self):forrecordinself.buffer:self.target.handle(record)self.buffer=[]classPythonParser():getvars=(".getVar",".appendVar",".prependVar")getvarflags=(".getVarFlag",".appendVarFlag",".prependVarFlag")containsfuncs=("bb.utils.contains","base_contains","bb.utils.contains_any")execfuncs=("bb.build.exec_func","bb.build.exec_task")defwarn(self,func,arg):"""Warn about calls of bitbake APIs which pass a non-literal argument for the variable name, as we're not able to track such a reference. """try:funcstr=codegen.to_source(func)argstr=codegen.to_source(arg)exceptTypeError:self.log.debug(2,'Failed to convert function and argument to source form')else:self.log.debug(1,self.unhandled_message%(funcstr,argstr))defvisit_Call(self,node):name=self.called_node_name(node.func)ifnameand(name.endswith(self.getvars)orname.endswith(self.getvarflags)ornameinself.containsfuncs):ifisinstance(node.args[0],ast.Str):varname=node.args[0].sifnameinself.containsfuncsandisinstance(node.args[1],ast.Str):ifvarnamenotinself.contains:self.contains[varname]=set()self.contains[varname].add(node.args[1].s)elifname.endswith(self.getvarflags):ifisinstance(node.args[1],ast.Str):self.references.add('%s[%s]'%(varname,node.args[1].s))else:self.warn(node.func,node.args[1])else:self.references.add(varname)else:self.warn(node.func,node.args[0])elifnameandname.endswith(".expand"):ifisinstance(node.args[0],ast.Str):value=node.args[0].sd=bb.data.init()parser=d.expandWithRefs(value,self.name)self.references|=parser.referencesself.execs|=parser.execsforvarnameinparser.contains:ifvarnamenotinself.contains:self.contains[varname]=set()self.contains[varname]|=parser.contains[varname]elifnameinself.execfuncs:ifisinstance(node.args[0],ast.Str):self.var_execs.add(node.args[0].s)else:self.warn(node.func,node.args[0])elifnameandisinstance(node.func,(ast.Name,ast.Attribute)):self.execs.add(name)defcalled_node_name(self,node):"""Given a called node, return its original string form"""components=[]whilenode:ifisinstance(node,ast.Attribute):components.append(node.attr)node=node.valueelifisinstance(node,ast.Name):components.append(node.id)return'.'.join(reversed(components))else:breakdef__init__(self,name,log):self.name=nameself.var_execs=set()self.contains={}self.execs=set()self.references=set()self.log=BufferedLogger('BitBake.Data.PythonParser',logging.DEBUG,log)self.unhandled_message="in call of %s, argument '%s' is not a string literal"self.unhandled_message="while parsing %s, %s"%(name,self.unhandled_message)defparse_python(self,node,lineno=0,filename="<string>"):ifnotnodeornotnode.strip():returnh=bbhash(str(node))ifhincodeparsercache.pythoncache:self.references=set(codeparsercache.pythoncache[h].refs)self.execs=set(codeparsercache.pythoncache[h].execs)self.contains={}foriincodeparsercache.pythoncache[h].contains:self.contains[i]=set(codeparsercache.pythoncache[h].contains[i])returnifhincodeparsercache.pythoncacheextras:self.references=set(codeparsercache.pythoncacheextras[h].refs)self.execs=set(codeparsercache.pythoncacheextras[h].execs)self.contains={}foriincodeparsercache.pythoncacheextras[h].contains:self.contains[i]=set(codeparsercache.pythoncacheextras[h].contains[i])return# We can't add to the linenumbers for compile, we can pad to the correct number of blank lines thoughnode="\n"*int(lineno)+nodecode=compile(check_indent(str(node)),filename,"exec",ast.PyCF_ONLY_AST)forninast.walk(code):ifn.__class__.__name__=="Call":self.visit_Call(n)self.execs.update(self.var_execs)codeparsercache.pythoncacheextras[h]=codeparsercache.newPythonCacheLine(self.references,self.execs,self.contains)classShellParser():def__init__(self,name,log):self.funcdefs=set()self.allexecs=set()self.execs=set()self.log=BufferedLogger('BitBake.Data.%s'%name,logging.DEBUG,log)self.unhandled_template="unable to handle non-literal command '%s'"self.unhandled_template="while parsing %s, %s"%(name,self.unhandled_template)defparse_shell(self,value):"""Parse the supplied shell code in a string, returning the external commands it executes. """h=bbhash(str(value))ifhincodeparsercache.shellcache:self.execs=set(codeparsercache.shellcache[h].execs)returnself.execsifhincodeparsercache.shellcacheextras:self.execs=set(codeparsercache.shellcacheextras[h].execs)returnself.execsself._parse_shell(value)self.execs=set(cmdforcmdinself.allexecsifcmdnotinself.funcdefs)codeparsercache.shellcacheextras[h]=codeparsercache.newShellCacheLine(self.execs)returnself.execsdef_parse_shell(self,value):try:tokens,_=pyshyacc.parse(value,eof=True,debug=False)exceptpyshlex.NeedMore:raisesherrors.ShellSyntaxError("Unexpected EOF")self.process_tokens(tokens)defprocess_tokens(self,tokens):"""Process a supplied portion of the syntax tree as returned by pyshyacc.parse. """deffunction_definition(value):self.funcdefs.add(value.name)return[value.body],Nonedefcase_clause(value):# Element 0 of each item in the case is the list of patterns, and# Element 1 of each item in the case is the list of commands to be# executed when that pattern matches.words=chain(*[item[0]foriteminvalue.items])cmds=chain(*[item[1]foriteminvalue.items])returncmds,wordsdefif_clause(value):main=chain(value.cond,value.if_cmds)rest=value.else_cmdsifisinstance(rest,tuple)andrest[0]=="elif":returnchain(main,if_clause(rest[1]))else:returnchain(main,rest)defsimple_command(value):returnNone,chain(value.words,(assign[1]forassigninvalue.assigns))token_handlers={"and_or":lambdax:((x.left,x.right),None),"async":lambdax:([x],None),"brace_group":lambdax:(x.cmds,None),"for_clause":lambdax:(x.cmds,x.items),"function_definition":function_definition,"if_clause":lambdax:(if_clause(x),None),"pipeline":lambdax:(x.commands,None),"redirect_list":lambdax:([x.cmd],None),"subshell":lambdax:(x.cmds,None),"while_clause":lambdax:(chain(x.condition,x.cmds),None),"until_clause":lambdax:(chain(x.condition,x.cmds),None),"simple_command":simple_command,"case_clause":case_clause,}defprocess_token_list(tokens):fortokenintokens:ifisinstance(token,list):process_token_list(token)continuename,value=tokentry:more_tokens,words=token_handlers[name](value)exceptKeyError:raiseNotImplementedError("Unsupported token type "+name)ifmore_tokens:self.process_tokens(more_tokens)ifwords:self.process_words(words)process_token_list(tokens)defprocess_words(self,words):"""Process a set of 'words' in pyshyacc parlance, which includes extraction of executed commands from $() blocks, as well as grabbing the command name argument. """words=list(words)forwordinlist(words):wtree=pyshlex.make_wordtree(word[1])forpartinwtree:ifnotisinstance(part,list):continueifpart[0]in('`','$('):command=pyshlex.wordtree_as_string(part[1:-1])self._parse_shell(command)ifword[0]in("cmd_name","cmd_word"):ifwordinwords:words.remove(word)usetoken=Falseforwordinwords:ifword[0]in("cmd_name","cmd_word")or \
(usetokenandword[0]=="TOKEN"):if"="inword[1]:usetoken=Truecontinuecmd=word[1]ifcmd.startswith("$"):self.log.debug(1,self.unhandled_template%cmd)elifcmd=="eval":command=" ".join(wordfor_,wordinwords[1:])self._parse_shell(command)else:self.allexecs.add(cmd)break