/***********************************************************Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,The Netherlands. All Rights ReservedPermission to use, copy, modify, and distribute this software and itsdocumentation for any purpose and without fee is hereby granted,provided that the above copyright notice appear in all copies and thatboth that copyright notice and this permission notice appear insupporting documentation, and that the names of Stichting MathematischCentrum or CWI or Corporation for National Research Initiatives orCNRI not be used in advertising or publicity pertaining todistribution of the software without specific, written priorpermission.While CWI is the initial source for this software, a modified versionis made available by the Corporation for National Research Initiatives(CNRI) at the Internet address ftp://ftp.python.org.STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITHREGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OFMERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCHCENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIALDAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA ORPROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHERTORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE ORPERFORMANCE OF THIS SOFTWARE.******************************************************************//* Parser generator *//* XXX This file is not yet fully PROTOized *//* For a description, see the comments at end of this file */#include "pgenheaders.h"#include "assert.h"#include "token.h"#include "node.h"#include "grammar.h"#include "metagrammar.h"#include "pgen.h"externintPy_DebugFlag;/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */typedefstruct_nfaarc{intar_label;intar_arrow;}nfaarc;typedefstruct_nfastate{intst_narcs;nfaarc*st_arc;}nfastate;typedefstruct_nfa{intnf_type;char*nf_name;intnf_nstates;nfastate*nf_state;intnf_start,nf_finish;}nfa;/* Forward */staticvoidcompile_rhsPy_PROTO((labellist*ll,nfa*nf,node*n,int*pa,int*pb));staticvoidcompile_altPy_PROTO((labellist*ll,nfa*nf,node*n,int*pa,int*pb));staticvoidcompile_itemPy_PROTO((labellist*ll,nfa*nf,node*n,int*pa,int*pb));staticvoidcompile_atomPy_PROTO((labellist*ll,nfa*nf,node*n,int*pa,int*pb));staticintaddnfastate(nf)nfa*nf;{nfastate*st;PyMem_RESIZE(nf->nf_state,nfastate,nf->nf_nstates+1);if(nf->nf_state==NULL)Py_FatalError("out of mem");st=&nf->nf_state[nf->nf_nstates++];st->st_narcs=0;st->st_arc=NULL;returnst-nf->nf_state;}staticvoidaddnfaarc(nf,from,to,lbl)nfa*nf;intfrom,to,lbl;{nfastate*st;nfaarc*ar;st=&nf->nf_state[from];PyMem_RESIZE(st->st_arc,nfaarc,st->st_narcs+1);if(st->st_arc==NULL)Py_FatalError("out of mem");ar=&st->st_arc[st->st_narcs++];ar->ar_label=lbl;ar->ar_arrow=to;}staticnfa*newnfa(name)char*name;{nfa*nf;staticinttype=NT_OFFSET;/* All types will be disjunct */nf=PyMem_NEW(nfa,1);if(nf==NULL)Py_FatalError("no mem for new nfa");nf->nf_type=type++;nf->nf_name=name;/* XXX strdup(name) ??? */nf->nf_nstates=0;nf->nf_state=NULL;nf->nf_start=nf->nf_finish=-1;returnnf;}typedefstruct_nfagrammar{intgr_nnfas;nfa**gr_nfa;labellistgr_ll;}nfagrammar;/* Forward */staticvoidcompile_rulePy_PROTO((nfagrammar*gr,node*n));staticnfagrammar*newnfagrammar(){nfagrammar*gr;gr=PyMem_NEW(nfagrammar,1);if(gr==NULL)Py_FatalError("no mem for new nfa grammar");gr->gr_nnfas=0;gr->gr_nfa=NULL;gr->gr_ll.ll_nlabels=0;gr->gr_ll.ll_label=NULL;addlabel(&gr->gr_ll,ENDMARKER,"EMPTY");returngr;}staticnfa*addnfa(gr,name)nfagrammar*gr;char*name;{nfa*nf;nf=newnfa(name);PyMem_RESIZE(gr->gr_nfa,nfa*,gr->gr_nnfas+1);if(gr->gr_nfa==NULL)Py_FatalError("out of mem");gr->gr_nfa[gr->gr_nnfas++]=nf;addlabel(&gr->gr_ll,NAME,nf->nf_name);returnnf;}#ifdef Py_DEBUGstaticcharREQNFMT[]="metacompile: less than %d children\n";#define REQN(i, count) \ if (i < count) { \ fprintf(stderr, REQNFMT, count); \ Py_FatalError("REQN"); \ } else#else#define REQN(i, count) /* empty */#endifstaticnfagrammar*metacompile(n)node*n;{nfagrammar*gr;inti;printf("Compiling (meta-) parse tree into NFA grammar\n");gr=newnfagrammar();REQ(n,MSTART);i=n->n_nchildren-1;/* Last child is ENDMARKER */n=n->n_child;for(;--i>=0;n++){if(n->n_type!=NEWLINE)compile_rule(gr,n);}returngr;}staticvoidcompile_rule(gr,n)nfagrammar*gr;node*n;{nfa*nf;REQ(n,RULE);REQN(n->n_nchildren,4);n=n->n_child;REQ(n,NAME);nf=addnfa(gr,n->n_str);n++;REQ(n,COLON);n++;REQ(n,RHS);compile_rhs(&gr->gr_ll,nf,n,&nf->nf_start,&nf->nf_finish);n++;REQ(n,NEWLINE);}staticvoidcompile_rhs(ll,nf,n,pa,pb)labellist*ll;nfa*nf;node*n;int*pa,*pb;{inti;inta,b;REQ(n,RHS);i=n->n_nchildren;REQN(i,1);n=n->n_child;REQ(n,ALT);compile_alt(ll,nf,n,pa,pb);if(--i<=0)return;n++;a=*pa;b=*pb;*pa=addnfastate(nf);*pb=addnfastate(nf);addnfaarc(nf,*pa,a,EMPTY);addnfaarc(nf,b,*pb,EMPTY);for(;--i>=0;n++){REQ(n,VBAR);REQN(i,1);--i;n++;REQ(n,ALT);compile_alt(ll,nf,n,&a,&b);addnfaarc(nf,*pa,a,EMPTY);addnfaarc(nf,b,*pb,EMPTY);}}staticvoidcompile_alt(ll,nf,n,pa,pb)labellist*ll;nfa*nf;node*n;int*pa,*pb;{inti;inta,b;REQ(n,ALT);i=n->n_nchildren;REQN(i,1);n=n->n_child;REQ(n,ITEM);compile_item(ll,nf,n,pa,pb);--i;n++;for(;--i>=0;n++){if(n->n_type==COMMA){/* XXX Temporary */REQN(i,1);--i;n++;}REQ(n,ITEM);compile_item(ll,nf,n,&a,&b);addnfaarc(nf,*pb,a,EMPTY);*pb=b;}}staticvoidcompile_item(ll,nf,n,pa,pb)labellist*ll;nfa*nf;node*n;int*pa,*pb;{inti;inta,b;REQ(n,ITEM);i=n->n_nchildren;REQN(i,1);n=n->n_child;if(n->n_type==LSQB){REQN(i,3);n++;REQ(n,RHS);*pa=addnfastate(nf);*pb=addnfastate(nf);addnfaarc(nf,*pa,*pb,EMPTY);compile_rhs(ll,nf,n,&a,&b);addnfaarc(nf,*pa,a,EMPTY);addnfaarc(nf,b,*pb,EMPTY);REQN(i,1);n++;REQ(n,RSQB);}else{compile_atom(ll,nf,n,pa,pb);if(--i<=0)return;n++;addnfaarc(nf,*pb,*pa,EMPTY);if(n->n_type==STAR)*pb=*pa;elseREQ(n,PLUS);}}staticvoidcompile_atom(ll,nf,n,pa,pb)labellist*ll;nfa*nf;node*n;int*pa,*pb;{inti;REQ(n,ATOM);i=n->n_nchildren;REQN(i,1);n=n->n_child;if(n->n_type==LPAR){REQN(i,3);n++;REQ(n,RHS);compile_rhs(ll,nf,n,pa,pb);n++;REQ(n,RPAR);}elseif(n->n_type==NAME||n->n_type==STRING){*pa=addnfastate(nf);*pb=addnfastate(nf);addnfaarc(nf,*pa,*pb,addlabel(ll,n->n_type,n->n_str));}elseREQ(n,NAME);}staticvoiddumpstate(ll,nf,istate)labellist*ll;nfa*nf;intistate;{nfastate*st;inti;nfaarc*ar;printf("%c%2d%c",istate==nf->nf_start?'*':' ',istate,istate==nf->nf_finish?'.':' ');st=&nf->nf_state[istate];ar=st->st_arc;for(i=0;i<st->st_narcs;i++){if(i>0)printf("\n ");printf("-> %2d %s",ar->ar_arrow,PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));ar++;}printf("\n");}staticvoiddumpnfa(ll,nf)labellist*ll;nfa*nf;{inti;printf("NFA '%s' has %d states; start %d, finish %d\n",nf->nf_name,nf->nf_nstates,nf->nf_start,nf->nf_finish);for(i=0;i<nf->nf_nstates;i++)dumpstate(ll,nf,i);}/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */staticvoidaddclosure(ss,nf,istate)bitsetss;nfa*nf;intistate;{if(addbit(ss,istate)){nfastate*st=&nf->nf_state[istate];nfaarc*ar=st->st_arc;inti;for(i=st->st_narcs;--i>=0;){if(ar->ar_label==EMPTY)addclosure(ss,nf,ar->ar_arrow);ar++;}}}typedefstruct_ss_arc{bitsetsa_bitset;intsa_arrow;intsa_label;}ss_arc;typedefstruct_ss_state{bitsetss_ss;intss_narcs;ss_arc*ss_arc;intss_deleted;intss_finish;intss_rename;}ss_state;typedefstruct_ss_dfa{intsd_nstates;ss_state*sd_state;}ss_dfa;/* Forward */staticvoidprintssdfaPy_PROTO((intxx_nstates,ss_state*xx_state,intnbits,labellist*ll,char*msg));staticvoidsimplifyPy_PROTO((intxx_nstates,ss_state*xx_state));staticvoidconvertPy_PROTO((dfa*d,intxx_nstates,ss_state*xx_state));staticvoidmakedfa(gr,nf,d)nfagrammar*gr;nfa*nf;dfa*d;{intnbits=nf->nf_nstates;bitsetss;intxx_nstates;ss_state*xx_state,*yy;ss_arc*zz;intistate,jstate,iarc,jarc,ibit;nfastate*st;nfaarc*ar;ss=newbitset(nbits);addclosure(ss,nf,nf->nf_start);xx_state=PyMem_NEW(ss_state,1);if(xx_state==NULL)Py_FatalError("no mem for xx_state in makedfa");xx_nstates=1;yy=&xx_state[0];yy->ss_ss=ss;yy->ss_narcs=0;yy->ss_arc=NULL;yy->ss_deleted=0;yy->ss_finish=testbit(ss,nf->nf_finish);if(yy->ss_finish)printf("Error: nonterminal '%s' may produce empty.\n",nf->nf_name);/* This algorithm is from a book written before the invention of structured programming... *//* For each unmarked state... */for(istate=0;istate<xx_nstates;++istate){yy=&xx_state[istate];ss=yy->ss_ss;/* For all its states... */for(ibit=0;ibit<nf->nf_nstates;++ibit){if(!testbit(ss,ibit))continue;st=&nf->nf_state[ibit];/* For all non-empty arcs from this state... */for(iarc=0;iarc<st->st_narcs;iarc++){ar=&st->st_arc[iarc];if(ar->ar_label==EMPTY)continue;/* Look up in list of arcs from this state */for(jarc=0;jarc<yy->ss_narcs;++jarc){zz=&yy->ss_arc[jarc];if(ar->ar_label==zz->sa_label)gotofound;}/* Add new arc for this state */PyMem_RESIZE(yy->ss_arc,ss_arc,yy->ss_narcs+1);if(yy->ss_arc==NULL)Py_FatalError("out of mem");zz=&yy->ss_arc[yy->ss_narcs++];zz->sa_label=ar->ar_label;zz->sa_bitset=newbitset(nbits);zz->sa_arrow=-1;found:;/* Add destination */addclosure(zz->sa_bitset,nf,ar->ar_arrow);}}/* Now look up all the arrow states */for(jarc=0;jarc<xx_state[istate].ss_narcs;jarc++){zz=&xx_state[istate].ss_arc[jarc];for(jstate=0;jstate<xx_nstates;jstate++){if(samebitset(zz->sa_bitset,xx_state[jstate].ss_ss,nbits)){zz->sa_arrow=jstate;gotodone;}}PyMem_RESIZE(xx_state,ss_state,xx_nstates+1);if(xx_state==NULL)Py_FatalError("out of mem");zz->sa_arrow=xx_nstates;yy=&xx_state[xx_nstates++];yy->ss_ss=zz->sa_bitset;yy->ss_narcs=0;yy->ss_arc=NULL;yy->ss_deleted=0;yy->ss_finish=testbit(yy->ss_ss,nf->nf_finish);done:;}}if(Py_DebugFlag)printssdfa(xx_nstates,xx_state,nbits,&gr->gr_ll,"before minimizing");simplify(xx_nstates,xx_state);if(Py_DebugFlag)printssdfa(xx_nstates,xx_state,nbits,&gr->gr_ll,"after minimizing");convert(d,xx_nstates,xx_state);/* XXX cleanup */}staticvoidprintssdfa(xx_nstates,xx_state,nbits,ll,msg)intxx_nstates;ss_state*xx_state;intnbits;labellist*ll;char*msg;{inti,ibit,iarc;ss_state*yy;ss_arc*zz;printf("Subset DFA %s\n",msg);for(i=0;i<xx_nstates;i++){yy=&xx_state[i];if(yy->ss_deleted)continue;printf(" Subset %d",i);if(yy->ss_finish)printf(" (finish)");printf(" { ");for(ibit=0;ibit<nbits;ibit++){if(testbit(yy->ss_ss,ibit))printf("%d ",ibit);}printf("}\n");for(iarc=0;iarc<yy->ss_narcs;iarc++){zz=&yy->ss_arc[iarc];printf(" Arc to state %d, label %s\n",zz->sa_arrow,PyGrammar_LabelRepr(&ll->ll_label[zz->sa_label]));}}}/* PART THREE -- SIMPLIFY DFA *//* Simplify the DFA by repeatedly eliminating states that are equivalent to another oner. This is NOT Algorithm 3.3 from [Aho&Ullman 77]. It does not always finds the minimal DFA, but it does usually make a much smaller one... (For an example of sub-optimal behaviour, try S: x a b+ | y a b+.)*/staticintsamestate(s1,s2)ss_state*s1,*s2;{inti;if(s1->ss_narcs!=s2->ss_narcs||s1->ss_finish!=s2->ss_finish)return0;for(i=0;i<s1->ss_narcs;i++){if(s1->ss_arc[i].sa_arrow!=s2->ss_arc[i].sa_arrow||s1->ss_arc[i].sa_label!=s2->ss_arc[i].sa_label)return0;}return1;}staticvoidrenamestates(xx_nstates,xx_state,from,to)intxx_nstates;ss_state*xx_state;intfrom,to;{inti,j;if(Py_DebugFlag)printf("Rename state %d to %d.\n",from,to);for(i=0;i<xx_nstates;i++){if(xx_state[i].ss_deleted)continue;for(j=0;j<xx_state[i].ss_narcs;j++){if(xx_state[i].ss_arc[j].sa_arrow==from)xx_state[i].ss_arc[j].sa_arrow=to;}}}staticvoidsimplify(xx_nstates,xx_state)intxx_nstates;ss_state*xx_state;{intchanges;inti,j;do{changes=0;for(i=1;i<xx_nstates;i++){if(xx_state[i].ss_deleted)continue;for(j=0;j<i;j++){if(xx_state[j].ss_deleted)continue;if(samestate(&xx_state[i],&xx_state[j])){xx_state[i].ss_deleted++;renamestates(xx_nstates,xx_state,i,j);changes++;break;}}}}while(changes);}/* PART FOUR -- GENERATE PARSING TABLES *//* Convert the DFA into a grammar that can be used by our parser */staticvoidconvert(d,xx_nstates,xx_state)dfa*d;intxx_nstates;ss_state*xx_state;{inti,j;ss_state*yy;ss_arc*zz;for(i=0;i<xx_nstates;i++){yy=&xx_state[i];if(yy->ss_deleted)continue;yy->ss_rename=addstate(d);}for(i=0;i<xx_nstates;i++){yy=&xx_state[i];if(yy->ss_deleted)continue;for(j=0;j<yy->ss_narcs;j++){zz=&yy->ss_arc[j];addarc(d,yy->ss_rename,xx_state[zz->sa_arrow].ss_rename,zz->sa_label);}if(yy->ss_finish)addarc(d,yy->ss_rename,yy->ss_rename,0);}d->d_initial=0;}/* PART FIVE -- GLUE IT ALL TOGETHER */staticgrammar*maketables(gr)nfagrammar*gr;{inti;nfa*nf;dfa*d;grammar*g;if(gr->gr_nnfas==0)returnNULL;g=newgrammar(gr->gr_nfa[0]->nf_type);/* XXX first rule must be start rule */g->g_ll=gr->gr_ll;for(i=0;i<gr->gr_nnfas;i++){nf=gr->gr_nfa[i];if(Py_DebugFlag){printf("Dump of NFA for '%s' ...\n",nf->nf_name);dumpnfa(&gr->gr_ll,nf);}printf("Making DFA for '%s' ...\n",nf->nf_name);d=adddfa(g,nf->nf_type,nf->nf_name);makedfa(gr,gr->gr_nfa[i],d);}returng;}grammar*pgen(n)node*n;{nfagrammar*gr;grammar*g;gr=metacompile(n);g=maketables(gr);translatelabels(g);addfirstsets(g);returng;}/*Description-----------Input is a grammar in extended BNF (using * for repetition, + forat-least-once repetition, [] for optional parts, | for alternatives and() for grouping). This has already been parsed and turned into a parsetree.Each rule is considered as a regular expression in its own right.It is turned into a Non-deterministic Finite Automaton (NFA), whichis then turned into a Deterministic Finite Automaton (DFA), which is thenoptimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,or similar compiler books (this technique is more often used for lexicalanalyzers).The DFA's are used by the parser as parsing tables in a special waythat's probably unique. Before they are usable, the FIRST sets of allnon-terminals are computed.Reference---------[Aho&Ullman 77] Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 (first edition)*/