{-------------------------------------------------------------------------------------
-
- A Compiler from XQuery to Haskell
- Programmer: Leonidas Fegaras
- Email: fegaras@cse.uta.edu
- Web: http://lambda.uta.edu/- Creation: 02/15/08, last update: 06/21/08
-
- Copyright (c) 2008 by Leonidas Fegaras, the University of Texas at Arlington. All rights reserved.
- This material is provided as is, with absolutely no warranty expressed or implied.
- Any use is at your own risk. Permission is hereby granted to use or copy this program
- for any purpose, provided the above notices are retained on all copies.
-
--------------------------------------------------------------------------------------}{-# OPTIONS_GHC -fth -fbang-patterns #-}moduleXML.HXQ.CompilerwhereimportData.ListimportControl.MonadimportChar(isDigit,toLower)importList(sortBy)importLanguage.Haskell.THimportDatabase.HDBCimportXMLParse(parseDocument)importHXML(AttList)importXML.HXQ.ParserimportXML.HXQ.XTreeimportXML.HXQ.OptimizerimportXML.HXQ.DBimportXML.HXQ.DBConnect{--------------- XPath Steps ---------------------------------------------------------}current_step::Tag->XTree->XSeqcurrent_stepmx=casexofXElemk____|(k==m||m=="*")->[x]_->[]-- XPath step /tag or /*child_step::Tag->XTree->XSeqchild_stepmx=casexofXElem____bs->foldr(\bs->casebofXElemk____|(k==m||m=="*")->b:s_->s)[]bs_->[]-- XPath step //tag or //*descendant_step::Tag->XTree->XSeqdescendant_stepm(x@(XElemt___cs))|m==t||m=="*"=x:(concatMap(descendant_stepm)cs)descendant_stepm(XElemt___cs)=concatMap(descendant_stepm)csdescendant_stepm_=[]-- It's like //* but has tagged children, which are derived statically-- After examing 100 children it gives up: this avoids space leaksdescendant_any_with_tagged_children::[Tag]->XTree->XSeqdescendant_any_with_tagged_childrentags(x@(XElemt___cs))|all(\tag->foldr(\bs->casebof(XElemk____)->s||k==tag_->s)Falsecs100)tags=x:(concatMap(descendant_any_with_tagged_childrentags)cs)wherecs100=take100csdescendant_any_with_tagged_childrentags(XElemt___cs)=concatMap(descendant_any_with_tagged_childrentags)csdescendant_any_with_tagged_childrentags_=[]-- XPath step /@attr or /@*attribute_step::Tag->XTree->XSeqattribute_stepmx=casexof(XElem_al___)->foldr(\(k,v)s->ifk==m||m=="*"then(XTextv):selses)[]al_->[]-- XPath step //@attr or //@*attribute_descendant_step::Tag->XTree->XSeqattribute_descendant_stepm(x@(XElem_al__cs))=foldr(\(k,v)s->ifk==m||m=="*"then(XTextv):selses)(concatMap(attribute_descendant_stepm)cs)alattribute_descendant_stepm_=[]-- NOT USED: XPath step /..parent_step::Tag->XTree->XSeqparent_step_(XElem___p_)=[p]parent_step_e=error("Cannot derive the parent of "++showe){------------ Functions --------------------------------------------------------------}-- find the value of a variable in an association listfindVvarenv=casefilter(\(n,_)->n==var)envof(_,b):_->b_->error("Undefined variable: "++var)-- is the variable defined in the association list?memVvarenv=casefilter(\(n,_)->n==var)envof(_,b):_->True_->False-- like foldr but with an indexfoldir::(a->Int->b->b)->b->[a]->Int->bfoldircn[]i=nfoldircn(x:xs)i=cxi(foldircnxs(i+1))trueXT=XBoolTruefalseXT=XBoolFalsereadNum::String->MaybeXTreereadNumcs=casespanisDigitcsof(n,[])->Just(XInt(readn))(n,'.':rest)->casespanisDigitrestof(k,[])->Just(XFloat(read(n++('.':k))))_->Nothing_->Nothingtext::XSeq->XSeqtextxs=foldr(\xr->casexofXElem____zs->(filter(\a->caseaofXText_->True;XInt_->True;XFloat_->True;XBool_->True;_->False)zs)++rXText_->x:rXInt_->x:rXFloat_->x:rXBool_->x:r_->r)[]xstoString::XSeq->[String]toStringxs=map(\x->casexofXTextt->tXIntn->shownXFloatn->shownXBooln->shown)(textxs)-- concatenate text with no padding (for element content)appendText::[XSeq]->XSeqappendText[]=[]appendText[x]=xappendText(x:xs)=x++[XNoPad]++appendTextxstoNum::XSeq->XSeqtoNumxs=foldr(\xr->casexofXIntn->x:rXFloatn->x:rXTexts->casereadNumsofJustt->t:r_->r_->r)[](textxs)toFloat::XTree->FloattoFloat(XTexts)=casereadNumsofJust(XIntn)->fromIntegralnJust(XFloatn)->n_->error("Cannot convert to a float: "++s)toFloat(XIntn)=fromIntegralntoFloat(XFloatn)=ntoFloatx=error("Cannot convert to a float: "++(showx))mean::(Fractionalt)=>[t]->tmean=uncurry(/).foldl'(\(!s,!n)x->(s+x,n+1))(0,0.0)contains::String->String->Boolcontainstextword=letlen=lengthwordcxs|((takelenxs)==word)=Truec(_:xs)=cxsc_=Falseinctextdistinct::Eqa=>[a]->[a]distinct=foldl(\ra->ifelemarthenrelser++[a])[]arithmetic::(Float->Float->Float)->XTree->XTree->XTreearithmeticop(XIntn)(XIntm)=XInt(round(op(fromIntegraln)(fromIntegralm)))arithmeticop(XFloatn)(XFloatm)=XFloat(opnm)arithmeticop(XFloatn)(XIntm)=XFloat(opn(fromIntegralm))arithmeticop(XIntn)(XFloatm)=XFloat(op(fromIntegraln)m)compareXTrees::XTree->XTree->OrderingcompareXTrees(XElem_____)_=EQcompareXTrees_(XElem_____)=EQcompareXTrees(XIntn)(XIntm)=comparenmcompareXTrees(XFloatn)(XIntm)=comparen(fromIntegralm)compareXTrees(XIntn)(XFloatm)=compare(fromIntegraln)mcompareXTrees(XFloatn)(XFloatm)=comparenmcompareXTrees(XTextn)(XTextm)=comparenmcompareXTreesxy=compare(toFloatx)(toFloaty)strictCompareOne[XIntn][XIntm]=comparenmstrictCompareOne[XFloatn][XFloatm]=comparenmstrictCompareOne[XFloatn][XIntm]=comparen(fromIntegralm)strictCompareOne[XIntn][XFloatm]=compare(fromIntegraln)mstrictCompareOne[XTextn][XTextm]=comparenmstrictCompareOnexy=error("Illegal operands in strict comparison: "++(showx)++" "++(showy))strictCompare::XSeq->XSeq->OrderingstrictCompare[XElem____x][XElem____y]=strictCompareOnexystrictComparex[XElem____y]=strictCompareOnexystrictCompare[XElem____x]y=strictCompareOnexystrictComparexy=strictCompareOnexycompareXSeqs::Bool->XSeq->XSeq->OrderingcompareXSeqsordxsys=letcomps=[compareXTreesxy|x<-xs,y<-ys]inifordthenifall(\x->x==LT)compsthenLTelseifall(\x->x==GT)compsthenGTelseEQelseifall(\x->x==LT)compsthenGTelseifall(\x->x==GT)compsthenLTelseEQconditionTest::XSeq->BoolconditionTest[]=FalseconditionTest[XText""]=FalseconditionTest[XInt0]=FalseconditionTest[XBoolFalse]=FalseconditionTest_=True-- XPath stepspaths::[(Tag,QExp)]paths=[("current_step",[|current_step|]),("child_step",[|child_step|]),("descendant_step",[|descendant_step|]),("attribute_step",[|attribute_step|]),("attribute_descendant_step",[|attribute_descendant_step|]),("parent_step",[|parent_step|])]typeFunction=[QExp]->QExp-- System functions: they can also be defined as Haskell functions of type (XSeq,...,XSeq) -> XSeq-- but here we make sure they are unfolded and fused with the rest of the queryfunctions::[(Tag,Int,Function)]functions=[("=",2,\[xs,ys]->[|[trueXT|x<-text$xs,y<-text$ys,compareXTreesxy==EQ]|]),("!=",2,\[xs,ys]->[|ifnull[trueXT|x<-text$xs,y<-text$ys,compareXTreesxy==EQ]then[trueXT]else[falseXT]|]),(">",2,\[xs,ys]->[|[trueXT|x<-text$xs,y<-text$ys,compareXTreesxy==GT]|]),("<",2,\[xs,ys]->[|[trueXT|x<-text$xs,y<-text$ys,compareXTreesxy==LT]|]),(">=",2,\[xs,ys]->[|[trueXT|x<-text$xs,y<-text$ys,compareXTreesxy`elem`[GT,EQ]]|]),("<=",2,\[xs,ys]->[|[trueXT|x<-text$xs,y<-text$ys,compareXTreesxy`elem`[LT,EQ]]|]),("eq",2,\[xs,ys]->[|ifstrictCompare$xs$ys==EQthen[trueXT]else[falseXT]|]),("neq",2,\[xs,ys]->[|ifstrictCompare$xs$ys/=EQthen[trueXT]else[falseXT]|]),("lt",2,\[xs,ys]->[|ifstrictCompare$xs$ys==LTthen[trueXT]else[falseXT]|]),("gt",2,\[xs,ys]->[|ifstrictCompare$xs$ys==GTthen[trueXT]else[falseXT]|]),("le",2,\[xs,ys]->[|ifstrictCompare$xs$ys`elem`[LT,EQ]then[trueXT]else[falseXT]|]),("ge",2,\[xs,ys]->[|ifstrictCompare$xs$ys`elem`[GT,EQ]then[trueXT]else[falseXT]|]),("<<",2,\[xs,ys]->[|[trueXT|XElem__ox__<-$xs,XElem__oy__<-$ys,ox<oy]|]),(">>",2,\[xs,ys]->[|[trueXT|XElem__ox__<-$xs,XElem__oy__<-$ys,ox>oy]|]),("is",2,\[xs,ys]->[|[trueXT|XElem__ox__<-$xs,XElem__oy__<-$ys,ox==oy]|]),("+",2,\[xs,ys]->[|[arithmetic(+)xy|x<-toNum$xs,y<-toNum$ys]|]),("-",2,\[xs,ys]->[|[arithmetic(-)xy|x<-toNum$xs,y<-toNum$ys]|]),("*",2,\[xs,ys]->[|[arithmetic(*)xy|x<-toNum$xs,y<-toNum$ys]|]),("div",2,\[xs,ys]->[|[arithmetic(/)xy|x<-toNum$xs,y<-toNum$ys]|]),("idiv",2,\[xs,ys]->[|[XInt(divxy)|(XIntx)<-toNum$xs,(XInty)<-toNum$ys]|]),("mod",2,\[xs,ys]->[|[XInt(modxy)|(XIntx)<-toNum$xs,(XInty)<-toNum$ys]|]),("uplus",1,\[xs]->[|[x|x<-toNum$xs]|]),("uminus",1,\[xs]->[|[casexofXIntn->XInt(-n);XFloatn->XFloat(-n)|x<-toNum$xs]|]),("and",2,\[xs,ys]->[|if(conditionTest$xs)&&(conditionTest$ys)then[trueXT]else[falseXT]|]),("or",2,\[xs,ys]->[|if(conditionTest$xs)||(conditionTest$ys)then[trueXT]else[falseXT]|]),("not",1,\[xs]->[|if(conditionTest$xs)then[falseXT]else[trueXT]|]),("some",1,\[xs]->[|if(conditionTest$xs)then[trueXT]else[falseXT]|]),("count",1,\[xs]->[|[XInt(length$xs)]|]),("sum",1,\[xs]->[|[XFloat(sum[toFloatx|x<-toNum$xs])]|]),("avg",1,\[xs]->[|[XFloat(mean[toFloatx|x<-toNum$xs])]|]),("min",1,\[xs]->[|[XFloat(minimum[toFloatx|x<-toNum$xs])]|]),("max",1,\[xs]->[|[XFloat(maximum[toFloatx|x<-toNum$xs])]|]),("to",2,\[xs,ys]->[|[XInti|XIntn<-toNum$xs,XIntm<-toNum$ys,i<-[n..m]]|]),("text",1,\[xs]->[|text$xs|]),("string",1,\[xs]->[|text$xs|]),("data",1,\[xs]->[|text$xs|]),("node",1,\[xs]->[|[w|w@(XElem_____)<-$xs]|]),("exists",1,\[xs]->[|[XBool(not(null$xs))]|]),("empty",0,\[]->[|[]|]),("true",0,\[]->[|[trueXT]|]),("false",0,\[]->[|[]|]),("if",3,\[cs,ts,es]->[|ifconditionTest$csthen$tselse$es|]),("element",2,\[tags,xs]->[|[x|tag<-toString$tags,x@(XElemt____)<-$xs,(t==tag||tag=="*")]|]),("attribute",2,\[tags,xs]->[|[z|tag<-toString$tags,x<-$xs,z<-attribute_steptagx]|]),("name",1,\[xs]->[|[XTexttag|XElemtag____<-$xs]|]),("contains",2,\[xs,text]->[|[trueXT|x<-toString$xs,t<-toString$text,containsxt]|]),("substring",3,\[xs,n1,n2]->[|[XText(takem2(drop(m1-1)x))|x<-toString$xs,XIntm1<-toNum$n1,XIntm2<-toNum$n2]|]),("concatenate",2,\[xs,ys]->[|$xs++$ys|]),("distinct-values",1,\[xs]->[|distinct$xs|]),("union",2,\[xs,ys]->[|distinct($xs++$ys)|]),("intersect",2,\[xs,ys]->[|filter(\x->elemx$ys)$xs|]),("except",2,\[xs,ys]->[|filter(\x->not(elemx$ys))$xs|]),("reverse",1,\[xs]->[|reverse$xs|])]-- functions to be used by the interpreter-- when evaluated, it gives [(String,Int,[XSeq]->XSeq)]iFunctions::QExpiFunctions=foldr(\(fname,len,f)r->letvars=map(\i->mkName("v_"++(showi)))[1..len]entry=tupE[litE(StringLfname),litE(IntegerL(toIntegerlen)),lamE[listP(mapvarPvars)](f(mapvarEvars))]in[|$entry:$r|])[|[]|]functions-- XPath steps to be used by the interpreter-- when evaluated, it gives [(String,Tag->XTree->XSeq)]pFunctions=foldr(\(pname,p)r->letpn=litE(StringLpname)in[|($pn,$p):$r|])[|[]|]paths-- make a function callcallF::Tag->FunctioncallFfnameargs=casefilter(\(n,_,_)->n==fname||("fn:"++n)==fname)functionsof(_,len,f):_->if(lengthargs)==lenthenfargselseerror("wrong number of arguments in function call: "++fname)_->-- otherwise, it must be a Haskell function of type (XSeq,...,XSeq) -> XSeqletitp=caseargsof[]->[t|()|][_]->[t|XSeq|]_->foldr(\_r->appTr[t|XSeq|])(appT(tupleT(lengthargs))[t|XSeq|])(tailargs)fn=sigE(varE(mkNamefname))(appT(appTarrowTitp)[t|XSeq|])inappEfn(tupEargs){------------ Compiler ---------------------------------------------------------------}undef1=[|error"Undefined XQuery context (.)"|]undef2=[|error"Undefined position()"|]undef3=[|error"Undefined last()"|]-- does the expression contain a last()?containsLast::Ast->BoolcontainsLast(Ast"call"[Avar"last"])=TruecontainsLast(Astf_)|elemf["let","for","predicate"]=FalsecontainsLast(Ast"step"_)=FalsecontainsLast(Ast_args)=or(mapcontainsLastargs)containsLast_=False-- calculate the maximum position value used in a predicate, if there is onemaxPosition::Ast->Ast->IntmaxPositionpositione=caseeofAst"call"[Avarf,p,Aintn]|f`elem`["=","<","<=","eq","lt","le"]&&p==position->nAst"call"[Avarf,Aintn,p]|f`elem`["=",">",">=","eq","gt","ge"]&&p==position->nAst"let"[Avarx,source,body]->ifposition==Avarxthen0elseminp(maxPositionpositionsource)(maxPositionpositionbody)Ast"for"[Avarx,Avari,source,body]->ifposition==Avarx||position==Avarithen0elseminp(maxPositionpositionsource)(maxPositionpositionbody)Ast"predicate"[pred,body]->minp(maxPositionpositionpred)(maxPositionpositionbody)Ast"call"[Avar"and",x,y]->minp(maxPositionpositionx)(maxPositionpositiony)Ast"call"[Avar"or",x,y]->max(maxPositionpositionx)(maxPositionpositiony)_->0whereminpxy=ifx==0thenyelseify==0thenxelseminxypathPosition=Ast"call"[Avar"position"]parent_error=error"constructed elements have no parent"-- extract the QNameqName::XSeq->TagqName[XTexts]=sqNamee=error("Invalid QName: "++(showe))-- Each XPath predicate must calculate position() and last() from its input XSeq-- if last() is used, then the evaluation is blocking (need to store the whole input XSeq)compilePredicates::[Ast]->QExp->Bool->QExpcompilePredicates[]xs_=xscompilePredicates((Aintn):preds)xs_-- shortcut that improves laziness=compilePredicatespreds[|[$xs!!$(litE(IntegerL(toInteger(n-1))))]|]TruecompilePredicates(pred:preds)xsTrue-- top-k like|maxPositionpathPositionpred>0=compilePredicates(pred:preds)[|take$(litE(IntegerL(toInteger(maxPositionpathPositionpred))))$xs|]FalsecompilePredicates(pred:preds)xs_|containsLastpred-- blocking: use only when last() is used in the predicate=compilePredicatespreds[|letbl=$xslen=lengthblinfoldir(\xir->ifcase$(compilepred[|x|][|[XInti]|][|[XIntlen]|]"")of[XIntk]->k==i-- indexingb->conditionTestbthenx:relser)[]bl1|]TruecompilePredicates(pred:preds)xs_=compilePredicatespreds[|foldir(\xir->ifcase$(compilepred[|x|][|[XInti]|]undef3"")of[XIntk]->k==i-- indexingb->conditionTestbthenx:relser)[]$xs1|]True-- Compile the AST e into Haskell code-- context: context node (XPath .)-- position: the element position in the parent sequence (XPath position())-- last: the length of the parent sequence (XPath last())-- effective_axis: the XPath axis in /axis::tag(exp)-- (eg, the effective axis of //(A | B) is "descendant_step"compile::Ast->QExp->QExp->QExp->String->QExpcompileecontextpositionlasteffective_axis=caseeofAvar"."->[|[$context::XTree]|]Avarv->letx=varE(mkNamev)in[|$x::XSeq|]Aintn->letx=litE(IntegerL(toIntegern))in[|[XInt$x]|]Afloatn->letx=litE(RationalL(toRationaln))in[|[XFloat$x]|]Astrings->letx=litE(StringLs)in[|[XText$x]|]Ast"context"[v,Astringdp,body]->[|foldr(\xr->$(compilebody[|x|]positionlastdp)++r)[]$(compilevcontextpositionlasteffective_axis)|]Ast"call"[Avar"position"]->positionAst"call"[Avar"last"]->lastAst"child_step"[tag,Avar"."]|effective_axis/=""->compile(Asteffective_axis[tag,Avar"."])contextpositionlast""Ast"step"((Ast"descendant_any"(body:tags)):predicates)->letbc=compilebodycontextpositionlasteffective_axists=listE(map(\(Avartag)->litE(stringLtag))tags)in[|foldr(\xr->$(compilePredicatespredicates[|descendant_any_with_tagged_children$tsx|]True)++r)[]$bc|]Ast"step"((Astpath_step[Astringtag,body]):predicates)|memVpath_steppaths->letbc=compilebodycontextpositionlasteffective_axistc=litE(stringLtag)in[|foldr(\xr->$(compilePredicatespredicates[|$(findVpath_steppaths)$tcx|]True)++r)[]$bc|]Ast"descendant_any"(body:tags)->letbc=compilebodycontextpositionlasteffective_axists=listE(map(\(Avartag)->litE(stringLtag))tags)in[|foldr(\xr->(descendant_any_with_tagged_children$tsx)++r)[]$bc|]Astpath_step[Astringtag,body]|memVpath_steppaths->letbc=compilebodycontextpositionlasteffective_axistc=litE(stringLtag)in[|foldr(\xr->($(findVpath_steppaths)$tcx)++r)[]$bc|]Ast"step"(exp:predicates)->compilePredicatespredicates(compileexpcontextpositionlasteffective_axis)TrueAst"predicate"[condition,body]->compilePredicates[condition](compilebodycontextpositionlasteffective_axis)TrueAst"append"args->[|appendText$(listE(map(\x->compilexcontextpositionlasteffective_axis)args))|]Ast"call"((Avarf):args)->callFf(map(\x->compilexcontextpositionlasteffective_axis)args)Ast"construction"[Astringtag,Ast"attributes"[],body]->letct=litE(StringLtag)bc=compilebodycontextpositionlasteffective_axisin[|[XElem$ct[]0parent_error$bc]|]Ast"construction"[tag,Ast"attributes"al,body]->letalc=foldr(\(Ast"pair"[a,v])r->letac=compileacontextpositionlasteffective_axisvc=compilevcontextpositionlasteffective_axisin[|(qName$ac,showXS$vc):$r|])[|[]|]alct=compiletagcontextpositionlasteffective_axisbc=compilebodycontextpositionlasteffective_axisin[|[XElem(qName$ct)$alc0parent_error$bc]|]Ast"let"[Avarvar,source,body]->dos<-compilesourcecontextpositionlasteffective_axisb<-compilebodycontextpositionlasteffective_axisreturn(AppE(LamE[VarP(mkNamevar)]b)s)Ast"for"[Avarvar,Avar"$",source,body]-- a for-loop without an index->letb=compilebody[|head$(varE(mkNamevar))|]undef2undef3""f=lamE[varP(mkNamevar)][|\r->$b++r|]s=compilesourcecontextpositionlasteffective_axisin[|foldr(\x->$f[x])[]$s|]Ast"for"[Avarvar,Avarivar,source,body]-- a for-loop with an index->letb=compilebody[|head$(varE(mkNamevar))|][|$(varE(mkNameivar))|]undef3""f=lamE[varP(mkNamevar)](lamE[varP(mkNameivar)][|\r->$b++r|])p=maxPosition(Avarivar)bodyns=ifp>0-- there is a top-k like restrictionthenAst"step"[source,Ast"call"[Avar"<=",pathPosition,Aintp]]elsesources=compilenscontextpositionlasteffective_axisin[|foldir(\xi->$f[x][XInti])[]$s1|]Ast"sortTuple"(exp:orderBys)-- prepare each FLWOR tuple for sorting->letres=foldl(\ra->letac=compileacontextpositionlasteffective_axisin[|$r++[text$ac]|])[|[$(compileexpcontextpositionlasteffective_axis)]|]orderBysin[|[$res]|]Ast"sort"(exp:ordList)-- blocking->letce=compileexpcontextpositionlasteffective_axisordering=foldr(\(Avarord)r->letasc=iford=="ascending"then[|True|]else[|False|]in[|\(x:xs)(y:ys)->casecompareXSeqs$ascxyofEQ->$rxsyso->o|])[|\xsys->EQ|]ordListin[|concatMaphead(sortBy(\(_:xs)(_:ys)->$orderingxsys)($ce::[[XSeq]]))|]_->error("Illegal XQuery: "++(showe))-- The monadic compilePredicates that propagates IO statecompilePredicatesM::[Ast]->QExp->Bool->QExpcompilePredicatesM[]xs_=[|return$xs|]compilePredicatesM((Aintn):preds)xs_-- shortcut that improves laziness=compilePredicatesMpreds[|[$xs!!$(litE(IntegerL(toInteger(n-1))))]|]TruecompilePredicatesM(pred:preds)xsTrue-- top-k like|maxPositionpathPositionpred>0=compilePredicatesM(pred:preds)[|take$(litE(IntegerL(toInteger(maxPositionpathPositionpred))))$xs|]FalsecompilePredicatesM(pred:preds)xs_|containsLastpred-- blocking: use only when last() is used in the predicate=[|doletbl=$xslast=lengthblvs<-foldir(\xir->dovs<-$(compileMpred[|x|][|[XInti]|][|[XIntlast]|]"")s<-rreturn(ifcasevsof[XIntk]->k==i-- indexingb->conditionTestbthenx:selses))(return[])$xs1$(compilePredicatesMpreds[|vs|]True)|]compilePredicatesM(pred:preds)xs_=[|dovs<-foldir(\xir->dovs<-$(compileMpred[|x|][|[XInti]|]undef3"")s<-rreturn(ifcasevsof[XIntk]->k==i-- indexingb->conditionTestbthenx:selses))(return[])$xs1$(compilePredicatesMpreds[|vs|]True)|]-- The monadic XQuery compiler; it is like compile but has plumbing to propagate IO statecompileM::Ast->QExp->QExp->QExp->String->QExpcompileMecontextpositionlasteffective_axis=caseeofAvar"."->[|return[$context::XTree]|]Avarv->letx=varE(mkNamev)in[|return($x::XSeq)|]Aintn->letx=litE(IntegerL(toIntegern))in[|return[XInt$x]|]Afloatn->letx=litE(RationalL(toRationaln))in[|return[XFloat$x]|]Astrings->letx=litE(StringLs)in[|return[XText$x]|]-- for non-IO XQuery, use the regular compileAst"nonIO"[u]->[|return$(compileucontextpositionlasteffective_axis)|]Ast"context"[v,Astringdp,body]->[|dovs<-$(compileMvcontextpositionlasteffective_axis)foldr(\xr->(liftM2(++))$(compileMbody[|x|]positionlastdp)r)(return[])vs|]Ast"call"[Avar"position"]->[|return$position|]Ast"call"[Avar"last"]->[|return$last|]Ast"child_step"[tag,Avar"."]|effective_axis/=""->compileM(Asteffective_axis[tag,Avar"."])contextpositionlast""Ast"step"((Ast"descendant_any"(body:tags)):predicates)->letbc=compileMbodycontextpositionlasteffective_axists=listE(map(\(Avartag)->litE(stringLtag))tags)in[|dovs<-$bcfoldr(\xr->(liftM2(++))$(compilePredicatesMpredicates[|descendant_any_with_tagged_children$tsx|]True)r)(return[])vs|]Ast"step"((Astpath_step[Astringtag,body]):predicates)|memVpath_steppaths->letbc=compileMbodycontextpositionlasteffective_axistc=litE(stringLtag)in[|dovs<-$bcfoldr(\xr->(liftM2(++))$(compilePredicatesMpredicates[|$(findVpath_steppaths)$tcx|]True)r)(return[])vs|]Ast"descendant_any"(body:tags)->letbc=compileMbodycontextpositionlasteffective_axists=listE(map(\(Avartag)->litE(stringLtag))tags)in[|dovs<-$bcreturn(foldr(\xr->(descendant_any_with_tagged_children$tsx)++r)[]vs)|]Astpath_step[Astringtag,body]|memVpath_steppaths->letbc=compileMbodycontextpositionlasteffective_axistc=litE(stringLtag)in[|dovs<-$bcreturn(foldr(\xr->($(findVpath_steppaths)$tcx)++r)[]vs)|]Ast"step"(exp:predicates)->[|dovs<-$(compileMexpcontextpositionlasteffective_axis)$(compilePredicatesMpredicates[|vs|]True)|]Ast"predicate"[condition,body]->[|dovs<-$(compileMbodycontextpositionlasteffective_axis)$(compilePredicatesM[condition][|vs|]True)|]Ast"executeSQL"[Avarstmt,args]->[|doas<-$(compileMargscontextpositionlasteffective_axis)$(varE(mkName"executeSQL"))$(varE(mkNamestmt))as|]Ast"append"args->letbinds=zipWith(\ix->(mkName("x"++(showi)),x))[1..(lengthargs)]argsinfoldr(\(n,x)r->[|$(compileMxcontextpositionlasteffective_axis)>>=$(lamE[varPn]r)|])[|return(appendText$(listE(map(\(n,_)->varEn)binds)))|]bindsAst"call"((Avarf):args)->letbinds=zipWith(\ix->(mkName("x"++(showi)),x))[1..(lengthargs)]argsinfoldr(\(n,x)r->[|$(compileMxcontextpositionlasteffective_axis)>>=$(lamE[varPn]r)|])[|return$(callFf(map(\(n,_)->varEn)binds))|]bindsAst"construction"[Astringtag,Ast"attributes"[],body]->letct=litE(StringLtag)bc=compileMbodycontextpositionlasteffective_axisin[|dob<-$bcreturn[XElem$ct[]0parent_errorb]|]Ast"construction"[tag,Ast"attributes"al,body]->letalc=foldr(\(Ast"pair"[a,v])r->[|doac<-$(compileMacontextpositionlasteffective_axis)vc<-$(compileMvcontextpositionlasteffective_axis)s<-$rreturn((qNameac,showXSvc):s)|])[|return[]|]alct=compileMtagcontextpositionlasteffective_axisbc=compileMbodycontextpositionlasteffective_axisin[|doa<-$alcc<-$ctb<-$bcreturn[XElem(qNamec)a0parent_errorb]|]Ast"let"[Avarvar,source,body]->[|$(compileMsourcecontextpositionlasteffective_axis)>>=$(lamE[varP(mkNamevar)](compileMbodycontextpositionlasteffective_axis))|]Ast"for"[Avarvar,Avar"$",source,body]-- a for-loop without an index->letb=compileMbody[|head$(varE(mkNamevar))|]undef2undef3""f=lamE[varP(mkNamevar)][|(liftM2(++))$b|]s=compileMsourcecontextpositionlasteffective_axisin[|dovs<-$sfoldr(\x->$f[x])(return[])vs|]Ast"for"[Avarvar,Avarivar,source,body]-- a for-loop with an index->letb=compileMbody[|head$(varE(mkNamevar))|][|$(varE(mkNameivar))|]undef3""f=lamE[varP(mkNamevar)](lamE[varP(mkNameivar)][|(liftM2(++))$b|])p=maxPosition(Avarivar)bodyns=ifp>0-- there is a top-k like restrictionthenAst"step"[source,Ast"call"[Avar"<=",pathPosition,Aintp]]elsesources=compileMnscontextpositionlasteffective_axisin[|dovs<-$sfoldir(\xi->$f[x][XInti])(return[])vs1|]Ast"sortTuple"(exp:orderBys)-- prepare each FLWOR tuple for sorting->letvs=compileMexpcontextpositionlasteffective_axisres=foldl(\ra->[|doac<-$(compileMacontextpositionlasteffective_axis)s<-$rreturn(s++[textac])|])[|dov<-$vs;return[v]|]orderBysin[|return$res|]Ast"sort"(exp:ordList)-- blocking->letce=compileMexpcontextpositionlasteffective_axisordering=foldr(\(Avarord)r->letasc=iford=="ascending"then[|True|]else[|False|]in[|\(x:xs)(y:ys)->casecompareXSeqs$ascxyofEQ->$rxsyso->o|])[|\xsys->EQ|]ordListin[|doc<-$cereturn(concatMaphead(sortBy(\(_:xs)(_:ys)->$orderingxsys)(c::[[XSeq]])))|]_->error("Illegal XQuery: "++(showe))-- functions that need IO interaction (document reader, DB access, etc)ioSources::[String]ioSources=["executeSQL","doc","fn:doc","sql","fn:sql","publish","fn:publish"]-- collect all input documents and assign them a unique numberpullIOSources::Ast->Int->(Ast,Int,[(String,Ast)])pullIOSourcesquerycount=casequeryofAst"call"[Avarnm,file]|elemnm["doc","fn:doc"]->(Avar("_doc"++(showcount)),count+1,[("_doc"++(showcount),file)])Ast"call"[Avarnm,sql]|elemnm["sql","fn:sql"]->(Ast"executeSQL"[Avar("_sql"++(showcount)),Ast"call"[Avar"empty"]],count+1,[("_sql"++(showcount),Ast"prepareSQL"[sql])])Ast"call"[Avarnm,sql,args]|elemnm["sql","fn:sql"]->(Ast"executeSQL"[Avar("_sql"++(showcount)),args],count+1,[("_sql"++(showcount),Ast"prepareSQL"[sql])])Astnargs->let(s,c,ns)=foldr(\arc->let(e,c1,n1)=pullIOSourcesac(s,c2,n2)=rc1in(e:s,c2,unionn1n2))(\c->([],c,[]))argscountin(Astns,c,ns)_->(query,count,[])whereunionxs((n,s):ys)=(n,foldr(\(m,d)r->ifs==dthenAvarmelser)sxs):(unionxsys)unionxs[]=xs-- true if there is no need to lift to the IO monadnoIO::Ast->BoolnoIO(Astnm_)|elemnmioSources=FalsenoIO(Astnargs)=allnoIOargsnoIO_=TrueliftIOSources::Ast->(Ast,[(String,Ast)])liftIOSourcesquery=let(ast,_,ns)=pullIOSourcesquery0fx=casexofAstnm_|elemnm["attributes"]->xAst__|noIOx->Ast"nonIO"[x]_->casexofAst"call"((Avarnm):args)->Ast"call"((Avarnm):(mapfargs))Astnargs->Astn(mapfargs)_->xin(fast,ns)-- optimize and compile an AST (unlifted)compileAst::Ast->QExpcompileAstast=compile(optimizeast)undef1undef2undef3""-- optimize and compile an AST (IO lifted)compileAstM::Ast->QExpcompileAstMast=compileM(optimizeast)undef1undef2undef3""-- compile an XQuery AST that reads XML documentscompileQuery::[Ast]->QExpcompileQuery((Ast"function"((Avarf):b:args)):xs)=letlvars=caseargsof[Astringa]->[varP(mkNamea)]_->[tupP(map(\(Avara)->varP(mkNamea))args)]inletE[valD(varP(mkNamef))(normalB(lamElvars(compileAstb)))[]](compileQueryxs)compileQuery((Ast"variable"[Avarv,u]):xs)=letE[valD(varP(mkNamev))(normalB(compileAstu))[]](compileQueryxs)compileQuery[query]=let(ast,ns)=liftIOSources(optimizequery)code=compileMastundef1undef2undef3""infoldl(\r(n,e)->letd=lamE[varP(mkNamen)]rincaseeofAvarm->[|$d$(varE(mkNamem))|]Ast"prepareSQL"[Astringsql]->[|($(varE(mkName"prepareSQL"))$(varE(mkName"_db"))$(litE(StringLsql)))>>=$d|]_->[|dolet[XTextf]=$(compileAste)doc<-readFilef$d[materialize(parseDocumentdoc)]|])[|$code|]ns-- Debugging: display the AST and the Haskell code of an input XQuerycq::String->IO()cqquery=doputStrLn"Abstract Syntax Tree:"letast=parse(scanquery)putStrLn(showast)letopt=optimize(lastast)putStrLn"Optimized AST:"putStrLn(showopt)--putStrLn "Haskell Code:"--let code = compileQuery ast--runQ code >>= putStrLn.pprint-- | Run an XQuery expression that does not read XML documents.-- When evaluated, it returns XSeq.xe::String->QExpxequery=compileAst(last(parse(scanquery)))-- | Run an XQuery that reads XML documents.-- When evaluated, it returns IO XSeq.xq::String->QExpxqquery=compileQuery(parse(scanquery))-- | Run an XQuery that reads XML documents and queries databases.-- When evaluated, it returns (IConnection conn) => conn -> IO XSeq.xqdb::String->QExpxqdbquery=lamE[varP(mkName"_db")](compileQuery(parse(scanquery)))