{-------------------------------------------------------------------------------------
-
- A Compiler from XQuery to Haskell
- Programmer: Leonidas Fegaras
- Email: fegaras@cse.uta.edu
- Web: http://lambda.uta.edu/- Creation: 02/15/08, last update: 08/20/08
-
- Copyright (c) 2008 by Leonidas Fegaras, the University of Texas at Arlington. All rights reserved.
- This material is provided as is, with absolutely no warranty expressed or implied.
- Any use is at your own risk. Permission is hereby granted to use or copy this program
- for any purpose, provided the above notices are retained on all copies.
-
--------------------------------------------------------------------------------------}{-# OPTIONS_GHC -fth #-}moduleText.XML.HXQ.CompilerwhereimportControl.MonadimportChar(toLower)importList(sortBy)importLanguage.Haskell.THimportXMLParse(parseDocument)importText.XML.HXQ.ParserimportText.XML.HXQ.XTreeimportText.XML.HXQ.OptimizerimportText.XML.HXQ.Functionsundef1=[|error"Undefined XQuery context (.)"|]undef2=[|error"Undefined position()"|]undef3=[|error"Undefined last()"|]-- does the expression contain a last()?containsLast::Ast->BoolcontainsLast(Ast"call"[Avar"last"])=TruecontainsLast(Astf_)|elemf["let","for","predicate"]=FalsecontainsLast(Ast"step"_)=FalsecontainsLast(Ast_args)=or(mapcontainsLastargs)containsLast_=False-- calculate the maximum position value used in a predicate, if there is onemaxPosition::Ast->Ast->IntmaxPositionpositione=caseeofAst"call"[Avarf,p,Aintn]|f`elem`["=","<","<=","eq","lt","le"]&&p==position->nAst"call"[Avarf,Aintn,p]|f`elem`["=",">",">=","eq","gt","ge"]&&p==position->nAst"let"[Avarx,source,body]->ifposition==Avarxthen0elseminp(maxPositionpositionsource)(maxPositionpositionbody)Ast"for"[Avarx,Avari,source,body]->ifposition==Avarx||position==Avarithen0elseminp(maxPositionpositionsource)(maxPositionpositionbody)Ast"predicate"[pred,body]->minp(maxPositionpositionpred)(maxPositionpositionbody)Ast"call"[Avar"and",x,y]->minp(maxPositionpositionx)(maxPositionpositiony)Ast"call"[Avar"or",x,y]->max(maxPositionpositionx)(maxPositionpositiony)_->0whereminpxy=ifx==0thenyelseify==0thenxelseminxypathPosition=Ast"call"[Avar"position"]parent_error=error"constructed elements have no parent"-- extract the QNameqName::XSeq->TagqName[XTexts]=sqNamee=error("Invalid QName: "++(showe))-- Each XPath predicate must calculate position() and last() from its input XSeq-- if last() is used, then the evaluation is blocking (need to store the whole input XSeq)compilePredicates::[Ast]->QExp->Bool->QExpcompilePredicates[]xs_=xscompilePredicates((Aintn):preds)xs_-- shortcut that improves laziness=compilePredicatespreds[|[$xs!!$(litE(IntegerL(toInteger(n-1))))]|]TruecompilePredicates(pred:preds)xsTrue-- top-k like|maxPositionpathPositionpred>0=compilePredicates(pred:preds)[|take$(litE(IntegerL(toInteger(maxPositionpathPositionpred))))$xs|]FalsecompilePredicates(pred:preds)xs_|containsLastpred-- blocking: use only when last() is used in the predicate=compilePredicatespreds[|letbl=$xslen=lengthblinfoldir(\xir->ifcase$(compilepred[|x|][|[XInti]|][|[XIntlen]|]"")of[XIntk]->k==i-- indexingb->conditionTestbthenx:relser)[]bl1|]TruecompilePredicates(pred:preds)xs_=compilePredicatespreds[|foldir(\xir->ifcase$(compilepred[|x|][|[XInti]|]undef3"")of[XIntk]->k==i-- indexingb->conditionTestbthenx:relser)[]$xs1|]True-- Compile the AST e into Haskell code-- context: context node (XPath .)-- position: the element position in the parent sequence (XPath position())-- last: the length of the parent sequence (XPath last())-- effective_axis: the XPath axis in /axis::tag(exp)-- eg, the effective axis of //(A | B) is descendantcompile::Ast->QExp->QExp->QExp->String->QExpcompileecontextpositionlasteffective_axis=caseeofAvar"."->[|[$context::XTree]|]Avarv->letx=varE(mkNamev)in[|$x::XSeq|]Aintn->letx=litE(IntegerL(toIntegern))in[|[XInt$x]|]Afloatn->letx=litE(RationalL(toRationaln))in[|[XFloat$x]|]Astrings->letx=litE(StringLs)in[|[XText$x]|]Ast"context"[v,Astringdp,body]->[|foldr(\xr->$(compilebody[|x|]positionlastdp)++r)[]$(compilevcontextpositionlasteffective_axis)|]Ast"call"[Avar"position"]->positionAst"call"[Avar"last"]->lastAst"step"(Avar"child":tag:Avar".":preds)|effective_axis/=""->compile(Ast"step"(Avareffective_axis:tag:Avar".":preds))contextpositionlast""Ast"step"(Avar"descendant_any":Ast"tags"tags:e:preds)->letbc=compileecontextpositionlasteffective_axists=listE(map(\(Avartag)->litE(stringLtag))tags)in[|foldr(\xr->$(compilePredicatespreds[|descendant_any_with_tagged_children$tsx|]True)++r)[]$bc|]Ast"step"(Avarstep:Astringtag:e:preds)->letbc=compileecontextpositionlasteffective_axistc=litE(stringLtag)in[|foldr(\xr->$(compilePredicatespreds[|$(findVsteppaths)$tcx|]True)++r)[]$bc|]Ast"filter"(e:preds)->compilePredicatespreds(compileecontextpositionlasteffective_axis)TrueAst"predicate"[condition,body]->letxs=compilebodycontextpositionlasteffective_axisin[|foldr(\xr->ifconditionTest$(compileconditionundef1undef2undef3"")thenx:relser)[]$xs|]Ast"append"args->[|appendText$(listE(map(\x->compilexcontextpositionlasteffective_axis)args))|]Ast"call"((Avarf):args)->callFf(map(\x->compilexcontextpositionlasteffective_axis)args)Ast"construction"[Astringtag,Ast"attributes"[],body]->letct=litE(StringLtag)bc=compilebodycontextpositionlasteffective_axisin[|[XElem$ct[]0parent_error$bc]|]Ast"construction"[tag,Ast"attributes"al,body]->letalc=foldr(\(Ast"pair"[a,v])r->letac=compileacontextpositionlasteffective_axisvc=compilevcontextpositionlasteffective_axisin[|(qName$ac,showXS$vc):$r|])[|[]|]alct=compiletagcontextpositionlasteffective_axisbc=compilebodycontextpositionlasteffective_axisin[|[XElem(qName$ct)$alc0parent_error$bc]|]Ast"let"[Avarvar,source,body]->dos<-compilesourcecontextpositionlasteffective_axisb<-compilebodycontextpositionlasteffective_axisreturn(AppE(LamE[VarP(mkNamevar)]b)s)Ast"for"[Avarvar,Avar"$",source,body]-- a for-loop without an index->letb=compilebody[|head$(varE(mkNamevar))|]undef2undef3""f=lamE[varP(mkNamevar)][|\r->$b++r|]s=compilesourcecontextpositionlasteffective_axisin[|foldr(\x->$f[x])[]$s|]Ast"for"[Avarvar,Avarivar,source,body]-- a for-loop with an index->letb=compilebody[|head$(varE(mkNamevar))|][|$(varE(mkNameivar))|]undef3""f=lamE[varP(mkNamevar)](lamE[varP(mkNameivar)][|\r->$b++r|])p=maxPosition(Avarivar)bodyns=ifp>0-- there is a top-k like restrictionthenAst"step"[source,Ast"call"[Avar"<=",pathPosition,Aintp]]elsesources=compilenscontextpositionlasteffective_axisin[|foldir(\xi->$f[x][XInti])[]$s1|]Ast"sortTuple"(exp:orderBys)-- prepare each FLWOR tuple for sorting->letres=foldl(\ra->letac=compileacontextpositionlasteffective_axisin[|$r++[text$ac]|])[|[$(compileexpcontextpositionlasteffective_axis)]|]orderBysin[|[$res]|]Ast"sort"(exp:ordList)-- blocking->letce=compileexpcontextpositionlasteffective_axisordering=foldr(\(Avarord)r->letasc=iford=="ascending"then[|True|]else[|False|]in[|\(x:xs)(y:ys)->casecompareXSeqs$ascxyofEQ->$rxsyso->o|])[|\xsys->EQ|]ordListin[|concatMaphead(sortBy(\(_:xs)(_:ys)->$orderingxsys)($ce::[[XSeq]]))|]_->error("Illegal XQuery: "++(showe))-- The monadic compilePredicates that propagates IO statecompilePredicatesM::[Ast]->QExp->Bool->QExpcompilePredicatesM[]xs_=[|return$xs|]compilePredicatesM((Aintn):preds)xs_-- shortcut that improves laziness=compilePredicatesMpreds[|[$xs!!$(litE(IntegerL(toInteger(n-1))))]|]TruecompilePredicatesM(pred:preds)xsTrue-- top-k like|maxPositionpathPositionpred>0=compilePredicatesM(pred:preds)[|take$(litE(IntegerL(toInteger(maxPositionpathPositionpred))))$xs|]FalsecompilePredicatesM(pred:preds)xs_|containsLastpred-- blocking: use only when last() is used in the predicate=[|doletbl=$xslast=lengthblvs<-foldir(\xir->dovs<-$(compileMpred[|x|][|[XInti]|][|[XIntlast]|]"")s<-rreturn(ifcasevsof[XIntk]->k==i-- indexingb->conditionTestbthenx:selses))(return[])$xs1$(compilePredicatesMpreds[|vs|]True)|]compilePredicatesM(pred:preds)xs_=[|dovs<-foldir(\xir->dovs<-$(compileMpred[|x|][|[XInti]|]undef3"")s<-rreturn(ifcasevsof[XIntk]->k==i-- indexingb->conditionTestbthenx:selses))(return[])$xs1$(compilePredicatesMpreds[|vs|]True)|]-- The monadic XQuery compiler; it is like compile but has plumbing to propagate IO statecompileM::Ast->QExp->QExp->QExp->String->QExpcompileMecontextpositionlasteffective_axis=caseeofAvar"."->[|return[$context::XTree]|]Avarv->letx=varE(mkNamev)in[|return($x::XSeq)|]Aintn->letx=litE(IntegerL(toIntegern))in[|return[XInt$x]|]Afloatn->letx=litE(RationalL(toRationaln))in[|return[XFloat$x]|]Astrings->letx=litE(StringLs)in[|return[XText$x]|]-- for non-IO XQuery, use the regular compileAst"nonIO"[u]->[|return$(compileucontextpositionlasteffective_axis)|]Ast"context"[v,Astringdp,body]->[|dovs<-$(compileMvcontextpositionlasteffective_axis)foldr(\xr->(liftM2(++))$(compileMbody[|x|]positionlastdp)r)(return[])vs|]Ast"call"[Avar"position"]->[|return$position|]Ast"call"[Avar"last"]->[|return$last|]Ast"step"(Avar"child":tag:Avar".":preds)|effective_axis/=""->compileM(Ast"step"(Avareffective_axis:tag:Avar".":preds))contextpositionlast""Ast"step"(Avar"descendant_any":Ast"tags"tags:e:preds)->letbc=compileMecontextpositionlasteffective_axists=listE(map(\(Avartag)->litE(stringLtag))tags)in[|dovs<-$bcfoldr(\xr->(liftM2(++))$(compilePredicatesMpreds[|descendant_any_with_tagged_children$tsx|]True)r)(return[])vs|]Ast"step"(Avarstep:Astringtag:e:preds)->letbc=compileMecontextpositionlasteffective_axistc=litE(stringLtag)in[|dovs<-$bcfoldr(\xr->(liftM2(++))$(compilePredicatesMpreds[|$(findVsteppaths)$tcx|]True)r)(return[])vs|]Ast"filter"(e:preds)->[|dovs<-$(compileMecontextpositionlasteffective_axis)$(compilePredicatesMpreds[|vs|]True)|]Ast"predicate"[condition,body]->[|dovs<-$(compileMbodycontextpositionlasteffective_axis)foldr(\xr->dovs<-$(compileMconditionundef1undef2undef3"")s<-rreturn(ifconditionTestvsthenx:selses))(return[])vs|]Ast"executeSQL"[Avarstmt,args]->[|doas<-$(compileMargscontextpositionlasteffective_axis)$(varE(mkName"executeSQL"))$(varE(mkNamestmt))as|]Ast"append"args->letbinds=zipWith(\ix->(mkName("x"++(showi)),x))[1..(lengthargs)]argsinfoldr(\(n,x)r->[|$(compileMxcontextpositionlasteffective_axis)>>=$(lamE[varPn]r)|])[|return(appendText$(listE(map(\(n,_)->varEn)binds)))|]bindsAst"call"((Avarf):args)->letbinds=zipWith(\ix->(mkName("x"++(showi)),x))[1..(lengthargs)]argsinfoldr(\(n,x)r->[|$(compileMxcontextpositionlasteffective_axis)>>=$(lamE[varPn]r)|])[|return$(callFf(map(\(n,_)->varEn)binds))|]bindsAst"construction"[Astringtag,Ast"attributes"[],body]->letct=litE(StringLtag)bc=compileMbodycontextpositionlasteffective_axisin[|dob<-$bcreturn[XElem$ct[]0parent_errorb]|]Ast"construction"[tag,Ast"attributes"al,body]->letalc=foldr(\(Ast"pair"[a,v])r->[|doac<-$(compileMacontextpositionlasteffective_axis)vc<-$(compileMvcontextpositionlasteffective_axis)s<-$rreturn((qNameac,showXSvc):s)|])[|return[]|]alct=compileMtagcontextpositionlasteffective_axisbc=compileMbodycontextpositionlasteffective_axisin[|doa<-$alcc<-$ctb<-$bcreturn[XElem(qNamec)a0parent_errorb]|]Ast"let"[Avarvar,source,body]->[|$(compileMsourcecontextpositionlasteffective_axis)>>=$(lamE[varP(mkNamevar)](compileMbodycontextpositionlasteffective_axis))|]Ast"for"[Avarvar,Avar"$",source,body]-- a for-loop without an index->letb=compileMbody[|head$(varE(mkNamevar))|]undef2undef3""f=lamE[varP(mkNamevar)][|(liftM2(++))$b|]s=compileMsourcecontextpositionlasteffective_axisin[|dovs<-$sfoldr(\x->$f[x])(return[])vs|]Ast"for"[Avarvar,Avarivar,source,body]-- a for-loop with an index->letb=compileMbody[|head$(varE(mkNamevar))|][|$(varE(mkNameivar))|]undef3""f=lamE[varP(mkNamevar)](lamE[varP(mkNameivar)][|(liftM2(++))$b|])p=maxPosition(Avarivar)bodyns=ifp>0-- there is a top-k like restrictionthenAst"step"[source,Ast"call"[Avar"<=",pathPosition,Aintp]]elsesources=compileMnscontextpositionlasteffective_axisin[|dovs<-$sfoldir(\xi->$f[x][XInti])(return[])vs1|]Ast"sortTuple"(exp:orderBys)-- prepare each FLWOR tuple for sorting->letvs=compileMexpcontextpositionlasteffective_axisres=foldl(\ra->[|doac<-$(compileMacontextpositionlasteffective_axis)s<-$rreturn(s++[textac])|])[|dov<-$vs;return[v]|]orderBysin[|return$res|]Ast"sort"(exp:ordList)-- blocking->letce=compileMexpcontextpositionlasteffective_axisordering=foldr(\(Avarord)r->letasc=iford=="ascending"then[|True|]else[|False|]in[|\(x:xs)(y:ys)->casecompareXSeqs$ascxyofEQ->$rxsyso->o|])[|\xsys->EQ|]ordListin[|doc<-$cereturn(concatMaphead(sortBy(\(_:xs)(_:ys)->$orderingxsys)(c::[[XSeq]])))|]_->error("Illegal XQuery: "++(showe))-- functions that need IO interaction (document reader, DB access, etc)ioSources::[String]ioSources=["executeSQL","doc","fn:doc","sql","fn:sql","publish","fn:publish"]-- steps that need the parent XTree link in evaluation (with a potential space leak)backward_steps::[String]backward_steps=["following-sibling","following","parent","ancestor","preceding-sibling","preceding","ancestor-or-self"]-- Collect all input documents and assign them a unique number.-- The backward flag indicates whether there are backward steps-- (so that they would require XTrees with parent links)pullIOSources::Ast->Int->Bool->(Ast,Int,Bool,[(String,Bool,Ast)])pullIOSourcesquerycountbackward=casequeryofAst"call"[Avarnm,file]|elemnm["doc","fn:doc"]->(Avar("_doc"++(showcount)),count+1,backward,[("_doc"++(showcount),backward,file)])Ast"call"[Avarnm,sql]|elemnm["sql","fn:sql"]->(Ast"executeSQL"[Avar("_sql"++(showcount)),Ast"call"[Avar"empty"]],count+1,backward,[("_sql"++(showcount),backward,Ast"prepareSQL"[sql])])Ast"call"[Avarnm,sql,args]|elemnm["sql","fn:sql"]->(Ast"executeSQL"[Avar("_sql"++(showcount)),args],count+1,backward,[("_sql"++(showcount),backward,Ast"prepareSQL"[sql])])Ast"step"(args@(Avarstep:_))-- backward step|elemstepbackward_steps->let(s,c,ns)=foldr(\arc->let(e,c1,_,n1)=pullIOSourcesacTrue(s,c2,n2)=rc1in(e:s,c2,unionn1n2))(\c->([],c,[]))argscountin(Ast"step"s,c,True,ns)Astnargs->let(s,c,ns)=foldr(\arc->let(e,c1,_,n1)=pullIOSourcesacbackward(s,c2,n2)=rc1in(e:s,c2,unionn1n2))(\c->([],c,[]))argscountin(Astns,c,backward,ns)_->(query,count,backward,[])whereunionxs((n,b,s):ys)=(n,b,foldr(\(m,_,d)r->ifs==dthenAvarmelser)sxs):(unionxsys)unionxs[]=xs-- true if there is no need to lift to the IO monadnoIO::Ast->BoolnoIO(Astnm_)|elemnmioSources=FalsenoIO(Astnargs)=allnoIOargsnoIO_=TrueliftIOSources::Ast->(Ast,[(String,Bool,Ast)])liftIOSourcesquery=let(ast,_,_,ns)=pullIOSourcesquery0Falsefx=casexofAstnm_|elemnm["attributes","tags"]->xAst__|noIOx->Ast"nonIO"[x]_->casexofAst"call"((Avarnm):args)->Ast"call"((Avarnm):(mapfargs))Astnargs->Astn(mapfargs)_->xin(fast,ns)-- optimize and compile an AST compileAst::Ast->QExpcompileAstast=compile(optimizeast)undef1undef2undef3""-- Compile an XQuery AST that does not perform IO (unlifted).-- When evaluated, it returns XSeq.compileQuery::[Ast]->QExpcompileQuery((Ast"function"((Avarf):b:args)):xs)=letlvars=caseargsof[Astringa]->[varP(mkNamea)]_->[tupP(map(\(Avara)->varP(mkNamea))args)]inletE[valD(varP(mkNamef))(normalB(lamElvars(compileAstb)))[]](compileQueryxs)compileQuery((Ast"variable"[Avarv,u]):xs)=letE[valD(varP(mkNamev))(normalB(compileAstu))[]](compileQueryxs)compileQuery(query:xs)=letcode=compileAstqueryrest=compileQueryxsin[|$code++$rest|]compileQuery[]=[|[]|]-- Compile an XQuery AST that may read XML documents or use databases (IO lifted).-- When evaluated, it returns IO XSeq.compileQueryM::[Ast]->QExpcompileQueryM((Ast"function"((Avarf):b:args)):xs)=letlvars=caseargsof[Astringa]->[varP(mkNamea)]_->[tupP(map(\(Avara)->varP(mkNamea))args)]inletE[valD(varP(mkNamef))(normalB(lamElvars(compileAstb)))[]](compileQueryMxs)compileQueryM((Ast"variable"[Avarv,u]):xs)=letE[valD(varP(mkNamev))(normalB(compileAstu))[]](compileQueryMxs)compileQueryM(query:xs)=let(ast,ns)=liftIOSources(optimizequery)code=compileMastundef1undef2undef3""rest=compileQueryMxsinfoldl(\r(n,b,e)->letd=lamE[varP(mkNamen)]rincaseeofAvarm->[|$d$(varE(mkNamem))|]Ast"prepareSQL"[Astringsql]->[|($(varE(mkName"prepareSQL"))$(varE(mkName"_db"))$(litE(StringLsql)))>>=$d|]_->[|dolet[XTextf]=$(compileAste)doc<-readFilef$d[materializeb(parseDocumentdoc)]|])[|(liftM2(++))$code$rest|]nscompileQueryM[]=[|return[]|]-- Debugging: display the AST and the Haskell code of an input XQuerycq::String->IO()cqquery=doputStrLn"Abstract Syntax Tree:"letast=parse(scanquery)putStrLn(showast)letopt=optimize(lastast)putStrLn"Optimized AST:"putStrLn(showopt)-- | Run an XQuery expression that does not perform IO.-- When evaluated, it returns XSeq.xe::String->QExpxequery=compileQuery(parse(scanquery))-- | Run an XQuery that may read XML documents.-- When evaluated, it returns IO XSeq.xq::String->QExpxqquery=compileQueryM(parse(scanquery))-- | Run an XQuery that reads XML documents and queries databases.-- When evaluated, it returns (IConnection conn) => conn -> IO XSeq.xqdb::String->QExpxqdbquery=lamE[varP(mkName"_db")](compileQueryM(parse(scanquery)))