{-----------------------------------------------------------------------------
A LIBRARY OF MONADIC PARSER COMBINATORS
29th July 1996
Graham Hutton Erik Meijer
University of Nottingham University of Utrecht
This Haskell 1.3 script defines a library of parser combinators, and is taken
from sections 1-6 of our article "Monadic Parser Combinators". Some changes
to the library have been made in the move from Gofer to Haskell:
* Do notation is used in place of monad comprehension notation;
* The parser datatype is defined using "newtype", to avoid the overhead
of tagging and untagging parsers with the P constructor.
------------------------------------------------------------------------------
** Extended to allow a symbol table/state to be threaded through the monad.
** Extended to allow a parameterised token type, rather than just strings.
** Extended to allow error-reporting.
(Extensions: 1998-2000 Malcolm.Wallace@cs.york.ac.uk)
(More extensions: 2004 gk-haskell@ninebynine.org)
------------------------------------------------------------------------------}-- | This library of monadic parser combinators is based on the ones-- defined by Graham Hutton and Erik Meijer. It has been extended by-- Malcolm Wallace to use an abstract token type (no longer just a-- string) as input, and to incorporate state in the monad, useful-- for symbol tables, macros, and so on. Basic facilities for error-- reporting have also been added, and later extended by Graham Klyne-- to return the errors through an @Either@ type, rather than just-- calling @error@.moduleText.ParserCombinators.HuttonMeijerWallace(-- * The parser monadParser(..)-- * Primitive parser combinators,item,eof,papply,papply'-- * Derived combinators,(+++),{-sat,-}tok,nottok,many,many1,sepby,sepby1,chainl,chainl1,chainr,chainr1,ops,bracket,toEOF-- * Error handling,elserror-- * State handling,stupd,stquery,stget-- * Re-parsing,reparse)whereimportCharimportMonadinfixr5+++--- The parser monad ---------------------------------------------------------typeParseResultstea=Eithere[(a,s,[Eitheret])]newtypeParserstea=P(s->[Eitheret]->ParseResultstea)-- ^ The parser type is parametrised on the types of the state @s@,-- the input tokens @t@, error-type @e@, and the result value @a@.-- The state and remaining input are threaded through the monad.instanceFunctor(Parserste)where-- fmap :: (a -> b) -> (Parser s t e a -> Parser s t e b)fmapf(Pp)=P(\stinp->casepstinpofRightres->Right[(fv,s,out)|(v,s,out)<-res]Lefterr->Lefterr)instanceMonad(Parserste)where-- return :: a -> Parser s t e areturnv=P(\stinp->Right[(v,st,inp)])-- >>= :: Parser s t e a -> (a -> Parser s t e b) -> Parser s t e b(Pp)>>=f=P(\stinp->casepstinpofRightres->foldrjoinresults(Right[])[papply'(fv)sout|(v,s,out)<-res]Lefterr->Lefterr)-- fail :: String -> Parser s t e afailerr=P(\stinp->Right[])-- I know it's counterintuitive, but we want no-parse, not an error.instanceMonadPlus(Parserste)where-- mzero :: Parser s t e amzero=P(\stinp->Right[])-- mplus :: Parser s t e a -> Parser s t e a -> Parser s t e a(Pp)`mplus`(Pq)=P(\stinp->joinresults(pstinp)(qstinp))-- joinresults ensures that explicitly raised errors are dominant,-- provided no parse has yet been found. The commented out code is-- a slightly stricter specification of the real code.joinresults::ParseResultstea->ParseResultstea->ParseResultstea{-
joinresults (Left p) (Left q) = Left p
joinresults (Left p) (Right _) = Left p
joinresults (Right []) (Left q) = Left q
joinresults (Right p) (Left q) = Right p
joinresults (Right p) (Right q) = Right (p++q)
-}joinresults(Leftp)q=Leftpjoinresults(Right[])q=qjoinresults(Rightp)q=Right(p++caseqofLeft_->[]Rightr->r)--- Primitive parser combinators ----------------------------------------------- | Deliver the first remaining token.item::Parserstetitem=P(\stinp->caseinpof[]->Right[](Lefte:_)->Lefte(Rightx:xs)->Right[(x,st,xs)])-- | Fail if end of input is not reachedeof::Showp=>Parsers(p,t)String()eof=P(\stinp->caseinpof[]->Right[((),st,[])](Lefte:_)->Lefte(Right(p,_):_)->Left("End of input expected at "++showp++"\n but found text")){-
-- | Ensure the value delivered by the parser is evaluated to WHNF.
force :: Parser s t e a -> Parser s t e a
force (P p) = P (\st inp -> let Right xs = p st inp
h = head xs in
h `seq` Right (h: tail xs)
)
-- [[[GK]]] ^^^^^^
-- WHNF = Weak Head Normal Form, meaning that it has no top-level redex.
-- In this case, I think that means that the first element of the list
-- is fully evaluated.
--
-- NOTE: the original form of this function fails if there is no parse
-- result for p st inp (head xs fails if xs is null), so the modified
-- form can assume a Right value only.
--
-- Why is this needed?
-- It's not exported, and the only use of this I see is commented out.
---------------------------------------
-}-- | Deliver the first parse result only, eliminating any backtracking.first::Parserstea->Parsersteafirst(Pp)=P(\stinp->casepstinpofRight(x:xs)->Right[x]otherwise->otherwise)-- | Apply the parser to some real input, given an initial state value.-- If the parser fails, raise 'error' to halt the program.-- (This is the original exported behaviour - to allow the caller to-- deal with the error differently, see @papply'@.)papply::ParserstStringa->s->[EitherStringt]->[(a,s,[EitherStringt])]papply(Pp)stinp=eithererrorid(pstinp)-- | Apply the parser to some real input, given an initial state value.-- If the parser fails, return a diagnostic message to the caller.papply'::Parserstea->s->[Eitheret]->Eithere[(a,s,[Eitheret])]papply'(Pp)stinp=pstinp--- Derived combinators -------------------------------------------------------- | A choice between parsers. Keep only the first success.(+++)::Parserstea->Parserstea->Parsersteap+++q=first(p`mplus`q)-- | Deliver the first token if it satisfies a predicate.sat::(t->Bool)->Parsers(p,t)etsatp=do{(_,x)<-item;ifpxthenreturnxelsemzero}-- | Deliver the first token if it equals the argument.tok::Eqt=>t->Parsers(p,t)ettokt=do{(_,x)<-item;ifx==tthenreturntelsemzero}-- | Deliver the first token if it does not equal the argument.nottok::Eqt=>[t]->Parsers(p,t)etnottokts=do{(_,x)<-item;ifx`notElem`tsthenreturnxelsemzero}-- | Deliver zero or more values of @a@.many::Parserstea->Parserste[a]manyp=many1p+++return[]--many p = force (many1 p +++ return [])-- | Deliver one or more values of @a@.many1::Parserstea->Parserste[a]many1p=do{x<-p;xs<-manyp;return(x:xs)}-- | Deliver zero or more values of @a@ separated by @b@'s.sepby::Parserstea->Parsersteb->Parserste[a]p`sepby`sep=(p`sepby1`sep)+++return[]-- | Deliver one or more values of @a@ separated by @b@'s.sepby1::Parserstea->Parsersteb->Parserste[a]p`sepby1`sep=do{x<-p;xs<-many(do{sep;p});return(x:xs)}chainl::Parserstea->Parserste(a->a->a)->a->Parsersteachainlpopv=(p`chainl1`op)+++returnvchainl1::Parserstea->Parserste(a->a->a)->Parsersteap`chainl1`op=do{x<-p;restx}whererestx=do{f<-op;y<-p;rest(fxy)}+++returnxchainr::Parserstea->Parserste(a->a->a)->a->Parsersteachainrpopv=(p`chainr1`op)+++returnvchainr1::Parserstea->Parserste(a->a->a)->Parsersteap`chainr1`op=do{x<-p;restx}whererestx=do{f<-op;y<-p`chainr1`op;return(fxy)}+++returnxops::[(Parserstea,b)]->Parserstebopsxs=foldr1(+++)[do{p;returnop}|(p,op)<-xs]bracket::(Showp,Showt)=>Parsers(p,t)ea->Parsers(p,t)eb->Parsers(p,t)ec->Parsers(p,t)ebbracketopenpclose=do{open;x<-p;close-- `elserror` "improperly matched construct";;returnx}-- | Accept a complete parse of the input only, no partial parses.toEOF::Showp=>Parsers(p,t)Stringa->Parsers(p,t)StringatoEOFp=do{x<-p;eof;returnx}--- Error handling ------------------------------------------------------------- | Return an error using the supplied diagnostic string, and a token type-- which includes position information.parseerror::(Showp,Showt)=>String->Parsers(p,t)Stringaparseerrorerr=P(\stinp->caseinpof[]->Left"Parse error: unexpected EOF\n"(Lefte:_)->Left("Lexical error: "++e)(Right(p,t):_)->Left("Parse error: in "++showp++"\n "++err++"\n "++"Found "++showt))-- | If the parser fails, generate an error message.elserror::(Showp,Showt)=>Parsers(p,t)Stringa->String->Parsers(p,t)Stringap`elserror`s=p+++parseerrors--- State handling ------------------------------------------------------------- | Update the internal state.stupd::(s->s)->Parserste()stupdf=P(\stinp->{-let newst = f st in newst `seq`-}Right[((),fst,inp)])-- | Query the internal state.stquery::(s->a)->Parsersteastqueryf=P(\stinp->Right[(fst,st,inp)])-- | Deliver the entire internal state.stget::Parserstesstget=P(\stinp->Right[(st,st,inp)])--- Push some tokens back onto the input stream and reparse -------------------- | This is useful for recursively expanding macros. When the-- user-parser recognises a macro use, it can lookup the macro-- expansion from the parse state, lex it, and then stuff the-- lexed expansion back down into the parser.reparse::[Eitheret]->Parserste()reparsets=P(\stinp->Right[((),st,ts++inp)])------------------------------------------------------------------------------