------------------------------------------------------------------------------- |-- Module : Text.Parsec.Prim-- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007-- License : BSD-style (see the LICENSE file)-- -- Maintainer : derek.a.elkins@gmail.com-- Stability : provisional-- Portability : portable-- -- The primitive parser combinators.-- ----------------------------------------------------------------------------- {-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleContexts,
UndecidableInstances #-}moduleText.Parsec.Prim(unknownError,sysUnExpectError,unexpected,ParsecT,runParsecT,mkPT,Parsec,Consumed(..),Reply(..),State(..),parsecMap,parserReturn,parserBind,mergeErrorReply,parserFail,parserZero,parserPlus,(<?>),(<|>),label,labels,lookAhead,Stream(..),tokens,try,token,tokenPrim,tokenPrimEx,many,skipMany,manyAccum,runPT,runP,runParserT,runParser,parse,parseTest,getPosition,getInput,setPosition,setInput,getParserState,setParserState,updateParserState,getState,putState,modifyState,setState,updateState)whereimportqualifiedControl.ApplicativeasApplicative(Applicative(..),Alternative(..))importControl.Monad()importControl.Monad.TransimportControl.Monad.IdentityimportControl.Monad.Reader.ClassimportControl.Monad.State.ClassimportControl.Monad.Cont.ClassimportControl.Monad.Error.ClassimportText.Parsec.PosimportText.Parsec.ErrorunknownError::Statesu->ParseErrorunknownErrorstate=newErrorUnknown(statePosstate)sysUnExpectError::String->SourcePos->ReplysuasysUnExpectErrormsgpos=Error(newErrorMessage(SysUnExpectmsg)pos)-- | The parser @unexpected msg@ always fails with an unexpected error-- message @msg@ without consuming any input.---- The parsers 'fail', ('<?>') and @unexpected@ are the three parsers-- used to generate error messages. Of these, only ('<?>') is commonly-- used. For an example of the use of @unexpected@, see the definition-- of 'Text.Parsec.Combinator.notFollowedBy'.unexpected::(Streamsmt)=>String->ParsecTsumaunexpectedmsg=ParsecT$\s___eerr->eerr$newErrorMessage(UnExpectmsg)(statePoss)-- | ParserT monad transformer and Parser type-- | @ParsecT s u m a@ is a parser with stream type @s@, user state type @u@,-- underlying monad @m@ and return type @a@. Parsec is strict in the user state.-- If this is undesirable, simply used a data type like @data Box a = Box a@ and-- the state type @Box YourStateType@ to add a level of indirection.newtypeParsecTsuma=ParsecT{unParser::forallb.Statesu->(a->Statesu->ParseError->mb)-- consumed ok->(ParseError->mb)-- consumed err->(a->Statesu->ParseError->mb)-- empty ok->(ParseError->mb)-- empty err->mb}-- | Low-level unpacking of the ParsecT type. To run your parser, please look to-- runPT, runP, runParserT, runParser and other such functions.runParsecT::Monadm=>ParsecTsuma->Statesu->m(Consumed(m(Replysua)))runParsecTps=unParserpscokcerreokeerrwherecokas'err=return.Consumed.return$Okas'errcerrerr=return.Consumed.return$Errorerreokas'err=return.Empty.return$Okas'erreerrerr=return.Empty.return$Errorerr-- | Low-level creation of the ParsecT type. You really shouldn't have to do this.mkPT::Monadm=>(Statesu->m(Consumed(m(Replysua))))->ParsecTsumamkPTk=ParsecT$\scokcerreokeerr->docons<-kscaseconsofConsumedmrep->dorep<-mrepcaserepofOkxs'err->cokxs'errErrorerr->cerrerrEmptymrep->dorep<-mrepcaserepofOkxs'err->eokxs'errErrorerr->eerrerrtypeParsecsu=ParsecTsuIdentitydataConsumeda=Consumeda|Empty!adataReplysua=Oka!(Statesu)ParseError|ErrorParseErrordataStatesu=State{stateInput::s,statePos::!SourcePos,stateUser::!u}instanceFunctorConsumedwherefmapf(Consumedx)=Consumed(fx)fmapf(Emptyx)=Empty(fx)instanceFunctor(Replysu)wherefmapf(Okxse)=Ok(fx)sefmap_(Errore)=Errore-- XXXinstanceFunctor(ParsecTsum)wherefmapfp=parsecMapfpparsecMap::(a->b)->ParsecTsuma->ParsecTsumbparsecMapfp=ParsecT$\scokcerreokeerr->unParserps(cok.f)cerr(eok.f)eerrinstanceApplicative.Applicative(ParsecTsum)wherepure=return(<*>)=ap-- TODO: Can this be optimized?instanceApplicative.Alternative(ParsecTsum)whereempty=mzero(<|>)=mplusinstanceMonad(ParsecTsum)wherereturnx=parserReturnxp>>=f=parserBindpffailmsg=parserFailmsginstance(MonadIOm)=>MonadIO(ParsecTsum)whereliftIO=lift.liftIOinstance(MonadReaderrm)=>MonadReaderr(ParsecTsum)whereask=liftasklocalfp=mkPT$\s->localf(runParsecTps)-- I'm presuming the user might want a separate, non-backtracking-- state aside from the Parsec user state.instance(MonadStatesm)=>MonadStates(ParsecTs'um)whereget=liftgetput=lift.putinstance(MonadContm)=>MonadCont(ParsecTsum)wherecallCCf=mkPT$\s->callCC$\c->runParsecT(f(\a->mkPT$\s'->c(packs'a)))swherepacksa=Empty$return(Okas(unknownErrors))instance(MonadErrorem)=>MonadErrore(ParsecTsum)wherethrowError=lift.throwErrorp`catchError`h=mkPT$\s->runParsecTps`catchError`\e->runParsecT(he)sparserReturn::a->ParsecTsumaparserReturnx=ParsecT$\s__eok_->eokxs(unknownErrors)parserBind::ParsecTsuma->(a->ParsecTsumb)->ParsecTsumb{-# INLINE parserBind #-}parserBindmk=ParsecT$\scokcerreokeerr->let-- consumed-okay case for mmcokxserr=let-- if (k x) consumes, those go straigt uppcok=cokpcerr=cerr-- if (k x) doesn't consume input, but is okay,-- we still return in the consumed continuationpeokxserr'=cokxs(mergeErrorerrerr')-- if (k x) doesn't consume input, but errors,-- we return the error in the 'consumed-error'-- continuationpeerrerr'=cerr(mergeErrorerrerr')inunParser(kx)spcokpcerrpeokpeerr-- empty-ok case for mmeokxserr=let-- in these cases, (k x) can return as emptypcok=cokpeokxserr'=eokxs(mergeErrorerrerr')pcerr=cerrpeerrerr'=eerr(mergeErrorerrerr')inunParser(kx)spcokpcerrpeokpeerr-- consumed-error case for mmcerr=cerr-- empty-error case for mmeerr=eerrinunParsermsmcokmcerrmeokmeerrmergeErrorReply::ParseError->Replysua->ReplysuamergeErrorReplyerr1reply-- XXX where to put it?=casereplyofOkxstateerr2->Okxstate(mergeErrorerr1err2)Errorerr2->Error(mergeErrorerr1err2)parserFail::String->ParsecTsumaparserFailmsg=ParsecT$\s___eerr->eerr$newErrorMessage(Messagemsg)(statePoss)instanceMonadPlus(ParsecTsum)wheremzero=parserZeromplusp1p2=parserPlusp1p2-- | @parserZero@ always fails without consuming any input. @parserZero@ is defined-- equal to the 'mzero' member of the 'MonadPlus' class and to the 'Control.Applicative.empty' member -- of the 'Control.Applicative.Applicative' class.parserZero::ParsecTsumaparserZero=ParsecT$\s___eerr->eerr$unknownErrorsparserPlus::ParsecTsuma->ParsecTsuma->ParsecTsuma{-# INLINE parserPlus #-}parserPlusmn=ParsecT$\scokcerreokeerr->letmeerrerr=letneokys'err'=eokys'(mergeErrorerrerr')neerrerr'=eerr$mergeErrorerrerr'inunParsernscokcerrneokneerrinunParsermscokcerreokmeerrinstanceMonadTrans(ParsecTsu)whereliftamb=ParsecT$\s__eok_->doa<-ambeokas$unknownErrorsinfix0<?>infixr1<|>-- | The parser @p <?> msg@ behaves as parser @p@, but whenever the-- parser @p@ fails /without consuming any input/, it replaces expect-- error messages with the expect error message @msg@.---- This is normally used at the end of a set alternatives where we want-- to return an error message in terms of a higher level construct-- rather than returning all possible characters. For example, if the-- @expr@ parser from the 'try' example would fail, the error-- message is: '...: expecting expression'. Without the @(\<?>)@-- combinator, the message would be like '...: expecting \"let\" or-- letter', which is less friendly.(<?>)::(ParsecTsuma)->String->(ParsecTsuma)p<?>msg=labelpmsg-- | This combinator implements choice. The parser @p \<|> q@ first-- applies @p@. If it succeeds, the value of @p@ is returned. If @p@-- fails /without consuming any input/, parser @q@ is tried. This-- combinator is defined equal to the 'mplus' member of the 'MonadPlus'-- class and the ('Control.Applicative.<|>') member of 'Control.Applicative.Alternative'.---- The parser is called /predictive/ since @q@ is only tried when-- parser @p@ didn't consume any input (i.e.. the look ahead is 1).-- This non-backtracking behaviour allows for both an efficient-- implementation of the parser combinators and the generation of good-- error messages.(<|>)::(ParsecTsuma)->(ParsecTsuma)->(ParsecTsuma)p1<|>p2=mplusp1p2label::ParsecTsuma->String->ParsecTsumalabelpmsg=labelsp[msg]labels::ParsecTsuma->[String]->ParsecTsumalabelspmsgs=ParsecT$\scokcerreokeerr->leteok'xs'error=eokxs'$iferrorIsUnknownerrorthenerrorelsesetExpectErrorserrormsgseerr'err=eerr$setExpectErrorserrmsgsinunParserpscokcerreok'eerr'wheresetExpectErrorserr[]=setErrorMessage(Expect"")errsetExpectErrorserr[msg]=setErrorMessage(Expectmsg)errsetExpectErrorserr(msg:msgs)=foldr(\msg'err'->addErrorMessage(Expectmsg')err')(setErrorMessage(Expectmsg)err)msgs-- TODO: There should be a stronger statement that can be made about this-- | An instance of @Stream@ has stream type @s@, underlying monad @m@ and token type @t@ determined by the stream-- -- Some rough guidelines for a \"correct\" instance of Stream:---- * unfoldM uncons gives the [t] corresponding to the stream---- * A @Stream@ instance is responsible for maintaining the \"position within the stream\" in the stream state @s@. This is trivial unless you are using the monad in a non-trivial way.class(Monadm)=>Streamsmt|s->twhereuncons::s->m(Maybe(t,s))tokens::(Streamsmt,Eqt)=>([t]->String)-- Pretty print a list of tokens->(SourcePos->[t]->SourcePos)->[t]-- List of tokens to parse->ParsecTsum[t]{-# INLINE tokens #-}tokens__[]=ParsecT$\s__eok_->eok[]s$unknownErrorstokensshowTokensnextposstts@(tok:toks)=ParsecT$\(Stateinputposu)cokcerreokeerr->leterrEof=(setErrorMessage(Expect(showTokenstts))(newErrorMessage(SysUnExpect"")pos))errExpectx=(setErrorMessage(Expect(showTokenstts))(newErrorMessage(SysUnExpect(showTokens[x]))pos))walk[]rs=okrswalk(t:ts)rs=dosr<-unconsrscasesrofNothing->cerr$errEofJust(x,xs)|t==x->walktsxs|otherwise->cerr$errExpectxokrs=letpos'=nextpossposttss'=Staterspos'uincokttss'(newErrorUnknownpos')indosr<-unconsinputcasesrofNothing->eerr$errEofJust(x,xs)|tok==x->walktoksxs|otherwise->eerr$errExpectx-- | The parser @try p@ behaves like parser @p@, except that it-- pretends that it hasn't consumed any input when an error occurs.---- This combinator is used whenever arbitrary look ahead is needed.-- Since it pretends that it hasn't consumed any input when @p@ fails,-- the ('<|>') combinator will try its second alternative even when the-- first parser failed while consuming input.---- The @try@ combinator can for example be used to distinguish-- identifiers and reserved words. Both reserved words and identifiers-- are a sequence of letters. Whenever we expect a certain reserved-- word where we can also expect an identifier we have to use the @try@-- combinator. Suppose we write:---- > expr = letExpr <|> identifier <?> "expression"-- >-- > letExpr = do{ string "let"; ... }-- > identifier = many1 letter---- If the user writes \"lexical\", the parser fails with: @unexpected-- \'x\', expecting \'t\' in \"let\"@. Indeed, since the ('<|>') combinator-- only tries alternatives when the first alternative hasn't consumed-- input, the @identifier@ parser is never tried (because the prefix-- \"le\" of the @string \"let\"@ parser is already consumed). The-- right behaviour can be obtained by adding the @try@ combinator:---- > expr = letExpr <|> identifier <?> "expression"-- >-- > letExpr = do{ try (string "let"); ... }-- > identifier = many1 lettertry::ParsecTsuma->ParsecTsumatryp=ParsecT$\scok_eokeerr->unParserpscokeerreokeerr-- | @lookAhead p@ parses @p@ without consuming any input.---- If @p@ fails and consumes some input, so does @lookAhead@. Combine with 'try'-- if this is undesirable.lookAhead::(Streamsmt)=>ParsecTsuma->ParsecTsumalookAheadp=do{state<-getParserState;x<-p';setParserStatestate;returnx}wherep'=ParsecT$\scokcerreokeerr->unParserpseokcerreokeerr-- | The parser @token showTok posFromTok testTok@ accepts a token @t@-- with result @x@ when the function @testTok t@ returns @'Just' x@. The-- source position of the @t@ should be returned by @posFromTok t@ and-- the token can be shown using @showTok t@.---- This combinator is expressed in terms of 'tokenPrim'.-- It is used to accept user defined token streams. For example,-- suppose that we have a stream of basic tokens tupled with source-- positions. We can than define a parser that accepts single tokens as:---- > mytoken x-- > = token showTok posFromTok testTok-- > where-- > showTok (pos,t) = show t-- > posFromTok (pos,t) = pos-- > testTok (pos,t) = if x == t then Just t else Nothingtoken::(StreamsIdentityt)=>(t->String)-- ^ Token pretty-printing function.->(t->SourcePos)-- ^ Computes the position of a token.->(t->Maybea)-- ^ Matching function for the token to parse.->ParsecsuatokenshowTokentokpostest=tokenPrimshowTokennextpostestwherenextpos_tokts=caserunIdentity(unconsts)ofNothing->tokpostokJust(tok',_)->tokpostok'-- | The parser @tokenPrim showTok nextPos testTok@ accepts a token @t@-- with result @x@ when the function @testTok t@ returns @'Just' x@. The-- token can be shown using @showTok t@. The position of the /next/-- token should be returned when @nextPos@ is called with the current-- source position @pos@, the current token @t@ and the rest of the-- tokens @toks@, @nextPos pos t toks@.---- This is the most primitive combinator for accepting tokens. For-- example, the 'Text.Parsec.Char.char' parser could be implemented as:---- > char c-- > = tokenPrim showChar nextPos testChar-- > where-- > showChar x = "'" ++ x ++ "'"-- > testChar x = if x == c then Just x else Nothing-- > nextPos pos x xs = updatePosChar pos xtokenPrim::(Streamsmt)=>(t->String)-- ^ Token pretty-printing function.->(SourcePos->t->s->SourcePos)-- ^ Next position calculating function.->(t->Maybea)-- ^ Matching function for the token to parse.->ParsecTsuma{-# INLINE tokenPrim #-}tokenPrimshowTokennextpostest=tokenPrimExshowTokennextposNothingtesttokenPrimEx::(Streamsmt)=>(t->String)->(SourcePos->t->s->SourcePos)->Maybe(SourcePos->t->s->u->u)->(t->Maybea)->ParsecTsuma{-# INLINE tokenPrimEx #-}tokenPrimExshowTokennextposNothingtest=ParsecT$\(Stateinputposuser)cokcerreokeerr->dor<-unconsinputcaserofNothing->eerr$unexpectError""posJust(c,cs)->casetestcofJustx->letnewpos=nextposposccsnewstate=Statecsnewposuserinseqnewpos$seqnewstate$cokxnewstate(newErrorUnknownnewpos)Nothing->eerr$unexpectError(showTokenc)postokenPrimExshowTokennextpos(JustnextState)test=ParsecT$\(Stateinputposuser)cokcerreokeerr->dor<-unconsinputcaserofNothing->eerr$unexpectError""posJust(c,cs)->casetestcofJustx->letnewpos=nextposposccsnewUser=nextStateposccsusernewstate=StatecsnewposnewUserinseqnewpos$seqnewstate$cokxnewstate$newErrorUnknownnewposNothing->eerr$unexpectError(showTokenc)posunexpectErrormsgpos=newErrorMessage(SysUnExpectmsg)pos-- | @many p@ applies the parser @p@ /zero/ or more times. Returns a-- list of the returned values of @p@.---- > identifier = do{ c <- letter-- > ; cs <- many (alphaNum <|> char '_')-- > ; return (c:cs)-- > }many::ParsecTsuma->ParsecTsum[a]manyp=doxs<-manyAccum(:)preturn(reversexs)-- | @skipMany p@ applies the parser @p@ /zero/ or more times, skipping-- its result.---- > spaces = skipMany spaceskipMany::ParsecTsuma->ParsecTsum()skipManyp=domanyAccum(\__->[])preturn()manyAccum::(a->[a]->[a])->ParsecTsuma->ParsecTsum[a]manyAccumaccp=ParsecT$\scokcerreokeerr->letwalkxsxs'err=unParserps'(seqxs$walk$accxxs)-- consumed-okcerr-- consumed-errmanyErr-- empty-ok(\e->cok(accxxs)s'e)-- empty-errinunParserps(walk[])cerrmanyErr(\e->eok[]se)manyErr=error"Text.ParserCombinators.Parsec.Prim.many: combinator 'many' is applied to a parser that accepts an empty string."-- < Running a parser: monadic (runPT) and pure (runP)runPT::(Streamsmt)=>ParsecTsuma->u->SourceName->s->m(EitherParseErrora)runPTpunames=dores<-runParsecTp(States(initialPosname)u)r<-parserReplyrescaserofOkx__->return(Rightx)Errorerr->return(Lefterr)whereparserReplyres=caseresofConsumedr->rEmptyr->rrunP::(StreamsIdentityt)=>Parsecsua->u->SourceName->s->EitherParseErrorarunPpunames=runIdentity$runPTpunames-- | The most general way to run a parser. @runParserT p state filePath-- input@ runs parser @p@ on the input list of tokens @input@,-- obtained from source @filePath@ with the initial user state @st@.-- The @filePath@ is only used in error messages and may be the empty-- string. Returns a computation in the underlying monad @m@ that return either a 'ParseError' ('Left') or a-- value of type @a@ ('Right').runParserT::(Streamsmt)=>ParsecTsuma->u->SourceName->s->m(EitherParseErrora)runParserT=runPT-- | The most general way to run a parser over the Identity monad. @runParser p state filePath-- input@ runs parser @p@ on the input list of tokens @input@,-- obtained from source @filePath@ with the initial user state @st@.-- The @filePath@ is only used in error messages and may be the empty-- string. Returns either a 'ParseError' ('Left') or a-- value of type @a@ ('Right').---- > parseFromFile p fname-- > = do{ input <- readFile fname-- > ; return (runParser p () fname input)-- > }runParser::(StreamsIdentityt)=>Parsecsua->u->SourceName->s->EitherParseErrorarunParser=runP-- | @parse p filePath input@ runs a parser @p@ over Identity without user-- state. The @filePath@ is only used in error messages and may be the-- empty string. Returns either a 'ParseError' ('Left')-- or a value of type @a@ ('Right').---- > main = case (parse numbers "" "11, 2, 43") of-- > Left err -> print err-- > Right xs -> print (sum xs)-- >-- > numbers = commaSep integerparse::(StreamsIdentityt)=>Parsecs()a->SourceName->s->EitherParseErroraparsep=runPp()-- | The expression @parseTest p input@ applies a parser @p@ against-- input @input@ and prints the result to stdout. Used for testing-- parsers.parseTest::(StreamsIdentityt,Showa)=>Parsecs()a->s->IO()parseTestpinput=caseparsep""inputofLefterr->doputStr"parse error at "printerrRightx->printx-- < Parser state combinators-- | Returns the current source position. See also 'SourcePos'.getPosition::(Monadm)=>ParsecTsumSourcePosgetPosition=dostate<-getParserStatereturn(statePosstate)-- | Returns the current input getInput::(Monadm)=>ParsecTsumsgetInput=dostate<-getParserStatereturn(stateInputstate)-- | @setPosition pos@ sets the current source position to @pos@. setPosition::(Monadm)=>SourcePos->ParsecTsum()setPositionpos=doupdateParserState(\(Stateinput_user)->Stateinputposuser)return()-- | @setInput input@ continues parsing with @input@. The 'getInput' and-- @setInput@ functions can for example be used to deal with #include-- files. setInput::(Monadm)=>s->ParsecTsum()setInputinput=doupdateParserState(\(State_posuser)->Stateinputposuser)return()-- | Returns the full parser state as a 'State' record.getParserState::(Monadm)=>ParsecTsum(Statesu)getParserState=updateParserStateid-- | @setParserState st@ set the full parser state to @st@. setParserState::(Monadm)=>Statesu->ParsecTsum(Statesu)setParserStatest=updateParserState(constst)-- | @updateParserState f@ applies function @f@ to the parser state.updateParserState::(Statesu->Statesu)->ParsecTsum(Statesu)updateParserStatef=ParsecT$\s__eok_->lets'=fsineoks's'$unknownErrors'-- < User state combinators-- | Returns the current user state. getState::(Monadm)=>ParsecTsumugetState=stateUser`liftM`getParserState-- | @putState st@ set the user state to @st@. putState::(Monadm)=>u->ParsecTsum()putStateu=doupdateParserState$\s->s{stateUser=u}return()-- | @updateState f@ applies function @f@ to the user state. Suppose-- that we want to count identifiers in a source, we could use the user-- state as:---- > expr = do{ x <- identifier-- > ; updateState (+1)-- > ; return (Id x)-- > }modifyState::(Monadm)=>(u->u)->ParsecTsum()modifyStatef=doupdateParserState$\s->s{stateUser=f(stateUsers)}return()-- XXX Compat-- | An alias for putState for backwards compatibility.setState::(Monadm)=>u->ParsecTsum()setState=putState-- | An alias for modifyState for backwards compatibility.updateState::(Monadm)=>(u->u)->ParsecTsum()updateState=modifyState