{-# LANGUAGE BangPatterns, CPP, FlexibleInstances, OverloadedStrings, Rank2Types, RecordWildCards, TypeSynonymInstances, GADTs #-}{-# OPTIONS_GHC -fno-warn-orphans #-}-- |-- Module : Data.Attoparsec.Text.Internal-- Copyright : Bryan O'Sullivan 2011-- License : BSD3---- Maintainer : bos@serpentine.com-- Stability : experimental-- Portability : unknown---- Simple, efficient parser combinators for 'T.Text' strings, loosely-- based on the Parsec library.moduleData.Attoparsec.Text.Internal(-- * Parser typesParser,Result-- * Running parsers,parse,parseOnly-- * Combinators,(<?>),try,moduleData.Attoparsec.Combinator-- * Parsing individual characters,satisfy,satisfyWith,anyChar,skip,char,notChar,peekChar-- ** Character classes,inClass,notInClass-- * Efficient string handling,skipWhile,string,stringCI,asciiCI,take,scan,takeWhile,takeWhile1,takeTill-- ** Consume all remaining input,takeText,takeLazyText-- * State observation and manipulation functions,endOfInput,atEnd-- * Utilities,endOfLine)whereimportControl.Applicative((<|>),(<$>))importControl.Monad(when)importData.Attoparsec.CombinatorimportData.Attoparsec.Internal.Typeshiding(Parser,Input,Added,Failure,Success)importData.String(IsString(..))importData.Text(Text)importPreludehiding(getChar,take,takeWhile)importData.Char(chr,ord)importqualifiedData.Attoparsec.Internal.TypesasTimportqualifiedData.Attoparsec.Text.FastSetasSetimportqualifiedData.TextasTimportqualifiedData.Text.InternalasTimportqualifiedData.Text.LazyasL#ifdefined(__GLASGOW_HASKELL__)importGHC.Exts(inline)#elseinline::a->ainlinex=x#endiftypeParser=T.ParserTexttypeResult=IResultTexttypeInput=T.InputTexttypeAdded=T.AddedTexttypeFailurer=T.FailureTextrtypeSuccessar=T.SuccessTextarinstance(a~Text)=>IsString(Parsera)wherefromString=string.T.packlengthAtLeast::T.Text->Int->BoollengthAtLeastt@(T.Text__len)n=(len`div`2)>=n||T.lengtht>=n{-# INLINE lengthAtLeast #-}-- | If at least @n@ characters of input are available, return the-- current input, otherwise fail.ensure::Int->ParserTextensure!n=T.Parser$\i0a0m0kfks->iflengthAtLeast(unIi0)ntheninlineksi0a0m0(unIi0)elserunParser(demandInput>>gon)i0a0m0kfkswheregon'=T.Parser$\i0a0m0kfks->iflengthAtLeast(unIi0)n'thenksi0a0m0(unIi0)elserunParser(demandInput>>gon')i0a0m0kfks{-# INLINE ensure #-}-- | Ask for input. If we receive any, pass it to a success-- continuation, otherwise to a failure continuation.prompt::Input->Added->More->(Input->Added->More->Resultr)->(Input->Added->More->Resultr)->Resultrprompti0a0_m0kfks=Partial$\s->ifT.nullsthenkfi0a0Completeelseks(I(unIi0<>s))(A(unAa0<>s))Incomplete-- | Immediately demand more input via a 'Partial' continuation-- result.demandInput::Parser()demandInput=T.Parser$\i0a0m0kfks->ifm0==Completethenkfi0a0m0["demandInput"]"not enough input"elseletkf'iam=kfiam["demandInput"]"not enough input"ks'iam=ksiam()inprompti0a0m0kf'ks'-- | This parser always succeeds. It returns 'True' if any input is-- available either immediately or on demand, and 'False' if the end-- of all input has been reached.wantInput::ParserBoolwantInput=T.Parser$\i0a0m0_kfks->case()of_|not(T.null(unIi0))->ksi0a0m0True|m0==Complete->ksi0a0m0False|otherwise->letkf'iam=ksiamFalseks'iam=ksiamTrueinprompti0a0m0kf'ks'get::ParserTextget=T.Parser$\i0a0m0_kfks->ksi0a0m0(unIi0)put::Text->Parser()puts=T.Parser$\_i0a0m0_kfks->ks(Is)a0m0()-- | Attempt a parse, and if it fails, rewind the input so that no-- input appears to have been consumed.---- This combinator is provided for compatibility with Parsec.-- Attoparsec parsers always backtrack on failure.try::Parsera->Parseratryp=p{-# INLINE try #-}unsafeHead::Text->CharunsafeHead=T.headunsafeTail::Text->TextunsafeTail=T.tailunsafeTake::Int->Text->TextunsafeTake=T.takeunsafeDrop::Int->Text->TextunsafeDrop=T.drop-- | The parser @satisfy p@ succeeds for any character for which the-- predicate @p@ returns 'True'. Returns the character that is-- actually parsed.---- >digit = satisfy isDigit-- > where isDigit c = c >= '0' && c <= '9'satisfy::(Char->Bool)->ParserCharsatisfyp=dos<-ensure1let!w=unsafeHeadsifpwthenput(unsafeTails)>>returnwelsefail"satisfy"{-# INLINE satisfy #-}-- | The parser @skip p@ succeeds for any character for which the-- predicate @p@ returns 'True'.---- >skipDigit = skip isDigit-- > where isDigit c = c >= '0' && c <= '9'skip::(Char->Bool)->Parser()skipp=dos<-ensure1ifp(unsafeHeads)thenput(unsafeTails)elsefail"skip"-- | The parser @satisfyWith f p@ transforms a character, and succeeds-- if the predicate @p@ returns 'True' on the transformed value. The-- parser returns the transformed character that was parsed.satisfyWith::(Char->a)->(a->Bool)->ParserasatisfyWithfp=dos<-ensure1letc=f$!unsafeHeadsifpcthenlet!t=unsafeTailsinputt>>returncelsefail"satisfyWith"{-# INLINE satisfyWith #-}-- | Consume @n@ characters of input, but succeed only if the-- predicate returns 'True'.takeWith::Int->(Text->Bool)->ParserTexttakeWithnp=dos<-ensurenleth=unsafeTakenst=unsafeDropnsifphthenputt>>returnhelsefail"takeWith"-- | Consume exactly @n@ characters of input.take::Int->ParserTexttaken=takeWithn(constTrue){-# INLINE take #-}-- | @string s@ parses a sequence of characters that identically match-- @s@. Returns the parsed string (i.e. @s@). This parser consumes no-- input if it fails (even if a partial match).---- /Note/: The behaviour of this parser is different to that of the-- similarly-named parser in Parsec, as this one is all-or-nothing.-- To illustrate the difference, the following parser will fail under-- Parsec given an input of @\"for\"@:---- >string "foo" <|> string "for"---- The reason for its failure is that the first branch is a-- partial match, and will consume the letters @\'f\'@ and @\'o\'@-- before failing. In Attoparsec, the above parser will /succeed/ on-- that input, because the failed first branch will consume nothing.string::Text->ParserTextstrings=takeWith(T.lengths)(==s){-# INLINE string #-}-- | Satisfy a literal string, ignoring case.---- Note: this function is currently quite inefficient. Unicode case-- folding can change the length of a string (\"&#223;\" becomes-- "ss"), which makes a simple, efficient implementation tricky. We-- have (for now) chosen simplicity over efficiency.stringCI::Text->ParserTextstringCIs=go0wherego!n|n>T.lengthfs=fail"stringCI"|otherwise=dot<-ensurenleth=unsafeTakentifT.toCaseFoldh==fsthenput(unsafeDropnt)>>returnhelsego(n+1)fs=T.toCaseFolds{-# INLINE stringCI #-}{-# DEPRECATED stringCI "this is very inefficient, use asciiCI instead" #-}-- | Satisfy a literal string, ignoring case for characters in the ASCII range.asciiCI::Text->ParserTextasciiCIinput=dot<-ensurenleth=unsafeTakentifasciiToLowerh==sthenput(unsafeDropnt)>>returnhelsefail"asciiCI"wheren=T.lengthinputs=asciiToLowerinput-- convert letters in the ASCII range to lower-caseasciiToLower=T.mapfwhereoffset=ord'a'-ord'A'fc|'A'<=c&&c<='Z'=chr(ordc+offset)|otherwise=c{-# INLINE asciiCI #-}-- | Skip past input for as long as the predicate returns 'True'.skipWhile::(Char->Bool)->Parser()skipWhilep=gowherego=dot<-T.dropWhilep<$>getputtwhen(T.nullt)$doinput<-wantInputwheninputgo{-# INLINE skipWhile #-}-- | Consume input as long as the predicate returns 'False'-- (i.e. until it returns 'True'), and return the consumed input.---- This parser does not fail. It will return an empty string if the-- predicate returns 'True' on the first character of input.---- /Note/: Because this parser does not fail, do not use it with-- combinators such as 'many', because such parsers loop until a-- failure occurs. Careless use will thus result in an infinite loop.takeTill::(Char->Bool)->ParserTexttakeTillp=takeWhile(not.p){-# INLINE takeTill #-}-- | Consume input as long as the predicate returns 'True', and return-- the consumed input.---- This parser does not fail. It will return an empty string if the-- predicate returns 'False' on the first character of input.---- /Note/: Because this parser does not fail, do not use it with-- combinators such as 'many', because such parsers loop until a-- failure occurs. Careless use will thus result in an infinite loop.takeWhile::(Char->Bool)->ParserTexttakeWhilep=(T.concat.reverse)`fmap`go[]wheregoacc=do(h,t)<-T.spanp<$>getputtifT.nulltthendoinput<-wantInputifinputthengo(h:acc)elsereturn(h:acc)elsereturn(h:acc)takeRest::Parser[Text]takeRest=go[]wheregoacc=doinput<-wantInputifinputthendos<-getputT.emptygo(s:acc)elsereturn(reverseacc)-- | Consume all remaining input and return it as a single string.takeText::ParserTexttakeText=T.concat`fmap`takeRest-- | Consume all remaining input and return it as a single string.takeLazyText::ParserL.TexttakeLazyText=L.fromChunks`fmap`takeRestdataScans=Continues|Finished{-# UNPACK #-}!IntT.Text-- | A stateful scanner. The predicate consumes and transforms a-- state argument, and each transformed state is passed to successive-- invocations of the predicate on each character of the input until one-- returns 'Nothing' or the input ends.---- This parser does not fail. It will return an empty string if the-- predicate returns 'Nothing' on the first character of input.---- /Note/: Because this parser does not fail, do not use it with-- combinators such as 'many', because such parsers loop until a-- failure occurs. Careless use will thus result in an infinite loop.scan::s->(s->Char->Maybes)->ParserTextscans0p=dochunks<-go[]s0casechunksof[x]->returnxxs->return.T.concat.reverse$xswherescanners!nt=caseT.unconstofJust(c,t')->casepscofJusts'->scanners'(n+1)t'Nothing->FinishedntNothing->Continuesgoaccs=doinput<-getcasescanners0inputofContinues'->doputT.emptymore<-wantInputifmorethengo(input:acc)s'elsereturn(input:acc)Finishednt->putt>>return(T.takeninput:acc){-# INLINE scan #-}-- | Consume input as long as the predicate returns 'True', and return-- the consumed input.---- This parser requires the predicate to succeed on at least one-- character of input: it will fail if the predicate never returns-- 'True' or if there is no input left.takeWhile1::(Char->Bool)->ParserTexttakeWhile1p=do(`when`demandInput)=<<T.null<$>get(h,t)<-T.spanp<$>getwhen(T.nullh)$fail"takeWhile1"puttifT.nulltthen(h<>)`fmap`takeWhilepelsereturnh-- | Match any character in a set.---- >vowel = inClass "aeiou"---- Range notation is supported.---- >halfAlphabet = inClass "a-nA-N"---- To add a literal @\'-\'@ to a set, place it at the beginning or end-- of the string.inClass::String->Char->BoolinClasss=(`Set.member`mySet)wheremySet=Set.charClasss{-# NOINLINE mySet #-}{-# INLINE inClass #-}-- | Match any character not in a set.notInClass::String->Char->BoolnotInClasss=not.inClasss{-# INLINE notInClass #-}-- | Match any character.anyChar::ParserCharanyChar=satisfy$constTrue{-# INLINE anyChar #-}-- | Match a specific character.char::Char->ParserCharcharc=satisfy(==c)<?>showc{-# INLINE char #-}-- | Match any character except the given one.notChar::Char->ParserCharnotCharc=satisfy(/=c)<?>"not "++showc{-# INLINE notChar #-}-- | Match any character. Returns 'Nothing' if end of input has been-- reached. Does not consume any input.---- /Note/: Because this parser does not fail, do not use it with-- combinators such as 'many', because such parsers loop until a-- failure occurs. Careless use will thus result in an infinite loop.peekChar::Parser(MaybeChar)peekChar=T.Parser$\i0a0m0_kfks->ifT.null(unIi0)thenifm0==Completethenksi0a0m0Nothingelseletks'iam=let!c=unsafeHead(unIi)inksiam(Justc)kf'iam=ksiamNothinginprompti0a0m0kf'ks'elselet!c=unsafeHead(unIi0)inksi0a0m0(Justc){-# INLINE peekChar #-}-- | Match only if all input has been consumed.endOfInput::Parser()endOfInput=T.Parser$\i0a0m0kfks->ifT.null(unIi0)thenifm0==Completethenksi0a0m0()elseletkf'i1a1m1__=addSi0a0m0i1a1m1$\i2a2m2->ksi2a2m2()ks'i1a1m1_=addSi0a0m0i1a1m1$\i2a2m2->kfi2a2m2[]"endOfInput"inrunParserdemandInputi0a0m0kf'ks'elsekfi0a0m0[]"endOfInput"-- | Return an indication of whether the end of input has been-- reached.atEnd::ParserBoolatEnd=not<$>wantInput{-# INLINE atEnd #-}-- | Match either a single newline character @\'\\n\'@, or a carriage-- return followed by a newline character @\"\\r\\n\"@.endOfLine::Parser()endOfLine=(char'\n'>>return())<|>(string"\r\n">>return())-- | Name the parser, in case failure occurs.(<?>)::Parsera->String-- ^ the name to use if parsing fails->Parserap<?>msg0=T.Parser$\i0a0m0kfks->letkf'iamstrsmsg=kfiam(msg0:strs)msginrunParserpi0a0m0kf'ks{-# INLINE (<?>) #-}infix0<?>-- | Terminal failure continuation.failK::FailureafailKi0_a0_m0stackmsg=Fail(unIi0)stackmsg{-# INLINE failK #-}-- | Terminal success continuation.successK::SuccessaasuccessKi0_a0_m0a=Done(unIi0)a{-# INLINE successK #-}-- | Run a parser.parse::Parsera->Text->Resultaparsems=runParserm(Is)(AT.empty)IncompletefailKsuccessK{-# INLINE parse #-}-- | Run a parser that cannot be resupplied via a 'Partial' result.parseOnly::Parsera->Text->EitherStringaparseOnlyms=caserunParserm(Is)(AT.empty)CompletefailKsuccessKofFail__err->LefterrDone_a->Righta_->error"parseOnly: impossible error!"{-# INLINE parseOnly #-}