-- |-- Module : Data.Attoparsec.Char8-- Copyright : Bryan O'Sullivan 2007-2010-- License : BSD3-- -- Maintainer : bos@serpentine.com-- Stability : experimental-- Portability : unknown---- Simple, efficient, character-oriented combinator parsing for-- 'B.ByteString' strings, loosely based on the Parsec library.moduleData.Attoparsec.Char8(-- * Character encodings-- $encodings-- * Parser typesParser,A.Result(..)-- * Running parsers,A.parse,A.parseTest,A.parseWith,A.feed-- * Combinators,(I.<?>),I.try,moduleData.Attoparsec.Combinator-- * Parsing individual characters,satisfy,char,anyChar,char8,notChar-- ** Special character parsers,digit,letter_iso8859_15,letter_ascii,space-- ** Fast predicates,isDigit,isDigit_w8,isAlpha_iso8859_15,isAlpha_ascii-- *** Character classes,inClass,notInClass-- * Efficient string handling,I.string,stringCI,skipSpace,skipWhile,I.take,takeTill,takeWhile,takeWhile1-- * Text parsing,I.endOfLine,isEndOfLine,isHorizontalSpace-- * Numeric parsers,decimal,hexadecimal,signed--, double-- * State observation and manipulation functions,I.endOfInput,I.ensure)whereimportControl.Applicative((*>),(<$>),(<|>))importData.Attoparsec.CombinatorimportData.Attoparsec.FastSet(charClass,memberChar)importData.Attoparsec.Internal(Parser,(<?>))importData.ByteString.Internal(c2w,w2c)importData.Word(Word8)importPreludehiding(takeWhile)importqualifiedData.AttoparsecasAimportqualifiedData.Attoparsec.InternalasIimportqualifiedData.ByteStringasB8importqualifiedData.ByteString.Char8asB-- $encodings---- This module is intended for parsing text that is-- represented using an 8-bit character set, e.g. ASCII or-- ISO-8859-15. It /does not/ make any attempt to deal with character-- encodings, multibyte characters, or wide characters. In-- particular, all attempts to use characters above code point U+00FF-- will give wrong answers.---- Code points below U+0100 are simply translated to and from their-- numeric values, so e.g. the code point U+00A4 becomes the byte-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic-- currency sign in ISO-8859-1). Haskell 'Char' values above U+00FF-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.-- ASCII-specific but fast, oh yes.toLower::Word8->Word8toLowerw|w>=65&&w<=90=w+32|otherwise=w-- | Satisfy a literal string, ignoring case.stringCI::B.ByteString->ParserB.ByteStringstringCI=I.stringTransform(B8.maptoLower){-# INLINE stringCI #-}-- | Consume input as long as the predicate returns 'True', and return-- the consumed input.---- This parser requires the predicate to succeed on at least one byte-- of input: it will fail if the predicate never returns 'True' or if-- there is no input left.takeWhile1::(Char->Bool)->ParserB.ByteStringtakeWhile1p=I.takeWhile1(p.w2c){-# INLINE takeWhile1 #-}-- | The parser @satisfy p@ succeeds for any byte for which the-- predicate @p@ returns 'True'. Returns the byte that is actually-- parsed.---- >digit = satisfy isDigit-- > where isDigit c = c >= '0' && c <= '9'satisfy::(Char->Bool)->ParserCharsatisfy=I.satisfyWithw2c{-# INLINE satisfy #-}-- | Match a letter, in the ISO-8859-15 encoding.letter_iso8859_15::ParserCharletter_iso8859_15=satisfyisAlpha_iso8859_15<?>"letter_iso8859_15"{-# INLINE letter_iso8859_15 #-}-- | Match a letter, in the ASCII encoding.letter_ascii::ParserCharletter_ascii=satisfyisAlpha_ascii<?>"letter_ascii"{-# INLINE letter_ascii #-}-- | A fast alphabetic predicate for the ISO-8859-15 encoding---- /Note/: For all character encodings other than ISO-8859-15, and-- almost all Unicode code points above U+00A3, this predicate gives-- /wrong answers/.isAlpha_iso8859_15::Char->BoolisAlpha_iso8859_15c=(c>='a'&&c<='z')||(c>='A'&&c<='Z')||(c>='\166'&&mobyc)wheremoby=notInClass"\167\169\171-\179\182\183\185\187\191\215\247"{-# NOINLINE moby #-}{-# INLINE isAlpha_iso8859_15 #-}-- | A fast alphabetic predicate for the ASCII encoding---- /Note/: For all character encodings other than ASCII, and-- almost all Unicode code points above U+007F, this predicate gives-- /wrong answers/.isAlpha_ascii::Char->BoolisAlpha_asciic=(c>='a'&&c<='z')||(c>='A'&&c<='Z'){-# INLINE isAlpha_ascii #-}-- | Parse a single digit.digit::ParserChardigit=satisfyisDigit<?>"digit"{-# INLINE digit #-}-- | A fast digit predicate.isDigit::Char->BoolisDigitc=c>='0'&&c<='9'{-# INLINE isDigit #-}-- | A fast digit predicate.isDigit_w8::Word8->BoolisDigit_w8w=w>=48&&w<=57{-# INLINE isDigit_w8 #-}-- | Match any character.anyChar::ParserCharanyChar=satisfy$constTrue{-# INLINE anyChar #-}-- | Fast predicate for matching a space character.---- /Note/: This predicate only gives correct answers for the ASCII-- encoding. For instance, it does not recognise U+00A0 (non-breaking-- space) as a space character, even though it is a valid ISO-8859-15-- byte.isSpace::Char->BoolisSpacec=c`B.elem`spaceswherespaces=B.pack" \n\r\t\v\f"{-# NOINLINE spaces #-}{-# INLINE isSpace #-}-- | Parse a space character.---- /Note/: This parser only gives correct answers for the ASCII-- encoding. For instance, it does not recognise U+00A0 (non-breaking-- space) as a space character, even though it is a valid ISO-8859-15-- byte.space::ParserCharspace=satisfyisSpace<?>"space"{-# INLINE space #-}-- | Match a specific character.char::Char->ParserCharcharc=satisfy(==c)<?>[c]{-# INLINE char #-}-- | Match a specific character, but return its 'Word8' value.char8::Char->ParserWord8char8c=I.satisfy(==c2wc)<?>[c]{-# INLINE char8 #-}-- | Match any character except the given one.notChar::Char->ParserCharnotCharc=satisfy(/=c)<?>"not "++[c]{-# INLINE notChar #-}-- | Match any character in a set.---- >vowel = inClass "aeiou"---- Range notation is supported.---- >halfAlphabet = inClass "a-nA-N"---- To add a literal \'-\' to a set, place it at the beginning or end-- of the string.inClass::String->Char->BoolinClasss=(`memberChar`mySet)wheremySet=charClasss{-# INLINE inClass #-}-- | Match any character not in a set.notInClass::String->Char->BoolnotInClasss=not.inClasss{-# INLINE notInClass #-}-- | Consume input as long as the predicate returns 'True', and return-- the consumed input.---- This parser does not fail. It will return an empty string if the-- predicate returns 'False' on the first byte of input.---- /Note/: Because this parser does not fail, do not use it with-- combinators such as 'many', because such parsers loop until a-- failure occurs. Careless use will thus result in an infinite loop.takeWhile::(Char->Bool)->ParserB.ByteStringtakeWhilep=I.takeWhile(p.w2c){-# INLINE takeWhile #-}-- | Consume input as long as the predicate returns 'False'-- (i.e. until it returns 'True'), and return the consumed input.---- This parser does not fail. It will return an empty string if the-- predicate returns 'True' on the first byte of input.---- /Note/: Because this parser does not fail, do not use it with-- combinators such as 'many', because such parsers loop until a-- failure occurs. Careless use will thus result in an infinite loop.takeTill::(Char->Bool)->ParserB.ByteStringtakeTillp=I.takeTill(p.w2c){-# INLINE takeTill #-}-- | Skip past input for as long as the predicate returns 'True'.skipWhile::(Char->Bool)->Parser()skipWhilep=I.skipWhile(p.w2c){-# INLINE skipWhile #-}-- | Skip over white space.skipSpace::Parser()skipSpace=skipWhileisSpace>>return(){-# INLINE skipSpace #-}-- | A predicate that matches either a carriage return @\'\\r\'@ or-- newline @\'\\n\'@ character.isEndOfLine::Word8->BoolisEndOfLinew=w==13||w==10{-# INLINE isEndOfLine #-}-- | A predicate that matches either a space @\' \'@ or horizontal tab-- @\'\\t\'@ character.isHorizontalSpace::Word8->BoolisHorizontalSpacew=w==32||w==9{-# INLINE isHorizontalSpace #-}{-
-- | Parse a Double. The position counter is not updated.
double :: Parser Double
double = numeric "Double" readDouble
-}-- | Parse and decode an unsigned hexadecimal number. The hex digits-- @\'a\'@ through @\'f\'@ may be upper or lower case.---- This parser does not accept a leading @\"0x\"@ string.hexadecimal::Integrala=>Parsera{-# SPECIALISE hexadecimal :: Parser Int #-}hexadecimal=B8.foldl'step0`fmap`I.takeWhile1isHexDigitwhereisHexDigitw=(w>=48&&w<=57)||(x>=97&&x<=102)wherex=toLowerwstepaw|w>=48&&w<=57=a*16+fromIntegral(w-48)|otherwise=a*16+fromIntegral(x-87)wherex=toLowerw-- | Parse and decode an unsigned decimal number.decimal::Integrala=>Parsera{-# SPECIALISE decimal :: Parser Int #-}decimal=B8.foldl'step0`fmap`I.takeWhile1isDigwhereisDigw=w>=48&&w<=57stepaw=a*10+fromIntegral(w-48)-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign-- character.signed::Numa=>Parsera->Parsera{-# SPECIALISE signed :: Parser Int -> Parser Int #-}signedp=(negate<$>(char8'-'*>p))<|>(char8'+'*>p)<|>p