{-# LANGUAGE RankNTypes #-}{- |
Module : Language.Scheme.Parser
Copyright : Justin Ethier
Licence : MIT (see LICENSE in the distribution)
Maintainer : github.com/justinethier
Stability : experimental
Portability : portable
This module implements parsing of Scheme code.
-}moduleLanguage.Scheme.Parser(lispDef-- *Higher level parsing,mainParser,readOrThrow,readExpr,readExprList-- *Low level parsing,symbol,parseExpr,parseAtom,parseBool,parseChar,parseOctalNumber,parseBinaryNumber,parseHexNumber,parseDecimalNumber,parseNumber,parseRealNumber,parseRationalNumber,parseComplexNumber,parseEscapedChar,parseString,parseVector,parseHashTable,parseList,parseDottedList,parseQuoted,parseQuasiQuoted,parseUnquoted,parseUnquoteSpliced)whereimportLanguage.Scheme.TypesimportControl.Monad.ErrorimportData.ArrayimportqualifiedData.CharasCharimportData.CompleximportData.RatioimportqualifiedData.MapimportNumericimportText.ParserCombinators.Parsechiding(spaces)importText.Parsec.Language--import Text.Parsec.Prim (ParsecT)importqualifiedText.Parsec.TokenasP-- This was added by pull request #63 as part of a series of fixes-- to get husk to build on ghc 7.2.2---- For now this has been removed to allow husk to support the older-- GHC 6.x.x series.----import Data.Functor.Identity (Identity)-- |Language definition for SchemelispDef::LanguageDef()lispDef=emptyDef{P.commentStart="#|",P.commentEnd="|#",P.commentLine=";",P.nestedComments=True,P.identStart=letter<|>symbol,P.identLetter=letter<|>digit<|>symbol,P.reservedNames=[],P.caseSensitive=True}--lexer :: P.GenTokenParser String () Identitylexer=P.makeTokenParserlispDef--dot :: ParsecT String () Identity Stringdot=P.dotlexer--parens :: ParsecT String () Identity a -> ParsecT String () Identity aparens=P.parenslexerbrackets=P.bracketslexer--identifier :: ParsecT String () Identity Stringidentifier=P.identifierlexer-- TODO: typedef. starting point was: whiteSpace :: CharParser ()--whiteSpace :: ParsecT String () Identity ()whiteSpace=P.whiteSpacelexer--lexeme :: ParsecT String () Identity a -> ParsecT String () Identity alexeme=P.lexemelexersymbol::ParserCharsymbol=oneOf"!$%&|*+-/:<=>?@^_~."parseAtom::ParserLispValparseAtom=doatom<-identifierifatom=="."thenpzero-- Do not match this formelsereturn$AtomatomparseBool::ParserLispValparseBool=do_<-string"#"x<-oneOf"tf"return$casexof't'->BoolTrue'f'->BoolFalse_->BoolFalseparseChar::ParserLispValparseChar=do_<-try(string"#\\")c<-anyCharr<-many(letter)letpchr=c:rreturn$casepchrof"space"->Char' '"newline"->Char'\n'_->CharcparseOctalNumber::ParserLispValparseOctalNumber=do_<-try(string"#o")sign<-many(oneOf"-")num<-many1(oneOf"01234567")case(lengthsign)of0->return$Number$fst$Numeric.readOctnum!!01->return$Number$fromInteger$(*)(-1)$fst$Numeric.readOctnum!!0_->pzeroparseBinaryNumber::ParserLispValparseBinaryNumber=do_<-try(string"#b")sign<-many(oneOf"-")num<-many1(oneOf"01")case(lengthsign)of0->return$Number$fst$Numeric.readInt2(`elem`"01")Char.digitToIntnum!!01->return$Number$fromInteger$(*)(-1)$fst$Numeric.readInt2(`elem`"01")Char.digitToIntnum!!0_->pzeroparseHexNumber::ParserLispValparseHexNumber=do_<-try(string"#x")sign<-many(oneOf"-")num<-many1(digit<|>oneOf"abcdefABCDEF")case(lengthsign)of0->return$Number$fst$Numeric.readHexnum!!01->return$Number$fromInteger$(*)(-1)$fst$Numeric.readHexnum!!0_->pzero-- |Parser for Integer, base 10parseDecimalNumber::ParserLispValparseDecimalNumber=do_<-try(many(string"#d"))sign<-many(oneOf"-")num<-many1(digit)if(lengthsign)>1thenpzeroelsereturn$(Number.read)$sign++num-- |Parser for a base 10 Integer that will also-- check to see if the number is followed by-- an exponent (scientific notation). If so,-- the integer is converted to a float of the-- given magnitude.parseDecimalNumberMaybeExponent::ParserLispValparseDecimalNumberMaybeExponent=donum<-parseDecimalNumberresult<-parseNumberExponentnumreturnresult-- |Parse an integer in any baseparseNumber::ParserLispValparseNumber=parseDecimalNumberMaybeExponent<|>parseHexNumber<|>parseBinaryNumber<|>parseOctalNumber<?>"Unable to parse number"-- |Parse a floating point numberparseRealNumber::ParserLispValparseRealNumber=dosign<-many(oneOf"-+")num<-many1(digit)_<-char'.'frac<-many1(digit)letdec=num++"."++fracf<-case(lengthsign)of0->return$Float$fst$Numeric.readFloatdec!!0-- Bit of a hack, but need to support the + sign as well as the minus.1->ifsign=="-"thenreturn$Float$(*)(-1.0)$fst$Numeric.readFloatdec!!0elsereturn$Float$fst$Numeric.readFloatdec!!0_->pzeroresult<-parseNumberExponentfreturnresult-- | Parse the exponent section of a floating point number-- in scientific notation. Eg "e10" from "1.0e10"parseNumberExponent::LispVal->ParserLispValparseNumberExponentn=doexpnt<-many$oneOf"Ee"case(lengthexpnt)of0->returnn1->donum<-try(parseDecimalNumber)casenumofNumbernexp->buildResultnnexp_->pzero_->pzerowherebuildResult(Numbernum)nexp=return$Float$(fromIntegralnum)*(10**(fromIntegralnexp))buildResult(Floatnum)nexp=return$Float$num*(10**(fromIntegralnexp))buildResult__=pzeroparseRationalNumber::ParserLispValparseRationalNumber=dopnumerator<-parseDecimalNumbercasepnumeratorofNumbern->do_<-char'/'sign<-many(oneOf"-")num<-many1(digit)if(lengthsign)>1thenpzeroelsedoletpdenominator=read$sign++numifpdenominator==0thenreturn$Number0-- TODO: Prevents a div-by-zero error, but not really correct eitherelsereturn$Rational$n%pdenominator_->pzeroparseComplexNumber::ParserLispValparseComplexNumber=dolispreal<-(try(parseRealNumber)<|>try(parseRationalNumber)<|>parseDecimalNumber)letreal=caselisprealofNumbern->fromIntegernRationalr->fromRationalrFloatf->f_->0_<-char'+'lispimag<-(try(parseRealNumber)<|>try(parseRationalNumber)<|>parseDecimalNumber)letimag=caselispimagofNumbern->fromIntegernRationalr->fromRationalrFloatf->f_->0-- Case should never be reached_<-char'i'return$Complex$real:+imagparseEscapedChar::forallst.GenParserCharstCharparseEscapedChar=do_<-char'\\'c<-anyCharreturn$casecof'n'->'\n''t'->'\t''r'->'\r'_->cparseString::ParserLispValparseString=do_<-char'"'x<-many(parseEscapedChar<|>noneOf("\""))_<-char'"'return$StringxparseVector::ParserLispValparseVector=dovals<-sepByparseExprwhiteSpacereturn$Vector(listArray(0,(lengthvals-1))vals)-- |Parse a hash table. The table is either empty or is made up of-- an alist (associative list)parseHashTable::ParserLispValparseHashTable=do-- This function uses explicit recursion to loop over the parsed list:-- As long as it is an alist, the members are appended to an accumulator-- so they can be added to the hash table. However, if the input list is-- determined not to be an alist, Nothing is returned, letting the parser-- know that a valid hashtable was not read.letf::[(LispVal,LispVal)]->[LispVal]->Maybe[(LispVal,LispVal)]facc[]=Justaccfacc(List[a,b]:ls)=f(acc++[(a,b)])lsfacc(DottedList[a]b:ls)=f(acc++[(a,b)])lsf_(_:_)=Nothingvals<-sepByparseExprwhiteSpaceletmvals=f[]valscasemvalsofJustm->return$HashTable$Data.Map.fromListmNothing->pzeroparseList::ParserLispValparseList=liftMList$sepByparseExprwhiteSpace-- TODO: wanted to use endBy (or a variant) above, but it causes an error such that dotted lists are not parsedparseDottedList::ParserLispValparseDottedList=dophead<-endByparseExprwhiteSpaceptail<-dot>>parseExpr--char '.' >> whiteSpace >> parseExpr-- return $ DottedList phead ptailcaseptailofDottedListlsl->return$DottedList(phead++ls)l-- Issue #41-- Improper lists are tricky because if an improper list ends in a proper list, then it becomes proper as well.-- The following cases handle that, as well as preserving necessary functionality when appropriate, such as for-- unquoting.---- FUTURE: I am not sure if this is complete, in fact the "unquote" seems like it could either be incorrect or-- one special case among others. Anyway, for the 3.3 release this is good enough to pass all test-- cases. It will be revisited later if necessary.--List(Atom"unquote":_)->return$DottedListpheadptailListls->return$List$phead++ls{- Regarding above, see http://community.schemewiki.org/?scheme-faq-language#dottedapp Note, however, that most Schemes expand literal lists occurring in function applications,
e.g. (foo bar . (1 2 3)) is expanded into (foo bar 1 2 3) by the reader. It is not entirely
clear whether this is a consequence of the standard - the notation is not part of the R5RS
grammar but there is strong evidence to suggest a Scheme implementation cannot comply with
all of R5RS without performing this transformation. -}_->return$DottedListpheadptailparseQuoted::ParserLispValparseQuoted=do_<-lexeme$char'\''x<-parseExprreturn$List[Atom"quote",x]parseQuasiQuoted::ParserLispValparseQuasiQuoted=do_<-lexeme$char'`'x<-parseExprreturn$List[Atom"quasiquote",x]parseUnquoted::ParserLispValparseUnquoted=do_<-try(lexeme$char',')x<-parseExprreturn$List[Atom"unquote",x]parseUnquoteSpliced::ParserLispValparseUnquoteSpliced=do_<-try(lexeme$string",@")x<-parseExprreturn$List[Atom"unquote-splicing",x]-- FUTURE: should be able to use the grammar from R5RS-- to make parsing more efficient (mostly by minimizing-- or eliminating the number of try's below)-- |Parse an expressionparseExpr::ParserLispValparseExpr=try(lexemeparseComplexNumber)<|>try(lexemeparseRationalNumber)<|>try(lexemeparseRealNumber)<|>try(lexemeparseNumber)<|>lexemeparseChar<|>parseUnquoteSpliced<|>do_<-try(lexeme$string"#(")x<-parseVector_<-lexeme$char')'returnx-- <|> do _ <- try (lexeme $ string "#hash(")-- x <- parseHashTable-- _ <- lexeme $ char ')'-- return x<|>try(parseAtom)<|>lexemeparseString<|>lexemeparseBool<|>parseQuoted<|>parseQuasiQuoted<|>parseUnquoted<|>try(parensparseList)<|>parensparseDottedList<|>try(bracketsparseList)<|>bracketsparseDottedList<?>"Expression"mainParser::ParserLispValmainParser=do_<-whiteSpacex<-parseExpr-- FUTURE? (seemed to break test cases, but is supposed to be best practice?) eofreturnx-- |Use a parser to parse the given text, throwing an error-- if there is a problem parsing the text.readOrThrow::Parsera->String->ThrowsErrorareadOrThrowparserinput=caseparseparser"lisp"inputofLefterr->throwError$ParsererrRightval->returnval-- |Parse an expression from a string of textreadExpr::String->ThrowsErrorLispValreadExpr=readOrThrowmainParser-- |Parse many expressions from a string of textreadExprList::String->ThrowsError[LispVal]readExprList=readOrThrow(endBymainParserwhiteSpace)