-- | Reference lexer for core langauge parser. Slow but Simple.---- The lexers here all use 'String' in place of a real name type.-- After applying these functions to the program text, we need-- to use `renameTok` tok convert the strings in `TokNamed` tokens-- into the name type specific to the langauge fragment to be parsed.--moduleDDC.Core.Lexer(moduleDDC.Core.Lexer.Tokens,moduleDDC.Core.Lexer.Names-- * Lexer,lexModuleWithOffside,lexExp)whereimportDDC.Core.Lexer.OffsideimportDDC.Core.Lexer.CommentsimportDDC.Core.Lexer.NamesimportDDC.Core.Lexer.TokensimportDDC.Data.SourcePosimportDDC.Data.TokenimportData.CharimportData.List-- Module ----------------------------------------------------------------------- | Lex a module and apply the offside rule.---- Automatically drop comments from the token stream along the way.--lexModuleWithOffside::FilePath-- ^ Path to source file, for error messages.->Int-- ^ Starting line number.->String-- ^ String containing program text.->[Token(TokString)]lexModuleWithOffsidesourceNamelineStartstr={-# SCC lexWithOffside #-}applyOffside[][]$addStarts$dropComments$lexStringsourceNamelineStartstr-- Exp -------------------------------------------------------------------------- | Lex a string into tokens.---- Automatically drop comments from the token stream along the way.--lexExp::FilePath-- ^ Path to source file, for error messages.->Int-- ^ Starting line number.->String-- ^ String containing program text.->[Token(TokString)]lexExpsourceNamelineStartstr={-# SCC lexExp #-}dropNewLines$dropComments$lexStringsourceNamelineStartstr-- Generic --------------------------------------------------------------------lexString::String->Int->String->[Token(TokString)]lexStringsourceNamelineStartstr=lexWordlineStart1strwherelexWord::Int->Int->String->[Token(TokString)]lexWordlinecolumnw=lettokt=Tokent(SourcePossourceNamelinecolumn)tokM=tok.KMtokA=tok.KAtokN=tok.KNlexUptopatrest=casedropWhile(not.isPrefixOfpat)(tailsrest)of(x:_)->x_->[]lexMorenrest=lexWordline(column+n)restincasewof[]->[]-- Whitespace' ':w'->lexMore1w''\t':w'->lexMore8w'-- Literal values-- This needs to come before the rule for '-'c:cs|isDigitc,(body,rest)<-spanisLitBodycs->tokN(KLit(c:body)):lexMore(length(c:body))rest'-':c:cs|isDigitc,(body,rest)<-spanisLitBodycs->tokN(KLit('-':c:body)):lexMore(length(c:body))rest-- Meta tokens'{':'-':w'->tokMKCommentBlockStart:lexMore2(lexUpto"-}"w')'-':'}':w'->tokMKCommentBlockEnd:lexMore2w''-':'-':w'->let(_junk,w'')=span(/='\n')w'intokMKCommentLineStart:lexMore2w'''\n':w'->tokMKNewLine:lexWord(line+1)1w'-- Wrapper operator symbols.'(':c:cs|isOpStartc,(body,')':w')<-spanisOpBodycs->tokA(KOpVar(c:body)):lexMore(2+length(c:body))w'-- The unit data constructor'(':')':w'->tokAKDaConUnit:lexMore2w'-- Compound Parens'[':':':w'->tokAKSquareColonBra:lexMore2w'':':']':w'->tokAKSquareColonKet:lexMore2w''{':':':w'->tokAKBraceColonBra:lexMore2w'':':'}':w'->tokAKBraceColonKet:lexMore2w'-- Function Constructors'~':'>':w'->tokAKArrowTilde:lexMore2w''-':'>':w'->tokAKArrowDash:lexMore2w''<':'-':w'->tokAKArrowDashLeft:lexMore2w''=':'>':w'->tokAKArrowEquals:lexMore2w'-- Compound symbols'/':'\\':w'->tokAKBigLambda:lexMore2w'-- Debruijn indices'^':cs|(ds,rest)<-spanisDigitcs,lengthds>=1->tokA(KIndex(readds)):lexMore(1+lengthds)rest-- Parens'(':w'->tokAKRoundBra:lexMore1w'')':w'->tokAKRoundKet:lexMore1w''[':w'->tokAKSquareBra:lexMore1w'']':w'->tokAKSquareKet:lexMore1w''{':w'->tokAKBraceBra:lexMore1w''}':w'->tokAKBraceKet:lexMore1w'-- Punctuation Symbols'.':w'->tokAKDot:lexMore1w'',':w'->tokAKComma:lexMore1w'';':w'->tokAKSemiColon:lexMore1w''_':w'->tokAKUnderscore:lexMore1w''\\':w'->tokAKBackSlash:lexMore1w'-- Operator symbols.c:cs|isOpStartc,(body,rest)<-spanisOpBodycs->tokA(KOp(c:body)):lexMore(length(c:body))rest-- Operator body symbols.'^':w'->tokAKHat:lexMore1w'-- Bottomsname|Justw'<-stripPrefix"Pure"name->tokAKBotEffect:lexMore2w'|Justw'<-stripPrefix"Empty"name->tokAKBotClosure:lexMore2w'-- Named Constructorsc:cs|isConStartc,(body,rest)<-spanisConBodycs,(body',rest')<-caserestof'\'':rest'->(body++"'",rest')'#':rest'->(body++"#",rest')_->(body,rest)->letreadNamedCons|Justsocon<-readSoConBuiltins=tokA(KSoConBuiltinsocon):lexMore(lengths)rest'|Justkicon<-readKiConBuiltins=tokA(KKiConBuiltinkicon):lexMore(lengths)rest'|Justtwcon<-readTwConBuiltins=tokA(KTwConBuiltintwcon):lexMore(lengths)rest'|Justtccon<-readTcConBuiltins=tokA(KTcConBuiltintccon):lexMore(lengths)rest'|Justcon<-readCons=tokN(KConcon):lexMore(lengths)rest'|otherwise=[tok(KJunk[c])]inreadNamedCon(c:body')-- Keywords, Named Variables and Witness constructorsc:cs|isVarStartc,(body,rest)<-spanisVarBodycs,(body',rest')<-caserestof'#':rest'->(body++"#",rest')_->(body,rest)->letreadNamedVars|Justt<-lookupskeywords=tokt:lexMore(lengths)rest'|Justwc<-readWbConBuiltins=tokA(KWbConBuiltinwc):lexMore(lengths)rest'|Justv<-readVars=tokN(KVarv):lexMore(lengths)rest'|otherwise=[tok(KJunk[c])]inreadNamedVar(c:body')-- Some unrecognised character.-- We still need to keep lexing as this may be in a comment.c:cs->(tok$KJunk[c]):lexMore1cs