{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}{- |
Module : Text.Pandoc.Readers.Markdown
Copyright : Copyright (C) 2006-2010 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}moduleText.Pandoc.Readers.Markdown(readMarkdown)whereimportData.List(transpose,sortBy,findIndex,intercalate)importqualifiedData.MapasMimportData.Ord(comparing)importData.Char(isAlphaNum)importData.MaybeimportText.Pandoc.DefinitionimportText.Pandoc.GenericimportText.Pandoc.SharedimportText.Pandoc.ParsingimportText.Pandoc.Readers.LaTeX(rawLaTeXInline,rawLaTeXEnvironment')importText.Pandoc.Readers.HTML(htmlTag,htmlInBalanced,isInlineTag,isBlockTag,isTextTag,isCommentTag)importText.Pandoc.CharacterReferences(decodeCharacterReferences)importText.ParserCombinators.ParsecimportControl.Monad(when,liftM,guard)importText.HTML.TagSoupimportText.HTML.TagSoup.Match(tagOpen)-- | Read markdown from an input string and return a Pandoc document.readMarkdown::ParserState-- ^ Parser state, including options for parser->String-- ^ String to parse (assuming @'\n'@ line endings)->PandocreadMarkdownstates=(readWithparseMarkdown)state(s++"\n\n")---- Constants and data structure definitions--isBulletListMarker::Char->BoolisBulletListMarker'*'=TrueisBulletListMarker'+'=TrueisBulletListMarker'-'=TrueisBulletListMarker_=FalseisHruleChar::Char->BoolisHruleChar'*'=TrueisHruleChar'-'=TrueisHruleChar'_'=TrueisHruleChar_=FalsesetextHChars::[Char]setextHChars="=-"isBlank::Char->BoolisBlank' '=TrueisBlank'\t'=TrueisBlank'\n'=TrueisBlank_=False---- auxiliary functions--indentSpaces::GenParserCharParserState[Char]indentSpaces=try$dostate<-getStatelettabStop=stateTabStopstatecounttabStop(char' ')<|>string"\t"<?>"indentation"nonindentSpaces::GenParserCharParserState[Char]nonindentSpaces=dostate<-getStatelettabStop=stateTabStopstatesps<-many(char' ')iflengthsps<tabStopthenreturnspselseunexpected"indented line"skipNonindentSpaces::GenParserCharParserState()skipNonindentSpaces=dostate<-getStateatMostSpaces(stateTabStopstate-1)atMostSpaces::Int->GenParserCharParserState()atMostSpaces0=notFollowedBy(char' ')atMostSpacesn=(char' '>>atMostSpaces(n-1))<|>return()-- | Fail unless we're at beginning of a line.failUnlessBeginningOfLine::GenParsertokst()failUnlessBeginningOfLine=dopos<-getPositionifsourceColumnpos==1thenreturn()elsefail"not beginning of line"-- | Parse a sequence of inline elements between square brackets,-- including inlines between balanced pairs of square brackets.inlinesInBalancedBrackets::GenParserCharParserStateInline->GenParserCharParserState[Inline]inlinesInBalancedBracketsparser=try$dochar'['result<-manyTill((dolookAhead$try$do(Strres)<-parserguard(res=="[")bal<-inlinesInBalancedBracketsparserreturn$[Str"["]++bal++[Str"]"])<|>(count1parser))(char']')return$concatresult---- document structure--titleLine::GenParserCharParserState[Inline]titleLine=try$dochar'%'skipSpacesres<-many$(notFollowedBynewline>>inline)<|>try(endline>>whitespace)newlinereturn$normalizeSpacesresauthorsLine::GenParserCharParserState[[Inline]]authorsLine=try$dochar'%'skipSpacesauthors<-sepEndBy(many(notFollowedBy(satisfy$\c->c==';'||c=='\n')>>inline))(char';'<|>try(newline>>notFollowedByblankline>>spaceChar))newlinereturn$filter(not.null)$mapnormalizeSpacesauthorsdateLine::GenParserCharParserState[Inline]dateLine=try$dochar'%'skipSpacesdate<-manyTillinlinenewlinereturn$normalizeSpacesdatetitleBlock::GenParserCharParserState([Inline],[[Inline]],[Inline])titleBlock=try$dofailIfStricttitle<-option[]titleLineauthor<-option[]authorsLinedate<-option[]dateLineoptionalblanklinesreturn(title,author,date)parseMarkdown::GenParserCharParserStatePandocparseMarkdown=do-- markdown allows raw HTMLupdateState(\state->state{stateParseRaw=True})startPos<-getPosition-- go through once just to get list of reference keys and notes-- docMinusKeys is the raw document with blanks where the keys/notes were...st<-getStateletfirstPassParser=referenceKey<|>(ifstateStrictstthenpzeroelsenoteBlock)<|>lineClumpdocMinusKeys<-liftMconcat$manyTillfirstPassParsereofsetInputdocMinusKeyssetPositionstartPosst'<-getStateletreversedNotes=stateNotesst'updateState$\s->s{stateNotes=reversereversedNotes}-- now parse it for real...(title,author,date)<-option([],[],[])titleBlockblocks<-parseBlocksletdoc=Pandoc(Metatitleauthordate)$filter(/=Null)blocks-- if there are labeled examples, change references into numbersexamples<-liftMstateExamplesgetStatelethandleExampleRef::Inline->InlinehandleExampleRefz@(Str('@':xs))=caseM.lookupxsexamplesofJustn->Str(shown)Nothing->zhandleExampleRefz=zifM.nullexamplesthenreturndocelsereturn$bottomUphandleExampleRefdoc-- -- initial pass for references and notes--referenceKey::GenParserCharParserState[Char]referenceKey=try$dostartPos<-getPositionskipNonindentSpaceslab<-referencechar':'skipSpaces>>optionalnewline>>skipSpaces>>notFollowedBy(char'[')letnl=char'\n'>>notFollowedByblankline>>return' 'letsourceURL=liftMunwords$many$try$donotFollowedBy'referenceTitleskipManyspaceCharoptionalnlskipManyspaceCharnotFollowedBy'referencemany1(satisfy$not.isBlank)letbetweenAngles=try$char'<'>>manyTill(noneOf">\n"<|>nl)(char'>')src<-trybetweenAngles<|>sourceURLtit<-option""referenceTitleblanklinesendPos<-getPositionlettarget=(escapeURI$removeTrailingSpacesrc,tit)st<-getStateletoldkeys=stateKeysstupdateState$\s->s{stateKeys=M.insert(toKeylab)targetoldkeys}-- return blanks so line count isn't affectedreturn$replicate(sourceLineendPos-sourceLinestartPos)'\n'referenceTitle::GenParserCharstStringreferenceTitle=try$doskipSpaces>>optionalnewline>>skipSpacestit<-(charsInBalanced'('')'>>=return.unwords.words)<|>dodelim<-char'\''<|>char'"'manyTillanyChar(try(chardelim>>skipSpaces>>notFollowedBy(noneOf")\n")))return$decodeCharacterReferencestitnoteMarker::GenParserCharParserState[Char]noteMarker=string"[^">>many1Till(satisfy$not.isBlank)(char']')rawLine::GenParserCharParserState[Char]rawLine=try$donotFollowedByblanklinenotFollowedBy'$try$skipNonindentSpaces>>noteMarkeroptionalindentSpacesanyLinerawLines::GenParserCharParserState[Char]rawLines=dofirst<-anyLinerest<-manyrawLinereturn$unlines(first:rest)noteBlock::GenParserCharParserState[Char]noteBlock=try$dostartPos<-getPositionskipNonindentSpacesref<-noteMarkerchar':'optionalblanklineoptionalindentSpacesraw<-sepByrawLines(try(blankline>>indentSpaces>>notFollowedByblankline))optionalblanklinesendPos<-getPositionletnewnote=(ref,(intercalate"\n"raw)++"\n\n")st<-getStateletoldnotes=stateNotesstupdateState$\s->s{stateNotes=newnote:oldnotes}-- return blanks so line count isn't affectedreturn$replicate(sourceLineendPos-sourceLinestartPos)'\n'---- parsing blocks--parseBlocks::GenParserCharParserState[Block]parseBlocks=manyTillblockeofblock::GenParserCharParserStateBlockblock=dost<-getStatechoice(ifstateStrictstthen[header,codeBlockIndented,blockQuote,hrule,bulletList,orderedList,htmlBlock,para,plain,nullBlock]else[codeBlockDelimited,macro,header,table,codeBlockIndented,lhsCodeBlock,blockQuote,hrule,bulletList,orderedList,definitionList,rawTeXBlock,para,rawHtmlBlocks,plain,nullBlock])<?>"block"---- header blocks--header::GenParserCharParserStateBlockheader=setextHeader<|>atxHeader<?>"header"atxHeader::GenParserCharParserStateBlockatxHeader=try$dolevel<-many1(char'#')>>=return.lengthnotFollowedBy(char'.'<|>char')')-- this would be a listskipSpacestext<-manyTillinlineatxClosing>>=return.normalizeSpacesreturn$HeaderleveltextatxClosing::GenParserCharst[Char]atxClosing=try$skipMany(char'#')>>blanklinessetextHeader::GenParserCharParserStateBlocksetextHeader=try$do-- This lookahead prevents us from wasting time parsing Inlines-- unless necessary -- it gives a significant performance boost.lookAhead$anyLine>>many1(oneOfsetextHChars)>>blanklinetext<-many1TillinlinenewlineunderlineChar<-oneOfsetextHCharsmany(charunderlineChar)blanklinesletlevel=(fromMaybe0$findIndex(==underlineChar)setextHChars)+1return$Headerlevel(normalizeSpacestext)---- hrule block--hrule::GenParserCharstBlockhrule=try$doskipSpacesstart<-satisfyisHruleCharcount2(skipSpaces>>charstart)skipMany(spaceChar<|>charstart)newlineoptionalblanklinesreturnHorizontalRule---- code blocks--indentedLine::GenParserCharParserState[Char]indentedLine=indentSpaces>>manyTillanyCharnewline>>=return.(++"\n")codeBlockDelimiter::MaybeInt->GenParserCharst(Int,([Char],[[Char]],[([Char],[Char])]))codeBlockDelimiterlen=try$dosize<-caselenofJustl->countl(char'~')>>many(char'~')>>returnlNothing->count3(char'~')>>many(char'~')>>=return.(+3).lengthmanyspaceCharattr<-option([],[],[])attributesblanklinereturn(size,attr)attributes::GenParserCharst([Char],[[Char]],[([Char],[Char])])attributes=try$dochar'{'manyspaceCharattrs<-many(attribute>>~manyspaceChar)char'}'let(ids,classes,keyvals)=unzip3attrsletid'=ifnullidsthen""elseheadidsreturn(id',concatclasses,concatkeyvals)attribute::GenParserCharst([Char],[[Char]],[([Char],[Char])])attribute=identifierAttr<|>classAttr<|>keyValAttridentifier::GenParserCharst[Char]identifier=dofirst<-letterrest<-many$alphaNum<|>oneOf"-_:."return(first:rest)identifierAttr::GenParserCharst([Char],[a],[a1])identifierAttr=try$dochar'#'result<-identifierreturn(result,[],[])classAttr::GenParserCharst([Char],[[Char]],[a])classAttr=try$dochar'.'result<-identifierreturn("",[result],[])keyValAttr::GenParserCharst([Char],[a],[([Char],[Char])])keyValAttr=try$dokey<-identifierchar'='char'"'val<-manyTill(satisfy(/='\n'))(char'"')return("",[],[(key,val)])codeBlockDelimited::GenParserCharstBlockcodeBlockDelimited=try$do(size,attr)<-codeBlockDelimiterNothingcontents<-manyTillanyLine(codeBlockDelimiter(Justsize))blanklinesreturn$CodeBlockattr$intercalate"\n"contentscodeBlockIndented::GenParserCharParserStateBlockcodeBlockIndented=docontents<-many1(indentedLine<|>try(dob<-blanklinesl<-indentedLinereturn$b++l))optionalblanklinesst<-getStatereturn$CodeBlock("",stateIndentedCodeClassesst,[])$stripTrailingNewlines$concatcontentslhsCodeBlock::GenParserCharParserStateBlocklhsCodeBlock=dofailUnlessLHSliftM(CodeBlock("",["sourceCode","literate","haskell"],[]))(lhsCodeBlockBird<|>lhsCodeBlockLaTeX)<|>liftM(CodeBlock("",["sourceCode","haskell"],[]))lhsCodeBlockInverseBirdlhsCodeBlockLaTeX::GenParserCharParserStateStringlhsCodeBlockLaTeX=try$dostring"\\begin{code}"manyTillspaceCharnewlinecontents<-many1TillanyChar(try$string"\\end{code}")blanklinesreturn$stripTrailingNewlinescontentslhsCodeBlockBird::GenParserCharParserStateStringlhsCodeBlockBird=lhsCodeBlockBirdWith'>'lhsCodeBlockInverseBird::GenParserCharParserStateStringlhsCodeBlockInverseBird=lhsCodeBlockBirdWith'<'lhsCodeBlockBirdWith::Char->GenParserCharParserStateStringlhsCodeBlockBirdWithc=try$dopos<-getPositionwhen(sourceColumnpos/=1)$fail"Not in first column"lns<-many1$birdTrackLinec-- if (as is normal) there is always a space after >, drop itletlns'=ifall(\ln->nullln||take1ln==" ")lnsthenmap(drop1)lnselselnsblanklinesreturn$intercalate"\n"lns'birdTrackLine::Char->GenParserCharst[Char]birdTrackLinec=try$docharc-- allow html tags on left margin:when(c=='<')$notFollowedBylettermanyTillanyCharnewline---- block quotes--emailBlockQuoteStart::GenParserCharParserStateCharemailBlockQuoteStart=try$skipNonindentSpaces>>char'>'>>~optional(char' ')emailBlockQuote::GenParserCharParserState[[Char]]emailBlockQuote=try$doemailBlockQuoteStartraw<-sepBy(many(nonEndline<|>(try(endline>>notFollowedByemailBlockQuoteStart>>return'\n'))))(try(newline>>emailBlockQuoteStart))newline<|>(eof>>return'\n')optionalblanklinesreturnrawblockQuote::GenParserCharParserStateBlockblockQuote=doraw<-emailBlockQuote-- parse the extracted block, which may contain various block elements:contents<-parseFromStringparseBlocks$(intercalate"\n"raw)++"\n\n"return$BlockQuotecontents---- list blocks--bulletListStart::GenParserCharParserState()bulletListStart=try$dooptionalnewline-- if preceded by a Plain block in a list contextskipNonindentSpacesnotFollowedBy'hrule-- because hrules start out just like listssatisfyisBulletListMarkerspaceCharskipSpacesanyOrderedListStart::GenParserCharParserState(Int,ListNumberStyle,ListNumberDelim)anyOrderedListStart=try$dooptionalnewline-- if preceded by a Plain block in a list contextskipNonindentSpacesnotFollowedBy$string"p.">>spaceChar>>digit-- page numberstate<-getStateifstateStrictstatethendomany1digitchar'.'spaceCharreturn(1,DefaultStyle,DefaultDelim)elsedo(num,style,delim)<-anyOrderedListMarker-- if it could be an abbreviated first name, insist on more than one spaceifdelim==Period&&(style==UpperAlpha||(style==UpperRoman&&num`elem`[1,5,10,50,100,500,1000]))thenchar'\t'<|>(try$char' '>>spaceChar)elsespaceCharskipSpacesreturn(num,style,delim)listStart::GenParserCharParserState()listStart=bulletListStart<|>(anyOrderedListStart>>return())-- parse a line of a list item (start = parser for beginning of list item)listLine::GenParserCharParserState[Char]listLine=try$donotFollowedBy'listStartnotFollowedByblanklinenotFollowedBy'(doindentSpacesmany(spaceChar)listStart)chunks<-manyTill(liftMsnd(htmlTagisCommentTag)<|>count1anyChar)newlinereturn$concatchunks++"\n"-- parse raw text for one list item, excluding start marker and continuationsrawListItem::GenParserCharParserState[Char]rawListItem=try$dolistStartresult<-many1listLineblanks<-manyblanklinereturn$concatresult++blanks-- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline.-- note: nested lists are parsed as continuationslistContinuation::GenParserCharParserState[Char]listContinuation=try$dolookAheadindentSpacesresult<-many1listContinuationLineblanks<-manyblanklinereturn$concatresult++blankslistContinuationLine::GenParserCharParserState[Char]listContinuationLine=try$donotFollowedByblanklinenotFollowedBy'listStartoptionalindentSpacesresult<-manyTillanyCharnewlinereturn$result++"\n"listItem::GenParserCharParserState[Block]listItem=try$dofirst<-rawListItemcontinuations<-manylistContinuation-- parsing with ListItemState forces markers at beginning of lines to-- count as list item markers, even if not separated by blank space.-- see definition of "endline"state<-getStateletoldContext=stateParserContextstatesetState$state{stateParserContext=ListItemState}-- parse the extracted block, which may contain various block elements:letraw=concat(first:continuations)contents<-parseFromStringparseBlocksrawupdateState(\st->st{stateParserContext=oldContext})returncontentsorderedList::GenParserCharParserStateBlockorderedList=try$do(start,style,delim)<-lookAheadanyOrderedListStartitems<-many1listItemreturn$OrderedList(start,style,delim)$compactifyitemsbulletList::GenParserCharParserStateBlockbulletList=try$dolookAheadbulletListStartmany1listItem>>=return.BulletList.compactify-- definition listsdefListMarker::GenParserCharParserState()defListMarker=dosps<-nonindentSpaceschar':'<|>char'~'st<-getStatelettabStop=stateTabStopstletremaining=tabStop-(lengthsps+1)ifremaining>0thencountremaining(char' ')<|>string"\t"elsepzeroreturn()definitionListItem::GenParserCharParserState([Inline],[[Block]])definitionListItem=try$do-- first, see if this has any chance of being a definition list:lookAhead(anyLine>>optionalblankline>>defListMarker)term<-manyTillinlinenewlineoptionalblanklineraw<-many1defRawBlockstate<-getStateletoldContext=stateParserContextstate-- parse the extracted block, which may contain various block elements:contents<-mapM(parseFromStringparseBlocks)rawupdateState(\st->st{stateParserContext=oldContext})return((normalizeSpacesterm),contents)defRawBlock::GenParserCharParserState[Char]defRawBlock=try$dodefListMarkerfirstline<-anyLinerawlines<-many(notFollowedByblankline>>indentSpaces>>anyLine)trailing<-option""blanklinescont<-liftMconcat$many$dolns<-many1$notFollowedByblankline>>indentSpaces>>anyLinetrl<-option""blanklinesreturn$unlineslns++trlreturn$firstline++"\n"++unlinesrawlines++trailing++contdefinitionList::GenParserCharParserStateBlockdefinitionList=doitems<-many1definitionListItem-- "compactify" the definition list:letdefs=mapsnditemsletdefBlocks=reverse$concat$concatdefsletisPara(Para_)=TrueisPara_=Falseletitems'=casetake1defBlocksof[Parax]->ifnot$anyisPara(drop1defBlocks)thenlet(t,ds)=lastitemslastDef=lastdsds'=initds++[initlastDef++[Plainx]]ininititems++[(t,ds')]elseitems_->itemsreturn$DefinitionListitems'---- paragraph block--isHtmlOrBlank::Inline->BoolisHtmlOrBlank(RawInline"html"_)=TrueisHtmlOrBlank(Space)=TrueisHtmlOrBlank(LineBreak)=TrueisHtmlOrBlank_=Falsepara::GenParserCharParserStateBlockpara=try$doresult<-liftMnormalizeSpaces$many1inlineguard$not.allisHtmlOrBlank$resultoption(Plainresult)$try$donewlineblanklines<|>(getState>>=guard.stateStrict>>lookAhead(blockQuote<|>header)>>return"")return$Pararesultplain::GenParserCharParserStateBlockplain=many1inline>>~spaces>>=return.Plain.normalizeSpaces-- -- raw html--htmlElement::GenParserCharParserState[Char]htmlElement=strictHtmlBlock<|>liftMsnd(htmlTagisBlockTag)htmlBlock::GenParserCharParserStateBlockhtmlBlock=try$dofailUnlessBeginningOfLinefirst<-htmlElementfinalSpace<-manyspaceCharfinalNewlines<-manynewlinereturn$RawBlock"html"$first++finalSpace++finalNewlinesstrictHtmlBlock::GenParserCharParserState[Char]strictHtmlBlock=dofailUnlessBeginningOfLinehtmlInBalanced(not.isInlineTag)rawVerbatimBlock::GenParserCharParserStateStringrawVerbatimBlock=try$do(TagOpentag_,open)<-htmlTag(tagOpen(\t->t=="pre"||t=="style"||t=="script")(constTrue))contents<-manyTillanyChar(htmlTag(~==TagClosetag))return$open++contents++renderTags[TagClosetag]rawTeXBlock::GenParserCharParserStateBlockrawTeXBlock=dofailIfStrictresult<-liftM(RawBlock"latex")rawLaTeXEnvironment'<|>liftM(RawBlock"context")rawConTeXtEnvironment'spacesreturnresultrawHtmlBlocks::GenParserCharParserStateBlockrawHtmlBlocks=dohtmlBlocks<-many1$doblk<-rawVerbatimBlock<|>liftMsnd(htmlTagisBlockTag)sps<-dosp1<-manyspaceCharsp2<-option""(blankline>>return"\n")sp3<-manyspaceCharsp4<-option""blanklinesreturn$sp1++sp2++sp3++sp4-- note: we want raw html to be able to-- precede a code block, when separated-- by a blank linereturn$blk++spsletcombined=concathtmlBlocksletcombined'=iflastcombined=='\n'theninitcombinedelsecombinedreturn$RawBlock"html"combined'---- Tables-- -- Parse a dashed line with optional trailing spaces; return its length-- and the length including trailing space.dashedLine::Char->GenParserCharst(Int,Int)dashedLinech=dodashes<-many1(charch)sp<-manyspaceCharreturn$(lengthdashes,length$dashes++sp)-- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text.simpleTableHeader::Bool-- ^ Headerless table ->GenParserCharParserState([[Block]],[Alignment],[Int])simpleTableHeaderheadless=try$dorawContent<-ifheadlessthenreturn""elseanyLineinitSp<-nonindentSpacesdashes<-many1(dashedLine'-')newlinelet(lengths,lines')=unzipdashesletindices=scanl(+)(lengthinitSp)lines'-- If no header, calculate alignment on basis of first row of textrawHeads<-liftM(tail.splitByIndices(initindices))$ifheadlessthenlookAheadanyLineelsereturnrawContentletaligns=zipWithalignType(map(\a->[a])rawHeads)lengthsletrawHeads'=ifheadlessthenreplicate(lengthdashes)""elserawHeadsheads<-mapM(parseFromString(manyplain))$mapremoveLeadingTrailingSpacerawHeads'return(heads,aligns,indices)-- Parse a table footer - dashed lines followed by blank line.tableFooter::GenParserCharParserState[Char]tableFooter=try$skipNonindentSpaces>>many1(dashedLine'-')>>blanklines-- Parse a table separator - dashed line.tableSep::GenParserCharParserStateChartableSep=try$skipNonindentSpaces>>many1(dashedLine'-')>>char'\n'-- Parse a raw line and split it into chunks by indices.rawTableLine::[Int]->GenParserCharParserState[String]rawTableLineindices=donotFollowedBy'(blanklines<|>tableFooter)line<-many1TillanyCharnewlinereturn$mapremoveLeadingTrailingSpace$tail$splitByIndices(initindices)line-- Parse a table line and return a list of lists of blocks (columns).tableLine::[Int]->GenParserCharParserState[[Block]]tableLineindices=rawTableLineindices>>=mapM(parseFromString(manyplain))-- Parse a multiline table row and return a list of blocks (columns).multilineRow::[Int]->GenParserCharParserState[[Block]]multilineRowindices=docolLines<-many1(rawTableLineindices)letcols=mapunlines$transposecolLinesmapM(parseFromString(manyplain))cols-- Parses a table caption: inlines beginning with 'Table:'-- and followed by blank lines.tableCaption::GenParserCharParserState[Inline]tableCaption=try$doskipNonindentSpacesstring":"<|>string"Table:"result<-many1inlineblanklinesreturn$normalizeSpacesresult-- Parse a simple table with '---' header and one line per row.simpleTable::Bool-- ^ Headerless table->GenParserCharParserStateBlocksimpleTableheadless=doTableca_whl<-tableWith(simpleTableHeaderheadless)tableLine(return())(ifheadlessthentableFooterelsetableFooter<|>blanklines)tableCaption-- Simple tables get 0s for relative column widths (i.e., use default)return$Tableca(replicate(lengtha)0)hl-- Parse a multiline table: starts with row of '-' on top, then header-- (which may be multiline), then the rows,-- which may be multiline, separated by blank lines, and-- ending with a footer (dashed line followed by blank line).multilineTable::Bool-- ^ Headerless table->GenParserCharParserStateBlockmultilineTableheadless=tableWith(multilineTableHeaderheadless)multilineRowblanklinestableFootertableCaptionmultilineTableHeader::Bool-- ^ Headerless table->GenParserCharParserState([[Block]],[Alignment],[Int])multilineTableHeaderheadless=try$doifheadlessthenreturn'\n'elsetableSeprawContent<-ifheadlessthenreturn$repeat""elsemany1(notFollowedBytableSep>>many1TillanyCharnewline)initSp<-nonindentSpacesdashes<-many1(dashedLine'-')newlinelet(lengths,lines')=unzipdashesletindices=scanl(+)(lengthinitSp)lines'rawHeadsList<-ifheadlessthenliftM(map(:[]).tail.splitByIndices(initindices))$lookAheadanyLineelsereturn$transpose$map(\ln->tail$splitByIndices(initindices)ln)rawContentletaligns=zipWithalignTyperawHeadsListlengthsletrawHeads=ifheadlessthenreplicate(lengthdashes)""elsemap(intercalate" ")rawHeadsListheads<-mapM(parseFromString(manyplain))$mapremoveLeadingTrailingSpacerawHeadsreturn(heads,aligns,indices)-- Returns an alignment type for a table, based on a list of strings-- (the rows of the column header) and a number (the length of the-- dashed line under the rows.alignType::[String]->Int->AlignmentalignType[]_=AlignDefaultalignTypestrLstlen=letnonempties=filter(not.null)$mapremoveTrailingSpacestrLst(leftSpace,rightSpace)=casesortBy(comparinglength)nonemptiesof(x:_)->(headx`elem`" \t",lengthx<len)[]->(False,False)incase(leftSpace,rightSpace)of(True,False)->AlignRight(False,True)->AlignLeft(True,True)->AlignCenter(False,False)->AlignDefaultgridTable::Bool-- ^ Headerless table->GenParserCharParserStateBlockgridTable=gridTableWithblocktableCaptiontable::GenParserCharParserStateBlocktable=multilineTableFalse<|>simpleTableTrue<|>simpleTableFalse<|>multilineTableTrue<|>gridTableFalse<|>gridTableTrue<?>"table"-- -- inline--inline::GenParserCharParserStateInlineinline=choiceinlineParsers<?>"inline"inlineParsers::[GenParserCharParserStateInline]inlineParsers=[whitespace,str,endline,code,(fourOrMore'*'<|>fourOrMore'_'),strong,emph,note,link,cite,image,math,strikeout,superscript,subscript,inlineNote-- after superscript because of ^[link](/foo)^,autoLink,rawHtmlInline,rawLaTeXInline',escapedChar,exampleRef,smartPunctuationinline,charRef,symbol,ltSign]inlineNonLink::GenParserCharParserStateInlineinlineNonLink=(choice$map(\parser->try(parser>>=failIfLink))inlineParsers)<?>"inline (non-link)"failIfLink::Inline->GenParsertokstInlinefailIfLink(Link__)=pzerofailIfLinkelt=returneltescapedChar::GenParserCharParserStateInlineescapedChar=try$dochar'\\'state<-getStateresult<-ifstateStrictstatethenoneOf"\\`*_{}[]()>#+-.!~"elsesatisfy(not.isAlphaNum)return$caseresultof' '->Str"\160"-- "\ " is a nonbreaking space'\n'->LineBreak-- "\[newline]" is a linebreak_->Str[result]ltSign::GenParserCharParserStateInlineltSign=dost<-getStateifstateStrictstthenchar'<'elsenotFollowedBy'rawHtmlBlocks>>char'<'-- unless it starts htmlreturn$Str['<']exampleRef::GenParserCharParserStateInlineexampleRef=try$dochar'@'lab<-many1(alphaNum<|>oneOf"-_")-- We just return a Str. These are replaced with numbers-- later. See the end of parseMarkdown.return$Str$'@':labsymbol::GenParserCharParserStateInlinesymbol=doresult<-noneOf"<\\\n\t "<|>try(dolookAhead$char'\\'notFollowedBy'$rawLaTeXEnvironment'<|>rawConTeXtEnvironment'char'\\')return$Str[result]-- parses inline code, between n `s and n `scode::GenParserCharParserStateInlinecode=try$dostarts<-many1(char'`')skipSpacesresult<-many1Till(many1(noneOf"`\n")<|>many1(char'`')<|>(char'\n'>>notFollowedBy'blankline>>return" "))(try(skipSpaces>>count(lengthstarts)(char'`')>>notFollowedBy(char'`')))attr<-option([],[],[])(try$optionalwhitespace>>attributes)return$Codeattr$removeLeadingTrailingSpace$concatresultmathWord::GenParserCharst[Char]mathWord=liftMconcat$many1mathChunkmathChunk::GenParserCharst[Char]mathChunk=dochar'\\'c<-anyCharreturn['\\',c]<|>many1(satisfy$\c->not(isBlankc||c=='\\'||c=='$'))math::GenParserCharParserStateInlinemath=(mathDisplay>>=applyMacros'>>=return.MathDisplayMath)<|>(mathInline>>=applyMacros'>>=return.MathInlineMath)mathDisplay::GenParserCharParserStateStringmathDisplay=try$dofailIfStrictstring"$$"many1Till(noneOf"\n"<|>(newline>>~notFollowedBy'blankline))(try$string"$$")mathInline::GenParserCharParserStateStringmathInline=try$dofailIfStrictchar'$'notFollowedByspacewords'<-sepBy1mathWord(many1(spaceChar<|>(newline>>~notFollowedBy'blankline)))char'$'notFollowedBydigitreturn$intercalate" "words'-- to avoid performance problems, treat 4 or more _ or * in a row as a literal-- rather than attempting to parse for emph/strongfourOrMore::Char->GenParserCharstInlinefourOrMorec=try$count4(charc)>>many(charc)>>=\s->return(Str$replicate4c++s)emph::GenParserCharParserStateInlineemph=((enclosed(char'*')(notFollowedBy'strong>>char'*')inline)<|>(enclosed(char'_')(notFollowedBy'strong>>char'_'>>notFollowedByalphaNum)inline))>>=return.Emph.normalizeSpacesstrong::GenParserCharParserStateInlinestrong=((enclosed(string"**")(try$string"**")inline)<|>(enclosed(string"__")(try$string"__")inline))>>=return.Strong.normalizeSpacesstrikeout::GenParserCharParserStateInlinestrikeout=failIfStrict>>enclosed(string"~~")(try$string"~~")inline>>=return.Strikeout.normalizeSpacessuperscript::GenParserCharParserStateInlinesuperscript=failIfStrict>>enclosed(char'^')(char'^')(notFollowedByspaceChar>>inline)>>=-- may not contain Spacereturn.Superscriptsubscript::GenParserCharParserStateInlinesubscript=failIfStrict>>enclosed(char'~')(char'~')(notFollowedByspaceChar>>inline)>>=-- may not contain Spacereturn.Subscriptwhitespace::GenParserCharParserStateInlinewhitespace=spaceChar>>((spaceChar>>skipManyspaceChar>>optionSpace(endline>>returnLineBreak))<|>(skipManyspaceChar>>returnSpace))<?>"whitespace"nonEndline::GenParserCharstCharnonEndline=satisfy(/='\n')str::GenParserCharParserStateInlinestr=doa<-alphaNumas<-many$alphaNum<|>(try$char'_'>>~lookAheadalphaNum)letresult=a:asstate<-getStateletspacesToNbr=map(\c->ifc==' 'then'\160'elsec)ifstateSmartstatethencaselikelyAbbrevresultof[]->return$Strresultxs->choice(map(\x->try(stringx>>oneOf" \n">>lookAheadalphaNum>>return(Str$result++spacesToNbrx++"\160")))xs)<|>(return$Strresult)elsereturn$Strresult-- | if the string matches the beginning of an abbreviation (before-- the first period, return strings that would finish the abbreviation.likelyAbbrev::String->[String]likelyAbbrevx=letabbrevs=["Mr.","Mrs.","Ms.","Capt.","Dr.","Prof.","Gen.","Gov.","e.g.","i.e.","Sgt.","St.","vol.","vs.","Sen.","Rep.","Pres.","Hon.","Rev.","Ph.D.","M.D.","M.A.","p.","pp.","ch.","sec."]abbrPairs=map(break(=='.'))abbrevsinmapsnd$filter(\(y,_)->y==x)abbrPairs-- an endline character that can be treated as a space, not a structural breakendline::GenParserCharParserStateInlineendline=try$donewlinenotFollowedByblanklinest<-getStatewhen(stateStrictst)$donotFollowedByemailBlockQuoteStartnotFollowedBy(char'#')-- atx header-- parse potential list-starts differently if in a list:when(stateParserContextst==ListItemState)$donotFollowedBy'bulletListStartnotFollowedBy'anyOrderedListStartreturnSpace---- links---- a reference label for a linkreference::GenParserCharParserState[Inline]reference=donotFollowedBy'(string"[^")-- footnote referenceresult<-inlinesInBalancedBracketsinlineNonLinkreturn$normalizeSpacesresult-- source for a link, with optional titlesource::GenParserCharst(String,[Char])source=(try$charsInBalanced'('')'>>=parseFromStringsource')<|>-- the following is needed for cases like: [ref](/url(a).(enclosed(char'(')(char')')anyChar>>=parseFromStringsource')-- auxiliary function for sourcesource'::GenParserCharst(String,[Char])source'=doskipSpacesletnl=char'\n'>>~notFollowedByblanklineletsourceURL=liftMunwords$many$try$donotFollowedBy'linkTitleskipManyspaceCharoptionalnlskipManyspaceCharmany1(satisfy$not.isBlank)letbetweenAngles=try$char'<'>>manyTill(noneOf">\n"<|>nl)(char'>')src<-trybetweenAngles<|>sourceURLtit<-option""linkTitleskipSpaceseofreturn(escapeURI$removeTrailingSpacesrc,tit)linkTitle::GenParserCharstStringlinkTitle=try$do(many1spaceChar>>option'\n'newline)<|>newlineskipSpacesdelim<-oneOf"'\""tit<-manyTill(optional(char'\\')>>anyChar)(try(chardelim>>skipSpaces>>eof))return$decodeCharacterReferencestitlink::GenParserCharParserStateInlinelink=try$dolab<-reference(src,tit)<-source<|>referenceLinklabreturn$Linklab(src,tit)-- a link like [this][ref] or [this][] or [this]referenceLink::[Inline]->GenParserCharParserState(String,[Char])referenceLinklab=doref<-option[](try(optional(char' ')>>optional(newline>>skipSpaces)>>reference))letref'=ifnullrefthenlabelserefstate<-getStatecaselookupKeySrc(stateKeysstate)(toKeyref')ofNothing->fail"no corresponding key"Justtarget->returntargetautoLink::GenParserCharParserStateInlineautoLink=try$dochar'<'(orig,src)<-uri<|>emailAddresschar'>'st<-getStatereturn$ifstateStrictstthenLink[Strorig](src,"")elseLink[Code("",["url"],[])orig](src,"")image::GenParserCharParserStateInlineimage=try$dochar'!'(Linklabsrc)<-linkreturn$Imagelabsrcnote::GenParserCharParserStateInlinenote=try$dofailIfStrictref<-noteMarkerstate<-getStateletnotes=stateNotesstatecaselookuprefnotesofNothing->fail"note not found"Justraw->do-- We temporarily empty the note list while parsing the note,-- so that we don't get infinite loops with notes inside notes...-- Note references inside other notes do not work.updateState$\st->st{stateNotes=[]}contents<-parseFromStringparseBlocksrawupdateState$\st->st{stateNotes=notes}return$NotecontentsinlineNote::GenParserCharParserStateInlineinlineNote=try$dofailIfStrictchar'^'contents<-inlinesInBalancedBracketsinlinereturn$Note[Paracontents]rawLaTeXInline'::GenParserCharParserStateInlinerawLaTeXInline'=try$dofailIfStrictlookAhead$char'\\'notFollowedBy'$rawLaTeXEnvironment'<|>rawConTeXtEnvironment'RawInline_s<-rawLaTeXInlinereturn$RawInline"tex"s-- "tex" because it might be context or latexrawConTeXtEnvironment'::GenParserCharstStringrawConTeXtEnvironment'=try$dostring"\\start"completion<-inBrackets(letter<|>digit<|>spaceChar)<|>(many1letter)contents<-manyTill(rawConTeXtEnvironment'<|>(count1anyChar))(try$string"\\stop">>stringcompletion)return$"\\start"++completion++concatcontents++"\\stop"++completioninBrackets::(GenParserCharstChar)->GenParserCharstStringinBracketsparser=dochar'['contents<-manyparserchar']'return$"["++contents++"]"rawHtmlInline::GenParserCharParserStateInlinerawHtmlInline=dost<-getState(_,result)<-ifstateStrictstthenhtmlTag(not.isTextTag)elsehtmlTagisInlineTagreturn$RawInline"html"result-- Citationscite::GenParserCharParserStateInlinecite=dofailIfStrictcitations<-textualCite<|>normalCitereturn$Citecitations[]spnl::GenParserCharst()spnl=try$doskipSpacesoptionalnewlineskipSpacesnotFollowedBy(char'\n')textualCite::GenParserCharParserState[Citation]textualCite=try$do(_,key)<-citeKeyletfirst=Citation{citationId=key,citationPrefix=[],citationSuffix=[],citationMode=AuthorInText,citationNoteNum=0,citationHash=0}rest<-option[]$try$spnl>>normalCiteifnullrestthenoption[first]$barelocfirstelsereturn$first:restbareloc::Citation->GenParserCharParserState[Citation]barelocc=try$dospnlchar'['suff<-suffixrest<-option[]$try$char';'>>citeListspnlchar']'return$c{citationSuffix=suff}:restnormalCite::GenParserCharParserState[Citation]normalCite=try$dochar'['spnlcitations<-citeListspnlchar']'returncitationsciteKey::GenParserCharParserState(Bool,String)citeKey=try$dosuppress_author<-optionFalse(char'-'>>returnTrue)char'@'first<-letterrest<-many$(noneOf",;!?[]()@ \t\n")letkey=first:restst<-getStateguard$key`elem`stateCitationsstreturn(suppress_author,key)suffix::GenParserCharParserState[Inline]suffix=try$dospnlliftMnormalizeSpaces$many$notFollowedBy(oneOf";]")>>inlineprefix::GenParserCharParserState[Inline]prefix=liftMnormalizeSpaces$manyTillinline(char']'<|>liftM(const']')(lookAheadciteKey))citeList::GenParserCharParserState[Citation]citeList=sepBy1citation(try$char';'>>spnl)citation::GenParserCharParserStateCitationcitation=try$dopref<-prefix(suppress_author,key)<-citeKeysuff<-suffixreturn$Citation{citationId=key,citationPrefix=pref,citationSuffix=suff,citationMode=ifsuppress_authorthenSuppressAuthorelseNormalCitation,citationNoteNum=0,citationHash=0}