{-
Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}{- |
Module : Text.Pandoc.Readers.RST
Copyright : Copyright (C) 2006-8 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion from reStructuredText to 'Pandoc' document.
-}moduleText.Pandoc.Readers.RST(readRST)whereimportText.Pandoc.DefinitionimportText.Pandoc.SharedimportText.ParserCombinators.ParsecimportControl.Monad(when)importData.List(findIndex,delete,intercalate)-- | Parse reStructuredText string and return Pandoc document.readRST::ParserState->String->PandocreadRSTstates=(readWithparseRST)state(s++"\n\n")---- Constants and data structure definitions---bulletListMarkers::[Char]bulletListMarkers="*+-"underlineChars::[Char]underlineChars="!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"-- treat these as potentially non-text when parsing inline:specialChars::[Char]specialChars="\\`|*_<>$:[-"---- parsing documents--isHeader::Int->Block->BoolisHeadern(Headerx_)=x==nisHeader__=False-- | Promote all headers in a list of blocks. (Part of-- title transformation for RST.)promoteHeaders::Int->[Block]->[Block]promoteHeadersnum((Headerleveltext):rest)=(Header(level-num)text):(promoteHeadersnumrest)promoteHeadersnum(other:rest)=other:(promoteHeadersnumrest)promoteHeaders_[]=[]-- | If list of blocks starts with a header (or a header and subheader)-- of level that are not found elsewhere, return it as a title and-- promote all the other headers. titleTransform::[Block]-- ^ list of blocks->([Block],[Inline])-- ^ modified list of blocks, titletitleTransform((Header1head1):(Header2head2):rest)=-- title subtitleif(any(isHeader1)rest)||(any(isHeader2)rest)then((Header1head1):(Header2head2):rest,[])else((promoteHeaders2rest),head1++[Str":",Space]++head2)titleTransform((Header1head1):rest)=-- title, no subtitleif(any(isHeader1)rest)then((Header1head1):rest,[])else((promoteHeaders1rest),head1)titleTransformblocks=(blocks,[])parseRST::GenParserCharParserStatePandocparseRST=dostartPos<-getPosition-- go through once just to get list of reference keys-- docMinusKeys is the raw document with blanks where the keys were...docMinusKeys<-manyTill(referenceKey<|>lineClump)eof>>=return.concatsetInputdocMinusKeyssetPositionstartPosst<-getStateletreversedKeys=stateKeysstupdateState$\s->s{stateKeys=reversereversedKeys}-- now parse it for real...blocks<-parseBlocksletblocks'=filter(/=Null)blocksstate<-getStatelet(blocks'',title)=ifstateStandalonestatethentitleTransformblocks'else(blocks',[])letauthors=stateAuthorsstateletdate=stateDatestatelettitle'=if(nulltitle)then(stateTitlestate)elsetitlereturn$Pandoc(Metatitle'authorsdate)blocks''---- parsing blocks--parseBlocks::GenParserCharParserState[Block]parseBlocks=manyTillblockeofblock::GenParserCharParserStateBlockblock=choice[codeBlock,rawHtmlBlock,rawLaTeXBlock,fieldList,blockQuote,imageBlock,unknownDirective,header,hrule,list,lineBlock,lhsCodeBlock,para,plain,nullBlock]<?>"block"---- field list--fieldListItem::String->GenParserCharst([Char],[Char])fieldListItemindent=try$dostringindentchar':'name<-many1alphaNumstring": "skipSpacesfirst<-manyTillanyCharnewlinerest<-option""$try$lookAhead(stringindent>>oneOf" \t")>>indentedBlockreturn(name,intercalate" "(first:(linesrest)))fieldList::GenParserCharParserStateBlockfieldList=try$doindent<-lookAhead$many(oneOf" \t")items<-many1$fieldListItemindentblanklinesletauthors=caselookup"Authors"itemsofJustauth->[auth]Nothing->mapsnd(filter(\(x,_)->x=="Author")items)ifnullauthorsthenreturn()elseupdateState$\st->st{stateAuthors=authors}case(lookup"Date"items)ofJustdat->updateState$\st->st{stateDate=dat}Nothing->return()case(lookup"Title"items)ofJusttit->parseFromString(manyinline)tit>>=\t->updateState$\st->st{stateTitle=t}Nothing->return()letremaining=filter(\(x,_)->(x/="Authors")&&(x/="Author")&&(x/="Date")&&(x/="Title"))itemsifnullremainingthenreturnNullelsedoterms<-mapM(return.(:[]).Str.fst)remainingdefs<-mapM(parseFromString(manyblock).snd)remainingreturn$DefinitionList$ziptermsdefs---- line block--lineBlockLine::GenParserCharParserState[Inline]lineBlockLine=try$dostring"| "white<-many(oneOf" \t")line<-manyTillinlinenewlinereturn$(ifnullwhitethen[]else[Strwhite])++line++[LineBreak]lineBlock::GenParserCharParserStateBlocklineBlock=try$dolines'<-many1lineBlockLineblanklinesreturn$Para(concatlines')---- paragraph block--para::GenParserCharParserStateBlockpara=paraBeforeCodeBlock<|>paraNormal<?>"paragraph"codeBlockStart::GenParserCharstCharcodeBlockStart=string"::">>blankline>>blankline-- paragraph that ends in a :: starting a code blockparaBeforeCodeBlock::GenParserCharParserStateBlockparaBeforeCodeBlock=try$doresult<-many1(notFollowedBy'codeBlockStart>>inline)lookAhead(string"::")return$Para$iflastresult==SpacethennormalizeSpacesresultelse(normalizeSpacesresult)++[Str":"]-- regular paragraphparaNormal::GenParserCharParserStateBlockparaNormal=try$doresult<-many1inlinenewlineblanklinesreturn$Para$normalizeSpacesresultplain::GenParserCharParserStateBlockplain=many1inline>>=return.Plain.normalizeSpaces---- image block--imageBlock::GenParserCharstBlockimageBlock=try$dostring".. image:: "src<-manyTillanyCharnewlinefields<-option[]$doindent<-lookAhead$many(oneOf" /t")many1$fieldListItemindentoptionalblanklinescaselookup"alt"fieldsofJustalt->return$Plain[Image[Stralt](src,alt)]Nothing->return$Plain[Image[Str"image"](src,"")]---- header blocks--header::GenParserCharParserStateBlockheader=doubleHeader<|>singleHeader<?>"header"-- a header with lines on top and bottomdoubleHeader::GenParserCharParserStateBlockdoubleHeader=try$doc<-oneOfunderlineCharsrest<-many(charc)-- the top lineletlenTop=length(c:rest)skipSpacesnewlinetxt<-many1(notFollowedByblankline>>inline)pos<-getPositionletlen=(sourceColumnpos)-1if(len>lenTop)thenfail"title longer than border"elsereturn()blankline-- spaces and newlinecountlenTop(charc)-- the bottom lineblanklines-- check to see if we've had this kind of header before. -- if so, get appropriate level. if not, add to list.state<-getStateletheaderTable=stateHeaderTablestatelet(headerTable',level)=casefindIndex(==DoubleHeaderc)headerTableofJustind->(headerTable,ind+1)Nothing->(headerTable++[DoubleHeaderc],(lengthheaderTable)+1)setState(state{stateHeaderTable=headerTable'})return$Headerlevel(normalizeSpacestxt)-- a header with line on the bottom onlysingleHeader::GenParserCharParserStateBlocksingleHeader=try$donotFollowedBy'whitespacetxt<-many1(do{notFollowedByblankline;inline})pos<-getPositionletlen=(sourceColumnpos)-1blanklinec<-oneOfunderlineCharscount(len-1)(charc)many(charc)blanklinesstate<-getStateletheaderTable=stateHeaderTablestatelet(headerTable',level)=casefindIndex(==SingleHeaderc)headerTableofJustind->(headerTable,ind+1)Nothing->(headerTable++[SingleHeaderc],(lengthheaderTable)+1)setState(state{stateHeaderTable=headerTable'})return$Headerlevel(normalizeSpacestxt)---- hrule block--hrule::GenParserCharstBlockhrule=try$dochr<-oneOfunderlineCharscount3(charchr)skipMany(charchr)blanklineblanklinesreturnHorizontalRule---- code blocks---- read a line indented by a given stringindentedLine::String->GenParserCharst[Char]indentedLineindents=try$dostringindentsresult<-manyTillanyCharnewlinereturn$result++"\n"-- two or more indented lines, possibly separated by blank lines.-- any amount of indentation will work.indentedBlock::GenParserCharst[Char]indentedBlock=doindents<-lookAhead$many1(oneOf" \t")lns<-many$choice$[indentedLineindents,try$dob<-blanklinesl<-indentedLineindentsreturn(b++l)]optionalblanklinesreturn$concatlnscodeBlock::GenParserCharstBlockcodeBlock=try$docodeBlockStartresult<-indentedBlockreturn$CodeBlock("",[],[])$stripTrailingNewlinesresultlhsCodeBlock::GenParserCharParserStateBlocklhsCodeBlock=try$dofailUnlessLHSpos<-getPositionwhen(sourceColumnpos/=1)$fail"Not in first column"lns<-many1birdTrackLine-- if (as is normal) there is always a space after >, drop itletlns'=ifall(\ln->nullln||take1ln==" ")lnsthenmap(drop1)lnselselnsblanklinesreturn$CodeBlock("",["sourceCode","haskell"],[])$intercalate"\n"lns'birdTrackLine::GenParserCharst[Char]birdTrackLine=dochar'>'manyTillanyCharnewline---- raw html--rawHtmlBlock::GenParserCharstBlockrawHtmlBlock=try$string".. raw:: html">>blanklines>>indentedBlock>>=return.RawHtml---- raw latex--rawLaTeXBlock::GenParserCharstBlockrawLaTeXBlock=try$dostring".. raw:: latex"blanklinesresult<-indentedBlockreturn$Para[(TeXresult)]---- block quotes--blockQuote::GenParserCharParserStateBlockblockQuote=doraw<-indentedBlock-- parse the extracted block, which may contain various block elements:contents<-parseFromStringparseBlocks$raw++"\n\n"return$BlockQuotecontents---- list blocks--list::GenParserCharParserStateBlocklist=choice[bulletList,orderedList,definitionList]<?>"list"definitionListItem::GenParserCharParserState([Inline],[Block])definitionListItem=try$do-- avoid capturing a directive or commentnotFollowedBy(try$char'.'>>char'.')term<-many1Tillinlineendlineraw<-indentedBlock-- parse the extracted block, which may contain various block elements:contents<-parseFromStringparseBlocks$raw++"\n\n"return(normalizeSpacesterm,contents)definitionList::GenParserCharParserStateBlockdefinitionList=many1definitionListItem>>=return.DefinitionList-- parses bullet list start and returns its length (inc. following whitespace)bulletListStart::GenParserCharstIntbulletListStart=try$donotFollowedBy'hrule-- because hrules start out just like listsmarker<-oneOfbulletListMarkerswhite<-many1spaceCharreturn$length(marker:white)-- parses ordered list start and returns its length (inc following whitespace)orderedListStart::ListNumberStyle->ListNumberDelim->GenParserCharstIntorderedListStartstyledelim=try$do(_,markerLen)<-withHorizDisplacement(orderedListMarkerstyledelim)white<-many1spaceCharreturn$markerLen+lengthwhite-- parse a line of a list itemlistLine::Int->GenParserCharParserState[Char]listLinemarkerLength=try$donotFollowedByblanklineindentWithmarkerLengthline<-manyTillanyCharnewlinereturn$line++"\n"-- indent by specified number of spaces (or equiv. tabs)indentWith::Int->GenParserCharParserState[Char]indentWithnum=dostate<-getStatelettabStop=stateTabStopstateif(num<tabStop)thencountnum(char' ')elsechoice[try(countnum(char' ')),(try(char'\t'>>count(num-tabStop)(char' ')))]-- parse raw text for one list item, excluding start marker and continuationsrawListItem::GenParserCharParserStateInt->GenParserCharParserState(Int,[Char])rawListItemstart=try$domarkerLength<-startfirstLine<-manyTillanyCharnewlinerestLines<-many(listLinemarkerLength)return(markerLength,(firstLine++"\n"++(concatrestLines)))-- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations.listContinuation::Int->GenParserCharParserState[Char]listContinuationmarkerLength=try$doblanks<-many1blanklineresult<-many1(listLinemarkerLength)return$blanks++concatresultlistItem::GenParserCharParserStateInt->GenParserCharParserState[Block]listItemstart=try$do(markerLength,first)<-rawListItemstartrest<-many(listContinuationmarkerLength)blanks<-choice[try(manyblankline>>~lookAheadstart),many1blankline]-- whole list must end with blank.-- parsing with ListItemState forces markers at beginning of lines to-- count as list item markers, even if not separated by blank space.-- see definition of "endline"state<-getStateletoldContext=stateParserContextstatesetState$state{stateParserContext=ListItemState}-- parse the extracted block, which may itself contain block elementsparsed<-parseFromStringparseBlocks$concat(first:rest)++blanksupdateState(\st->st{stateParserContext=oldContext})returnparsedorderedList::GenParserCharParserStateBlockorderedList=try$do(start,style,delim)<-lookAhead(anyOrderedListMarker>>~spaceChar)items<-many1(listItem(orderedListStartstyledelim))letitems'=compactifyitemsreturn$OrderedList(start,style,delim)items'bulletList::GenParserCharParserStateBlockbulletList=many1(listItembulletListStart)>>=return.BulletList.compactify---- unknown directive (e.g. comment)--unknownDirective::GenParserCharstBlockunknownDirective=try$dostring".."notFollowedBy(noneOf" \t\n")manyTillanyCharnewlinemany$blanklines<|>(oneOf" \t">>manyTillanyCharnewline)returnNull-- -- reference key--quotedReferenceName::GenParserCharParserState[Inline]quotedReferenceName=try$dochar'`'>>notFollowedBy(char'`')-- `` means inline code!label'<-many1Tillinline(char'`')returnlabel'unquotedReferenceName::GenParserCharParserState[Inline]unquotedReferenceName=try$dolabel'<-many1Tillinline(lookAhead$char':')returnlabel'isolated::Char->GenParserCharstCharisolatedch=try$charch>>~notFollowedBy(charch)simpleReferenceName::GenParserCharst[Inline]simpleReferenceName=doraw<-many1(alphaNum<|>isolated'-'<|>isolated'.'<|>(try$char'_'>>~lookAheadalphaNum))return[Strraw]referenceName::GenParserCharParserState[Inline]referenceName=quotedReferenceName<|>(try$simpleReferenceName>>~lookAhead(char':'))<|>unquotedReferenceNamereferenceKey::GenParserCharParserState[Char]referenceKey=dostartPos<-getPositionkey<-choice[imageKey,anonymousKey,regularKey]st<-getStateletoldkeys=stateKeysstupdateState$\s->s{stateKeys=key:oldkeys}optionalblanklinesendPos<-getPosition-- return enough blanks to replace keyreturn$replicate(sourceLineendPos-sourceLinestartPos)'\n'targetURI::GenParserCharst[Char]targetURI=doskipSpacesoptionalnewlinecontents<-many1(try(manyspaceChar>>newline>>many1spaceChar>>noneOf" \t\n")<|>noneOf"\n")blanklinesreturncontentsimageKey::GenParserCharParserState([Inline],(String,[Char]))imageKey=try$dostring".. |"ref<-manyTillinline(char'|')skipSpacesstring"image::"src<-targetURIreturn(normalizeSpacesref,(removeLeadingTrailingSpacesrc,""))anonymousKey::GenParserCharst([Inline],(String,[Char]))anonymousKey=try$dooneOfStrings[".. __:","__"]src<-targetURIreturn([Str"_"],(removeLeadingTrailingSpacesrc,""))regularKey::GenParserCharParserState([Inline],(String,[Char]))regularKey=try$dostring".. _"ref<-referenceNamechar':'src<-targetURIreturn(normalizeSpacesref,(removeLeadingTrailingSpacesrc,""))-- -- inline--inline::GenParserCharParserStateInlineinline=choice[link,str,whitespace,endline,strong,emph,code,image,hyphens,superscript,subscript,escapedChar,symbol]<?>"inline"hyphens::GenParserCharParserStateInlinehyphens=doresult<-many1(char'-')optionSpaceendline-- don't want to treat endline after hyphen or dash as a spacereturn$StrresultescapedChar::GenParserCharstInlineescapedChar=escapedanyCharsymbol::GenParserCharParserStateInlinesymbol=doresult<-oneOfspecialCharsreturn$Str[result]-- parses inline code, between codeStart and codeEndcode::GenParserCharParserStateInlinecode=try$dostring"``"result<-manyTillanyChar(try(string"``"))return$Code$removeLeadingTrailingSpace$intercalate" "$linesresultemph::GenParserCharParserStateInlineemph=enclosed(char'*')(char'*')inline>>=return.Emph.normalizeSpacesstrong::GenParserCharParserStateInlinestrong=enclosed(string"**")(try$string"**")inline>>=return.Strong.normalizeSpacesinterpreted::[Char]->GenParserCharst[Inline]interpretedrole=try$dooptional$try$string"\\ "result<-enclosed(string$":"++role++":`")(char'`')anyChartry(string"\\ ")<|>lookAhead(count1$oneOf" \t\n")<|>(eof>>return"")return[Strresult]superscript::GenParserCharParserStateInlinesuperscript=interpreted"sup">>=(return.Superscript)subscript::GenParserCharParserStateInlinesubscript=interpreted"sub">>=(return.Subscript)whitespace::GenParserCharParserStateInlinewhitespace=many1spaceChar>>returnSpace<?>"whitespace"str::GenParserCharParserStateInlinestr=many1(noneOf(specialChars++"\t\n "))>>=return.Str-- an endline character that can be treated as a space, not a structural breakendline::GenParserCharParserStateInlineendline=try$donewlinenotFollowedByblankline-- parse potential list-starts at beginning of line differently in a list:st<-getStateif(stateParserContextst)==ListItemStatethennotFollowedBy(anyOrderedListMarker>>spaceChar)>>notFollowedBy'bulletListStartelsereturn()returnSpace---- links--link::GenParserCharParserStateInlinelink=choice[explicitLink,referenceLink,autoLink]<?>"link"explicitLink::GenParserCharParserStateInlineexplicitLink=try$dochar'`'notFollowedBy(char'`')-- `` marks start of inline codelabel'<-manyTill(notFollowedBy(char'`')>>inline)(try(spaces>>char'<'))src<-manyTill(noneOf">\n ")(char'>')skipSpacesstring"`_"return$Link(normalizeSpaceslabel')(removeLeadingTrailingSpacesrc,"")referenceLink::GenParserCharParserStateInlinereferenceLink=try$dolabel'<-(quotedReferenceName<|>simpleReferenceName)>>~char'_'key<-optionlabel'(do{char'_';return[Str"_"]})-- anonymous linkstate<-getStateletkeyTable=stateKeysstatesrc<-caselookupKeySrckeyTablekeyofNothing->fail"no corresponding key"Justtarget->returntarget-- if anonymous link, remove first anon key so it won't be used againletkeyTable'=if(key==[Str"_"])-- anonymous link? thendelete([Str"_"],src)keyTable-- remove first anon key elsekeyTablesetState$state{stateKeys=keyTable'}return$Link(normalizeSpaceslabel')srcautoURI::GenParserCharParserStateInlineautoURI=dosrc<-urireturn$Link[Strsrc](src,"")autoEmail::GenParserCharParserStateInlineautoEmail=dosrc<-emailAddressreturn$Link[Strsrc]("mailto:"++src,"")autoLink::GenParserCharParserStateInlineautoLink=autoURI<|>autoEmail-- For now, we assume that all substitution references are for images.image::GenParserCharParserStateInlineimage=try$dochar'|'ref<-manyTillinline(char'|')state<-getStateletkeyTable=stateKeysstatesrc<-caselookupKeySrckeyTablerefofNothing->fail"no corresponding key"Justtarget->returntargetreturn$Image(normalizeSpacesref)src