{-
Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}{- |
Module : Text.Pandoc.Readers.LaTeX
Copyright : Copyright (C) 2006-8 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of LaTeX to 'Pandoc' document.
-}moduleText.Pandoc.Readers.LaTeX(readLaTeX,rawLaTeXInline,rawLaTeXEnvironment')whereimportText.ParserCombinators.ParsecimportText.Pandoc.DefinitionimportText.Pandoc.SharedimportData.Maybe(fromMaybe)importData.Char(chr)importData.List(isPrefixOf,isSuffixOf)-- | Parse LaTeX from string and return 'Pandoc' document.readLaTeX::ParserState-- ^ Parser state, including options for parser->String-- ^ String to parse (assumes @'\n'@ line endings)->PandocreadLaTeX=readWithparseLaTeX-- characters with special meaningspecialChars::[Char]specialChars="\\`$%^&_~#{}\n \t|<>'\"-"---- utility functions---- | Returns text between brackets and its matching pair.bracketedText::Char->Char->GenParserCharst[Char]bracketedTextopenBcloseB=doresult<-charsInBalanced'openBcloseBreturn$[openB]++result++[closeB]-- | Returns an option or argument of a LaTeX command.optOrArg::GenParserCharst[Char]optOrArg=bracketedText'{''}'<|>bracketedText'['']'-- | True if the string begins with '{'.isArg::[Char]->BoolisArg('{':_)=TrueisArg_=False-- | Returns list of options and arguments of a LaTeX command.commandArgs::GenParserCharst[[Char]]commandArgs=manyoptOrArg-- | Parses LaTeX command, returns (name, star, list of options or arguments).command::GenParserCharst([Char],[Char],[[Char]])command=dochar'\\'name<-many1letterstar<-option""(string"*")-- some commands have starred versionsargs<-commandArgsreturn(name,star,args)begin::[Char]->GenParserCharst[Char]beginname=try$dostring$"\\begin{"++name++"}"optionalcommandArgsspacesreturnnameend::[Char]->GenParserCharst[Char]endname=try$dostring$"\\end{"++name++"}"returnname-- | Returns a list of block elements containing the contents of an-- environment.environment::[Char]->GenParserCharParserState[Block]environmentname=try$beginname>>spaces>>manyTillblock(endname)>>~spacesanyEnvironment::GenParserCharParserStateBlockanyEnvironment=try$dostring"\\begin{"name<-manyletterstar<-option""(string"*")-- some environments have starred variantschar'}'optionalcommandArgsspacescontents<-manyTillblock(end(name++star))spacesreturn$BlockQuotecontents---- parsing documents---- | Process LaTeX preamble, extracting metadata.processLaTeXPreamble::GenParserCharParserState()processLaTeXPreamble=try$manyTill(choice[bibliographic,comment,unknownCommand,nullBlock])(try(string"\\begin{document}"))>>spaces-- | Parse LaTeX and return 'Pandoc'.parseLaTeX::GenParserCharParserStatePandocparseLaTeX=dooptionalprocessLaTeXPreamble-- preamble might not be present (fragment)spacesblocks<-parseBlocksspacesoptional$try(string"\\end{document}">>manyanyChar)-- might not be present (fragment)spaceseofstate<-getStateletblocks'=filter(/=Null)blockslettitle'=stateTitlestateletauthors'=stateAuthorsstateletdate'=stateDatestatereturn$Pandoc(Metatitle'authors'date')blocks'---- parsing blocks--parseBlocks::GenParserCharParserState[Block]parseBlocks=spaces>>manyblockblock::GenParserCharParserStateBlockblock=choice[hrule,codeBlock,header,list,blockQuote,comment,bibliographic,para,itemBlock,unknownEnvironment,ignore,unknownCommand]<?>"block"---- header blocks--header::GenParserCharParserStateBlockheader=try$dochar'\\'subs<-many(try(string"sub"))string"section"optional(char'*')char'{'title'<-manyTillinline(char'}')spacesreturn$Header(lengthsubs+1)(normalizeSpacestitle')---- hrule block--hrule::GenParserCharstBlockhrule=oneOfStrings["\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n","\\newpage"]>>spaces>>returnHorizontalRule---- code blocks--codeBlock::GenParserCharParserStateBlockcodeBlock=codeBlockWith"verbatim"<|>codeBlockWith"Verbatim"<|>lhsCodeBlock-- Note: Verbatim is from fancyvrb.codeBlockWith::String->GenParserCharstBlockcodeBlockWithenv=try$dostring("\\begin{"++env++"}")-- don't use begin function because it-- gobbles whitespaceoptionalblanklines-- we want to gobble blank lines, but not -- leading spacecontents<-manyTillanyChar(try(string$"\\end{"++env++"}"))spacesletclasses=ifenv=="code"then["haskell"]else[]return$CodeBlock("",classes,[])(stripTrailingNewlinescontents)lhsCodeBlock::GenParserCharParserStateBlocklhsCodeBlock=dofailUnlessLHS(CodeBlock(_,_,_)cont)<-codeBlockWith"code"return$CodeBlock("",["sourceCode","literate","haskell"],[])cont---- block quotes--blockQuote::GenParserCharParserStateBlockblockQuote=(environment"quote"<|>environment"quotation")>>~spaces>>=return.BlockQuote---- list blocks--list::GenParserCharParserStateBlocklist=bulletList<|>orderedList<|>definitionList<?>"list"listItem::GenParserCharParserState([Inline],[Block])listItem=try$do("item",_,args)<-commandspacesstate<-getStateletoldParserContext=stateParserContextstateupdateState(\s->s{stateParserContext=ListItemState})blocks<-manyblockupdateState(\s->s{stateParserContext=oldParserContext})opt<-caseargsof([x])|"["`isPrefixOf`x&&"]"`isSuffixOf`x->parseFromString(manyinline)$tail$initx_->return[]return(opt,blocks)orderedList::GenParserCharParserStateBlockorderedList=try$dostring"\\begin{enumerate}"(_,style,delim)<-option(1,DefaultStyle,DefaultDelim)$try$dofailIfStrictchar'['res<-anyOrderedListMarkerchar']'returnresspacesoption""$try$dostring"\\setlength{\\itemindent}"char'{'manyTillanyChar(char'}')spacesstart<-option1$try$dofailIfStrictstring"\\setcounter{enum"many1(oneOf"iv")string"}{"num<-many1digitchar'}'spacesreturn$(readnum)+1items<-manylistItemend"enumerate"spacesreturn$OrderedList(start,style,delim)$mapsnditemsbulletList::GenParserCharParserStateBlockbulletList=try$dobegin"itemize"spacesitems<-manylistItemend"itemize"spacesreturn(BulletList$mapsnditems)definitionList::GenParserCharParserStateBlockdefinitionList=try$dobegin"description"spacesitems<-manylistItemend"description"spacesreturn$DefinitionList$map(\(t,d)->(t,[d]))items---- paragraph block--para::GenParserCharParserStateBlockpara=dores<-many1inlinespacesreturn$ifnull(filter(`notElem`[Str"",Space])res)thenNullelsePara$normalizeSpacesres---- title authors date--bibliographic::GenParserCharParserStateBlockbibliographic=choice[maketitle,title,authors,date]maketitle::GenParserCharstBlockmaketitle=try(string"\\maketitle")>>spaces>>returnNulltitle::GenParserCharParserStateBlocktitle=try$dostring"\\title{"tit<-manyTillinline(char'}')spacesupdateState(\state->state{stateTitle=tit})returnNullauthors::GenParserCharParserStateBlockauthors=try$dostring"\\author{"raw<-many1(notFollowedBy(char'}')>>inline)letauthors'=mapnormalizeSpaces$splitByLineBreakrawchar'}'spacesupdateState(\s->s{stateAuthors=authors'})returnNulldate::GenParserCharParserStateBlockdate=try$dostring"\\date{"date'<-manyTillinline(char'}')spacesupdateState(\state->state{stateDate=normalizeSpacesdate'})returnNull---- item block-- for use in unknown environments that aren't being parsed as raw latex---- this forces items to be parsed in different blocksitemBlock::GenParserCharParserStateBlockitemBlock=try$do("item",_,args)<-commandstate<-getStateifstateParserContextstate==ListItemStatethenfail"item should be handled by list block"elseifnullargsthenreturnNullelsereturn$Plain[Str(stripFirstAndLast(headargs))]---- raw LaTeX ---- | Parse any LaTeX environment and return a Para block containing-- the whole literal environment as raw TeX.rawLaTeXEnvironment::GenParserCharstBlockrawLaTeXEnvironment=docontents<-rawLaTeXEnvironment'spacesreturn$Para[TeXcontents]-- | Parse any LaTeX environment and return a string containing-- the whole literal environment as raw TeX.rawLaTeXEnvironment'::GenParserCharstStringrawLaTeXEnvironment'=try$dostring"\\begin{"name<-many1letterstar<-option""(string"*")-- for starred variantsletname'=name++starchar'}'args<-option[]commandArgsletargStr=concatargscontents<-manyTill(choice[(many1(noneOf"\\")),rawLaTeXEnvironment',string"\\"])(endname')return$"\\begin{"++name'++"}"++argStr++concatcontents++"\\end{"++name'++"}"unknownEnvironment::GenParserCharParserStateBlockunknownEnvironment=try$dostate<-getStateresult<-ifstateParseRawstate-- check whether we should include raw TeX thenrawLaTeXEnvironment-- if so, get whole raw environmentelseanyEnvironment-- otherwise just the contentsreturnresult-- \ignore{} is used conventionally in literate haskell for definitions-- that are to be processed by the compiler but not printed.ignore::GenParserCharParserStateBlockignore=try$do("ignore",_,_)<-commandspacesreturnNullunknownCommand::GenParserCharParserStateBlockunknownCommand=try$donotFollowedBy'$choice$mapend["itemize","enumerate","description","document"]state<-getStateifstateParserContextstate==ListItemStatethennotFollowedBy'$string"\\item"elsereturn()ifstateParseRawstatethendo(name,star,args)<-commandspacesreturn$Plain[TeX("\\"++name++star++concatargs)]elsedo-- skip unknown command, leaving arguments to be parsedchar'\\'lettermany(letter<|>digit)optional(try$string"{}")spacesreturnNull-- latex commentcomment::GenParserCharstBlockcomment=try$char'%'>>manyTillanyCharnewline>>spaces>>returnNull-- -- inline--inline::GenParserCharParserStateInlineinline=choice[str,endline,whitespace,quoted,apostrophe,spacer,strong,math,ellipses,emDash,enDash,hyphen,emph,strikeout,superscript,subscript,ref,lab,code,url,link,image,footnote,linebreak,accentedChar,specialChar,rawLaTeXInline,escapedChar,unescapedChar]<?>"inline"accentedChar::GenParserCharstInlineaccentedChar=normalAccentedChar<|>specialAccentedCharnormalAccentedChar::GenParserCharstInlinenormalAccentedChar=try$dochar'\\'accent<-oneOf"'`^\"~"character<-(try$char'{'>>letter>>~char'}')<|>letterlettable=fromMaybe[]$lookupcharacteraccentTableletresult=caselookupaccenttableofJustnum->chrnumNothing->'?'return$Str[result]-- an association list of letters and association list of accents-- and decimal character numbers.accentTable::[(Char,[(Char,Int)])]accentTable=[('A',[('`',192),('\'',193),('^',194),('~',195),('"',196)]),('E',[('`',200),('\'',201),('^',202),('"',203)]),('I',[('`',204),('\'',205),('^',206),('"',207)]),('N',[('~',209)]),('O',[('`',210),('\'',211),('^',212),('~',213),('"',214)]),('U',[('`',217),('\'',218),('^',219),('"',220)]),('a',[('`',224),('\'',225),('^',227),('"',228)]),('e',[('`',232),('\'',233),('^',234),('"',235)]),('i',[('`',236),('\'',237),('^',238),('"',239)]),('n',[('~',241)]),('o',[('`',242),('\'',243),('^',244),('~',245),('"',246)]),('u',[('`',249),('\'',250),('^',251),('"',252)])]specialAccentedChar::GenParserCharstInlinespecialAccentedChar=choice[ccedil,aring,iuml,szlig,aelig,oslash,pound,euro,copyright,sect]ccedil::GenParserCharstInlineccedil=try$dochar'\\'letter'<-oneOfStrings["cc","cC"]letnum=ifletter'=="cc"then231else199return$Str[chrnum]aring::GenParserCharstInlinearing=try$dochar'\\'letter'<-oneOfStrings["aa","AA"]letnum=ifletter'=="aa"then229else197return$Str[chrnum]iuml::GenParserCharstInlineiuml=try(string"\\\"")>>oneOfStrings["\\i","{\\i}"]>>return(Str[chr239])szlig::GenParserCharstInlineszlig=try(string"\\ss")>>return(Str[chr223])oslash::GenParserCharstInlineoslash=try$dochar'\\'letter'<-choice[char'o',char'O']letnum=ifletter'=='o'then248else216return$Str[chrnum]aelig::GenParserCharstInlineaelig=try$dochar'\\'letter'<-oneOfStrings["ae","AE"]letnum=ifletter'=="ae"then230else198return$Str[chrnum]pound::GenParserCharstInlinepound=try(string"\\pounds")>>return(Str[chr163])euro::GenParserCharstInlineeuro=try(string"\\euro")>>return(Str[chr8364])copyright::GenParserCharstInlinecopyright=try(string"\\copyright")>>return(Str[chr169])sect::GenParserCharstInlinesect=try(string"\\S")>>return(Str[chr167])escapedChar::GenParserCharstInlineescapedChar=doresult<-escaped(oneOf" $%&_#{}\n")return$ifresult==Str"\n"thenStr" "elseresult-- nonescaped special charactersunescapedChar::GenParserCharstInlineunescapedChar=oneOf"`$^&_#{}|<>">>=return.(\c->Str[c])specialChar::GenParserCharstInlinespecialChar=choice[backslash,tilde,caret,bar,lt,gt,doubleQuote]backslash::GenParserCharstInlinebackslash=try(string"\\textbackslash")>>optional(try$string"{}")>>return(Str"\\")tilde::GenParserCharstInlinetilde=try(string"\\ensuremath{\\sim}")>>return(Str"~")caret::GenParserCharstInlinecaret=try(string"\\^{}")>>return(Str"^")bar::GenParserCharstInlinebar=try(string"\\textbar")>>optional(try$string"{}")>>return(Str"\\")lt::GenParserCharstInlinelt=try(string"\\textless")>>optional(try$string"{}")>>return(Str"<")gt::GenParserCharstInlinegt=try(string"\\textgreater")>>optional(try$string"{}")>>return(Str">")doubleQuote::GenParserCharstInlinedoubleQuote=char'"'>>return(Str"\"")code::GenParserCharParserStateInlinecode=code1<|>code2<|>lhsInlineCodecode1::GenParserCharstInlinecode1=try$dostring"\\verb"marker<-anyCharresult<-manyTillanyChar(charmarker)return$Code$removeLeadingTrailingSpaceresultcode2::GenParserCharstInlinecode2=try$dostring"\\texttt{"result<-manyTill(noneOf"\\\n~$%^&{}")(char'}')return$CoderesultlhsInlineCode::GenParserCharParserStateInlinelhsInlineCode=try$dofailUnlessLHSchar'|'result<-manyTill(noneOf"|\n")(char'|')return$Coderesultemph::GenParserCharParserStateInlineemph=try$oneOfStrings["\\emph{","\\textit{"]>>manyTillinline(char'}')>>=return.Emphstrikeout::GenParserCharParserStateInlinestrikeout=try$string"\\sout{">>manyTillinline(char'}')>>=return.Strikeoutsuperscript::GenParserCharParserStateInlinesuperscript=try$string"\\textsuperscript{">>manyTillinline(char'}')>>=return.Superscript-- note: \textsubscript isn't a standard latex command, but we use-- a defined version in pandoc.subscript::GenParserCharParserStateInlinesubscript=try$string"\\textsubscript{">>manyTillinline(char'}')>>=return.Subscriptapostrophe::GenParserCharParserStateInlineapostrophe=char'\''>>returnApostrophequoted::GenParserCharParserStateInlinequoted=doubleQuoted<|>singleQuotedsingleQuoted::GenParserCharParserStateInlinesingleQuoted=enclosedsingleQuoteStartsingleQuoteEndinline>>=return.QuotedSingleQuote.normalizeSpacesdoubleQuoted::GenParserCharParserStateInlinedoubleQuoted=encloseddoubleQuoteStartdoubleQuoteEndinline>>=return.QuotedDoubleQuote.normalizeSpacessingleQuoteStart::GenParserCharstCharsingleQuoteStart=char'`'singleQuoteEnd::GenParserCharst()singleQuoteEnd=try$char'\''>>notFollowedByalphaNumdoubleQuoteStart::CharParserstStringdoubleQuoteStart=string"``"doubleQuoteEnd::CharParserstStringdoubleQuoteEnd=try$string"''"ellipses::GenParserCharstInlineellipses=try$string"\\ldots">>optional(try$string"{}")>>returnEllipsesenDash::GenParserCharstInlineenDash=try(string"--")>>returnEnDashemDash::GenParserCharstInlineemDash=try(string"---")>>returnEmDashhyphen::GenParserCharstInlinehyphen=char'-'>>return(Str"-")lab::GenParserCharstInlinelab=try$dostring"\\label{"result<-manyTillanyChar(char'}')return$Str$"("++result++")"ref::GenParserCharstInlineref=try(string"\\ref{")>>manyTillanyChar(char'}')>>=return.Strstrong::GenParserCharParserStateInlinestrong=try(string"\\textbf{")>>manyTillinline(char'}')>>=return.Strongwhitespace::GenParserCharstInlinewhitespace=many1(oneOf"~ \t")>>returnSpace-- hard line breaklinebreak::GenParserCharstInlinelinebreak=try(string"\\\\")>>returnLineBreakspacer::GenParserCharstInlinespacer=try(string"\\,")>>return(Str"")str::GenParserCharstInlinestr=many1(noneOfspecialChars)>>=return.Str-- endline internal to paragraphendline::GenParserCharstInlineendline=try$newline>>notFollowedByblankline>>returnSpace-- mathmath::GenParserCharstInlinemath=(math3>>=return.MathDisplayMath)<|>(math1>>=return.MathInlineMath)<|>(math2>>=return.MathInlineMath)<|>(math4>>=return.MathDisplayMath)<|>(math5>>=return.MathDisplayMath)<|>(math6>>=return.MathDisplayMath)<?>"math"math1::GenParserCharstStringmath1=try$char'$'>>manyTillanyChar(char'$')math2::GenParserCharstStringmath2=try$string"\\(">>manyTillanyChar(try$string"\\)")math3::GenParserCharstStringmath3=try$char'$'>>math1>>~char'$'math4::GenParserCharstStringmath4=try$doname<-begin"displaymath"<|>begin"equation"<|>begin"equation*"<|>begin"gather"<|>begin"gather*"<|>begin"gathered"<|>begin"multline"<|>begin"multline*"spacesmanyTillanyChar(endname)math5::GenParserCharstStringmath5=try$(string"\\[")>>spaces>>manyTillanyChar(try$string"\\]")math6::GenParserCharstStringmath6=try$doname<-begin"eqnarray"<|>begin"eqnarray*"<|>begin"align"<|>begin"align*"<|>begin"alignat"<|>begin"alignat*"<|>begin"split"<|>begin"aligned"<|>begin"alignedat"spacesres<-manyTillanyChar(endname)return$filter(/='&')res-- remove alignment codes---- links and images--url::GenParserCharParserStateInlineurl=try$dostring"\\url"url'<-charsInBalanced'{''}'return$Link[Codeurl'](url',"")link::GenParserCharParserStateInlinelink=try$dostring"\\href{"url'<-manyTillanyChar(char'}')char'{'label'<-manyTillinline(char'}')return$Link(normalizeSpaceslabel')(url',"")image::GenParserCharParserStateInlineimage=try$do("includegraphics",_,args)<-commandletargs'=filterisArgargs-- filter out optionsletsrc=ifnullargs'then("","")else(stripFirstAndLast(headargs'),"")return$Image[Str"image"]srcfootnote::GenParserCharParserStateInlinefootnote=try$do(name,_,(contents:[]))<-commandif((name=="footnote")||(name=="thanks"))thenstring""elsefail"not a footnote or thanks command"letcontents'=stripFirstAndLastcontents-- parse the extracted block, which may contain various block elements:rest<-getInputsetInput$contents'blocks<-parseBlockssetInputrestreturn$Noteblocks-- | Parse any LaTeX command and return it in a raw TeX inline element.rawLaTeXInline::GenParserCharParserStateInlinerawLaTeXInline=try$donotFollowedBy'$oneOfStrings["\\begin","\\end","\\item","\\ignore"]state<-getStateifstateParseRawstatethendo(name,star,args)<-commandreturn$TeX("\\"++name++star++concatargs)elsedo-- skip unknown command, leaving arguments to be parsedchar'\\'lettermany(letter<|>digit)optional(try$string"{}")return$Str""