{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
FlexibleContexts #-}{-
Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}{- |
Module : Text.Pandoc.Shared
Copyright : Copyright (C) 2006-2013 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Utility functions and definitions used by the various Pandoc modules.
-}moduleText.Pandoc.Shared(-- * List processingsplitBy,splitByIndices,splitStringByIndices,substitute,-- * Text processingbackslashEscapes,escapeStringUsing,stripTrailingNewlines,trim,triml,trimr,stripFirstAndLast,camelCaseToHyphenated,toRomanNumeral,escapeURI,tabFilter,-- * Date/timenormalizeDate,-- * Pandoc block and inline list processingorderedListMarkers,normalizeSpaces,normalize,stringify,compactify,compactify',Element(..),hierarchicalize,uniqueIdent,isHeaderBlock,headerShift,isTightList,addMetaField,makeMeta,-- * TagSoup HTML handlingrenderTags',-- * File handlinginDirectory,readDataFile,readDataFileUTF8,fetchItem,openURL,-- * Error handlingerr,warn,-- * Safe readsafeRead)whereimportText.Pandoc.DefinitionimportText.Pandoc.WalkimportText.Pandoc.GenericimportText.Pandoc.Builder(Blocks,ToMetaValue(..))importqualifiedText.Pandoc.BuilderasBimportqualifiedText.Pandoc.UTF8asUTF8importSystem.Environment(getProgName)importSystem.Exit(exitWith,ExitCode(..))importData.Char(toLower,isLower,isUpper,isAlpha,isLetter,isDigit,isSpace)importData.List(find,isPrefixOf,intercalate)importqualifiedData.MapasMimportNetwork.URI(escapeURIString,isURI,unEscapeString)importSystem.DirectoryimportText.Pandoc.MIME(getMimeType)importSystem.FilePath((</>),takeExtension,dropExtension)importData.Generics(Typeable,Data)importqualifiedControl.Monad.StateasSimportqualifiedControl.ExceptionasEimportControl.Monad(msum,unless)importText.Pandoc.Pretty(charWidth)importSystem.Locale(defaultTimeLocale)importData.TimeimportSystem.IO(stderr)importText.HTML.TagSoup(renderTagsOptions,RenderOptions(..),Tag(..),renderOptions)importqualifiedData.ByteStringasBSimportqualifiedData.ByteString.Char8asB8importText.Pandoc.Compat.Monoid#ifdef EMBED_DATA_FILESimportText.Pandoc.Data(dataFiles)importSystem.FilePath(joinPath,splitDirectories)#elseimportPaths_pandoc(getDataFileName)#endif#ifdef HTTP_CONDUITimportData.ByteString.Lazy(toChunks)importNetwork.HTTP.Conduit(httpLbs,parseUrl,withManager,responseBody,responseHeaders)importNetwork.HTTP.Types.Header(hContentType)#elseimportNetwork.URI(parseURI)importNetwork.HTTP(findHeader,rspBody,RequestMethod(..),HeaderName(..),mkRequest)importNetwork.Browser(browse,setAllowRedirects,setOutHandler,request)#endif---- List processing---- | Split list by groups of one or more sep.splitBy::(a->Bool)->[a]->[[a]]splitBy_[]=[]splitByisSeplst=let(first,rest)=breakisSeplstrest'=dropWhileisSeprestinfirst:(splitByisSeprest')splitByIndices::[Int]->[a]->[[a]]splitByIndices[]lst=[lst]splitByIndices(x:xs)lst=first:(splitByIndices(map(\y->y-x)xs)rest)where(first,rest)=splitAtxlst-- | Split string into chunks divided at specified indices.splitStringByIndices::[Int]->[Char]->[[Char]]splitStringByIndices[]lst=[lst]splitStringByIndices(x:xs)lst=let(first,rest)=splitAt'xlstinfirst:(splitStringByIndices(map(\y->y-x)xs)rest)splitAt'::Int->[Char]->([Char],[Char])splitAt'_[]=([],[])splitAt'nxs|n<=0=([],xs)splitAt'n(x:xs)=(x:ys,zs)where(ys,zs)=splitAt'(n-charWidthx)xs-- | Replace each occurrence of one sublist in a list with another.substitute::(Eqa)=>[a]->[a]->[a]->[a]substitute__[]=[]substitute[]_xs=xssubstitutetargetreplacementlst@(x:xs)=iftarget`isPrefixOf`lstthenreplacement++substitutetargetreplacement(drop(lengthtarget)lst)elsex:substitutetargetreplacementxs---- Text processing---- | Returns an association list of backslash escapes for the-- designated characters.backslashEscapes::[Char]-- ^ list of special characters to escape->[(Char,String)]backslashEscapes=map(\ch->(ch,['\\',ch]))-- | Escape a string of characters, using an association list of-- characters and strings.escapeStringUsing::[(Char,String)]->String->StringescapeStringUsing_[]=""escapeStringUsingescapeTable(x:xs)=case(lookupxescapeTable)ofJuststr->str++restNothing->x:restwhererest=escapeStringUsingescapeTablexs-- | Strip trailing newlines from string.stripTrailingNewlines::String->StringstripTrailingNewlines=reverse.dropWhile(=='\n').reverse-- | Remove leading and trailing space (including newlines) from string.trim::String->Stringtrim=triml.trimr-- | Remove leading space (including newlines) from string.triml::String->Stringtriml=dropWhile(`elem`" \r\n\t")-- | Remove trailing space (including newlines) from string.trimr::String->Stringtrimr=reverse.triml.reverse-- | Strip leading and trailing characters from stringstripFirstAndLast::String->StringstripFirstAndLaststr=drop1$take((lengthstr)-1)str-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).camelCaseToHyphenated::String->StringcamelCaseToHyphenated[]=""camelCaseToHyphenated(a:b:rest)|isLowera&&isUpperb=a:'-':(toLowerb):(camelCaseToHyphenatedrest)camelCaseToHyphenated(a:rest)=(toLowera):(camelCaseToHyphenatedrest)-- | Convert number < 4000 to uppercase roman numeral.toRomanNumeral::Int->StringtoRomanNumeralx=ifx>=4000||x<0then"?"elsecasexof_|x>=1000->"M"++toRomanNumeral(x-1000)_|x>=900->"CM"++toRomanNumeral(x-900)_|x>=500->"D"++toRomanNumeral(x-500)_|x>=400->"CD"++toRomanNumeral(x-400)_|x>=100->"C"++toRomanNumeral(x-100)_|x>=90->"XC"++toRomanNumeral(x-90)_|x>=50->"L"++toRomanNumeral(x-50)_|x>=40->"XL"++toRomanNumeral(x-40)_|x>=10->"X"++toRomanNumeral(x-10)_|x>=9->"IX"++toRomanNumeral(x-5)_|x>=5->"V"++toRomanNumeral(x-5)_|x>=4->"IV"++toRomanNumeral(x-4)_|x>=1->"I"++toRomanNumeral(x-1)_->""-- | Escape whitespace in URI.escapeURI::String->StringescapeURI=escapeURIString(not.isSpace)-- | Convert tabs to spaces and filter out DOS line endings.-- Tabs will be preserved if tab stop is set to 0.tabFilter::Int-- ^ Tab stop->String-- ^ Input->StringtabFiltertabStop=letgo_[]=""go_('\n':xs)='\n':gotabStopxsgo_('\r':'\n':xs)='\n':gotabStopxsgo_('\r':xs)='\n':gotabStopxsgospsToNextStop('\t':xs)=iftabStop==0then'\t':gotabStopxselsereplicatespsToNextStop' '++gotabStopxsgo1(x:xs)=x:gotabStopxsgospsToNextStop(x:xs)=x:go(spsToNextStop-1)xsingotabStop---- Date/time---- | Parse a date and convert (if possible) to "YYYY-MM-DD" format.normalizeDate::String->MaybeStringnormalizeDates=fmap(formatTimedefaultTimeLocale"%F")(msum$map(\fs->parsetimeWithfss)formats::MaybeDay)whereparsetimeWith=parseTimedefaultTimeLocaleformats=["%x","%m/%d/%Y","%D","%F","%d %b %Y","%d %B %Y","%b. %d, %Y","%B %d, %Y"]---- Pandoc block and inline list processing---- | Generate infinite lazy list of markers for an ordered list,-- depending on list attributes.orderedListMarkers::(Int,ListNumberStyle,ListNumberDelim)->[String]orderedListMarkers(start,numstyle,numdelim)=letsingletonc=[c]nums=casenumstyleofDefaultStyle->mapshow[start..]Example->mapshow[start..]Decimal->mapshow[start..]UpperAlpha->drop(start-1)$cycle$mapsingleton['A'..'Z']LowerAlpha->drop(start-1)$cycle$mapsingleton['a'..'z']UpperRoman->maptoRomanNumeral[start..]LowerRoman->map(maptoLower.toRomanNumeral)[start..]inDelimstr=casenumdelimofDefaultDelim->str++"."Period->str++"."OneParen->str++")"TwoParens->"("++str++")"inmapinDelimnums-- | Normalize a list of inline elements: remove leading and trailing-- @Space@ elements, collapse double @Space@s into singles, and-- remove empty Str elements.normalizeSpaces::[Inline]->[Inline]normalizeSpaces=cleanup.dropWhileisSpaceOrEmptywherecleanup[]=[]cleanup(Space:rest)=casedropWhileisSpaceOrEmptyrestof[]->[](x:xs)->Space:x:cleanupxscleanup((Str""):rest)=cleanuprestcleanup(x:rest)=x:cleanuprestisSpaceOrEmpty::Inline->BoolisSpaceOrEmptySpace=TrueisSpaceOrEmpty(Str"")=TrueisSpaceOrEmpty_=False-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,-- combining adjacent 'Str's and 'Emph's, remove 'Null's and-- empty elements, etc.normalize::(Eqa,Dataa)=>a->anormalize=topDownremoveEmptyBlocks.topDownconsolidateInlines.bottomUp(removeEmptyInlines.removeTrailingInlineSpaces)removeEmptyBlocks::[Block]->[Block]removeEmptyBlocks(Null:xs)=removeEmptyBlocksxsremoveEmptyBlocks(BulletList[]:xs)=removeEmptyBlocksxsremoveEmptyBlocks(OrderedList_[]:xs)=removeEmptyBlocksxsremoveEmptyBlocks(DefinitionList[]:xs)=removeEmptyBlocksxsremoveEmptyBlocks(RawBlock_[]:xs)=removeEmptyBlocksxsremoveEmptyBlocks(x:xs)=x:removeEmptyBlocksxsremoveEmptyBlocks[]=[]removeEmptyInlines::[Inline]->[Inline]removeEmptyInlines(Emph[]:zs)=removeEmptyInlineszsremoveEmptyInlines(Strong[]:zs)=removeEmptyInlineszsremoveEmptyInlines(Subscript[]:zs)=removeEmptyInlineszsremoveEmptyInlines(Superscript[]:zs)=removeEmptyInlineszsremoveEmptyInlines(SmallCaps[]:zs)=removeEmptyInlineszsremoveEmptyInlines(Strikeout[]:zs)=removeEmptyInlineszsremoveEmptyInlines(RawInline_[]:zs)=removeEmptyInlineszsremoveEmptyInlines(Code_[]:zs)=removeEmptyInlineszsremoveEmptyInlines(Str"":zs)=removeEmptyInlineszsremoveEmptyInlines(x:xs)=x:removeEmptyInlinesxsremoveEmptyInlines[]=[]removeTrailingInlineSpaces::[Inline]->[Inline]removeTrailingInlineSpaces=reverse.removeLeadingInlineSpaces.reverseremoveLeadingInlineSpaces::[Inline]->[Inline]removeLeadingInlineSpaces=dropWhileisSpaceOrEmptyconsolidateInlines::[Inline]->[Inline]consolidateInlines(Strx:ys)=caseconcat(x:mapfromStrstrs)of""->consolidateInlinesrestn->Strn:consolidateInlinesrestwhere(strs,rest)=spanisStrysisStr(Str_)=TrueisStr_=FalsefromStr(Strz)=zfromStr_=error"consolidateInlines - fromStr - not a Str"consolidateInlines(Space:ys)=Space:restwhereisSpSpace=TrueisSp_=Falserest=consolidateInlines$dropWhileisSpysconsolidateInlines(Emphxs:Emphys:zs)=consolidateInlines$Emph(xs++ys):zsconsolidateInlines(Strongxs:Strongys:zs)=consolidateInlines$Strong(xs++ys):zsconsolidateInlines(Subscriptxs:Subscriptys:zs)=consolidateInlines$Subscript(xs++ys):zsconsolidateInlines(Superscriptxs:Superscriptys:zs)=consolidateInlines$Superscript(xs++ys):zsconsolidateInlines(SmallCapsxs:SmallCapsys:zs)=consolidateInlines$SmallCaps(xs++ys):zsconsolidateInlines(Strikeoutxs:Strikeoutys:zs)=consolidateInlines$Strikeout(xs++ys):zsconsolidateInlines(RawInlinefx:RawInlinef'y:zs)|f==f'=consolidateInlines$RawInlinef(x++y):zsconsolidateInlines(Codea1x:Codea2y:zs)|a1==a2=consolidateInlines$Codea1(x++y):zsconsolidateInlines(x:xs)=x:consolidateInlinesxsconsolidateInlines[]=[]-- | Convert pandoc structure to a string with formatting removed.-- Footnotes are skipped (since we don't want their contents in link-- labels).stringify::WalkableInlinea=>a->Stringstringify=querygo.walkdeNotewherego::Inline->[Char]goSpace=" "go(Strx)=xgo(Code_x)=xgo(Math_x)=xgoLineBreak=" "go_=""deNote(Note_)=Str""deNotex=x-- | Change final list item from @Para@ to @Plain@ if the list contains-- no other @Para@ blocks.compactify::[[Block]]-- ^ List of list items (each a list of blocks)->[[Block]]compactify[]=[]compactifyitems=case(inititems,lastitems)of(_,[])->items(others,final)->caselastfinalofParaa->case(filterisPara$concatitems)of-- if this is only Para, change to Plain[_]->others++[initfinal++[Plaina]]_->items_->items-- | Change final list item from @Para@ to @Plain@ if the list contains-- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather-- than @[Block]@.compactify'::[Blocks]-- ^ List of list items (each a list of blocks)->[Blocks]compactify'[]=[]compactify'items=let(others,final)=(inititems,lastitems)incasereverse(B.toListfinal)of(Paraa:xs)->case[Parax|Parax<-concatMapB.toListitems]of-- if this is only Para, change to Plain[_]->others++[B.fromList(reverse$Plaina:xs)]_->items_->itemsisPara::Block->BoolisPara(Para_)=TrueisPara_=False-- | Data structure for defining hierarchical Pandoc documentsdataElement=BlkBlock|SecInt[Int]Attr[Inline][Element]-- lvl num attributes label contentsderiving(Eq,Read,Show,Typeable,Data)instanceWalkableInlineElementwherewalkf(Blkx)=Blk(walkfx)walkf(Seclevnumsattrilselts)=Seclevnumsattr(walkfils)(walkfelts)walkMf(Blkx)=Blk`fmap`walkMfxwalkMf(Seclevnumsattrilselts)=doils'<-walkMfilselts'<-walkMfeltsreturn$Seclevnumsattrils'elts'queryf(Blkx)=queryfxqueryf(Sec___ilselts)=queryfils<>queryfeltsinstanceWalkableBlockElementwherewalkf(Blkx)=Blk(walkfx)walkf(Seclevnumsattrilselts)=Seclevnumsattr(walkfils)(walkfelts)walkMf(Blkx)=Blk`fmap`walkMfxwalkMf(Seclevnumsattrilselts)=doils'<-walkMfilselts'<-walkMfeltsreturn$Seclevnumsattrils'elts'queryf(Blkx)=queryfxqueryf(Sec___ilselts)=queryfils<>queryfelts-- | Convert Pandoc inline list to plain text identifier. HTML-- identifiers must start with a letter, and may contain only-- letters, digits, and the characters _-.inlineListToIdentifier::[Inline]->StringinlineListToIdentifier=dropWhile(not.isAlpha).intercalate"-".words.map(nbspToSp.toLower).filter(\c->isLetterc||isDigitc||c`elem`"_-. ").stringifywherenbspToSp'\160'=' 'nbspToSpx=x-- | Convert list of Pandoc blocks into (hierarchical) list of Elementshierarchicalize::[Block]->[Element]hierarchicalizeblocks=S.evalState(hierarchicalizeWithIdsblocks)[]hierarchicalizeWithIds::[Block]->S.State[Int][Element]hierarchicalizeWithIds[]=return[]hierarchicalizeWithIds((Headerlevelattr@(_,classes,_)title'):xs)=dolastnum<-S.getletlastnum'=takelevellastnumletnewnum=caselengthlastnum'ofx|"unnumbered"`elem`classes->[]|x>=level->initlastnum'++[lastlastnum'+1]|otherwise->lastnum++replicate(level-lengthlastnum-1)0++[1]unless(nullnewnum)$S.putnewnumlet(sectionContents,rest)=break(headerLtEqlevel)xssectionContents'<-hierarchicalizeWithIdssectionContentsrest'<-hierarchicalizeWithIdsrestreturn$Seclevelnewnumattrtitle'sectionContents':rest'hierarchicalizeWithIds(x:rest)=dorest'<-hierarchicalizeWithIdsrestreturn$(Blkx):rest'headerLtEq::Int->Block->BoolheaderLtEqlevel(Headerl__)=l<=levelheaderLtEq__=False-- | Generate a unique identifier from a list of inlines.-- Second argument is a list of already used identifiers.uniqueIdent::[Inline]->[String]->StringuniqueIdenttitle'usedIdents=letbaseIdent=caseinlineListToIdentifiertitle'of""->"section"x->xnumIdentn=baseIdent++"-"++showninifbaseIdent`elem`usedIdentsthencasefind(\x->numIdentx`notElem`usedIdents)([1..60000]::[Int])ofJustx->numIdentxNothing->baseIdent-- if we have more than 60,000, allow repeatselsebaseIdent-- | True if block is a Header block.isHeaderBlock::Block->BoolisHeaderBlock(Header___)=TrueisHeaderBlock_=False-- | Shift header levels up or down.headerShift::Int->Pandoc->PandocheaderShiftn=walkshiftwhereshift::Block->Blockshift(Headerlevelattrinner)=Header(level+n)attrinnershiftx=x-- | Detect if a list is tight.isTightList::[[Block]]->BoolisTightList=and.mapfirstIsPlainwherefirstIsPlain(Plain_:_)=TruefirstIsPlain_=False-- | Set a field of a 'Meta' object. If the field already has a value,-- convert it into a list with the new value appended to the old value(s).addMetaField::ToMetaValuea=>String->a->Meta->MetaaddMetaFieldkeyval(Metameta)=Meta$M.insertWithcombinekey(toMetaValueval)metawherecombinenewval(MetaListxs)=MetaList(xs++[newval])combinenewvalx=MetaList[x,newval]-- | Create 'Meta' from old-style title, authors, date. This is-- provided to ease the transition from the old API.makeMeta::[Inline]->[[Inline]]->[Inline]->MetamakeMetatitleauthorsdate=addMetaField"title"(B.fromListtitle)$addMetaField"author"(mapB.fromListauthors)$addMetaField"date"(B.fromListdate)$nullMeta---- TagSoup HTML handling---- | Render HTML tags.renderTags'::[TagString]->StringrenderTags'=renderTagsOptionsrenderOptions{optMinimize=\x->lety=maptoLowerxiny=="hr"||y=="br"||y=="img"||y=="meta"||y=="link",optRawTag=\x->lety=maptoLowerxiny=="script"||y=="style"}---- File handling---- | Perform an IO action in a directory, returning to starting directory.inDirectory::FilePath->IOa->IOainDirectorypathaction=dooldDir<-getCurrentDirectorysetCurrentDirectorypathresult<-actionsetCurrentDirectoryoldDirreturnresultreadDefaultDataFile::FilePath->IOBS.ByteStringreadDefaultDataFilefname=#ifdef EMBED_DATA_FILEScaselookup(makeCanonicalfname)dataFilesofNothing->err97$"Could not find data file "++fnameJustcontents->returncontentswheremakeCanonical=joinPath.transformPathParts.splitDirectoriestransformPathParts=reverse.foldlgo[]goas"."=asgo(_:as)".."=asgoasx=x:as#elsegetDataFileName("data"</>fname)>>=checkExistence>>=BS.readFilewherecheckExistencefn=doexists<-doesFileExistfnifexiststhenreturnfnelseerr97("Could not find data file "++fname)#endif-- | Read file from specified user data directory or, if not found there, from-- Cabal data directory.readDataFile::MaybeFilePath->FilePath->IOBS.ByteStringreadDataFileNothingfname=readDefaultDataFilefnamereadDataFile(JustuserDir)fname=doexists<-doesFileExist(userDir</>fname)ifexiststhenBS.readFile(userDir</>fname)elsereadDefaultDataFilefname-- | Same as 'readDataFile' but returns a String instead of a ByteString.readDataFileUTF8::MaybeFilePath->FilePath->IOStringreadDataFileUTF8userDirfname=UTF8.toString`fmap`readDataFileuserDirfname-- | Fetch an image or other item from the local filesystem or the net.-- Returns raw content and maybe mime type.fetchItem::MaybeString->String->IO(EitherE.SomeException(BS.ByteString,MaybeString))fetchItemsourceURLs|isURIs=openURLs|otherwise=casesourceURLofJustu->openURL(u++"/"++s)Nothing->E.tryreadLocalFilewherereadLocalFile=doletmime=casetakeExtensionsof".gz"->getMimeType$dropExtensionsx->getMimeTypexcont<-BS.readFilesreturn(cont,mime)-- | Read from a URL and return raw data and maybe mime type.openURL::String->IO(EitherE.SomeException(BS.ByteString,MaybeString))openURLu|"data:"`isPrefixOf`u=letmime=takeWhile(/=',')$drop5ucontents=B8.pack$unEscapeString$drop1$dropWhile(/=',')uinreturn$Right(contents,Justmime)#ifdef HTTP_CONDUIT|otherwise=E.try$doreq<-parseUrluresp<-withManager$httpLbsreqreturn(BS.concat$toChunks$responseBodyresp,UTF8.toString`fmap`lookuphContentType(responseHeadersresp))#else|otherwise=E.try$getBodyAndMimeType`fmap`browse(doS.liftIO$UTF8.hPutStrLnstderr$"Fetching "++u++"..."setOutHandler$const(return())setAllowRedirectsTruerequest(getRequest'u'))wheregetBodyAndMimeType(_,r)=(rspBodyr,findHeaderHdrContentTyper)getRequest'uriString=caseparseURIuriStringofNothing->error("Not a valid URL: "++uriString)Justv->mkRequestGETvu'=escapeURIString(/='|')u-- pipes are rejected by Network.URI#endif---- Error reporting--err::Int->String->IOaerrexitCodemsg=doname<-getProgNameUTF8.hPutStrLnstderr$name++": "++msgexitWith$ExitFailureexitCodereturnundefinedwarn::String->IO()warnmsg=doname<-getProgNameUTF8.hPutStrLnstderr$name++": "++msg---- Safe read--safeRead::(Monadm,Reada)=>String->masafeReads=casereadssof(d,x):_|allisSpacex->returnd_->fail$"Could not read `"++s++"'"