{-
Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}{- |
Module : Text.Pandoc.Writers.EPUB
Copyright : Copyright (C) 2010 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of 'Pandoc' documents to EPUB.
-}moduleText.Pandoc.Writers.EPUB(writeEPUB)whereimportData.IORefimportData.Maybe(fromMaybe,isNothing)importData.List(isInfixOf,intercalate)importSystem.Environment(getEnv)importText.Printf(printf)importSystem.FilePath((</>),takeBaseName,takeExtension,takeFileName)importqualifiedData.ByteString.LazyasBimportqualifiedData.ByteString.Lazy.Char8asB8importText.Pandoc.UTF8(fromStringLazy,toString)importCodec.Archive.ZipimportData.Time.Clock.POSIXimportData.TimeimportSystem.LocaleimportText.Pandoc.Sharedhiding(Element)importqualifiedText.Pandoc.SharedasSharedimportText.Pandoc.OptionsimportText.Pandoc.DefinitionimportText.Pandoc.GenericimportControl.Monad.StateimportText.XML.Lighthiding(ppTopElement)importText.Pandoc.UUIDimportText.Pandoc.Writers.HTMLimportText.Pandoc.Writers.Markdown(writePlain)importData.Char(toLower)importNetwork.URI(unEscapeString)importText.Pandoc.MIME(getMimeType)importPreludehiding(catch)importControl.Exception(catch,SomeException)importText.Blaze.Html.Renderer.Utf8(renderHtml)-- | Produce an EPUB file from a Pandoc document.writeEPUB::WriterOptions-- ^ Writer options->Pandoc-- ^ Document to convert->IOB.ByteStringwriteEPUBoptsdoc@(Pandocmeta_)=doletversion=maybeEPUB2id(writerEpubVersionopts)letepub3=version==EPUB3epochtime<-floor`fmap`getPOSIXTimeletmkEntrypathcontent=toEntrypathepochtimecontentletvars=("epub3",ifepub3then"true"else"false"):("css","stylesheet.css"):writerVariablesoptsletopts'=opts{writerEmailObfuscation=NoObfuscation,writerStandalone=True,writerSectionDivs=True,writerHtml5=epub3,writerTableOfContents=False-- we always have one in epub,writerVariables=vars,writerHTMLMathMethod=ifepub3thenMathMLNothingelsewriterHTMLMathMethodopts,writerWrapText=False}letsourceDir=writerSourceDirectoryopts'letmbCoverImage=lookup"epub-cover-image"vars-- cover page(cpgEntry,cpicEntry)<-casembCoverImageofNothing->return([],[])Justimg->doletcoverImage="cover-image"++takeExtensionimgletcpContent=renderHtml$writeHtmlopts'(Pandocmeta[RawBlock"html"$"<div id=\"cover-image\">\n<img src=\""++coverImage++"\" alt=\"cover image\" />\n</div>"])imgContent<-B.readFileimgreturn([mkEntry"cover.xhtml"cpContent],[mkEntrycoverImageimgContent])-- title pagelettpContent=renderHtml$writeHtmlopts'{writerVariables=("titlepage","true"):vars}(Pandocmeta[])lettpEntry=mkEntry"title_page.xhtml"tpContent-- handle picturespicsRef<-newIORef[]Pandoc_blocks<-bottomUpM(transformInlines(writerHTMLMathMethodopts')sourceDirpicsRef)docpics<-readIORefpicsRefletreadPicEntry(oldsrc,newsrc)=do(img,_)<-fetchItemsourceDiroldsrcreturn$toEntrynewsrcepochtime$B.fromChunks.(:[])$imgpicEntries<-mapMreadPicEntrypics-- handle fontsletmkFontEntryf=mkEntry(takeFileNamef)`fmap`B.readFileffontEntries<-mapMmkFontEntry$writerEpubFontsopts'-- body pages-- add level 1 header to beginning if none thereletblocks'=addIdentifiers$caseblocksof(Header1__:_)->blocks_->Header1("",[],[])(docTitlemeta):blocksletchapterHeaderLevel=writerEpubChapterLevelopts-- internal reference IDs change when we chunk the file,-- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.-- the next two lines fix that:letreftable=correlateRefschapterHeaderLevelblocks'letblocks''=replaceRefsreftableblocks'letisChapterHeader(Headern__)=n<=chapterHeaderLevelisChapterHeader_=FalselettoChunks::[Block]->[[Block]]toChunks[]=[]toChunks(b:bs)=(b:xs):toChunksyswhere(xs,ys)=breakisChapterHeaderbsletchunks=toChunksblocks''letchapToEntry::Int->[Block]->EntrychapToEntrynumbs=mkEntry(showChapternum)$renderHtml$writeHtmlopts'$casebsof(Header__xs:_)->Pandoc(Metaxs[][])bs_->Pandoc(Meta[][][])bsletchapterEntries=zipWithchapToEntry[1..]chunks-- incredibly inefficient (TODO):letcontainsMathMLent="<math"`isInfixOf`(B8.unpack$fromEntryent)-- contents.opflocaleLang<-catch(liftM(map(\c->ifc=='_'then'-'elsec).takeWhile(/='.'))$getEnv"LANG")(\e->let_=(e::SomeException)inreturn"en-US")letlang=caselookup"lang"(writerVariablesopts')ofJustx->xNothing->localeLanguuid<-getRandomUUIDletchapterNodeent=unode"item"!([("id",takeBaseName$eRelativePathent),("href",eRelativePathent),("media-type","application/xhtml+xml")]++[("properties","mathml")|epub3&&containsMathMLent])$()letchapterRefNodeent=unode"itemref"![("idref",takeBaseName$eRelativePathent)]$()letpictureNodeent=unode"item"![("id",takeBaseName$eRelativePathent),("href",eRelativePathent),("media-type",fromMaybe"application/octet-stream"$imageTypeOf$eRelativePathent)]$()letfontNodeent=unode"item"![("id",takeBaseName$eRelativePathent),("href",eRelativePathent),("media-type",maybe""id$getMimeType$eRelativePathent)]$()letplainifyt=trimr$writePlainopts'{writerStandalone=False}$Pandocmeta[Plaint]letplainTitle=plainify$docTitlemetaletplainAuthors=mapplainify$docAuthorsmetacurrentTime<-getCurrentTimeletplainDate=maybe(showDateTimeISO8601currentTime)id$normalizeDate$stringify$docDatemetaletcontentsData=fromStringLazy$ppTopElement$unode"package"![("version",caseversionofEPUB2->"2.0"EPUB3->"3.0"),("xmlns","http://www.idpf.org/2007/opf"),("unique-identifier","BookId")]$[metadataElementversion(writerEpubMetadataopts')uuidlangplainTitleplainAuthorsplainDatecurrentTimembCoverImage,unode"manifest"$[unode"item"![("id","ncx"),("href","toc.ncx"),("media-type","application/x-dtbncx+xml")]$(),unode"item"![("id","style"),("href","stylesheet.css"),("media-type","text/css")]$()]++[unode"item"![("id","nav"),("href","nav.xhtml"),("properties","nav"),("media-type","application/xhtml+xml")]$()|version==EPUB3]++mapchapterNode(cpgEntry++(tpEntry:chapterEntries))++mappictureNode(cpicEntry++picEntries)++mapfontNodefontEntries,unode"spine"![("toc","ncx")]$casembCoverImageofNothing->[]Just_->[unode"itemref"![("idref","cover"),("linear","no")]$()]++mapchapterRefNode(tpEntry:chapterEntries)]letcontentsEntry=mkEntry"content.opf"contentsData-- toc.ncxletsecs=hierarchicalizeblocks''lettocLevel=writerTOCDepthoptsletnavPointNode::(Int->String->String->[Element]->Element)->Shared.Element->StateIntElementnavPointNodeformatter(Sec_numsidentilschildren)=don<-getmodify(+1)letshowNums::[Int]->StringshowNums=intercalate".".mapshowlettit'=plainifyilslettit=ifwriterNumberSectionsoptsthenshowNumsnums++" "++tit'elsetit'letsrc=caselookupidentreftableofJustx->xNothing->error(ident++" not found in reftable")letisSec(Seclev____)=lev<=tocLevelisSec_=Falseletsubsecs=filterisSecchildrensubs<-mapM(navPointNodeformatter)subsecsreturn$formatterntitsrcsubsnavPointNode_(Blk_)=error"navPointNode encountered Blk"letnavMapFormatter::Int->String->String->[Element]->ElementnavMapFormatterntitsrcsubs=unode"navPoint"![("id","navPoint-"++shown),("playOrder",shown)]$[unode"navLabel"$unode"text"tit,unode"content"![("src",src)]$()]++subslettpNode=unode"navPoint"![("id","navPoint-0")]$[unode"navLabel"$unode"text"(plainify$docTitlemeta),unode"content"![("src","title_page.xhtml")]$()]lettocData=fromStringLazy$ppTopElement$unode"ncx"![("version","2005-1"),("xmlns","http://www.daisy.org/z3986/2005/ncx/")]$[unode"head"$[unode"meta"![("name","dtb:uid"),("content",showuuid)]$(),unode"meta"![("name","dtb:depth"),("content","1")]$(),unode"meta"![("name","dtb:totalPageCount"),("content","0")]$(),unode"meta"![("name","dtb:maxPageNumber"),("content","0")]$()]++casembCoverImageofNothing->[]Just_->[unode"meta"![("name","cover"),("content","cover-image")]$()],unode"docTitle"$unode"text"$plainTitle,unode"navMap"$tpNode:evalState(mapM(navPointNodenavMapFormatter)secs)1]lettocEntry=mkEntry"toc.ncx"tocDataletnavXhtmlFormatter::Int->String->String->[Element]->ElementnavXhtmlFormatterntitsrcsubs=unode"li"![("id","toc-li-"++shown)]$(unode"a"![("href",src)]$(unode"span"tit)):casesubsof[]->[](_:_)->[unode"ol"subs]letnavData=fromStringLazy$ppTopElement$unode"html"![("xmlns","http://www.w3.org/1999/xhtml"),("xmlns:epub","http://www.idpf.org/2007/ops")]$[unode"head"$unode"title"plainTitle,unode"body"$unode"nav"![("epub:type","toc")]$[unode"h1"plainTitle,unode"ol"$evalState(mapM(navPointNodenavXhtmlFormatter)secs)1]]letnavEntry=mkEntry"nav.xhtml"navData-- mimetypeletmimetypeEntry=mkEntry"mimetype"$fromStringLazy"application/epub+zip"-- container.xmlletcontainerData=fromStringLazy$ppTopElement$unode"container"![("version","1.0"),("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")]$unode"rootfiles"$unode"rootfile"![("full-path","content.opf"),("media-type","application/oebps-package+xml")]$()letcontainerEntry=mkEntry"META-INF/container.xml"containerData-- com.apple.ibooks.display-options.xmlletapple=fromStringLazy$ppTopElement$unode"display_options"$unode"platform"![("name","*")]$unode"option"![("name","specified-fonts")]$"true"letappleEntry=mkEntry"META-INF/com.apple.ibooks.display-options.xml"apple-- stylesheetstylesheet<-casewriterEpubStylesheetoptsofJusts->returnsNothing->toString`fmap`readDataFile(writerUserDataDiropts)"epub.css"letstylesheetEntry=mkEntry"stylesheet.css"$fromStringLazystylesheet-- construct archiveletarchive=foldraddEntryToArchiveemptyArchive(mimetypeEntry:containerEntry:appleEntry:stylesheetEntry:tpEntry:contentsEntry:tocEntry:([navEntry|version==EPUB3]++picEntries++cpicEntry++cpgEntry++chapterEntries++fontEntries))return$fromArchivearchivemetadataElement::EPUBVersion->String->UUID->String->String->[String]->String->UTCTime->Maybea->ElementmetadataElementversionmetadataXMLuuidlangtitleauthorsdatecurrentTimembCoverImage=letuserNodes=parseXMLmetadataXMLelt=unode"metadata"![("xmlns:dc","http://purl.org/dc/elements/1.1/"),("xmlns:opf","http://www.idpf.org/2007/opf")]$filterisMetadataElement$onlyElemsuserNodesdublinElements=["contributor","coverage","creator","date","description","format","identifier","language","publisher","relation","rights","source","subject","title","type"]isMetadataElemente=(qPrefix(elNamee)==Just"dc"&&qName(elNamee)`elem`dublinElements)||(qPrefix(elNamee)==Nothing&&qName(elNamee)`elem`["link","meta"])containsen=not(null(findElements(QNamenNothing(Just"dc"))e))newNodes=[unode"dc:title"title|not(elt`contains`"title")]++[unode"dc:language"lang|not(elt`contains`"language")]++[unode"dc:identifier"![("id","BookId")]$showuuid|not(elt`contains`"identifier")]++[unode"dc:creator"![("opf:role","aut")|version==EPUB2]$a|a<-authors]++[unode"dc:date"date|not(elt`contains`"date")]++[unode"meta"![("property","dcterms:modified")]$(showDateTimeISO8601currentTime)|version==EPUB3]++[unode"meta"![("name","cover"),("content","cover-image")]$()|not(isNothingmbCoverImage)]inelt{elContent=elContentelt++mapElemnewNodes}showDateTimeISO8601::UTCTime->StringshowDateTimeISO8601=formatTimedefaultTimeLocale"%FT%TZ"transformInlines::HTMLMathMethod->FilePath->IORef[(FilePath,FilePath)]-- ^ (oldpath, newpath) images->[Inline]->IO[Inline]transformInlines___(Imagelab(src,_):xs)|isNothing(imageTypeOfsrc)=return$Emphlab:xstransformInlines_sourceDirpicsRef(Imagelab(src,tit):xs)=doletsrc'=unEscapeStringsrcpics<-readIORefpicsRefletoldsrc=sourceDir</>src'letext=takeExtensionsrc'newsrc<-caselookupoldsrcpicsofJustn->returnnNothing->doletnew="images/img"++show(lengthpics)++extmodifyIORefpicsRef((oldsrc,new):)returnnewreturn$Imagelab(newsrc,tit):xstransformInlines(MathML_)__(x@(Math__):xs)=do-- note: ideally we'd use a switch statement to provide a fallback-- but switch does not seem to be widely implemented yet, so we just-- provide the mathmlletwriteHtmlInlineoptsz=trimr$writeHtmlStringopts$Pandoc(Meta[][][])[Plain[z]]result=writeHtmlInlinedef{writerHTMLMathMethod=MathMLNothing}xreturn$RawInline"html"result:xstransformInlines___xs=returnxs(!)::Nodet=>(t->Element)->[(String,String)]->t->Element(!)fattrsn=add_attrs(map(\(k,v)->Attr(unqualk)v)attrs)(fn)-- | Version of 'ppTopElement' that specifies UTF-8 encoding.ppTopElement::Element->StringppTopElement=("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"++).unEntity.ppElement-- unEntity removes numeric entities introduced by ppElement-- (kindlegen seems to choke on these).whereunEntity[]=""unEntity('&':'#':xs)=let(ds,ys)=break(==';')xsrest=drop1ysincasesafeRead('\'':'\\':ds++"'")ofJustx->x:unEntityrestNothing->'&':'#':unEntityxsunEntity(x:xs)=x:unEntityxsimageTypeOf::FilePath->MaybeStringimageTypeOfx=casedrop1(maptoLower(takeExtensionx))of"jpg"->Just"image/jpeg""jpeg"->Just"image/jpeg""jfif"->Just"image/jpeg""png"->Just"image/png""gif"->Just"image/gif""svg"->Just"image/svg+xml"_->NothingdataIdentState=IdentState{chapterNumber::Int,identTable::[(String,String)]}deriving(Read,Show)-- Returns filename for chapter number.showChapter::Int->StringshowChapter=printf"ch%03d.xhtml"-- Add identifiers to any headers without them.addIdentifiers::[Block]->[Block]addIdentifiersbs=evalState(mapMgobs)[]wherego(Headern(ident,classes,kvs)ils)=doids<-getletident'=ifnullidentthenuniqueIdentilsidselseidentput$ident':idsreturn$Headern(ident',classes,kvs)ilsgox=returnx-- Go through a block list and construct a table-- correlating the automatically constructed references-- that would be used in a normal pandoc document with-- new URLs to be used in the EPUB. For example, what-- was "header-1" might turn into "ch006.xhtml#header".correlateRefs::Int->[Block]->[(String,String)]correlateRefschapterHeaderLevelbs=identTable$execState(mapM_gobs)IdentState{chapterNumber=0,identTable=[]}wherego::Block->StateIdentState()go(Headern(ident,_,_)_)=dowhen(n<=chapterHeaderLevel)$modify$\s->s{chapterNumber=chapterNumbers+1}st<-getletchapterid=showChapter(chapterNumberst)++ifn<=chapterHeaderLevelthen""else'#':identmodify$\s->s{identTable=(ident,chapterid):identTablest}go_=return()-- Replace internal link references using the table produced-- by correlateRefs.replaceRefs::[(String,String)]->[Block]->[Block]replaceRefsrefTable=bottomUpreplaceOneRefwherereplaceOneRefx@(Linklab('#':xs,tit))=caselookupxsrefTableofJusturl->Linklab(url,tit)Nothing->xreplaceOneRefx=x