{-# LANGUAGE OverloadedStrings #-}{-
Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}{- |
Module : Text.Pandoc.SelfContained
Copyright : Copyright (C) 2011 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Functions for converting an HTML file into one that can be viewed
offline, by incorporating linked images, CSS, and scripts into
the HTML using data URIs.
-}moduleText.Pandoc.SelfContained(makeSelfContained)whereimportText.HTML.TagSoupimportNetwork.URI(isAbsoluteURI,parseURI,escapeURIString)importNetwork.HTTPimportData.ByteString.Base64importqualifiedData.ByteString.Char8asBimportData.ByteString(ByteString)importData.ByteString.UTF8(toString,fromString)importSystem.FilePath(takeExtension,dropExtension,takeDirectory,(</>))importData.Char(toLower,isAscii,isAlphaNum)importCodec.Compression.GZipasGzipimportqualifiedData.ByteString.LazyasLimportText.Pandoc.Shared(findDataFile)importText.Pandoc.MIME(getMimeType)importSystem.Directory(doesFileExist)getItem::MaybeFilePath->String->IO(ByteString,MaybeString)getItemuserdataf=ifisAbsoluteURIfthenopenURLfelsedoletmime=casetakeExtensionfof".gz"->getMimeType$dropExtensionfx->getMimeTypexexists<-doesFileExistfifexiststhendocont<-B.readFilefreturn(cont,mime)elsedores<-findDataFileuserdatafexists'<-doesFileExistresifexists'thendocont<-B.readFileresreturn(cont,mime)elseerror$"Could not find `"++f++"'"-- TODO - have this return mime type too - then it can work for google-- chart API, e.g.openURL::String->IO(ByteString,MaybeString)openURLu=getBodyAndMimeType=<<simpleHTTP(getRequ)wheregetReqv=caseparseURIvofNothing->error$"Could not parse URI: "++vJustu'->mkRequestGETu'getBodyAndMimeType(Lefterr)=fail(showerr)getBodyAndMimeType(Rightr)=return(rspBodyr,findHeaderHdrContentTyper)isOk::Char->BoolisOkc=isAsciic&&isAlphaNumcconvertTag::MaybeFilePath->TagString->IO(TagString)convertTaguserdatat@(TagOpen"img"as)=casefromAttrib"src"tof[]->returntsrc->do(raw,mime)<-getRawuserdata(fromAttrib"type"t)srcletenc="data:"++mime++";base64,"++toString(encoderaw)return$TagOpen"img"(("src",enc):[(x,y)|(x,y)<-as,x/="src"])convertTaguserdatat@(TagOpen"video"as)=casefromAttrib"src"tof[]->returntsrc->do(raw,mime)<-getRawuserdata(fromAttrib"type"t)srcletenc="data:"++mime++";base64,"++toString(encoderaw)return$TagOpen"video"(("src",enc):[(x,y)|(x,y)<-as,x/="src"])convertTaguserdatat@(TagOpen"script"as)=casefromAttrib"src"tof[]->returntsrc->do(raw,mime)<-getRawuserdata(fromAttrib"type"t)srcletenc="data:"++mime++","++escapeURIStringisOk(toStringraw)return$TagOpen"script"(("src",enc):[(x,y)|(x,y)<-as,x/="src"])convertTaguserdatat@(TagOpen"link"as)=casefromAttrib"href"tof[]->returntsrc->do(raw,mime)<-getRawuserdata(fromAttrib"type"t)srcletenc="data:"++mime++","++escapeURIStringisOk(toStringraw)return$TagOpen"link"(("href",enc):[(x,y)|(x,y)<-as,x/="href"])convertTag_t=returntcssURLs::MaybeFilePath->FilePath->ByteString->IOByteStringcssURLsuserdatadorig=caseB.breakSubstring"url("origof(x,y)|B.nully->returnorig|otherwise->dolet(u,v)=B.breakSubstring")"$B.drop4yleturl=toString$caseB.take1uof"\""->B.takeWhile(/='"')$B.drop1u_->u(raw,mime)<-getRawuserdata""(d</>url)rest<-cssURLsuserdatadvletenc="data:"`B.append`fromStringmime`B.append`";base64,"`B.append`(encoderaw)return$x`B.append`"url("`B.append`enc`B.append`restgetRaw::MaybeFilePath->String->String->IO(ByteString,String)getRawuserdatamimetypesrc=doletext=maptoLower$takeExtensionsrc(raw,respMime)<-getItemuserdatasrcletraw'=ifext==".gz"thenB.concat$L.toChunks$Gzip.decompress$L.fromChunks$[raw]elserawletmime=case(mimetype,respMime)of("",Nothing)->error$"Could not determine mime type for `"++src++"'"(x,Nothing)->x(_,Justx)->xresult<-ifmime=="text/css"thencssURLsuserdata(takeDirectorysrc)raw'elsereturnraw'return(result,mime)-- | Convert HTML into self-contained HTML, incorporating images,-- scripts, and CSS using data: URIs. Items specified using absolute-- URLs will be downloaded; those specified using relative URLs will-- be sought first relative to the working directory, then relative-- to the user data directory (if the first parameter is 'Just'-- a directory), and finally relative to pandoc's default data-- directory.makeSelfContained::MaybeFilePath->String->IOStringmakeSelfContaineduserdatainp=dolettags=parseTagsinpout'<-mapM(convertTaguserdata)tagsreturn$renderTagsOptionsrenderOptions{optMinimize=(\t->t=="br"||t=="img"||t=="meta"||t=="link")}out'