{- |
Module : Data.GraphViz.PreProcessing
Description : Pre-process imported Dot code.
Copyright : (c) Ivan Lazar Miljenovic
License : 3-Clause BSD-style
Maintainer : Ivan.Miljenovic@gmail.com
\"Real life\" Dot code contains various items that are not directly
parseable by this library. This module defines the 'preProcess'
function to remove these components, which include:
* Comments (both @\/* ... *\/@ style and @\/\/ ... @ style);
* Pre-processor lines (lines starting with a @#@);
* Split lines (by inserting a @\\@ the rest of that \"line\" is
continued on the next line).
* 'Text's concatenated together using @\"...\" + \"...\"@; these
are concatenated into one big 'Text'.
-}moduleData.GraphViz.PreProcessing(preProcess)whereimportData.GraphViz.ParsingimportData.GraphViz.Exception(GraphvizException(NotDotCode),throw)importqualifiedData.Text.LazyasTimportData.Text.Lazy(Text)importqualifiedData.Text.Lazy.BuilderasBimportData.Text.Lazy.Builder(Builder)importData.Monoid(Monoid(..),mconcat)importControl.Monad(liftM)-- ------------------------------------------------------------------------------- Filtering out unwanted Dot items such as comments-- | Remove unparseable features of Dot, such as comments and-- multi-line strings (which are converted to single-line strings).preProcess::Text->TextpreProcesst=casefst$runParserparseOutUnwantedtof(Rightr)->B.toLazyTextr(Leftl)->throw(NotDotCodel)-- snd should be null-- | Parse out comments and make quoted strings spread over multiple-- lines only over a single line. Should parse the /entire/ input-- 'Text'.parseOutUnwanted::ParseBuilderparseOutUnwanted=liftMmconcat(manygetNext)wheregetNext=parseOK`onFail`parseConcatStrings`onFail`parseHTML`onFail`parseUnwanted`onFail`liftMB.singletonnextparseOK=liftMB.fromLazyText$many1Satisfy(`notElem`['\n','\r','\\','/','"','<'])-- | Parses an unwanted part of the Dot code (comments and-- pre-processor lines; also un-splits lines).parseUnwanted::(Monoidm)=>ParsemparseUnwanted=oneOf[parseLineComment,parseMultiLineComment,parsePreProcessor,parseSplitLine]-- | Remove pre-processor lines (that is, those that start with a-- @#@). Will consume the newline from the beginning of the-- previous line, but will leave the one from the pre-processor line-- there (so in the end it just removes the line).parsePreProcessor::(Monoidm)=>ParsemparsePreProcessor=donewlinecharacter'#'consumeLinereturnmempty-- | Parse @//@-style comments.parseLineComment::(Monoidm)=>ParsemparseLineComment=dostring"//"-- Note: do /not/ consume the newlines, as they're-- needed in case the next line is a pre-processor-- line.consumeLinereturnmempty-- | Parse @/* ... */@-style comments.parseMultiLineComment::(Monoidm)=>ParsemparseMultiLineComment=bracketstartend(manyinner)>>returnmemptywherestart=string"/*"end=string"*/"inner=(many1Satisfy('*'/=)>>return())`onFail`docharacter'*'satisfy('/'/=)innerparseConcatStrings::ParseBuilderparseConcatStrings=liftM(wrapQuotes.mconcat)$sepBy1parseStringparseConcatwhereqParse=bracket(character'"')(commit$character'"')parseString=qParse(liftMmconcat$manyparseInner)parseInner=(string"\\\"">>return(B.fromLazyText$T.pack"\\\""))`onFail`-- Need to parse an explicit `\', in case it ends the-- string (and thus the next step would get parsed by the-- previous option).(string"\\\\">>return(B.fromLazyText$T.pack"\\\\"))`onFail`parseSplitLine-- in case there's a split mid-quote`onFail`liftMB.singleton(satisfy(quoteChar/=))parseConcat=parseSep>>character'+'>>parseSepparseSep=many$allWhitespace`onFail`parseUnwantedwrapQuotesstr=qc`mappend`str`mappend`qcqc=B.singleton'"'-- | Lines can be split with a @\\@ at the end of the line.parseSplitLine::(Monoidm)=>ParsemparseSplitLine=character'\\'>>newline>>returnmemptyparseHTML::ParseBuilderparseHTML=liftM(addAngled.mconcat).parseAngled$manyinnerwhereinner=parseHTML`onFail`(liftMB.fromLazyText$many1Satisfy(\c->c/=open&&c/=close))addAngledstr=B.singletonopen`mappend`str`mappend`B.singletoncloseopen='<'close='>'