------------------------------------------------------------------------------- |-- Module : Distribution.Simple.PreProcess.Unlit-- Copyright : ...---- Maintainer : cabal-devel@haskell.org-- Portability : portable---- Remove the \"literal\" markups from a Haskell source file, including-- \"@>@\", \"@\\begin{code}@\", \"@\\end{code}@\", and \"@#@\"-- This version is interesting because instead of striping comment lines, it-- turns them into "-- " style comments. This allows using haddock markup-- in literate scripts without having to use "> --" prefix.moduleDistribution.Simple.PreProcess.Unlit(unlit,plain)whereimportData.CharimportData.ListdataClassified=BirdTrackString|BlankString|OrdinaryString|Line!IntString|CPPString|BeginCode|EndCode-- output only:|ErrorString|CommentString-- | No unliteration.plain::String->String->Stringplain_hs=hsclassify::String->Classifiedclassify('>':s)=BirdTracksclassify('#':s)=casetokenssof(line:file:_)|allisDigitline&&lengthfile>=2&&headfile=='"'&&lastfile=='"'->Line(readline)(tail(initfile))_->CPPswheretokens=unfoldr$\str->caselexstrof(t@(_:_),str'):_->Just(t,str')_->Nothingclassify('\\':s)|"begin{code}"`isPrefixOf`s=BeginCode|"end{code}"`isPrefixOf`s=EndCodeclassifys|allisSpaces=Blanksclassifys=Ordinarys-- So the weird exception for comment indenting is to make things work with-- haddock, see classifyAndCheckForBirdTracks below.unclassify::Bool->Classified->Stringunclassify_(BirdTracks)=' ':sunclassify_(Blanks)=sunclassify_(Ordinarys)=sunclassify_(Linenfile)="# "++shown++" "++showfileunclassify_(CPPs)='#':sunclassifyTrue(Comment"")=" --"unclassifyTrue(Comments)=" -- "++sunclassifyFalse(Comment"")="--"unclassifyFalse(Comments)="-- "++sunclassify__=internalError-- | 'unlit' takes a filename (for error reports), and transforms the-- given string, to eliminate the literate comments from the program text.unlit::FilePath->String->EitherStringStringunlitfileinput=let(usesBirdTracks,classified)=classifyAndCheckForBirdTracks.inlines$inputineither(Left.unlines.map(unclassifyusesBirdTracks))Right.checkErrors.reclassify$classifiedwhere-- So haddock requires comments and code to align, since it treats comments-- as following the layout rule. This is a pain for us since bird track-- style literate code typically gets indented by two since ">" is replaced-- by " " and people usually use one additional space of indent ie-- "> then the code". On the other hand we cannot just go and indent all-- the comments by two since that does not work for latex style literate-- code. So the hacky solution we use here is that if we see any bird track-- style code then we'll indent all comments by two, otherwise by none.-- Of course this will not work for mixed latex/bird track .lhs files but-- nobody does that, it's silly and specifically recommended against in the-- H98 unlit spec.--classifyAndCheckForBirdTracks=flipmapAccumLFalse$\seenBirdTrackline->letclassification=classifylinein(seenBirdTrack||isBirdTrackclassification,classification)isBirdTrack(BirdTrack_)=TrueisBirdTrack_=FalsecheckErrorsls=case[e|Errore<-ls]of[]->Leftls(message:_)->Right(f++":"++shown++": "++message)where(f,n)=errorPosfile1lserrorPosfn[]=(f,n)errorPosfn(Error_:_)=(f,n)errorPos__(Linen'f':ls)=errorPosf'n'lserrorPosfn(_:ls)=errorPosf(n+1)ls-- Here we model a state machine, with each state represented by-- a local function. We only have four states (well, five,-- if you count the error state), but the rules-- to transition between then are not so simple.-- Would it be simpler to have more states?---- Each state represents the type of line that was last read-- i.e. are we in a comment section, or a latex-code section,-- or a bird-code section, etc?reclassify::[Classified]->[Classified]reclassify=blank-- begin in blank statewherelatex[]=[]latex(EndCode:ls)=Blank"":commentlslatex(BeginCode:_)=[Error"\\begin{code} in code section"]latex(BirdTrackl:ls)=Ordinary('>':l):latexlslatex(l:ls)=l:latexlsblank[]=[]blank(EndCode:_)=[Error"\\end{code} without \\begin{code}"]blank(BeginCode:ls)=Blank"":latexlsblank(BirdTrackl:ls)=BirdTrackl:birdlsblank(Ordinaryl:ls)=Commentl:commentlsblank(l:ls)=l:blanklsbird[]=[]bird(EndCode:_)=[Error"\\end{code} without \\begin{code}"]bird(BeginCode:ls)=Blank"":latexlsbird(Blankl:ls)=Blankl:blanklsbird(Ordinary_:_)=[Error"program line before comment line"]bird(l:ls)=l:birdlscomment[]=[]comment(EndCode:_)=[Error"\\end{code} without \\begin{code}"]comment(BeginCode:ls)=Blank"":latexlscomment(CPPl:ls)=CPPl:commentlscomment(BirdTrack_:_)=[Error"comment line before program line"]-- a blank line and another ordinary line following a comment-- will be treated as continuing the comment. Otherwise it's-- then end of the comment, with a blank line.comment(Blankl:ls@(Ordinary_:_))=Commentl:commentlscomment(Blankl:ls)=Blankl:blanklscomment(Linenf:ls)=Linenf:commentlscomment(Ordinaryl:ls)=Commentl:commentlscomment(Comment_:_)=internalErrorcomment(Error_:_)=internalError-- Re-implementation of 'lines', for better efficiency (but decreased laziness).-- Also, importantly, accepts non-standard DOS and Mac line ending characters.inlines::String->[String]inlinesxs=lines'xsidwherelines'[]acc=[acc[]]lines'('\^M':'\n':s)acc=acc[]:lines'sid-- DOSlines'('\^M':s)acc=acc[]:lines'sid-- MacOSlines'('\n':s)acc=acc[]:lines'sid-- Unixlines'(c:s)acc=lines's(acc.(c:))internalError::ainternalError=error"unlit: internal error"