{-# LANGUAGE CPP, MultiParamTypeClasses, FlexibleInstances #-}-- Consider splitting off as a separate package-- Copyright (c) 2008 Gustav Munkby-- Copyright (c) 2008 Jean-Philippe Bernardy-- | This module defines a Rope representation.-- While the representation are ByteStrings stored in a finger tree, the indices-- are actually in number of characters.-- This is currently based on utf8-string, but a couple of other packages might be-- better: text, compact-string. -- At the moment none of them has a lazy-- implementation, which forces us to always export plain Strings.-- (Utf8-string does not have a proper newtype)moduleData.Rope(Rope,-- * Conversions to RopefromString,-- * Conversions from RopetoString,toReverseString,-- * List-like functionsnull,empty,take,drop,append,splitAt,splitAtLine,length,reverse,countNewLines,-- * IOreadFile,writeFile,-- * Low level functionssplitAtChunkBefore)whereimportPreludehiding(null,head,tail,length,take,drop,splitAt,head,tail,foldl,reverse,readFile,writeFile)importqualifiedData.ListasLimportqualifiedData.ByteString.UTF8asBimportqualifiedData.ByteStringasB(append,concat)importqualifiedData.ByteStringasByteimportData.ByteString(ByteString)importqualifiedData.ByteString.LazyasLB(toChunks,fromChunks,null,readFile)importqualifiedData.ByteString.Lazy.UTF8asLBimportqualifiedData.FingerTreeasTimportData.FingerTreehiding(null,empty,reverse)importData.BinaryimportData.Char(ord)importData.MonoidimportSystem.IO.Cautious(writeFileL)defaultChunkSize::IntdefaultChunkSize=128-- in chars! (chunkSize requires this to be <= 256)-- The FingerTree does not store measurements for single chunks, which-- means that the length of chunks often have to be recomputed.mkChunk::ByteString->ChunkmkChunks=Chunk(fromIntegral$B.lengths)sdataChunk=Chunk{chunkSize::{-# UNPACK #-}!Word8,fromChunk::{-# UNPACK #-}!ByteString}deriving(Eq,Show)dataSize=Indices{charIndex::{-# UNPACK #-}!Int,lineIndex::{-# UNPACK #-}!Int}-- lineIndex is lazy because we do not often want the line count. However, we need this to avoid stack overflows on large files!derivingShowinstanceMonoidSizewheremempty=Indices00mappend(Indicesc1l1)(Indicesc2l2)=Indices(c1+c2)(l1+l2)newtypeRope=Rope{fromRope::FingerTreeSizeChunk}deriving(Eq,Show)(-|)::Chunk->FingerTreeSizeChunk->FingerTreeSizeChunkb-|t|chunkSizeb==0=t|otherwise=b<|t(|-)::FingerTreeSizeChunk->Chunk->FingerTreeSizeChunkt|-b|chunkSizeb==0=t|otherwise=t|>b-- Newlines are preserved by UTF8 encoding and decodingnewline::Word8newline=fromIntegral(ord'\n')instanceMeasuredSizeChunkwheremeasure(Chunkls)=Indices(fromIntegrall)-- note that this is the length in characters, not bytes.(Byte.countnewlines)-- | The 'Foldable' instance of 'FingerTree' only defines 'foldMap', so the 'foldr' needed for 'toList' is inefficient,-- and can cause stack overflows. So, we roll our own (somewhat inefficient) version of 'toList' to avoid this.toList::Measuredva=>FingerTreeva->[a]toListt=caseviewltofc:<cs->c:toListcsEmptyL->[]toLazyByteString::Rope->LB.ByteStringtoLazyByteString=LB.fromChunks.fmapfromChunk.toList.fromRopereverse::Rope->Ropereverse=Rope.fmap'(mkChunk.B.fromString.L.reverse.B.toString.fromChunk).T.reverse.fromRopetoReverseString::Rope->StringtoReverseString=L.concat.map(L.reverse.B.toString.fromChunk).toList.T.reverse.fromRopetoString::Rope->StringtoString=LB.toString.toLazyByteStringfromLazyByteString::LB.ByteString->RopefromLazyByteString=Rope.toTreeT.emptywheretoTreeaccb|LB.nullb=acc|otherwise=let(h,t)=LB.splitAt(fromIntegraldefaultChunkSize)bchunk=mkChunk$B.concat$LB.toChunks$hinacc`seq`chunk`seq`toTree(acc|>chunk)tfromString::String->RopefromString=Rope.toTreeT.emptywheretoTreeacc[]=acctoTreeaccb=let(h,t)=L.splitAtdefaultChunkSizebchunk=mkChunk$B.fromStringhinacc`seq`chunk`seq`toTree(acc|>chunk)tnull::Rope->Boolnull(Ropea)=T.nullaempty::Ropeempty=RopeT.empty-- | Get the length of the string. (This information cached, so O(1) amortized runtime.)length::Rope->Intlength=charIndex.measure.fromRope-- | Count the number of newlines in the strings. (This information cached, so O(1) amortized runtime.)countNewLines::Rope->IntcountNewLines=lineIndex.measure.fromRope-- | Append two strings by merging the two finger trees.append::Rope->Rope->Ropeappend(Ropea)(Ropeb)=Rope$caseT.viewraofEmptyR->bl:>(Chunklenx)->caseT.viewlbofEmptyL->a(Chunklen'x'):<r->if(fromIntegrallen)+(fromIntegrallen')<defaultChunkSizethenl><singleton(Chunk(len+len')(x`B.append`x'))><relsea><btake,drop::Int->Rope->Ropetaken=fst.splitAtndropn=snd.splitAtn-- | Split the string at the specified position.splitAt::Int->Rope->(Rope,Rope)splitAtn(Ropet)=caseT.viewlcof(Chunklenx):<r|n'/=0->let(lx,rx)=B.splitAtn'xin(Rope$l|>(Chunk(fromIntegraln')lx),Rope$(Chunk(len-fromIntegraln')rx)-|r)_->(Ropel,Ropec)where(l,c)=T.split((>n).charIndex)tn'=n-charIndex(measurel)-- | Split the rope on a chunk, so that the desired-- position lies within the first chunk of the second rope.splitAtChunkBefore::Int->Rope->(Rope,Rope)splitAtChunkBeforen(Ropet)=let(l,c)=T.split((>n).charIndex)tin(Ropel,Ropec)-- | Split before the specified line. Lines are indexed from 0.splitAtLine::Int->Rope->(Rope,Rope)splitAtLinen|n<=0=\r->(empty,r)|otherwise=splitAtLine'(n-1)-- | Split after the specified line. Lines are indexed from 0.splitAtLine'::Int->Rope->(Rope,Rope)splitAtLine'n(Ropet)=caseT.viewlcofch@(Chunk_x):<r->let(lx,rx)=cutExcessexcessxexcess=lineIndex(measurel)+lineIndex(measurech)-n-1in(Rope$l|-mkChunklx,Rope$mkChunkrx-|r)_->(Ropel,Ropec)where(l,c)=T.split((n<).lineIndex)tcutExcess::Int->ByteString->(ByteString,ByteString)cutExcessis=letidx=gti$L.reverse$Byte.elemIndicesnewlinesinByte.splitAt(idx+1)s-- take one extra byte to that the newline is found on the left.wheregt_[]=Byte.lengthsgt0(x:_)=xgtn(_:xs)=gt(n-1)xsinstanceBinaryRopewhereput=put.toStringget=fromString`fmap`getwriteFile::FilePath->Rope->IO()writeFilef=writeFileLf.toLazyByteStringreadFile::FilePath->IORopereadFilef=fromLazyByteString`fmap`LB.readFilef