{-# LANGUAGE CPP, BangPatterns, Rank2Types #-}#ifdef USE_MONO_PAT_BINDS{-# LANGUAGE MonoPatBinds #-}#endif-- |-- Module : Blaze.ByteString.Builder.Internal-- Copyright : (c) 2010 Simon Meier-- License : BSD3-style (see LICENSE)---- Maintainer : Simon Meier <iridcode@gmail.com>-- Stability : experimental-- Portability : tested on GHC only---- Core types and functions for the 'Builder' monoid and the 'Put' monad.--moduleBlaze.ByteString.Builder.Internal(-- * Build StepsBufRange(..),BuildSignal,BuildStep,done,bufferFull,insertByteString-- * Builder,Builder,fromBuildStepCont,fromPut,flush-- * Put,Put,putBuilder,putBuildStepCont,putLiftIO-- * Writes,moduleBlaze.ByteString.Builder.Internal.Write,writeToByteString-- * Execution,toLazyByteString,toLazyByteStringWith,toByteString,toByteStringIO,toByteStringIOWith-- * Deafult Sizes,defaultFirstBufferSize,defaultMinimalBufferSize,defaultBufferSize,defaultMaximalCopySize)where#ifdef HAS_FOREIGN_UNSAFE_MODULEimportForeign(withForeignPtr,sizeOf,copyBytes,plusPtr,minusPtr)importForeign.ForeignPtr.Unsafe(unsafeForeignPtrToPtr)#elseimportForeign(unsafeForeignPtrToPtr,withForeignPtr,sizeOf,copyBytes,plusPtr,minusPtr)#endifimportControl.Monad(unless)importSystem.IO.Unsafe(unsafeDupablePerformIO)importqualifiedData.ByteStringasSimportqualifiedData.ByteString.InternalasSimportqualifiedData.ByteString.LazyasLimportqualifiedData.ByteString.Lazy.InternalasLimportBlaze.ByteString.Builder.Internal.TypesimportBlaze.ByteString.Builder.Internal.Write-------------------------------------------------------------------------------- Internal global constants.-------------------------------------------------------------------------------- | Default size (~32kb) for the buffer that becomes a chunk of the output-- stream once it is filled.--defaultBufferSize::IntdefaultBufferSize=32*1024-overhead-- Copied from Data.ByteString.Lazy.whereoverhead=2*sizeOf(undefined::Int)-- | The minimal length (~4kb) a buffer must have before filling it and-- outputting it as a chunk of the output stream.---- This size determines when a buffer is spilled after a 'flush' or a direct-- bytestring insertion. It is also the size of the first chunk generated by-- 'toLazyByteString'.defaultMinimalBufferSize::IntdefaultMinimalBufferSize=4*1024-overheadwhereoverhead=2*sizeOf(undefined::Int)-- | The default length (64) for the first buffer to be allocated when-- converting a 'Builder' to a lazy bytestring.---- See 'toLazyByteStringWith' for further explanation.defaultFirstBufferSize::IntdefaultFirstBufferSize=64-- | The maximal number of bytes for that copying is cheaper than direct-- insertion into the output stream. This takes into account the fragmentation-- that may occur in the output buffer due to the early 'flush' implied by the-- direct bytestring insertion.---- @'defaultMaximalCopySize' = 2 * 'defaultMinimalBufferSize'@--defaultMaximalCopySize::IntdefaultMaximalCopySize=2*defaultMinimalBufferSize-------------------------------------------------------------------------------- Flushing and running a Builder-------------------------------------------------------------------------------- | Prepend the chunk if it is non-empty.{-# INLINE nonEmptyChunk #-}nonEmptyChunk::S.ByteString->L.ByteString->L.ByteStringnonEmptyChunkbslbs|S.nullbs=lbs|otherwise=L.Chunkbslbs-- | Output all data written in the current buffer and start a new chunk.---- The use of this function depends on how the resulting bytestrings are-- consumed. 'flush' is possibly not very useful in non-interactive scenarios.-- However, it is kept for compatibility with the builder provided by-- Data.Binary.Builder.---- When using 'toLazyByteString' to extract a lazy 'L.ByteString' from a-- 'Builder', this means that a new chunk will be started in the resulting lazy-- 'L.ByteString'. The remaining part of the buffer is spilled, if the-- reamining free space is smaller than the minimal desired buffer size.--{-# INLINE flush #-}flush::Builderflush=fromBuildStepContstepwherestepk!(BufRangeop_)=return$insertByteStringopS.emptyk-- | Run a 'Builder' with the given buffer sizes.---- Use this function for integrating the 'Builder' type with other libraries-- that generate lazy bytestrings.---- Note that the builders should guarantee that on average the desired chunk-- size is attained. Builders may decide to start a new buffer and not-- completely fill the existing buffer, if this is faster. However, they should-- not spill too much of the buffer, if they cannot compensate for it.---- A call @toLazyByteStringWith bufSize minBufSize firstBufSize@ will generate-- a lazy bytestring according to the following strategy. First, we allocate-- a buffer of size @firstBufSize@ and start filling it. If it overflows, we-- allocate a buffer of size @minBufSize@ and copy the first buffer to it in-- order to avoid generating a too small chunk. Finally, every next buffer will-- be of size @bufSize@. This, slow startup strategy is required to achieve-- good speed for short (<200 bytes) resulting bytestrings, as for them the-- allocation cost is of a large buffer cannot be compensated. Moreover, this-- strategy also allows us to avoid spilling too much memory for short-- resulting bytestrings.---- Note that setting @firstBufSize >= minBufSize@ implies that the first buffer-- is no longer copied but allocated and filled directly. Hence, setting-- @firstBufSize = bufSize@ means that all chunks will use an underlying buffer-- of size @bufSize@. This is recommended, if you know that you always output-- more than @minBufSize@ bytes.toLazyByteStringWith::Int-- ^ Buffer size (upper-bounds the resulting chunk size).->Int-- ^ Minimal free buffer space for continuing filling-- the same buffer after a 'flush' or a direct bytestring-- insertion. This corresponds to the minimal desired-- chunk size.->Int-- ^ Size of the first buffer to be used and copied for-- larger resulting sequences->Builder-- ^ Builder to run.->L.ByteString-- ^ Lazy bytestring to output after the builder is-- finished.->L.ByteString-- ^ Resulting lazy bytestringtoLazyByteStringWithbufSizeminBufSizefirstBufSize(Builderb)k=S.inlinePerformIO$fillFirstBuffer(b(buildStepfinalStep))wherefinalStep(BufRangepf_)=return$Donepf()-- fill a first very small buffer, if we need more space then copy it-- to the new buffer of size 'minBufSize'. This way we don't pay the-- allocation cost of the big 'bufSize' buffer, when outputting only-- small sequences.fillFirstBuffer!step0|minBufSize<=firstBufSize=fillNewBufferfirstBufSizestep0|otherwise=dofpbuf<-S.mallocByteStringfirstBufSizewithForeignPtrfpbuf$\pf->dolet!pe=pf`plusPtr`firstBufSizemkbspf'=S.PSfpbuf0(pf'`minusPtr`pf){-# INLINE mkbs #-}next<-runBuildStepstep0(BufRangepfpe)casenextofDonepf'_|pf'==pf->returnk|otherwise->return$L.Chunk(mkbspf')kBufferFullnewSizepf'nextStep->dolet!l=pf'`minusPtr`pffillNewBuffer(max(l+newSize)minBufSize)$buildStep$\(BufRangepfNewpeNew)->docopyBytespfNewpfllet!br'=BufRange(pfNew`plusPtr`l)peNewrunBuildStepnextStepbr'InsertByteStringpf'bsnextStep|pf'==pf->return$nonEmptyChunkbs(S.inlinePerformIO$fillNewBufferbufSizenextStep)|otherwise->return$L.Chunk(mkbspf')(nonEmptyChunkbs(S.inlinePerformIO$fillNewBufferbufSizenextStep))-- allocate and fill a new bufferfillNewBuffer!size!step0=dofpbuf<-S.mallocByteStringsizewithForeignPtrfpbuf$fillBufferfpbufwherefillBufferfpbuf!pbuf=fillpbufstep0where!pe=pbuf`plusPtr`sizefill!pf!step=donext<-runBuildStepstep(BufRangepfpe)letmkbspf'=S.PSfpbuf(pf`minusPtr`pbuf)(pf'`minusPtr`pf){-# INLINE mkbs #-}casenextofDonepf'_|pf'==pf->returnk|otherwise->return$L.Chunk(mkbspf')kBufferFullnewSizepf'nextStep|pf'==pf->fillNewBuffer(maxnewSizebufSize)nextStep|otherwise->return$L.Chunk(mkbspf')(S.inlinePerformIO$fillNewBuffer(maxnewSizebufSize)nextStep)InsertByteStringpf'bsnextStep|pf'==pf->return$nonEmptyChunkbs(S.inlinePerformIO$fillpf'nextStep)|minBufSize<pe`minusPtr`pf'->return$L.Chunk(mkbspf')(nonEmptyChunkbs(S.inlinePerformIO$fillpf'nextStep))|otherwise->return$L.Chunk(mkbspf')(nonEmptyChunkbs(S.inlinePerformIO$fillNewBufferbufSizenextStep))-- | Extract the lazy 'L.ByteString' from the builder by running it with default-- buffer sizes. Use this function, if you do not have any special-- considerations with respect to buffer sizes.---- @ 'toLazyByteString' b = 'toLazyByteStringWith' 'defaultBufferSize' 'defaultMinimalBufferSize' 'defaultFirstBufferSize' b L.empty@---- Note that @'toLazyByteString'@ is a 'Monoid' homomorphism.---- > toLazyByteString mempty == mempty-- > toLazyByteString (x `mappend` y) == toLazyByteString x `mappend` toLazyByteString y---- However, in the second equation, the left-hand-side is generally faster to-- execute.--toLazyByteString::Builder->L.ByteStringtoLazyByteStringb=toLazyByteStringWithdefaultBufferSizedefaultMinimalBufferSizedefaultFirstBufferSizebL.empty{-# INLINE toLazyByteString #-}-- | Pack the chunks of a lazy bytestring into a single strict bytestring.packChunks::L.ByteString->S.ByteStringpackChunkslbs=doS.unsafeCreate(fromIntegral$L.lengthlbs)(copyChunkslbs)wherecopyChunks!L.Empty!_pf=return()copyChunks!(L.Chunk(S.PSfpbufol)lbs')!pf=dowithForeignPtrfpbuf$\pbuf->copyBytespf(pbuf`plusPtr`o)lcopyChunkslbs'(pf`plusPtr`l)-- | Run the builder to construct a strict bytestring containing the sequence-- of bytes denoted by the builder. This is done by first serializing to a lazy bytestring and then packing its-- chunks to a appropriately sized strict bytestring.---- > toByteString = packChunks . toLazyByteString---- Note that @'toByteString'@ is a 'Monoid' homomorphism.---- > toByteString mempty == mempty-- > toByteString (x `mappend` y) == toByteString x `mappend` toByteString y---- However, in the second equation, the left-hand-side is generally faster to-- execute.--toByteString::Builder->S.ByteStringtoByteString=packChunks.toLazyByteString-- | @toByteStringIOWith bufSize io b@ runs the builder @b@ with a buffer of-- at least the size @bufSize@ and executes the 'IO' action @io@ whenever the-- buffer is full.---- Compared to 'toLazyByteStringWith' this function requires less allocation,-- as the output buffer is only allocated once at the start of the-- serialization and whenever something bigger than the current buffer size has-- to be copied into the buffer, which should happen very seldomly for the-- default buffer size of 32kb. Hence, the pressure on the garbage collector is-- reduced, which can be an advantage when building long sequences of bytes.--toByteStringIOWith::Int-- ^ Buffer size (upper bounds-- the number of bytes forced-- per call to the 'IO' action).->(S.ByteString->IO())-- ^ 'IO' action to execute per-- full buffer, which is-- referenced by a strict-- 'S.ByteString'.->Builder-- ^ 'Builder' to run.->IO()-- ^ Resulting 'IO' action.toByteStringIOWithbufSizeio(Builderb)=fillBufferbufSize(b(buildStepfinalStep))wherefinalStep!(BufRangepf_)=return$Donepf()fillBuffer!sizestep=doS.mallocByteStringsize>>=fillwherefillfpbuf=dolet!pf=unsafeForeignPtrToPtrfpbuf!br=BufRangepf(pf`plusPtr`size)-- safe due to later reference of fpbuf-- BETTER than withForeignPtr, as we lose a tail call otherwisesignal<-runBuildStepstepbrcasesignalofDonepf'_->io$S.PSfpbuf0(pf'`minusPtr`pf)BufferFullminSizepf'nextStep->doio$S.PSfpbuf0(pf'`minusPtr`pf)fillBuffer(maxbufSizeminSize)nextStepInsertByteStringpf'bsnextStep->doio$S.PSfpbuf0(pf'`minusPtr`pf)unless(S.nullbs)(iobs)fillBufferbufSizenextStep-- | Run the builder with a 'defaultBufferSize'd buffer and execute the given-- 'IO' action whenever the buffer is full or gets flushed.---- @ 'toByteStringIO' = 'toByteStringIOWith' 'defaultBufferSize'@---- This is a 'Monoid' homomorphism in the following sense.---- > toByteStringIO io mempty == return ()-- > toByteStringIO io (x `mappend` y) == toByteStringIO io x >> toByteStringIO io y--toByteStringIO::(S.ByteString->IO())->Builder->IO()toByteStringIO=toByteStringIOWithdefaultBufferSize{-# INLINE toByteStringIO #-}-- | Run a 'Write' to produce a strict 'S.ByteString'.-- This is equivalent to @('toByteString' . 'fromWrite')@, but is more-- efficient because it uses just one appropriately-sized buffer.writeToByteString::Write->S.ByteStringwriteToByteString!w=unsafeDupablePerformIO$dofptr<-S.mallocByteString(getBoundw)len<-withForeignPtrfptr$\ptr->doend<-runWritewptrreturn$!end`minusPtr`ptrreturn$!S.fromForeignPtrfptr0len{-# INLINE writeToByteString #-}-------------------------------------------------------------------------------- Draft of new builder/put execution code------------------------------------------------------------------------------{- FIXME: Generalize this code such that it can replace the above clunky
- implementations.
-- | A monad for lazily composing lazy bytestrings using continuations.
newtype LBSM a = LBSM { unLBSM :: (a, L.ByteString -> L.ByteString) }
instance Monad LBSM where
return x = LBSM (x, id)
(LBSM (x,k)) >>= f = let LBSM (x',k') = f x in LBSM (x', k . k')
(LBSM (_,k)) >> (LBSM (x',k')) = LBSM (x', k . k')
-- | Execute a put and return the written buffers as the chunks of a lazy
-- bytestring.
toLazyByteString :: Put a -> (a, L.ByteString)
toLazyByteString put =
(fst result, k (bufToLBSCont (snd result) L.empty))
where
-- FIXME: Check with ByteString guys why allocation in inlinePerformIO is
-- bad.
-- initial buffer
buf0 = S.inlinePerformIO $ allocBuffer defaultBufferSize
-- run put, but don't force result => we're lazy enough
LBSM (result, k) = runPut liftIO outputBuf outputBS put buf0
-- convert a buffer to a lazy bytestring continuation
bufToLBSCont = maybe id L.Chunk . unsafeFreezeNonEmptyBuffer
-- lifting an io putsignal to a lazy bytestring monad
liftIO io = LBSM (S.inlinePerformIO io, id)
-- add buffer as a chunk prepare allocation of new one
outputBuf minSize buf = LBSM
( S.inlinePerformIO $ allocBuffer (max minSize defaultBufferSize)
, bufToLBSCont buf )
-- add bytestring directly as a chunk; exploits postcondition of runPut
-- that bytestrings are non-empty
outputBS bs = LBSM ((), L.Chunk bs)
{-
-- | A Builder that traces a message
traceBuilder :: String -> Builder
traceBuilder msg = fromBuildStepCont $ \k br@(BufRange op ope) -> do
putStrLn $ "traceBuilder " ++ show (op, ope) ++ ": " ++ msg
k br
test2 :: Word8 -> [S.ByteString]
test2 x = L.toChunks $ toLazyByteString2 $ fromBuilder $ mconcat
[ traceBuilder "before flush"
, fromWord8 48
, flushBuilder
, flushBuilder
, traceBuilder "after flush"
, fromWord8 x
]
-}
-}