{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}-- |-- Copyright: 2011 Michael Snoyman, 2010-2011 John Millikin-- License: MIT---- Handle streams of text.---- Parts of this code were taken from enumerator and adapted for conduits.moduleData.Conduit.Text(-- * Text codecsCodec,encode,decode,utf8,utf16_le,utf16_be,utf32_le,utf32_be,ascii,iso8859_1,lines,linesBounded,TextException(..),takeWhile,dropWhile,take,drop,foldLines,withLine)whereimportqualifiedPreludeimportPreludehiding(head,drop,takeWhile,lines,zip,zip3,zipWith,zipWith3,take,dropWhile)importControl.Arrow(first)importqualifiedControl.ExceptionasExcimportData.Bits((.&.),(.|.),shiftL)importqualifiedData.ByteStringasBimportqualifiedData.ByteString.Char8asB8importData.Char(ord)importData.Maybe(catMaybes)importqualifiedData.TextasTimportqualifiedData.Text.EncodingasTEimportData.Word(Word8,Word16)importSystem.IO.Unsafe(unsafePerformIO)importData.Typeable(Typeable)importData.ConduitimportqualifiedData.Conduit.ListasCLimportControl.Monad.Trans.Class(lift)importControl.Monad(unless,when)-- | A specific character encoding.---- Since 0.3.0dataCodec=Codec{codecName::T.Text,codecEncode::T.Text->(B.ByteString,Maybe(TextException,T.Text)),codecDecode::B.ByteString->(T.Text,Either(TextException,B.ByteString)B.ByteString)}instanceShowCodecwhereshowsPrecdc=showParen(d>10)$showString"Codec ".shows(codecNamec)-- | Emit each line separately---- Since 0.4.1lines::Monadm=>ConduitT.TextmT.Textlines=loopidwhereloopfront=await>>=maybe(finishfront)(gofront)finishfront=letfinal=frontT.emptyinunless(T.nullfinal)(yieldfinal)gosofarmore=caseT.unconssecondofJust(_,second')->yield(sofarfirst')>>goidsecond'Nothing->letrest=sofarmoreinloop$T.appendrestwhere(first',second)=T.break(=='\n')more-- | Variant of the lines function with an integer parameter.-- The text length of any emitted line-- never exceeds the value of the paramater. Whenever-- this is about to happen a LengthExceeded exception-- is thrown. This function should be used instead-- of the lines function whenever we are dealing with-- user input (e.g. a file upload) because we can't be sure that-- user input won't have extraordinarily large lines which would-- require large amounts of memory if consumed.linesBounded::MonadThrowm=>Int->ConduitT.TextmT.TextlinesBoundedmaxLineLen=loop0idwherelooplenfront=await>>=maybe(finishfront)(golenfront)finishfront=letfinal=frontT.emptyinunless(T.nullfinal)(yieldfinal)golensofarmore=caseT.unconssecondofJust(_,second')->dolettoYield=sofarfirst'len'=len+T.lengthfirst'when(len'>maxLineLen)(lift$monadThrow(LengthExceededmaxLineLen))yieldtoYieldgo0idsecond'Nothing->doletlen'=len+T.lengthmorewhen(len'>maxLineLen)$(lift$monadThrow(LengthExceededmaxLineLen))letrest=sofarmorelooplen'$T.appendrestwhere(first',second)=T.break(=='\n')more-- | Convert text into bytes, using the provided codec. If the codec is-- not capable of representing an input character, an exception will be thrown.---- Since 0.3.0encode::MonadThrowm=>Codec->ConduitT.TextmB.ByteStringencodecodec=CL.mapM$\t->dolet(bs,mexc)=codecEncodecodectmaybe(returnbs)(monadThrow.fst)mexc-- | Convert bytes into text, using the provided codec. If the codec is-- not capable of decoding an input byte sequence, an exception will be thrown.---- Since 0.3.0decode::MonadThrowm=>Codec->ConduitB.ByteStringmT.Textdecodecodec=loopidwhereloopfront=await>>=maybe(finishfront)(gofront)finishfront=caseB.uncons$frontB.emptyofNothing->return()Just(w,_)->lift$monadThrow$DecodeExceptioncodecwgofrontbs'=caseextraofLeft(exc,_)->lift$monadThrowexcRightbs''->yieldtext>>loop(B.appendbs'')where(text,extra)=codecDecodecodecbsbs=frontbs'-- |-- Since 0.3.0dataTextException=DecodeExceptionCodecWord8|EncodeExceptionCodecChar|LengthExceededInt|TextExceptionExc.SomeExceptionderiving(Show,Typeable)instanceExc.ExceptionTextExceptionbyteSplits::B.ByteString->[(B.ByteString,B.ByteString)]byteSplitsbytes=loop(B.lengthbytes)whereloop0=[(B.empty,bytes)]loopn=B.splitAtnbytes:loop(n-1)splitSlowly::(B.ByteString->T.Text)->B.ByteString->(T.Text,Either(TextException,B.ByteString)B.ByteString)splitSlowlydecbytes=validwherevalid=firstValid(Prelude.mapdecFirstsplits)splits=byteSplitsbytesfirstValid=Prelude.head.catMaybestryDec=tryEvaluate.decdecFirst(a,b)=casetryDecaofLeft_->NothingRighttext->Just(text,casetryDecbofLeftexc->Left(TextExceptionexc,b)-- this case shouldn't occur, since splitSlowly-- is only called when parsing failed somewhereRight_->RightB.empty)-- |-- Since 0.3.0utf8::Codecutf8=Codecnameencdecwherename=T.pack"UTF-8"enctext=(TE.encodeUtf8text,Nothing)decbytes=casesplitQuicklybytes>>=maybeDecodeofJust(text,extra)->(text,Rightextra)Nothing->splitSlowlyTE.decodeUtf8bytes-- Whether the given byte is a continuation byte.isContinuationbyte=byte.&.0xC0==0x80-- The number of continuation bytes needed by the given-- non-continuation byte. Returns -1 for an illegal UTF-8-- non-continuation byte and the whole split quickly must fail so-- as the input is passed to TE.decodeUtf8, which will issue a-- suitable error.requiredx0|x0.&.0x80==0x00=0|x0.&.0xE0==0xC0=1|x0.&.0xF0==0xE0=2|x0.&.0xF8==0xF0=3|otherwise=-1splitQuicklybytes|B.nulll||req==-1=Nothing|req==B.lengthr=Just(TE.decodeUtf8bytes,B.empty)|otherwise=Just(TE.decodeUtf8l',r')where(l,r)=B.spanEndisContinuationbytesreq=required(B.lastl)l'=B.initlr'=B.cons(B.lastl)r-- |-- Since 0.3.0utf16_le::Codecutf16_le=Codecnameencdecwherename=T.pack"UTF-16-LE"enctext=(TE.encodeUtf16LEtext,Nothing)decbytes=casesplitQuicklybytesofJust(text,extra)->(text,Rightextra)Nothing->splitSlowlyTE.decodeUtf16LEbytessplitQuicklybytes=maybeDecode(loop0)wheremaxN=B.lengthbytesloopn|n==maxN=decodeAll|(n+1)==maxN=decodeTonloopn=letreq=utf16Required(B.indexbytesn)(B.indexbytes(n+1))decodeMore=loop$!n+reqinifn+req>maxNthendecodeTonelsedecodeMoredecodeTon=firstTE.decodeUtf16LE(B.splitAtnbytes)decodeAll=(TE.decodeUtf16LEbytes,B.empty)-- |-- Since 0.3.0utf16_be::Codecutf16_be=Codecnameencdecwherename=T.pack"UTF-16-BE"enctext=(TE.encodeUtf16BEtext,Nothing)decbytes=casesplitQuicklybytesofJust(text,extra)->(text,Rightextra)Nothing->splitSlowlyTE.decodeUtf16BEbytessplitQuicklybytes=maybeDecode(loop0)wheremaxN=B.lengthbytesloopn|n==maxN=decodeAll|(n+1)==maxN=decodeTonloopn=letreq=utf16Required(B.indexbytes(n+1))(B.indexbytesn)decodeMore=loop$!n+reqinifn+req>maxNthendecodeTonelsedecodeMoredecodeTon=firstTE.decodeUtf16BE(B.splitAtnbytes)decodeAll=(TE.decodeUtf16BEbytes,B.empty)utf16Required::Word8->Word8->Intutf16Requiredx0x1=requiredwhererequired=ifx>=0xD800&&x<=0xDBFFthen4else2x::Word16x=(fromIntegralx1`shiftL`8).|.fromIntegralx0-- |-- Since 0.3.0utf32_le::Codecutf32_le=Codecnameencdecwherename=T.pack"UTF-32-LE"enctext=(TE.encodeUtf32LEtext,Nothing)decbs=caseutf32SplitBytesTE.decodeUtf32LEbsofJust(text,extra)->(text,Rightextra)Nothing->splitSlowlyTE.decodeUtf32LEbs-- |-- Since 0.3.0utf32_be::Codecutf32_be=Codecnameencdecwherename=T.pack"UTF-32-BE"enctext=(TE.encodeUtf32BEtext,Nothing)decbs=caseutf32SplitBytesTE.decodeUtf32BEbsofJust(text,extra)->(text,Rightextra)Nothing->splitSlowlyTE.decodeUtf32BEbsutf32SplitBytes::(B.ByteString->T.Text)->B.ByteString->Maybe(T.Text,B.ByteString)utf32SplitBytesdecbytes=splitwheresplit=maybeDecode(dectoDecode,extra)len=B.lengthbyteslenExtra=modlen4lenToDecode=len-lenExtra(toDecode,extra)=iflenExtra==0then(bytes,B.empty)elseB.splitAtlenToDecodebytes-- |-- Since 0.3.0ascii::Codecascii=Codecnameencdecwherename=T.pack"ASCII"enctext=(bytes,extra)where(safe,unsafe)=T.span(\c->ordc<=0x7F)textbytes=B8.pack(T.unpacksafe)extra=ifT.nullunsafethenNothingelseJust(EncodeExceptionascii(T.headunsafe),unsafe)decbytes=(text,extra)where(safe,unsafe)=B.span(<=0x7F)bytestext=T.pack(B8.unpacksafe)extra=ifB.nullunsafethenRightB.emptyelseLeft(DecodeExceptionascii(B.headunsafe),unsafe)-- |-- Since 0.3.0iso8859_1::Codeciso8859_1=Codecnameencdecwherename=T.pack"ISO-8859-1"enctext=(bytes,extra)where(safe,unsafe)=T.span(\c->ordc<=0xFF)textbytes=B8.pack(T.unpacksafe)extra=ifT.nullunsafethenNothingelseJust(EncodeExceptioniso8859_1(T.headunsafe),unsafe)decbytes=(T.pack(B8.unpackbytes),RightB.empty)tryEvaluate::a->EitherExc.SomeExceptionatryEvaluate=unsafePerformIO.Exc.try.Exc.evaluatemaybeDecode::(a,b)->Maybe(a,b)maybeDecode(a,b)=casetryEvaluateaofLeft_->NothingRight_->Just(a,b)-- |---- Since 1.0.8takeWhile::Monadm=>(Char->Bool)->ConduitT.TextmT.TexttakeWhilep=loopwhereloop=await>>=maybe(return())gogot=caseT.spanptof(x,y)|T.nully->yieldx>>loop|otherwise->yieldx>>leftovery-- |---- Since 1.0.8dropWhile::Monadm=>(Char->Bool)->ConsumerT.Textm()dropWhilep=loopwhereloop=await>>=maybe(return())gogot|T.nullx=loop|otherwise=leftoverxwherex=T.dropWhilept-- |---- Since 1.0.8take::Monadm=>Int->ConduitT.TextmT.Texttake=loopwhereloopi=await>>=maybe(return())(goi)goit|diff==0=yieldt|diff<0=let(x,y)=T.splitAtitinyieldx>>leftovery|otherwise=yieldt>>loopdiffwherediff=i-T.lengtht-- |---- Since 1.0.8drop::Monadm=>Int->ConsumerT.Textm()drop=loopwhereloopi=await>>=maybe(return())(goi)goit|diff==0=return()|diff<0=leftover$T.dropit|otherwise=loopdiffwherediff=i-T.lengtht-- |---- Since 1.0.8foldLines::Monadm=>(a->ConduitMT.Textoma)->a->ConduitMT.TextomafoldLinesf=startwherestarta=CL.peek>>=maybe(returna)(const$loop$fa)loopconsumer=doa<-takeWhile(/='\n')=$=doa<-CL.map(T.filter(/='\r'))=$=consumerCL.sinkNullreturnadrop1starta-- |---- Since 1.0.8withLine::Monadm=>SinkT.Textma->ConsumerT.Textm(Maybea)withLineconsumer=toConsumer$domx<-CL.peekcasemxofNothing->returnNothingJust_->dox<-takeWhile(/='\n')=$dox<-CL.map(T.filter(/='\r'))=$consumerCL.sinkNullreturnxdrop1return$Justx