------------------------------------------------------------------------------- |-- Module: Data.Enumerator.Text-- Copyright: 2010 John Millikin-- License: MIT---- Maintainer: jmillikin@gmail.com-- Portability: portable---- Enumerator-based text IO-------------------------------------------------------------------------------moduleData.Enumerator.Text(-- * Enumerators and iterateesenumHandle,enumFile,iterHandle-- * Codecs,Codec,encode,decode,utf8,utf16_le,utf16_be,utf32_le,utf32_be,ascii,iso8859_1)whereimportqualifiedControl.ExceptionasEimportqualifiedData.TextasTimportqualifiedData.Text.IOasTimportqualifiedSystem.IOasIOimportSystem.IO.Error(isEOFError)importControl.Arrow(first)importData.Bits((.&.))importqualifiedData.ByteStringasBimportqualifiedData.ByteString.Char8asB8importqualifiedData.Text.EncodingasTEimportData.Bits((.|.),shiftL)importData.Word(Word16)importPreludeasPreludeimportNumeric(showIntAtBase)importData.Char(toUpper,intToDigit,ord)importData.Word(Word8)importSystem.IO.Unsafe(unsafePerformIO)importData.EnumeratorimportData.Enumerator.Util-- | Read lines of text from the handle, and stream them to an 'Iteratee'.-- If an exception occurs during file IO, enumeration will stop and 'Error'-- will be returned. Exceptions from the iteratee are not caught.---- The handle should be opened with an appropriate text encoding, and-- in 'IO.ReadMode' or 'IO.ReadWriteMode'.enumHandle::IO.Handle->EnumeratorT.TextIObenumHandleh=Iteratee.loopwhereloop(Continuek)=withText$\maybeText->casemaybeTextofNothing->return$ContinuekJusttext->runIteratee(k(Chunks[text]))>>=looploopstep=returnstepwithText=tryStep$E.catch(Just`fmap`T.hGetLineh)(\err->ifisEOFErrorerrthenreturnNothingelseE.throwIOerr)-- | Opens a file path in text mode, and passes the handle to 'enumHandle'.-- The file will be closed when the 'Iteratee' finishes.enumFile::FilePath->EnumeratorT.TextIObenumFilepaths=IterateeiowherewithHandle=tryStep(IO.openFilepathIO.ReadMode)io=withHandle$\h->E.finally(runIteratee(enumHandlehs))(IO.hCloseh)-- | Read text from a stream and write it to a handle. If an exception-- occurs during file IO, enumeration will stop and 'Error' will be-- returned.---- The handle should be opened with an appropriate text encoding, and-- in 'IO.WriteMode' or 'IO.ReadWriteMode'.iterHandle::IO.Handle->IterateeT.TextIO()iterHandleh=continuestepwherestepEOF=yield()EOFstep(Chunks[])=continuestepstep(Chunkschunks)=Iterateeiowhereput=mapM_(T.hPutStrh)chunksio=tryStepput(\_->return$Continuestep)dataCodec=Codec{codecName::T.Text,codecEncode::[T.Text]->EitherE.SomeException[B.ByteString],codecDecode::B.ByteString->EitherE.SomeException(T.Text,B.ByteString)}instanceShowCodecwhereshowsPrecdc=showParen(d>10)$showString"Codec ".shows(codecNamec)encode::Monadm=>Codec->EnumerateeT.TextB.ByteStringmbencodecodec=loopwhereloop=checkDone$continue.stepstepkEOF=yield(Continuek)EOFstepk(Chunks[])=continue$stepkstepk(Chunksxs)=casecodecEncodecodecxsofLefterr->throwErrorerrRightbyteChunks->k(ChunksbyteChunks)>>==loopdecode::Monadm=>Codec->EnumerateeB.ByteStringT.Textmbdecodecodec=loopB.emptywheredec=codecDecodecodecloopacc=checkDone$continue.stepaccstepacckEOF=yield(Continuek)$ifB.nullaccthenEOFelseChunks[acc]stepacck(Chunks[])=continue$stepacckstepacck(Chunksxs)=casedec(B.concat(acc:xs))ofLefterr->throwErrorerrRight(text,extra)->ifT.nulltextthencontinue$stepextrakelsek(Chunks[text])>>==loopextrautf8::Codecutf8=Codecnameencdecwherename=T.pack"UTF-8"enc=Right.Prelude.mapTE.encodeUtf8dec=unsafeTryDec.splitBytessplitBytesbytes=loop0whererequiredx0|x0.&.0x80==0x00=1|x0.&.0xE0==0xC0=2|x0.&.0xF0==0xE0=3|x0.&.0xF8==0xF0=4-- Invalid input; let Text figure it out|otherwise=1maxN=B.lengthbytesloopn|n==maxN=(TE.decodeUtf8bytes,B.empty)loopn=letreq=required$B.indexbytesntooLong=firstTE.decodeUtf8$B.splitAtnbytesdecodeMore=loop$!n+reqinifreq>maxNthentooLongelsedecodeMoreutf16_le::Codecutf16_le=Codecnameencdecwherename=T.pack"UTF-16-LE"enc=Right.Prelude.mapTE.encodeUtf16LEdec=unsafeTryDec.splitBytessplitBytesbytes=loop0wheremaxN=B.lengthbytesloopn|n==maxN=(TE.decodeUtf16LEbytes,B.empty)|(n+1)==maxN=decodeTonloopn=letreq=utf16Required(B.indexbytes0)(B.indexbytes1)decodeMore=loop$!n+reqinifreq>maxNthendecodeTonelsedecodeMoredecodeTon=firstTE.decodeUtf16LE$B.splitAtnbytesutf16_be::Codecutf16_be=Codecnameencdecwherename=T.pack"UTF-16-BE"enc=Right.Prelude.mapTE.encodeUtf16BEdec=unsafeTryDec.splitBytessplitBytesbytes=loop0wheremaxN=B.lengthbytesloopn|n==maxN=(TE.decodeUtf16BEbytes,B.empty)|(n+1)==maxN=decodeTonloopn=letreq=utf16Required(B.indexbytes1)(B.indexbytes0)decodeMore=loop$!n+reqinifreq>maxNthendecodeTonelsedecodeMoredecodeTon=firstTE.decodeUtf16BE$B.splitAtnbytesutf16Required::Word8->Word8->Intutf16Requiredx0x1=requiredwhererequired=ifx>=0xD800&&x<=0xDBFFthen4else2x::Word16x=(fromIntegralx1`shiftL`8).|.fromIntegralx0utf32_le::Codecutf32_le=Codecnameencdecwherename=T.pack"UTF-32-LE"enc=Right.Prelude.mapTE.encodeUtf32LEdec=unsafeTryDec.utf32SplitBytesTE.decodeUtf32LEutf32_be::Codecutf32_be=Codecnameencdecwherename=T.pack"UTF-32-BE"enc=Right.Prelude.mapTE.encodeUtf32BEdec=unsafeTryDec.utf32SplitBytesTE.decodeUtf32BEutf32SplitBytes::(B.ByteString->a)->B.ByteString->(a,B.ByteString)utf32SplitBytesdecbytes=(dectoDecode,extra)wherelen=B.lengthbyteslenExtra=modlen4lenToDecode=len-lenExtra(toDecode,extra)=iflenExtra==0then(bytes,B.empty)elseB.splitAtlenToDecodebytesascii::Codecascii=Codecname(mapEitherenc)decwherename=T.pack"ASCII"enct=caseT.findBy(\c->ordc>0x7F)tofNothing->Right.B8.pack.T.unpack$tJustc->illegalEncnamecdecbytes=caseB.find(\w->w>0x7F)bytesofNothing->Right(T.pack(B8.unpackbytes),B.empty)Justw->illegalDecnamewiso8859_1::Codeciso8859_1=Codecname(mapEitherenc)decwherename=T.pack"ISO-8859-1"enct=caseT.findBy(\c->ordc>0xFF)tofNothing->Right.B8.pack.T.unpack$tJustc->illegalEncnamecdecbytes=Right(T.pack(B8.unpackbytes),B.empty)illegalEnc::T.Text->Char->EitherE.SomeExceptionaillegalEncnamec=Left.E.SomeException.E.ErrorCall$msg""wherelen=Prelude.lengthpadstr|lenstr<4=replicate(4-lenstr)'0'++str|otherwise=strhex="U+"++pad(showIntAtBase16(toUpper.intToDigit)(ordc)"")msg=(s"Codec ".showsname.s" can't encode character ".shex)s=showStringillegalDec::T.Text->Word8->EitherE.SomeExceptionaillegalDecnamew=Left.E.SomeException.E.ErrorCall$msg""wherelen=Prelude.lengthpadstr|lenstr<2=replicate(2-lenstr)'0'++str|otherwise=strhex="0x"++pad(showIntAtBase16(toUpper.intToDigit)w"")msg=(s"Codec ".showsname.s" can't decode byte ".shex)s=showStringunsafeTryDec::(a,b)->EitherE.SomeException(a,b)unsafeTryDec(a,b)=unsafePerformIO$dotried<-E.try$E.evaluateareturn$casetriedofLefterr->LefterrRight_->Right(a,b)