{-# OPTIONS_GHC -XNoImplicitPrelude #-}------------------------------------------------------------------------------- |-- Module : GHC.IO.Encoding.Iconv-- Copyright : (c) The University of Glasgow, 2008-2009-- License : see libraries/base/LICENSE-- -- Maintainer : libraries@haskell.org-- Stability : internal-- Portability : non-portable---- This module provides text encoding/decoding using iconv--------------------------------------------------------------------------------- #hidemoduleGHC.IO.Encoding.Iconv(#if !defined(mingw32_HOST_OS)mkTextEncoding,latin1,utf8,utf16,utf16le,utf16be,utf32,utf32le,utf32be,localeEncoding#endif)where#include "MachDeps.h"#include "HsBaseConfig.h"#if !defined(mingw32_HOST_OS)#undef DEBUG_DUMPimportForeignimportForeign.CimportData.MaybeimportGHC.BaseimportGHC.IO.BufferimportGHC.IO.Encoding.TypesimportGHC.NumimportGHC.ShowimportGHC.Real#ifdef DEBUG_DUMPimportSystem.Posix.Internals#endificonv_trace::String->IO()#ifdef DEBUG_DUMPiconv_traces=putssputs::String->IO()putss=dowithCStringLen(s++"\n")$\(p,len)->c_write1(castPtrp)(fromIntegrallen)return()#elseiconv_trace_=return()#endif-- ------------------------------------------------------------------------------- iconv encoders/decoders{-# NOINLINE latin1 #-}latin1::TextEncodinglatin1=unsafePerformIO(mkTextEncoding"Latin1"){-# NOINLINE utf8 #-}utf8::TextEncodingutf8=unsafePerformIO(mkTextEncoding"UTF8"){-# NOINLINE utf16 #-}utf16::TextEncodingutf16=unsafePerformIO(mkTextEncoding"UTF16"){-# NOINLINE utf16le #-}utf16le::TextEncodingutf16le=unsafePerformIO(mkTextEncoding"UTF16LE"){-# NOINLINE utf16be #-}utf16be::TextEncodingutf16be=unsafePerformIO(mkTextEncoding"UTF16BE"){-# NOINLINE utf32 #-}utf32::TextEncodingutf32=unsafePerformIO(mkTextEncoding"UTF32"){-# NOINLINE utf32le #-}utf32le::TextEncodingutf32le=unsafePerformIO(mkTextEncoding"UTF32LE"){-# NOINLINE utf32be #-}utf32be::TextEncodingutf32be=unsafePerformIO(mkTextEncoding"UTF32BE"){-# NOINLINE localeEncoding #-}localeEncoding::TextEncodinglocaleEncoding=unsafePerformIO$do#if HAVE_LANGINFO_Hcstr<-c_localeEncoding-- use nl_langinfo(CODESET) to get the encoding-- if we have itr<-peekCStringcstrmkTextEncodingr#elsemkTextEncoding""-- GNU iconv accepts "" to mean the -- locale encoding.#endif-- We hope iconv_t is a storable type. It should be, since it has at least the-- value -1, which is a possible return value from iconv_open.typeIConv=CLong-- ToDo: (#type iconv_t)foreignimportccallunsafe"hs_iconv_open"hs_iconv_open::CString->CString->IOIConvforeignimportccallunsafe"hs_iconv_close"hs_iconv_close::IConv->IOCIntforeignimportccallunsafe"hs_iconv"hs_iconv::IConv->PtrCString->PtrCSize->PtrCString->PtrCSize->IOCSizeforeignimportccallunsafe"localeEncoding"c_localeEncoding::IOCStringhaskellChar::String#ifdef WORDS_BIGENDIANhaskellChar|charSize==2="UTF-16BE"|otherwise="UTF-32BE"#elsehaskellChar|charSize==2="UTF-16LE"|otherwise="UTF-32LE"#endifchar_shift::Intchar_shift|charSize==2=1|otherwise=2mkTextEncoding::String->IOTextEncodingmkTextEncodingcharset=doreturn(TextEncoding{mkTextDecoder=newIConvcharsethaskellChariconvDecode,mkTextEncoder=newIConvhaskellCharcharseticonvEncode})newIConv::String->String->(IConv->Buffera->Bufferb->IO(Buffera,Bufferb))->IO(BufferCodecab())newIConvfromtofn=withCStringfrom$\from_str->withCStringto$\to_str->doiconvt<-throwErrnoIfMinus1"mkTextEncoding"$hs_iconv_opento_strfrom_strleticlose=throwErrnoIfMinus1_"Iconv.close"$hs_iconv_closeiconvtreturnBufferCodec{encode=fniconvt,close=iclose,-- iconv doesn't supply a way to save/restore the stategetState=return(),setState=const$return()}iconvDecode::IConv->BufferWord8->BufferCharBufElem->IO(BufferWord8,BufferCharBufElem)iconvDecodeiconv_tibufobuf=iconvRecodeiconv_tibuf0obufchar_shifticonvEncode::IConv->BufferCharBufElem->BufferWord8->IO(BufferCharBufElem,BufferWord8)iconvEncodeiconv_tibufobuf=iconvRecodeiconv_tibufchar_shiftobuf0iconvRecode::IConv->Buffera->Int->Bufferb->Int->IO(Buffera,Bufferb)iconvRecodeiconv_tinput@Buffer{bufRaw=iraw,bufL=ir,bufR=iw,bufSize=_}iscaleoutput@Buffer{bufRaw=oraw,bufL=_,bufR=ow,bufSize=os}oscale=doiconv_trace("haskelChar="++showhaskellChar)iconv_trace("iconvRecode before, input="++show(summaryBufferinput))iconv_trace("iconvRecode before, output="++show(summaryBufferoutput))withRawBufferiraw$\piraw->dowithRawBufferoraw$\poraw->dowith(piraw`plusPtr`(ir`shiftL`iscale))$\p_inbuf->dowith(poraw`plusPtr`(ow`shiftL`oscale))$\p_outbuf->dowith(fromIntegral((iw-ir)`shiftL`iscale))$\p_inleft->dowith(fromIntegral((os-ow)`shiftL`oscale))$\p_outleft->dores<-hs_iconviconv_tp_inbufp_inleftp_outbufp_outleftnew_inleft<-peekp_inleftnew_outleft<-peekp_outleftletnew_inleft'=fromIntegralnew_inleft`shiftR`iscalenew_outleft'=fromIntegralnew_outleft`shiftR`oscalenew_input|new_inleft==0=input{bufL=0,bufR=0}|otherwise=input{bufL=iw-new_inleft'}new_output=output{bufR=os-new_outleft'}iconv_trace("iconv res="++showres)iconv_trace("iconvRecode after, input="++show(summaryBuffernew_input))iconv_trace("iconvRecode after, output="++show(summaryBuffernew_output))if(res/=-1)thendo-- all input translatedreturn(new_input,new_output)elsedoerrno<-getErrnocaseerrnoofe|e==eINVAL||(e==e2BIG||e==eILSEQ)&&new_inleft'/=(iw-ir)->doiconv_trace("iconv ignoring error: "++show(errnoToIOError"iconv"eNothingNothing))-- Output overflow is relatively harmless, unless-- we made no progress at all. ---- Similarly, we ignore EILSEQ unless we converted no-- characters. Sometimes iconv reports EILSEQ for a-- character in the input even when there is no room-- in the output; in this case we might be about to-- change the encoding anyway, so the following bytes-- could very well be in a different encoding.-- This also helps with pinpointing EILSEQ errors: we-- don't report it until the rest of the characters in-- the buffer have been drained.return(new_input,new_output)_other->throwErrno"iconvRecoder"-- illegal sequence, or some other error#endif /* !mingw32_HOST_OS */