{- arch-tag: GZip file support in Haskell
Copyright (c) 2004-2011 John Goerzen <jgoerzen@complete.org>
All rights reserved.
For license and copyright information, see the file LICENSE
-}{- |
Module : System.FileArchive.GZip
Copyright : Copyright (C) 2004-2011 John Goerzen
License : BSD3
Maintainer : John Goerzen <jgoerzen@complete.org>
Stability : provisional
Portability: portable
GZip file decompression
Copyright (c) 2004 John Goerzen, jgoerzen\@complete.org
The GZip format is described in RFC1952.
-}moduleSystem.FileArchive.GZip(-- * GZip Files-- $gzipfiles-- * TypesHeader(..),Section,GZipError(..),Footer(..),-- * Whole-File Processingdecompress,hDecompress,read_sections,-- * Section Processingread_header,read_section)whereimportData.Compression.Inflate(inflate_string_remainder)importData.Hash.CRC32.GZip(update_crc)importData.Bits((.&.))importControl.Monad.Error-- (Error(), strMsg, throwError)importData.Char(ord)importData.Word(Word32())importData.Bits.Utils(fromBytes)importSystem.IO(hGetContents,hPutStr,Handle())dataGZipError=CRCError-- ^ CRC-32 check failed|NotGZIPFile-- ^ Couldn't find a GZip header|UnknownMethod-- ^ Compressed with something other than method 8 (deflate)|UnknownErrorString-- ^ Other problem arosederiving(Eq,Show)instanceErrorGZipErrorwherenoMsg=UnknownError""strMsg=UnknownError-- | First two bytes of filemagic::Stringmagic="\x1f\x8b"-- | FlagsfFHCRC,fFEXTRA,fFNAME,fFCOMMENT::Int-- fFTEXT = 1 :: IntfFHCRC=2fFEXTRA=4fFNAME=8fFCOMMENT=16{- | The data structure representing the GZip header. This occurs
at the beginning of each 'Section' on disk. -}dataHeader=Header{method::Int,-- ^ Compression method. Only 8 is defined at present.flags::Int,extra::MaybeString,filename::MaybeString,comment::MaybeString,mtime::Word32,-- ^ Modification time of the original filexfl::Int,-- ^ Extra flagsos::Int-- ^ Creating operating system}deriving(Eq,Show){- | Stored on-disk at the end of each section. -}dataFooter=Footer{size::Word32,-- ^ The size of the original, decompressed datacrc32::Word32,-- ^ The stored GZip CRC-32 of the original, decompressed datacrc32valid::Bool-- ^ Whether or not the stored CRC-32 matches the calculated CRC-32 of the data}{- | A section represents a compressed component in a GZip file.
Every GZip file has at least one. -}typeSection=(Header,String,Footer)split1::String->(Char,String)split1s=(heads,tails){- | Read a GZip file, decompressing all sections found.
Writes the decompressed data stream to the given output handle.
Returns Nothing if the action was successful, or Just GZipError if there
was a problem. If there was a problem, the data written to the output
handle should be discarded.
-}hDecompress::Handle-- ^ Input handle->Handle-- ^ Output handle->IO(MaybeGZipError)hDecompressinfdoutfd=doinc<-hGetContentsinfdlet(outstr,err)=decompressinchPutStroutfdoutstrreturnerr{- | Read a GZip file, decompressing all sections that are found.
Returns a decompresed data stream and Nothing, or an unreliable string
and Just (error). If you get anything other than Nothing, the String
returned should be discarded.
-}decompress::String->(String,MaybeGZipError){-
decompress s =
do x <- read_header s
let rem = snd x
return $ inflate_string rem
-}decompresss=letprocs::[Section]->(String,Bool)procs[]=([],True)procs((_,content,foot):xs)=let(nexth,nextb)=procsxsin(content++nexth,(crc32validfoot)&&nextb)incaseread_sectionssofLeftx->("",Justx)Rightx->let(decomp,iscrcok)=procsxin(decomp,ifiscrcokthenNothingelseJustCRCError){-
decompress s = do x <- read_sections s
return $ concatMap (\(_, x, _) -> x) x
-}-- | Read all sections.read_sections::String->EitherGZipError[Section]read_sections[]=Right[]read_sectionss=dox<-read_sectionscasexof(sect,remain)->donext<-read_sectionsremainreturn$sect:nextparseword::String->Word32parsewords=fromBytes$map(fromIntegral.ord)$reverses-- | Read one section, returning (ThisSection, Remainder)read_section::String->EitherGZipError(Section,String)read_sections=dox<-read_headersletheaderrem=sndxlet(decompressed,crc,remainder)=read_dataheaderremlet(crc32str,rm)=splitAt4remainderlet(sizestr,rem2)=splitAt4rmletfilecrc32=parsewordcrc32strletfilesize=parsewordsizestrreturn((fstx,decompressed,Footer{size=filesize,crc32=filecrc32,crc32valid=filecrc32==crc}),rem2)-- | Read the file's compressed data, returning-- (Decompressed, Calculated CRC32, Remainder)read_data::String->(String,Word32,String)read_datax=let(decompressed1,remainder)=inflate_string_remainderx(decompressed,crc32)=read_data_internaldecompressed10in(decompressed,crc32,remainder)whereread_data_internal[]ck=([],ck)read_data_internal(y:ys)ck=letnewcrc=update_crcckyn=newcrc`seq`read_data_internalysnewcrcin(y:fstn,sndn){- | Read the GZip header. Return (Header, Remainder).
-}read_header::String->EitherGZipError(Header,String)read_headers=letok=Right"ok"indolet(mag,rem)=splitAt2sifmag/=magicthenthrowErrorNotGZIPFileelseoklet(method,rem2)=split1remif(ord(method)/=8)thenthrowErrorUnknownMethodelseoklet(flag_S,rem3)=split1rem2letflag=ordflag_Slet(mtimea,rem3a)=splitAt4rem3letmtime=parsewordmtimealet(xfla,rem3b)=split1rem3aletxfl=ordxflalet(osa,_)=split1rem3bletos=ordosa-- skip modtime (4), extraflag (1), and os (1)letrem4=drop6rem3let(extra,rem5)=if(flag.&.fFEXTRA/=0)-- Skip past the extra field if we have it.thenlet(xlen_S,_)=split1rem4(xlen2_S,rem4b)=split1rem4xlen=(ordxlen_S)+256*(ordxlen2_S)(ex,rrem)=splitAtxlenrem4bin(Justex,rrem)else(Nothing,rem4)let(filename,rem6)=if(flag.&.fFNAME/=0)-- Skip past the null-terminated filenamethenletfn=takeWhile(/='\x00')rem5in(Justfn,drop((lengthfn)+1)rem5)else(Nothing,rem5)let(comment,rem7)=if(flag.&.fFCOMMENT/=0)-- Skip past the null-terminated commentthenletcm=takeWhile(/='\x00')rem6in(Justcm,drop((lengthcm)+1)rem6)else(Nothing,rem6)rem8<-if(flag.&.fFHCRC/=0)-- Skip past the header CRCthenreturn$drop2rem7elsereturnrem7return(Header{method=ordmethod,flags=flag,extra=extra,filename=filename,comment=comment,mtime=mtime,xfl=xfl,os=os},rem8)------------------------------------------------------------------------ Documentation----------------------------------------------------------------------{- $gzipfiles
GZip files contain one or more 'Section's. Each 'Section', on disk, begins
with a GZip 'Header', then stores the compressed data itself, and finally
stores a GZip 'Footer'.
The 'Header' identifies the file as a GZip file, records the original
modification date and time, and, in some cases, also records the original
filename and comments.
The 'Footer' contains a GZip CRC32 checksum over the decompressed data as
well as a 32-bit length of the decompressed data. The module
'Data.Hash.CRC32.GZip' is used to validate stored CRC32 values.
The vast majority of GZip files contain only one 'Section'. Standard tools
that work with GZip files create single-section files by default.
Multi-section files can be created by simply concatenating two existing
GZip files together. The standard gunzip and zcat tools will simply
concatenate the decompressed data when reading these files back. The
'decompress' function in this module will do the same.
When reading data from this module, please use caution regarding how you access
it. For instance, if you are wanting to write the decompressed stream
to disk and validate its CRC32 value, you could use the 'decompress'
function. However, you should process the entire stream before you check
the value of the Bool it returns. Otherwise, you will force Haskell to buffer
the entire file in memory just so it can check the CRC32.
-}