------------------------------------------------------------------------------- |-- Module : Codec.Archive.Tar.Read-- Copyright : (c) 2007 Bjorn Bringert,-- 2008 Andrea Vezzosi,-- 2008-2009 Duncan Coutts,-- 2011 Max Bolingbroke-- License : BSD3---- Maintainer : duncan@community.haskell.org-- Portability : portable-------------------------------------------------------------------------------moduleCodec.Archive.Tar.Read(read,FormatError(..))whereimportCodec.Archive.Tar.TypesimportData.Char(ord)importData.Int(Int64)importNumeric(readOct)importControl.Exception(Exception)importData.Typeable(Typeable)importqualifiedData.ByteString.LazyasBSimportqualifiedData.ByteString.Lazy.Char8asBS.Char8importData.ByteString.Lazy(ByteString)importPreludehiding(read)-- | Errors that can be encountered when parsing a Tar archive.dataFormatError=TruncatedArchive|ShortTrailer|BadTrailer|TrailingJunk|ChecksumIncorrect|NotTarFormat|UnrecognisedTarFormat|HeaderBadNumericEncodingderiving(Typeable)instanceShowFormatErrorwhereshowTruncatedArchive="truncated tar archive"showShortTrailer="short tar trailer"showBadTrailer="bad tar trailer"showTrailingJunk="tar file has trailing junk"showChecksumIncorrect="tar checksum error"showNotTarFormat="data is not in tar format"showUnrecognisedTarFormat="tar entry not in a recognised format"showHeaderBadNumericEncoding="tar header is malformed (bad numeric encoding)"instanceExceptionFormatError-- | Convert a data stream in the tar file format into an internal data-- structure. Decoding errors are reported by the 'Fail' constructor of the-- 'Entries' type.---- * The conversion is done lazily.--read::ByteString->EntriesFormatErrorread=unfoldEntriesgetEntrygetEntry::ByteString->EitherFormatError(Maybe(Entry,ByteString))getEntrybs|BS.lengthheader<512=LeftTruncatedArchive-- Tar files end with at least two blocks of all '0'. Checking this serves-- two purposes. It checks the format but also forces the tail of the data-- which is necessary to close the file if it came from a lazily read file.|BS.headbs==0=caseBS.splitAt1024bsof(end,trailing)|BS.lengthend/=1024->LeftShortTrailer|not(BS.all(==0)end)->LeftBadTrailer|not(BS.all(==0)trailing)->LeftTrailingJunk|otherwise->RightNothing|otherwise=partial$docase(chksum_,format_)of(Okchksum,_)|correctChecksumheaderchksum->return()(Ok_,Ok_)->ErrorChecksumIncorrect_->ErrorNotTarFormat-- These fields are partial, have to check themformat<-format_;mode<-mode_;uid<-uid_;gid<-gid_;size<-size_;mtime<-mtime_;devmajor<-devmajor_;devminor<-devminor_;letcontent=BS.takesize(BS.drop512bs)padding=(512-size)`mod`512bs'=BS.drop(512+size+padding)bsentry=Entry{entryTarPath=TarPathnameprefix,entryContent=casetypecodeof'\0'->NormalFilecontentsize'0'->NormalFilecontentsize'1'->HardLink(LinkTargetlinkname)'2'->SymbolicLink(LinkTargetlinkname)'3'->CharacterDevicedevmajordevminor'4'->BlockDevicedevmajordevminor'5'->Directory'6'->NamedPipe'7'->NormalFilecontentsize_->OtherEntryTypetypecodecontentsize,entryPermissions=mode,entryOwnership=Ownershipunamegnameuidgid,entryTime=mtime,entryFormat=format}return(Just(entry,bs'))whereheader=BS.take512bsname=getString0100headermode_=getOct1008headeruid_=getOct1088headergid_=getOct1168headersize_=getOct12412headermtime_=getOct13612headerchksum_=getOct1488headertypecode=getByte156headerlinkname=getString157100headermagic=getChars2578headeruname=getString26532headergname=getString29732headerdevmajor_=getOct3298headerdevminor_=getOct3378headerprefix=getString345155header-- trailing = getBytes 500 12 headerformat_=casemagicof"\0\0\0\0\0\0\0\0"->returnV7Format"ustar\NUL00"->returnUstarFormat"ustar \NUL"->returnGnuFormat_->ErrorUnrecognisedTarFormatcorrectChecksum::ByteString->Int->BoolcorrectChecksumheaderchecksum=checksum==checksum'where-- sum of all 512 bytes in the header block,-- treating each byte as an 8-bit unsigned valuechecksum'=BS.Char8.foldl'(\xy->x+ordy)0header'-- treating the 8 bytes of chksum as blank characters.header'=BS.concat[BS.take148header,BS.Char8.replicate8' ',BS.drop156header]-- * TAR format primitive inputgetOct::Integrala=>Int64->Int64->ByteString->PartialFormatErroragetOctofflen=parseOct.BS.Char8.unpack.BS.Char8.takeWhile(\c->c/='\NUL'&&c/=' ').BS.Char8.dropWhile(==' ').getBytesofflenwhereparseOct""=return0-- As a star extension, octal fields can hold a base-256 value if the high-- bit of the initial character is set. The initial character can be:-- 0x80 ==> trailing characters hold a positive base-256 value-- 0xFF ==> trailing characters hold a negative base-256 value---- In both cases, there won't be a trailing NUL/space.---- GNU tar seems to contain a half-implementation of code that deals with-- extra bits in the first character, but I don't think it works and the-- docs I can find on star seem to suggest that these will always be 0,-- which is what I will assume.parseOct('\128':xs)=return(readBytesxs)parseOct('\255':xs)=return(negate(readBytesxs))parseOcts=casereadOctsof[(x,[])]->returnx_->ErrorHeaderBadNumericEncodingreadBytes=go0wheregoacc[]=accgoacc(x:xs)=go(acc*256+fromIntegral(ordx))xsgetBytes::Int64->Int64->ByteString->ByteStringgetBytesofflen=BS.takelen.BS.dropoffgetByte::Int64->ByteString->ChargetByteoffbs=BS.Char8.indexbsoffgetChars::Int64->Int64->ByteString->StringgetCharsofflen=BS.Char8.unpack.getBytesofflengetString::Int64->Int64->ByteString->StringgetStringofflen=BS.Char8.unpack.BS.Char8.takeWhile(/='\0').getBytesofflendataPartialea=Errore|Okapartial::Partialea->Eithereapartial(Errormsg)=Leftmsgpartial(Okx)=RightxinstanceMonad(Partiale)wherereturn=OkErrorm>>=_=ErrormOkx>>=k=kxfail=error"fail @(Partial e)"