{-# LANGUAGE Rank2Types #-}-- Random and Binary IO with IterateeM-- A general-purpose TIFF library-- The library gives the user the TIFF dictionary, which the user-- can search for specific tags and obtain the values associated with -- the tags, including the pixel matrix.---- The overarching theme is incremental processing: initially,-- only the TIFF dictionary is read. The value associated with a tag-- is read only when that tag is looked up (unless the value was short-- and was packed in the TIFF dictionary entry). The pixel matrix-- (let alone the whole TIFF file) is not loaded in memory -- -- the pixel matrix is not even located before it is needed.-- The matrix is processed incrementally, by a user-supplied -- iteratee.---- The incremental processing is accomplished by iteratees and enumerators.-- The enumerators are indeed first-class, they are stored-- in the interned TIFF dictionary data structure. These enumerators-- represent the values associated with tags; the values will be read-- on demand, when the enumerator is applied to a user-given iteratee.---- The library extensively uses nested streams, tacitly converting the -- stream of raw bytes from the file into streams of integers, -- rationals and other user-friendly items. The pixel matrix is-- presented as a contiguous stream, regardless of its segmentation-- into strips and physical arrangement.-- The library exhibits random IO and binary parsing, reading-- of multi-byte numeric data in big- or little-endian formats.-- The library can be easily adopted for AIFF, RIFF and other-- IFF formats.---- We show a representative application of the library: reading a sample-- TIFF file, printing selected values from the TIFF dictionary,-- verifying the values of selected pixels and computing the histogram-- of pixel values. The pixel verification procedure stops reading the-- pixel matrix as soon as all specified pixel values are verified.-- The histogram accumulation does read the entire matrix, but-- incrementally. Neither pixel matrix processing procedure loads-- the whole matrix in memory. In fact, we never read and retain-- more than the IO-buffer-full of raw data.-- This TIFF library is to be contrasted with the corresponding Scheme-- code:-- http://okmij.org/ftp/Scheme/binary-io.html#tiff-- The main distinction is using iteratees for on-demand processing.moduleData.Iteratee.Codecs.TiffwhereimportData.IterateeimportqualifiedData.IterateeasIterimportqualifiedData.Iteratee.Base.StreamChunkasSCimportData.Iteratee.BinaryimportControl.MonadimportControl.Monad.TransimportData.Char(chr)importData.IntimportData.WordimportData.RatioimportData.MaybeimportqualifiedData.IntMapasIM-- ========================================================================-- Sample TIFF user code-- The following is sample code using the TIFF library (whose implementation-- is in the second part of this file).-- Our sample code prints interesting information from the TIFF-- dictionary (such as the dimensions, the resolution and the name-- of the image)-- The main user function. tiff_reader is the library function,-- which builds the TIFF dictionary.-- process_tiff is the user function, to extract useful data-- from the dictionary-- test_tiff :: IO (Maybe String)-- test_tiff = test_driver_random (tiff_reader >>= process_tiff) "filename.tiff"-- Sample TIFF processing functionprocess_tiff::MonadIOm=>Maybe(IM.IntMapTIFFDE)->IterateeG[]Word8m()process_tiffNothing=return()process_tiff(Justdict)=donote["dict size: ",show$IM.sizedict]-- Check tag values against the known values for the sample imagecheck_tagTG_IMAGEWIDTH(flipdict_read_intdict)129check_tagTG_IMAGELENGTH(flipdict_read_intdict)122check_tagTG_BITSPERSAMPLE(flipdict_read_intdict)8check_tagTG_IMAGEDESCRIPTION(flipdict_read_stringdict)"JPEG:gnu-head-sm.jpg 129x122"check_tagTG_COMPRESSION(flipdict_read_intdict)1check_tagTG_SAMPLESPERPIXEL(flipdict_read_intdict)1check_tagTG_STRIPBYTECOUNTS(flipdict_read_intdict)15738-- nrows*ncolscheck_tagTG_XRESOLUTION(flipdict_read_ratdict)(72%1)check_tagTG_YRESOLUTION(flipdict_read_ratdict)(72%1)(n,hist)<-compute_histdictnote["computed histogram over ",shown," values\n",showhist]--iterReportError >>= maybe (return ()) errornote["Verifying values of sample pixels"]verify_pixel_valsdict[(0,255),(17,248)]--err <- iterReportError--maybe (return ()) error err--return errwherecheck_tagtagactionv=dovc<-actiontagcasevcofJustv'|v'==v->note["Tag ",showtag," value ",showv]_->error$unwords["Tag",showtag,"unexpected:",showvc]-- process_tiff Nothing = return Nothing-- sample processing of the pixel matrix: computing the histogramcompute_hist::MonadIOm=>TIFFDict->IterateeG[]Word8m(Int,IM.IntMapInt)compute_histdict=Iter.joinI$pixel_matrix_enumdict$compute_hist'0IM.emptywhere--compute_hist' count = liftI . Cont . step countcompute_hist'counthist=IterateeG(stepcounthist)stepcounthist(Chunkch)|SC.nullch=return$Cont(compute_hist'counthist)Nothing|otherwise=return$Cont(compute_hist'(count+SC.lengthch)(foldraccumhistch))Nothingstepcounthists=return$Done(count,hist)saccume=IM.insertWith(+)(fromIntegrale)1-- Another sample processor of the pixel matrix: verifying values of-- some pixels-- This processor does not read the whole matrix; it stops as soon-- as everything is verified or the error is detectedverify_pixel_vals::MonadIOm=>TIFFDict->[(IM.Key,Word8)]->IterateeG[]Word8m()verify_pixel_valsdictpixels=Iter.joinI$pixel_matrix_enumdict$verify0(IM.fromListpixels)whereverify_m|IM.nullm=return()verifynm=IterateeG(stepnm)stepnm(Chunkxs)|SC.nullxs=return$Cont(verifynm)Nothing|otherwise=let(h,t)=(SC.headxs,SC.tailxs)incaseIM.updateLookupWithKey(\_k_e->Nothing)nmof(Justv,m')->ifv==hthenstep(succn)m'(Chunkt)elseleter=(unwords["Pixel #",shown,"expected:",showv,"found",showh])inreturn$Cont(throwErr.Err$er)(Just$Errer)(Nothing,m')->step(succn)m'(Chunkt)step_n_ms=return$Done()s-- ========================================================================-- TIFF library code-- A TIFF directory is a finite map associating a TIFF tag with-- a record TIFFDEtypeTIFFDict=IM.IntMapTIFFDEdataTIFFDE=TIFFDE{tiffde_count::Int,-- number of itemstiffde_enum::TIFFDE_ENUM-- enumerator to get values}dataTIFFDE_ENUM=TEN_CHAR(forallam.Monadm=>EnumeratorGMM[]Word8[]Charma)|TEN_BYTE(forallam.Monadm=>EnumeratorGMM[]Word8[]Word8ma)|TEN_INT(forallam.Monadm=>EnumeratorGMM[]Word8[]Intma)|TEN_RAT(forallam.Monadm=>EnumeratorGMM[]Word8[](RatioInt)ma)-- Standard TIFF data typesdataTIFF_TYPE=TT_NONE-- 0|TT_byte-- 1 8-bit unsigned integer|TT_ascii-- 2 8-bit bytes with last byte null|TT_short-- 3 16-bit unsigned integer|TT_long-- 4 32-bit unsigned integer|TT_rational-- 5 64-bit fractional (numer+denominator)-- The following was added in TIFF 6.0|TT_sbyte-- 6 8-bit signed (2s-complement) integer|TT_undefined-- 7 An 8-bit byte, "8-bit chunk"|TT_sshort-- 8 16-bit signed (2s-complement) integer|TT_slong-- 9 32-bit signed (2s-complement) integer|TT_srational-- 10 "signed rational", two SLONGs (num+denominator)|TT_float-- 11 "IEEE 32-bit float", single precision (4-byte)|TT_double-- 12 "IEEE 64-bit double", double precision (8-byte)deriving(Eq,Enum,Ord,Bounded,Show)-- Standard TIFF tagsdataTIFF_TAG=TG_otherInt-- other than below|TG_SUBFILETYPE-- subfile data descriptor|TG_OSUBFILETYPE-- +kind of data in subfile|TG_IMAGEWIDTH-- image width in pixels|TG_IMAGELENGTH-- image height in pixels|TG_BITSPERSAMPLE-- bits per channel (sample)|TG_COMPRESSION-- data compression technique|TG_PHOTOMETRIC-- photometric interpretation|TG_THRESHOLDING-- +thresholding used on data|TG_CELLWIDTH-- +dithering matrix width|TG_CELLLENGTH-- +dithering matrix height|TG_FILLORDER-- +data order within a byte|TG_DOCUMENTNAME-- name of doc. image is from|TG_IMAGEDESCRIPTION-- info about image|TG_MAKE-- scanner manufacturer name|TG_MODEL-- scanner model name/number|TG_STRIPOFFSETS-- offsets to data strips|TG_ORIENTATION-- +image orientation|TG_SAMPLESPERPIXEL-- samples per pixel|TG_ROWSPERSTRIP-- rows per strip of data|TG_STRIPBYTECOUNTS-- bytes counts for strips|TG_MINSAMPLEVALUE-- +minimum sample value|TG_MAXSAMPLEVALUE-- maximum sample value|TG_XRESOLUTION-- pixels/resolution in x|TG_YRESOLUTION-- pixels/resolution in y|TG_PLANARCONFIG-- storage organization|TG_PAGENAME-- page name image is from|TG_XPOSITION-- x page offset of image lhs|TG_YPOSITION-- y page offset of image lhs|TG_FREEOFFSETS-- +byte offset to free block|TG_FREEBYTECOUNTS-- +sizes of free blocks|TG_GRAYRESPONSEUNIT-- gray scale curve accuracy|TG_GRAYRESPONSECURVE-- gray scale response curve|TG_GROUP3OPTIONS-- 32 flag bits|TG_GROUP4OPTIONS-- 32 flag bits|TG_RESOLUTIONUNIT-- units of resolutions|TG_PAGENUMBER-- page numbers of multi-page|TG_COLORRESPONSEUNIT-- color scale curve accuracy|TG_COLORRESPONSECURVE-- RGB response curve|TG_SOFTWARE-- name & release|TG_DATETIME-- creation date and time|TG_ARTIST-- creator of image|TG_HOSTCOMPUTER-- machine where created|TG_PREDICTOR-- prediction scheme w/ LZW|TG_WHITEPOINT-- image white point|TG_PRIMARYCHROMATICITIES-- primary chromaticities|TG_COLORMAP-- RGB map for pallette image|TG_BADFAXLINES-- lines w/ wrong pixel count|TG_CLEANFAXDATA-- regenerated line info|TG_CONSECUTIVEBADFAXLINES-- max consecutive bad lines|TG_MATTEING-- alpha channel is presentderiving(Eq,Show)tag_map::Numt=>[(TIFF_TAG,t)]tag_map=[(TG_SUBFILETYPE,254),(TG_OSUBFILETYPE,255),(TG_IMAGEWIDTH,256),(TG_IMAGELENGTH,257),(TG_BITSPERSAMPLE,258),(TG_COMPRESSION,259),(TG_PHOTOMETRIC,262),(TG_THRESHOLDING,263),(TG_CELLWIDTH,264),(TG_CELLLENGTH,265),(TG_FILLORDER,266),(TG_DOCUMENTNAME,269),(TG_IMAGEDESCRIPTION,270),(TG_MAKE,271),(TG_MODEL,272),(TG_STRIPOFFSETS,273),(TG_ORIENTATION,274),(TG_SAMPLESPERPIXEL,277),(TG_ROWSPERSTRIP,278),(TG_STRIPBYTECOUNTS,279),(TG_MINSAMPLEVALUE,280),(TG_MAXSAMPLEVALUE,281),(TG_XRESOLUTION,282),(TG_YRESOLUTION,283),(TG_PLANARCONFIG,284),(TG_PAGENAME,285),(TG_XPOSITION,286),(TG_YPOSITION,287),(TG_FREEOFFSETS,288),(TG_FREEBYTECOUNTS,289),(TG_GRAYRESPONSEUNIT,290),(TG_GRAYRESPONSECURVE,291),(TG_GROUP3OPTIONS,292),(TG_GROUP4OPTIONS,293),(TG_RESOLUTIONUNIT,296),(TG_PAGENUMBER,297),(TG_COLORRESPONSEUNIT,300),(TG_COLORRESPONSECURVE,301),(TG_SOFTWARE,305),(TG_DATETIME,306),(TG_ARTIST,315),(TG_HOSTCOMPUTER,316),(TG_PREDICTOR,317),(TG_WHITEPOINT,318),(TG_PRIMARYCHROMATICITIES,319),(TG_COLORMAP,320),(TG_BADFAXLINES,326),(TG_CLEANFAXDATA,327),(TG_CONSECUTIVEBADFAXLINES,328),(TG_MATTEING,32995)]tag_map'::IM.IntMapTIFF_TAGtag_map'=IM.fromList$map(\(tag,v)->(v,tag))tag_maptag_to_int::TIFF_TAG->Inttag_to_int(TG_otherx)=xtag_to_intx=fromMaybe(error$"not found tag: "++showx)$lookupxtag_mapint_to_tag::Int->TIFF_TAGint_to_tagx=fromMaybe(TG_otherx)$IM.lookupxtag_map'-- The library function to read the TIFF dictionarytiff_reader::IterateeG[]Word8IO(MaybeTIFFDict)tiff_reader=doendian<-read_magiccheck_versioncaseendianofJuste->doendianRead4e>>=Iter.seek.fromIntegralload_dicteNothing->returnNothingwhere-- Read the magic and set the endiannessread_magic=doc1<-Iter.headc2<-Iter.headcase(c1,c2)of(0x4d,0x4d)->return$JustMSB(0x49,0x49)->return$JustLSB_->(throwErr.Err$"Bad TIFF magic word: "++show[c1,c2])>>returnNothing-- Check the version in the header. It is always ...tiff_version=42check_version=dov<-endianRead2MSBifv==tiff_versionthenreturn()elsethrowErr(Err$"Bad TIFF version: "++showv)-- A few conversion proceduresu32_to_float::Word32->Doubleu32_to_float_x=-- unsigned 32-bit int -> IEEE floaterror"u32->float is not yet implemented"u32_to_s32::Word32->Int32-- unsigned 32-bit int -> signed 32 bitu32_to_s32=fromIntegral-- u32_to_s32 0x7fffffff == 0x7fffffff-- u32_to_s32 0xffffffff == -1u16_to_s16::Word16->Int16-- unsigned 16-bit int -> signed 16 bitu16_to_s16=fromIntegral-- u16_to_s16 32767 == 32767-- u16_to_s16 32768 == -32768-- u16_to_s16 65535 == -1u8_to_s8::Word8->Int8-- unsigned 8-bit int -> signed 8 bitu8_to_s8=fromIntegral-- u8_to_s8 127 == 127-- u8_to_s8 128 == -128-- u8_to_s8 255 == -1note::(MonadIOm)=>[String]->IterateeG[]elm()note=liftIO.putStrLn.concat-- An internal function to load the dictionary. It assumes that the stream-- is positioned to read the dictionaryload_dict::MonadIOm=>Endian->IterateeG[]Word8m(MaybeTIFFDict)load_dicte=donentries<-endianRead2edict<-foldr(constread_entry)(return(JustIM.empty))[1..nentries]next_dict<-endianRead4ewhen(next_dict>0)$note["The TIFF file contains several images, ","only the first one will be considered"]returndictwhereread_entrydictM=dictM>>=maybe(returnNothing)(\dict->dotag<-endianRead2etyp'<-endianRead2etyp<-convert_type(fromIntegraltyp')count<-endianRead4e-- we read the val-offset later. We need to check the size and the type-- of the datum, because val-offset may contain the value itself,-- in its lower-numbered bytes, regardless of the big/little endian-- order!note["TIFFEntry: tag ",show.int_to_tag.fromIntegral$tag," type ",showtyp," count ",showcount]enum_m<-maybe(returnNothing)(\t->read_valuete(fromIntegralcount))typcaseenum_mofJustenum->return.Just$IM.insert(fromIntegraltag)(TIFFDE(fromIntegralcount)enum)dict_->return(Justdict))convert_type::(Monadm)=>Int->IterateeG[]elm(MaybeTIFF_TYPE)convert_typetyp|typ>0&&typ<=fromEnum(maxBound::TIFF_TYPE)=return.Just.toEnum$typconvert_typetyp=dothrowErr.Err$"Bad type of entry: "++showtypreturnNothingread_value::MonadIOm=>TIFF_TYPE->Endian->Int->IterateeG[]Word8m(MaybeTIFFDE_ENUM)read_valuetype'0=doendianRead4e'throwErr.Err$"Zero count in the entry of type: "++showtypreturnNothing-- Read an ascii string from the offset in the-- dictionary. The last byte of-- an ascii string is always zero, which is-- included in 'count' but we don't need to read itread_valueTT_asciie'count|count>4=do-- val-offset is offsetoffset<-endianRead4e'return.Just.TEN_CHAR$\iter_char->return$doIter.seek(fromIntegraloffset)letiter=convStream(checkErrIter.head>>=return.either(constNothing)(Just.(:[]).chr.fromIntegral))iter_charIter.joinI$Iter.joinI$Iter.takeR(predcount)iter-- Read the string of 0 to 3 characters long-- The zero terminator is included in count, but-- we don't need to read itread_valueTT_ascii_ecount=do-- count is within 1..4letlen=predcount-- string lengthletloopacc0=return.Just.reverse$accloopaccn=Iter.head>>=(\v->loop((chr.fromIntegral$v):acc)(predn))str<-loop[]lenIter.drop(4-len)casestrofJuststr'->return.Just.TEN_CHAR$immed_valuestr'Nothing->returnNothing-- Read the array of signed or unsigned bytesread_valuetype'count|count>4&&typ==TT_byte||typ==TT_sbyte=dooffset<-endianRead4e'return.Just.TEN_INT$\iter_int->return$doIter.seek(fromIntegraloffset)letiter=convStream(checkErrIter.head>>=return.either(constNothing)(Just.(:[]).conv_bytetyp))iter_intIter.joinI$Iter.joinI$Iter.takeRcountiter-- Read the array of 1 to 4 bytesread_valuetyp_ecount|typ==TT_byte||typ==TT_sbyte=doletloopacc0=return.Just.reverse$accloopaccn=Iter.head>>=(\v->loop(conv_bytetypv:acc)(predn))str<-(loop[]count)Iter.drop(4-count)casestrofJuststr'->return.Just.TEN_INT$immed_valuestr'Nothing->returnNothing-- Read the array of Word8read_valueTT_undefinede'count|count>4=dooffset<-endianRead4e'return.Just.TEN_BYTE$\iter->return$doIter.seek(fromIntegraloffset)Iter.joinI$Iter.takeRcountiter-- Read the array of Word8 of 1..4 elements,-- packed in the offset fieldread_valueTT_undefined_ecount=doletloopacc0=return.Just.reverse$accloopaccn=Iter.head>>=(\v->loop(v:acc)(predn))str<-loop[]countIter.drop(4-count)casestrofJuststr'->return.Just.TEN_BYTE$immed_valuestr'Nothing->returnNothing--return . Just . TEN_BYTE $ immed_value str-- Read the array of short integers-- of 1 element: the offset field contains the valueread_valuetype'1|typ==TT_short||typ==TT_sshort=doitem<-endianRead2e'Iter.drop2-- skip the paddingreturn.Just.TEN_INT$immed_value[conv_shorttypitem]-- of 2 elements: the offset field contains the valueread_valuetype'2|typ==TT_short||typ==TT_sshort=doi1<-endianRead2e'i2<-endianRead2e'return.Just.TEN_INT$immed_value[conv_shorttypi1,conv_shorttypi2]-- of n elementsread_valuetype'count|typ==TT_short||typ==TT_sshort=dooffset<-endianRead4e'return.Just.TEN_INT$\iter_int->return$doIter.seek(fromIntegraloffset)letiter=convStream(checkErr(endianRead2e')>>=return.either(constNothing)(Just.(:[]).conv_shorttyp))iter_intIter.joinI$Iter.joinI$Iter.takeR(2*count)iter-- Read the array of long integers-- of 1 element: the offset field contains the valueread_valuetype'1|typ==TT_long||typ==TT_slong=doitem<-endianRead4e'return.Just.TEN_INT$immed_value[conv_longtypitem]-- of n elementsread_valuetype'count|typ==TT_long||typ==TT_slong=dooffset<-endianRead4e'return.Just.TEN_INT$\iter_int->return$doIter.seek(fromIntegraloffset)letiter=convStream(checkErr(endianRead4e')>>=return.either(constNothing)(Just.(:[]).conv_longtyp))iter_intIter.joinI$Iter.joinI$Iter.takeR(4*count)iterread_valuetype'count=do-- stub_offset<-endianRead4e'note["unhandled type: ",showtyp," with count ",showcount]returnNothingimmed_value::(Monadm)=>[el]->EnumeratorGMM[]Word8[]elmaimmed_valueitemiter=--(Iter.enumPure1Chunk item >. enumEof) iter >>== Iter.joinI . returnreturn.joinI.return.joinIM$(enumPure1Chunkitem>.enumEof)iterconv_byte::TIFF_TYPE->Word8->Intconv_byteTT_byte=fromIntegralconv_byteTT_sbyte=fromIntegral.u8_to_s8conv_byte_=error"conv_byte called with non-byte type"conv_short::TIFF_TYPE->Word16->Intconv_shortTT_short=fromIntegralconv_shortTT_sshort=fromIntegral.u16_to_s16conv_short_=error"conv_short called with non-short type"conv_long::TIFF_TYPE->Word32->Intconv_longTT_long=fromIntegralconv_longTT_slong=fromIntegral.u32_to_s32conv_long_=error"conv_long called with non-long type"-- Reading the pixel matrix-- For simplicity, we assume no compression and 8-bit pixelspixel_matrix_enum::MonadIOm=>TIFFDict->EnumeratorN[]Word8[]Word8mapixel_matrix_enumdictiter=validate_dict>>=proceedwhere-- Make sure we can handle this particular TIFF imagevalidate_dict=dodict_assertTG_COMPRESSION1dict_assertTG_SAMPLESPERPIXEL1dict_assertTG_BITSPERSAMPLE8ncols<-liftM(fromMaybe0)$dict_read_intTG_IMAGEWIDTHdictnrows<-liftM(fromMaybe0)$dict_read_intTG_IMAGELENGTHdictstrip_offsets<-liftM(fromMaybe[0])$dict_read_intsTG_STRIPOFFSETSdictrps<-liftM(fromMaybenrows)(dict_read_intTG_ROWSPERSTRIPdict)ifncols>0&&nrows>0&&rps>0thenreturn$Just(ncols,nrows,rps,strip_offsets)elsereturnNothingdict_asserttagv=dovfound<-dict_read_inttagdictcasevfoundofJustv'|v'==v->return$Just()_->throwErr(Err(unwords["dict_assert: tag:",showtag,"expected:",showv,"found:",showvfound]))>>returnNothingproceedNothing=throwErr$Err"Can't handle this TIFF"proceed(Just(ncols,nrows,rows_per_strip,strip_offsets))=doletstrip_size=rows_per_strip*ncolsimage_size=nrows*ncolsnote["Processing the pixel matrix, ",showimage_size," bytes"]letloop_pos[]iter'=returniter'looppos(strip:strips)iter'=doIter.seek(fromIntegralstrip)letlen=minstrip_size(image_size-pos)iter''<-Iter.takeR(fromIntegrallen)iter'loop(pos+len)stripsiter''loop0strip_offsetsiter-- A few helpers for getting data from TIFF dictionarydict_read_int::Monadm=>TIFF_TAG->TIFFDict->IterateeG[]Word8m(MaybeInt)dict_read_inttagdict=doels<-dict_read_intstagdictcaseelsofJust(e:_)->return$Juste_->returnNothingdict_read_ints::Monadm=>TIFF_TAG->TIFFDict->IterateeG[]Word8m(Maybe[Int])dict_read_intstagdict=caseIM.lookup(tag_to_inttag)dictofJust(TIFFDE_(TEN_INTenum))->doe<-joinIM$enumstream2listreturn(Juste)_->returnNothingdict_read_rat::Monadm=>TIFF_TAG->TIFFDict->IterateeG[]Word8m(Maybe(RatioInt))dict_read_rattagdict=caseIM.lookup(tag_to_inttag)dictofJust(TIFFDE1(TEN_RATenum))->do[e]<-joinIM$enumstream2listreturn(Juste)_->returnNothingdict_read_string::Monadm=>TIFF_TAG->TIFFDict->IterateeG[]Word8m(MaybeString)dict_read_stringtagdict=caseIM.lookup(tag_to_inttag)dictofJust(TIFFDE_(TEN_CHARenum))->doe<-joinIM$enumstream2listreturn(Juste)_->returnNothing