{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}-- |-- Module : Data.Text.Lazy.Search-- Copyright : (c) 2009, 2010 Bryan O'Sullivan---- License : BSD-style-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,-- duncan@haskell.org-- Stability : experimental-- Portability : GHC---- Fast substring search for lazy 'Text', based on work by Boyer,-- Moore, Horspool, Sunday, and Lundh. Adapted from the strict-- implementation.moduleData.Text.Lazy.Search(indices)whereimportqualifiedData.Text.ArrayasAimportData.Int(Int64)importData.Word(Word16,Word64)importqualifiedData.Text.InternalasTimportData.Text.Fusion.Internal(PairS(..))importData.Text.Lazy.Internal(Text(..),foldlChunks)importData.Bits((.|.),(.&.))importData.Text.UnsafeShift(shiftL)-- | /O(n+m)/ Find the offsets of all non-overlapping indices of-- @needle@ within @haystack@.---- This function is strict in @needle@, and lazy (as far as possible)-- in the chunks of @haystack@.---- In (unlikely) bad cases, this algorithm's complexity degrades-- towards /O(n*m)/.indices::Text-- ^ Substring to search for (@needle@)->Text-- ^ Text to search in (@haystack@)->[Int64]indicesneedle@(Chunknns)_haystack@(Chunkkks)|nlen<=0=[]|nlen==1=indicesOne(nindex0)0kks|otherwise=advancekks00whereadvancex@(T.Text__l)xs=scanwherescan!g!i|i>=m=casexsofEmpty->[]Chunkyys->advanceyysg(i-m)|lackingHay(i+nlen)xxs=[]|c==z&&candidateMatch0=g:scan(g+nlen)(i+nlen)|otherwise=scan(g+delta)(i+delta)wherem=fromIntegrallc=hindex(i+nlast)delta|nextInPattern=nlen+1|c==z=skip+1|otherwise=1nextInPattern=mask.&.swizzle(hindex(i+nlen))==0candidateMatch!j|j>=nlast=True|hindex(i+j)/=nindexj=False|otherwise=candidateMatch(j+1)hindex=indexxxsnlen=wordLengthneedlenlast=nlen-1nindex=indexnnsz=foldlChunksfin0needlewherefin_(T.Textfarrfoffflen)=A.unsafeIndexfarr(foff+flen-1)(mask::Word64):*:skip=buildTablenns000(nlen-2)swizzlew=1`shiftL`(fromIntegralw.&.0x3f)buildTable(T.Textxarrxoffxlen)xs=gowherego!(g::Int64)!i!msk!skp|i>=xlast=casexsofEmpty->(msk.|.swizzlez):*:skpChunkyys->buildTableyysg0msk'skp'|otherwise=go(g+1)(i+1)msk'skp'wherec=A.unsafeIndexxarr(xoff+i)msk'=msk.|.swizzlecskp'|c==z=nlen-g-2|otherwise=skpxlast=xlen-1-- | Check whether an attempt to index into the haystack at the-- given offset would fail.lackingHayq=go0wheregop(T.Text__l)ps=p'<q&&casepsofEmpty->TrueChunkrrs->gop'rrswherep'=p+fromIntegrallindices__=[]-- | Fast index into a partly unpacked 'Text'. We take into account-- the possibility that the caller might try to access one element-- past the end.index::T.Text->Text->Int64->Word16index(T.Textarrofflen)xs!i|j<len=A.unsafeIndexarr(off+j)|otherwise=casexsofEmpty-- out of bounds, but legal|j==len->0-- should never happen, due to lackingHay above|otherwise->emptyError"index"Chunkccs->indexccs(i-fromIntegrallen)wherej=fromIntegrali-- | A variant of 'indices' that scans linearly for a single 'Word16'.indicesOne::Word16->Int64->T.Text->Text->[Int64]indicesOnec=chunkwherechunk!i(T.Textoarrooffolen)os=go0wheregoh|h>=olen=caseosofEmpty->[]Chunkyys->chunk(i+fromIntegralolen)yys|on==c=i+fromIntegralh:go(h+1)|otherwise=go(h+1)whereon=A.unsafeIndexoarr(ooff+h)-- | The number of 'Word16' values in a 'Text'.wordLength::Text->Int64wordLength=foldlChunkssumLength0wheresumLengthi(T.Text__l)=i+fromIntegrallemptyError::String->aemptyErrorfun=error("Data.Text.Lazy.Search."++fun++": empty input")