-- File created: 2008-10-10 13:29:26{-# LANGUAGE CPP #-}moduleSystem.FilePath.Glob.Base(Token(..),Pattern(..),CompOptions(..),MatchOptions(..),compDefault,compPosix,matchDefault,matchPosix,decompile,compile,compileWith,tryCompileWith,tokenize-- for tests,optimize,liftP,tokToLower)whereimportControl.Arrow(first)importControl.Monad.Error(ErrorT,runErrorT,throwError)importControl.Monad.Writer.Strict(Writer,runWriter,tell)importControl.Exception(assert)importData.Char(isDigit,isAlpha,toLower)importData.List(find,sortBy)importData.Maybe(fromMaybe)importData.Monoid(Monoid,mappend,mempty,mconcat)importSystem.FilePath(pathSeparator,extSeparator,isExtSeparator,isPathSeparator)importSystem.FilePath.Glob.Utils(dropLeadingZeroes,isLeft,fromLeft,increasingSeq,addToRange,overlap)#if __GLASGOW_HASKELL__importText.Read(readPrec,lexP,parens,prec,Lexeme(Ident))#endifdataToken-- primitives=Literal!Char|ExtSeparator-- .|PathSeparator-- /|NonPathSeparator-- ?|CharRange!Bool[EitherChar(Char,Char)]-- []|OpenRange(MaybeString)(MaybeString)-- <>|AnyNonPathSeparator-- *|AnyDirectory-- **/-- after optimization only|LongLiteral!IntStringderiving(Eq)-- Note: CharRanges aren't converted, because this is tricky in general.-- Consider for instance [@-[], which includes the range A-Z. This would need-- to become [@[a-z]: so essentially we'd need to either:---- 1) Have a list of ranges of uppercase Unicode. Check if our range-- overlaps with any of them and if it does, take the non-overlapping-- part and combine it with the toLower of the overlapping part.---- 2) Simply expand the entire range to a list and map toLower over it.---- In either case we'd need to re-optimize the CharRange—we can't assume that-- if the uppercase characters are consecutive, so are the lowercase.---- 1) might be feasible if someone bothered to get the latest data.---- 2) obviously isn't since you might have 'Right (minBound, maxBound)' in-- there somewhere.---- The current solution is to just check both the toUpper of the character and-- the toLower.tokToLower::Token->TokentokToLower(Literalc)=Literal(toLowerc)tokToLower(LongLiteralns)=LongLiteraln(maptoLowers)tokToLowertok=tok-- |An abstract data type representing a compiled pattern.---- Note that the 'Eq' instance cannot tell you whether two patterns behave in-- the same way; only whether they compile to the same 'Pattern'. For instance,-- @'compile' \"x\"@ and @'compile' \"[x]\"@ may or may not compare equal,-- though a @'match'@ will behave the exact same way no matter which 'Pattern'-- is used.newtypePattern=Pattern{unPattern::[Token]}deriving(Eq)liftP::([Token]->[Token])->Pattern->PatternliftPf(Patternpat)=Pattern(fpat)instanceShowTokenwhereshow(Literalc)|c`elem`"*?[<"||isExtSeparatorc=['[',c,']']|otherwise=assert(not$isPathSeparatorc)[c]showExtSeparator=[extSeparator]showPathSeparator=[pathSeparator]showNonPathSeparator="?"showAnyNonPathSeparator="*"showAnyDirectory="**/"show(LongLiteral_s)=concatMap(show.Literal)sshow(OpenRangeab)='<':fromMaybe""a++"-"++fromMaybe""b++">"-- We have to be careful here with ^ and ! lest [a!b] become [!ab]. So we-- just put them at the end.---- Also, [^x-] was sorted and should not become [^-x].show(CharRangebr)=letf=either(:[])(\(x,y)->[x,'-',y])(caret,exclamation,fs)=foldr(\c(ca,ex,ss)->casecofLeft'^'->("^",ex,ss)Left'!'->(ca,"!",ss)_->(ca,ex,(fc++).ss))("","",id)r(beg,rest)=lets'=fs[](x,y)=splitAt1s'inifnotb&&x=="-"then(y,x)else(s',"")inconcat["[",ifbthen""else"^",beg,caret,exclamation,rest,"]"]instanceShowPatternwhereshowsPrecdp=showParen(d>10)$showString"compile ".showsPrec(d+1)(decompilep)instanceReadPatternwhere#if __GLASGOW_HASKELL__readPrec=parens.prec10$doIdent"compile"<-lexPfmapcompilereadPrec#elsereadsPrecd=readParen(d>10)$\r->do("compile",string)<-lexr(xs,rest)<-readsPrec(d+1)string[(compilexs,rest)]#endifinstanceMonoidPatternwheremempty=Pattern[]mappend(Patterna)(Patternb)=optimize.Pattern$(a++b)mconcat=optimize.Pattern.concatMapunPattern-- |Options which can be passed to the 'tryCompileWith' or 'compileWith'-- functions: with these you can selectively toggle certain features at compile-- time.---- Note that some of these options depend on each other: classes can never-- occur if ranges aren't allowed, for instance.-- We could presumably put locale information in here, too.dataCompOptions=CompOptions{characterClasses::Bool-- ^Allow character classes, @[[:...:]]@.,characterRanges::Bool-- ^Allow character ranges, @[...]@.,numberRanges::Bool-- ^Allow open ranges, @\<...>@.,wildcards::Bool-- ^Allow wildcards, @*@ and @?@.,recursiveWildcards::Bool-- ^Allow recursive wildcards, @**/@.,pathSepInRanges::Bool-- ^Allow path separators in character ranges.---- If true, @a[/]b@ never matches anything (since character ranges can't-- match path separators); if false and 'errorRecovery' is enabled,-- @a[/]b@ matches itself, i.e. a file named @]b@ in the subdirectory-- @a[@.,errorRecovery::Bool-- ^If the input is invalid, recover by turning any invalid part into-- literals. For instance, with 'characterRanges' enabled, @[abc@ is an-- error by default (unclosed character range); with 'errorRecovery', the-- @[@ is turned into a literal match, as though 'characterRanges' were-- disabled.}deriving(Show,Read,Eq)-- |The default set of compilation options: closest to the behaviour of the-- @zsh@ shell, with 'errorRecovery' enabled.---- All options are enabled.compDefault::CompOptionscompDefault=CompOptions{characterClasses=True,characterRanges=True,numberRanges=True,wildcards=True,recursiveWildcards=True,pathSepInRanges=True,errorRecovery=True}-- |Options for POSIX-compliance, as described in @man 7 glob@.---- 'numberRanges', 'recursiveWildcards', and 'pathSepInRanges' are disabled.compPosix::CompOptionscompPosix=CompOptions{characterClasses=True,characterRanges=True,numberRanges=False,wildcards=True,recursiveWildcards=False,pathSepInRanges=False,errorRecovery=True}-- |Options which can be passed to the 'matchWith' or 'globDirWith' functions:-- with these you can selectively toggle certain features at matching time.dataMatchOptions=MatchOptions{matchDotsImplicitly::Bool-- ^Allow @*@, @?@, and @**/@ to match @.@ at the beginning of paths.,ignoreCase::Bool-- ^Case-independent matching.,ignoreDotSlash::Bool-- ^Treat @./@ as a no-op in both paths and patterns.---- (Of course e.g. @../@ means something different and will not be-- ignored.)}-- |The default set of execution options: closest to the behaviour of the @zsh@-- shell.---- Currently identical to 'matchPosix'.matchDefault::MatchOptionsmatchDefault=matchPosix-- |Options for POSIX-compliance, as described in @man 7 glob@.---- 'ignoreDotSlash' is enabled, the rest are disabled.matchPosix::MatchOptionsmatchPosix=MatchOptions{matchDotsImplicitly=False,ignoreCase=False,ignoreDotSlash=True}-- |Decompiles a 'Pattern' object into its textual representation: essentially-- the inverse of 'compile'.---- Note, however, that due to internal optimization, @decompile . compile@ is-- not the identity function. Instead, @compile . decompile@ is.---- Be careful with 'CompOptions': 'decompile' always produces a 'String' which-- can be passed to 'compile' to get back the same 'Pattern'. @compileWith-- options . decompile@ is /not/ the identity function unless @options@ is-- 'compDefault'.decompile::Pattern->Stringdecompile=concatMapshow.unPattern-------------------------------------------- COMPILATION-------------------------------------------- |Compiles a glob pattern from its textual representation into a 'Pattern'-- object.---- For the most part, a character matches itself. Recognized operators are as-- follows:---- [@?@] Matches any character except path separators.---- [@*@] Matches any number of characters except path separators,-- including the empty string.---- [@[..\]@] Matches any of the enclosed characters. Ranges of characters can-- be specified by separating the endpoints with a @\'-'@. @\'-'@ or-- @']'@ can be matched by including them as the first character(s)-- in the list. Never matches path separators: @[\/]@ matches-- nothing at all. Named character classes can also be matched:-- @[:x:]@ within @[]@ specifies the class named @x@, which matches-- certain predefined characters. See below for a full list.---- [@[^..\]@ or @[!..\]@] Like @[..]@, but matches any character /not/ listed.-- Note that @[^-x]@ is not the inverse of @[-x]@, but-- the range @[^-x]@.---- [@\<m-n>@] Matches any integer in the range m to n, inclusive. The range may-- be open-ended by leaving out either number: @\"\<->\"@, for-- instance, matches any integer.---- [@**/@] Matches any number of characters, including path separators,-- excluding the empty string.---- Supported character classes:---- [@[:alnum:\]@] Equivalent to @\"0-9A-Za-z\"@.---- [@[:alpha:\]@] Equivalent to @\"A-Za-z\"@.---- [@[:blank:\]@] Equivalent to @\"\\t \"@.---- [@[:cntrl:\]@] Equivalent to @\"\\0-\\x1f\\x7f\"@.---- [@[:digit:\]@] Equivalent to @\"0-9\"@.---- [@[:graph:\]@] Equivalent to @\"!-~\"@.---- [@[:lower:\]@] Equivalent to @\"a-z\"@.---- [@[:print:\]@] Equivalent to @\" -~\"@.---- [@[:punct:\]@] Equivalent to @\"!-\/:-\@[-`{-~\"@.---- [@[:space:\]@] Equivalent to @\"\\t-\\r \"@.---- [@[:upper:\]@] Equivalent to @\"A-Z\"@.---- [@[:xdigit:\]@] Equivalent to @\"0-9A-Fa-f\"@.---- Note that path separators (typically @\'/\'@) have to be matched explicitly-- or using the @**/@ pattern. In addition, extension separators (typically-- @\'.\'@) have to be matched explicitly at the beginning of the pattern or-- after any path separator.---- If a system supports multiple path separators, any one of them will match-- any of them. For instance, on Windows, @\'/\'@ will match itself as well as-- @\'\\\'@.---- Error recovery will be performed: erroneous operators will not be considered-- operators, but matched as literal strings. Such operators include:---- * An empty @[]@ or @[^]@ or @[!]@---- * A @[@ or @\<@ without a matching @]@ or @>@---- * A malformed @\<>@: e.g. nonnumeric characters or no hyphen---- So, e.g. @[]@ will match the string @\"[]\"@.compile::String->Patterncompile=compileWithcompDefault-- |Like 'compile', but recognizes operators according to the given-- 'CompOptions' instead of the defaults.---- If an error occurs and 'errorRecovery' is disabled, 'error' will be called.compileWith::CompOptions->String->PatterncompileWithopts=eithererrorid.tryCompileWithopts-- |A safe version of 'compileWith'.---- If an error occurs and 'errorRecovery' is disabled, the error message will-- be returned in a 'Left'.tryCompileWith::CompOptions->String->EitherStringPatterntryCompileWithopts=fmapoptimize.tokenizeoptstokenize::CompOptions->String->EitherStringPatterntokenizeopts=fmapPattern.sequence.gowhereerr_ccs|errorRecoveryopts=Right(Literalc):gocserrs__=[Lefts]go::String->[EitherStringToken]go[]=[]go('?':cs)|wcs=RightNonPathSeparator:gocsgo('*':cs)|wcs=casecsof'*':p:xs|rwcs&&isPathSeparatorp->RightAnyDirectory:goxs_->RightAnyNonPathSeparator:gocsgo('[':cs)|crs=let(range,rest)=charRangeoptscsincaserangeofLefts->errs'['csr->r:gorestgo('<':cs)|ors=let(range,rest)=break(=='>')csinifnullrestthenerr"compile :: unclosed <> in pattern"'<'cselsecaseopenRangerangeofLefts->errs'<'csr->r:go(tailrest)go(c:cs)|isPathSeparatorc=RightPathSeparator:gocs|isExtSeparatorc=RightExtSeparator:gocs|otherwise=Right(Literalc):gocswcs=wildcardsoptsrwcs=recursiveWildcardsoptscrs=characterRangesoptsors=numberRangesopts-- <a-b> where a > b can never match anything; this is not considered an erroropenRange::String->EitherStringTokenopenRange['-']=Right$OpenRangeNothingNothingopenRange('-':s)=casespanisDigitsof(b,"")->Right$OpenRangeNothing(openRangeNumb)_->Left$"compile :: bad <>, expected number, got "++sopenRanges=casespanisDigitsof(a,"-")->Right$OpenRange(openRangeNuma)Nothing(a,'-':s')->casespanisDigits'of(b,"")->Right$OpenRange(openRangeNuma)(openRangeNumb)_->Left$"compile :: bad <>, expected number, got "++s'_->Left$"compile :: bad <>, expected number followed by - in "++sopenRangeNum::String->MaybeStringopenRangeNum=Just.dropLeadingZeroestypeCharRange=[EitherChar(Char,Char)]charRange::CompOptions->String->(EitherStringToken,String)charRangeoptszs=casezsofy:ys|y`elem`"^!"->caseysof-- [!-#] is not the inverse of [-#], it is the range ! through-- #'-':']':xs->(Right(CharRangeFalse[Left'-']),xs)'-':_->first(fmap(CharRangeTrue))(startzs)xs->first(fmap(CharRangeFalse))(startxs)_->first(fmap(CharRangeTrue))(startzs)wherestart::String->(EitherStringCharRange,String)start(']':xs)=run$char']'xsstart('-':xs)=run$char'-'xsstartxs=run$goxsrun::ErrorTString(WriterCharRange)String->(EitherStringCharRange,String)runm=caserunWriter.runErrorT$mof(Lefterr,_)->(Lefterr,[])(Rightrest,cs)->(Rightcs,rest)go::String->ErrorTString(WriterCharRange)Stringgo('[':':':xs)|characterClassesopts=readClassxsgo(']':xs)=returnxsgo(c:xs)=ifnot(pathSepInRangesopts)&&isPathSeparatorcthenthrowError"compile :: path separator within []"elsecharcxsgo[]=throwError"compile :: unclosed [] in pattern"char::Char->String->ErrorTString(WriterCharRange)Stringcharc('-':x:xs)=ifx==']'thentell[Leftc,Left'-']>>returnxselsetell[Right(c,x)]>>goxscharcxs=tell[Leftc]>>goxsreadClass::String->ErrorTString(WriterCharRange)StringreadClassxs=let(name,end)=spanisAlphaxsincaseendof':':']':rest->charClassname>>gorest_->tell[Left'[',Left':']>>goxscharClass::String->ErrorTString(WriterCharRange)()charClassname=-- The POSIX classes---- TODO: this is ASCII-only, not sure how this should be extended-- Unicode, or with a locale as input, or something else?casenameof"alnum"->tell[digit,upper,lower]"alpha"->tell[upper,lower]"blank"->tellblanks"cntrl"->tell[Right('\0','\x1f'),Left'\x7f']"digit"->tell[digit]"graph"->tell[Right('!','~')]"lower"->tell[lower]"print"->tell[Right(' ','~')]"punct"->tellpunct"space"->tellspaces"upper"->tell[upper]"xdigit"->tell[digit,Right('A','F'),Right('a','f')]_->throwError("compile :: unknown character class '"++name++"'")digit=Right('0','9')upper=Right('A','Z')lower=Right('a','z')punct=mapRight[('!','/'),(':','@'),('[','`'),('{','~')]blanks=[Left'\t',Left' ']spaces=[Right('\t','\r'),Left' ']-------------------------------------------- OPTIMIZATION------------------------------------------optimize::Pattern->Patternoptimize=liftP(fin.go)wherefin[]=[]-- Literals to LongLiteral-- Has to be done here: we can't backtrack in go, but some cases might-- result in consecutive Literals being generated.-- E.g. "a[b]".fin(x:y:xs)|isLiteralx&&isLiteraly=let(ls,rest)=spanisLiteralxsinfin$LongLiteral(lengthls+2)(foldr(\(Literala)->(a:))[](x:y:ls)):rest-- concatenate LongLiterals-- Has to be done here because LongLiterals are generated above.---- So one could say that we have one pass (go) which flattens everything as-- much as it can and one pass (fin) which concatenates what it can.fin(LongLiterall1s1:LongLiterall2s2:xs)=fin$LongLiteral(l1+l2)(s1++s2):xsfin(LongLiteralls:Literalc:xs)=fin$LongLiteral(l+1)(s++[c]):xsfin(LongLiteral1s:xs)=Literal(heads):finxsfin(Literalc:LongLiteralls:xs)=fin$LongLiteral(l+1)(c:s):xsfin(x:xs)=x:finxsgo[]=[]go(x@(CharRange__):xs)=caseoptimizeCharRangexofx'@(CharRange__)->x':goxsx'->go(x':xs)-- <a-a> -> ago(OpenRange(Justa)(Justb):xs)|a==b=LongLiteral(lengtha)a:goxs-- <a-b> -> [a-b]-- a and b are guaranteed non-nullgo(OpenRange(Just[a])(Just[b]):xs)|b>a=go$CharRangeTrue[Right(a,b)]:xsgo(x:xs)=casefind($x)compressorsofJustc->let(compressed,ys)=spancxsinifnullcompressedthenx:goyselsego(x:ys)Nothing->x:goxscompressors=[isStar,isStarSlash,isAnyNumber]isLiteral(Literal_)=TrueisLiteral_=FalseisStarAnyNonPathSeparator=TrueisStar_=FalseisStarSlashAnyDirectory=TrueisStarSlash_=FalseisAnyNumber(OpenRangeNothingNothing)=TrueisAnyNumber_=FalseoptimizeCharRange::Token->TokenoptimizeCharRange(CharRangeb_rs)=finb_.go.sortCharRange$rswhere-- [/] is interesting, it actually matches nothing at all-- [.] can be Literalized though, just don't make it into an ExtSeparator so-- that it doesn't match a leading dotfinTrue[Leftc]|not(isPathSeparatorc)=LiteralcfinTrue[Rightr]|r==(minBound,maxBound)=NonPathSeparatorfinbx=CharRangebxgo[]=[]go(x@(Leftc):xs)=casexsof[]->[x]y@(Leftd):ys-- [aaaaa] -> [a]|c==d->go$Leftc:ys|d==succc->let(ls,rest)=spanisLeftxs-- start from y(catable,others)=increasingSeq(mapfromLeftls)range=(c,headcatable)in-- three (or more) Lefts make a Rightifnullcatable||null(tailcatable)thenx:y:goys-- [abcd] -> [a-d]elsego$Rightrange:mapLeftothers++rest|otherwise->x:goxsRightr:ys->caseaddToRangercof-- [da-c] -> [a-d]Justr'->go$Rightr':ysNothing->x:goxsgo(x@(Rightr):xs)=casexsof[]->[x]Leftc:ys->caseaddToRangercof-- [a-cd] -> [a-d]Justr'->go$Rightr':ysNothing->x:goxsRightr':ys->caseoverlaprr'of-- [a-cb-d] -> [a-d]Justo->go$Righto:ysNothing->x:goxsoptimizeCharRange_=error"Glob.optimizeCharRange :: internal error"sortCharRange::[EitherChar(Char,Char)]->[EitherChar(Char,Char)]sortCharRange=sortBycmpwherecmp(Lefta)(Leftb)=compareabcmp(Lefta)(Right(b,_))=compareabcmp(Right(a,_))(Leftb)=compareabcmp(Right(a,_))(Right(b,_))=compareab