{- |
Module : Data.FileStore.Utils
Copyright : Copyright (C) 2009 John MacFarlane, Gwern Branwen
License : BSD 3
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Utility functions for running external processes.
-}moduleData.FileStore.Utils(runShellCommand,mergeContents,hashsMatch,isInsideDir,escapeRegexSpecialChars,parseMatchLine,splitEmailAuthor,ensureFileExists,regSearchFiles,regsSearchFile,withSanityCheck,grepSearchRepo,withVerifyDir)whereimportCodec.Binary.UTF8.String(encodeString)importControl.Exception(throwIO)importControl.Monad(liftM,when,unless)importData.ByteString.Lazy.UTF8(toString)importData.Char(isSpace)importData.List(intersect,nub,isPrefixOf,isInfixOf)importData.List.Split(splitWhen)importData.Maybe(isJust)importSystem.Directory(canonicalizePath,doesFileExist,getTemporaryDirectory,removeFile,findExecutable,createDirectoryIfMissing,getDirectoryContents)importSystem.Exit(ExitCode(..))importSystem.FilePath((</>),takeDirectory)importSystem.IO(openTempFile,hClose)importSystem.Process(runInteractiveProcess,waitForProcess)importqualifiedData.ByteString.LazyasBimportData.FileStore.Types(SearchMatch(..),FileStoreError(IllegalResourceName,NotFound,UnknownError),SearchQuery(..))-- | Run shell command and return error status, standard output, and error-- output. Assumes UTF-8 locale. Note that this does not go through \/bin\/sh!runShellCommand::FilePath-- ^ Working directory->Maybe[(String,String)]-- ^ Environment->String-- ^ Command->[String]-- ^ Arguments->IO(ExitCode,B.ByteString,B.ByteString)runShellCommandwdenvcmdargs=do(hInp,hOut,hErr,ph)<-runInteractiveProcess(encodeStringcmd)(mapencodeStringargs)(Justwd)envhClosehInpoutput<-B.hGetContentshOuterrorOutput<-B.hGetContentshErrstatus<-waitForProcessphreturn(status,errorOutput,output)-- | Do a three way merge, using either git merge-file or RCS merge. Assumes-- that either @git@ or @merge@ is in the system path. Assumes UTF-8 locale.mergeContents::(String,B.ByteString)-- ^ (label, contents) of edited version->(String,B.ByteString)-- ^ (label, contents) of original revision->(String,B.ByteString)-- ^ (label, contents) of latest version->IO(Bool,String)-- ^ (were there conflicts?, merged contents)mergeContents(newLabel,newContents)(originalLabel,originalContents)(latestLabel,latestContents)=dotempPath<-catchgetTemporaryDirectory(\_->return".")(originalPath,hOriginal)<-openTempFiletempPath"orig"(latestPath,hLatest)<-openTempFiletempPath"latest"(newPath,hNew)<-openTempFiletempPath"new"B.hPutStrhOriginaloriginalContents>>hClosehOriginalB.hPutStrhLatestlatestContents>>hClosehLatestB.hPutStrhNewnewContents>>hClosehNewgitExists<-liftMisJust(findExecutable"git")(conflicts,mergedContents)<-ifgitExiststhendo(status,err,out)<-runShellCommandtempPathNothing"git"["merge-file","--stdout","-L",newLabel,"-L",originalLabel,"-L",latestLabel,newPath,originalPath,latestPath]casestatusofExitSuccess->return(False,out)ExitFailuren|n>=0->return(True,out)_->error$"merge failed: "++toStringerrelsedomergeExists<-liftMisJust(findExecutable"merge")ifmergeExiststhendo(status,err,out)<-runShellCommandtempPathNothing"merge"["-p","-q","-L",newLabel,"-L",originalLabel,"-L",latestLabel,newPath,originalPath,latestPath]casestatusofExitSuccess->return(False,out)ExitFailure1->return(True,out)_->error$"merge failed: "++toStringerrelseerror"mergeContents requires 'git' or 'merge', and neither was found in the path."removeFileoriginalPathremoveFilelatestPathremoveFilenewPathreturn(conflicts,toStringmergedContents)escapeRegexSpecialChars::String->StringescapeRegexSpecialChars=backslashEscape"?*+{}[]\\^$.()"wherebackslashEscapechars(x:xs)|x`elem`chars='\\':x:backslashEscapecharsxsbackslashEscapechars(x:xs)=x:backslashEscapecharsxsbackslashEscape_[]=[]-- | A number of VCS systems uniquely identify a particular revision or change via a-- cryptographic hash of some sort. These hashs can be very long, and so systems like-- Git and Darcs don't require the entire hash - a *unique prefix*. Thus a definition-- of hash equality is '==', certainly, but also simply whether either is a prefix of the-- other. If both are reasonably long, then the likelihood the shorter one is not a unique-- prefix of the longer (that is, clashes with another hash) is small.-- The burden of proof is on the caller to not pass a uselessly short short-hash like '1', however.hashsMatch::(Eqa)=>[a]->[a]->BoolhashsMatchr1r2=r1`isPrefixOf`r2||r2`isPrefixOf`r1-- | Inquire of a certain directory whether another file lies within its ambit.-- This is basically asking whether the file is 'above' the directory in the filesystems's-- directory tree. Useful for checking the legality of a filename.isInsideDir::FilePath->FilePath->IOBoolisInsideDirnamedir=dogitDirPathCanon<-canonicalizePathdirfilenameCanon<-canonicalizePathnamereturn(gitDirPathCanon`isPrefixOf`filenameCanon)-- | A parser function. This is intended for use on strings which are output by grep programs-- or programs which mimic the standard grep output - which uses colons as delimiters and has-- 3 fields: the filename, the line number, and then the matching line itself. Note that this -- is for use on only strings meeting that format - if it goes "file:match", this will throw-- a pattern-match exception.---- > parseMatchLine "foo:10:bar baz quux" ~> -- > SearchMatch {matchResourceName = "foo", matchLineNumber = 10, matchLine = "bar baz quux"}parseMatchLine::String->SearchMatchparseMatchLinestr=let(fn:n:res:_)=splitWhen(==':')strinSearchMatch{matchResourceName=fn,matchLineNumber=readn,matchLine=res}-- | Our policy is: if the input is clearly a "name \<e\@mail.com\>" input, then we return "(Just Address, Name)"-- If there is no '<' in the input, then it clearly can't be of that format, and so we just return "(Nothing, Name)"---- > splitEmailAuthor "foo bar baz@gmail.com" ~> (Nothing,"foo bar baz@gmail.com")-- > splitEmailAuthor "foo bar <baz@gmail.com>" ~> (Just "baz@gmail.com","foo bar")splitEmailAuthor::String->(MaybeString,String)splitEmailAuthorx=(mbEmail,trimname)where(name,rest)=break(=='<')xmbEmail=ifnullrestthenNothingelseJust$takeWhile(/='>')$drop1rest-- | Trim leading and trailing spacestrim::String->Stringtrim=reverse.dropWhileisSpace.reverse.dropWhileisSpace-- | Search multiple files with a single regexp.-- This calls out to grep, and so supports the regular expressions grep does.regSearchFiles::FilePath->[String]->String->IO[String]regSearchFilesrepofilesToCheckpattern=do(_,_,result)<-runShellCommandrepoNothing"grep"$["--line-number","-I","-l","-E","-e",pattern]++filesToCheckletresults=intersectfilesToCheck$lines$toStringresultreturnresults-- | Search a single file with multiple regexps.regsSearchFile::[String]->FilePath->[String]->String->IO[String]regsSearchFileosrepopatternsfile=dores<-mapM(runfile)patternsreturn$nub$concatreswhererunfp=do(_,_,r)<-runShellCommandrepoNothing"grep"(os++[p,f])return$lines$toStringr-- | If name doesn't exist in repo or is not a file, throw 'NotFound' exception.ensureFileExists::FilePath->FilePath->IO()ensureFileExistsreponame=doisFile<-doesFileExist(repo</>encodeStringname)unlessisFile$throwIONotFound-- | Check that the filename/location is within the given repo, and not inside-- any of the (relative) paths in @excludes@. Create the directory if needed.-- If everything checks out, then perform the specified action.withSanityCheck::FilePath->[FilePath]->FilePath->IOb->IObwithSanityCheckrepoexcludesnameaction=doletfilename=repo</>encodeStringnameinsideRepo<-filename`isInsideDir`repoinsideExcludes<-liftMor$mapM(filename`isInsideDir`)$map(repo</>)excludeswhen(insideExcludes||notinsideRepo)$throwIOIllegalResourceNamecreateDirectoryIfMissingTrue$takeDirectoryfilenameaction-- | Uses grep to search a file-based repository. Note that this calls out to grep; and so-- is generic over repos like git or darcs-based repos. (The git FileStore instance doesn't-- use this because git has builtin grep functionality.)-- Expected usage is to specialize this function with a particular backend's 'index'.grepSearchRepo::(FilePath->IO[String])->FilePath->SearchQuery->IO[SearchMatch]grepSearchRepoindexerrepoquery=doletopts=["-I","--line-number","--with-filename"]++["-i"|queryIgnoreCasequery]++(ifqueryWholeWordsquerythen["--word-regexp"]else["-E"])letregexps=mapescapeRegexSpecialChars$queryPatternsqueryfiles<-indexerrepoifqueryMatchAllquerythendofilesMatchingAllPatterns<-liftM(foldr1intersect)$mapM(regSearchFilesrepofiles)regexpsoutput<-mapM(regsSearchFileoptsreporegexps)filesMatchingAllPatternsreturn$mapparseMatchLine$concatoutputelsedo(_status,_errOutput,output)<-runShellCommandrepoNothing"grep"$opts++concatMap(\term->["-e",term])regexps++filesletresults=lines$toStringoutputreturn$mapparseMatchLineresults-- | we don't actually need the contents, just want to check that the directory exists and we have enough permissionswithVerifyDir::FilePath->IOa->IOawithVerifyDirda=catch(liftMhead(getDirectoryContents$encodeStringd)>>a)$\e->if"No such file or directory"`isInfixOf`showethenthrowIONotFoundelsethrowIO.UnknownError.show$e