{-# LANGUAGE BangPatterns, EmptyDataDecls, ScopedTypeVariables #-}-- |-- Module : Data.Text.ICU.Regex.Pure-- Copyright : (c) 2010 Bryan O'Sullivan---- License : BSD-style-- Maintainer : bos@serpentine.com-- Stability : experimental-- Portability : GHC---- Regular expression support for Unicode, implemented as bindings to-- the International Components for Unicode (ICU) libraries.---- The functions in this module are pure and hence thread safe, but-- may not be as fast or as flexible as those in the-- 'Data.Text.ICU.Regex.IO' module.---- The syntax and behaviour of ICU regular expressions are Perl-like.-- For complete details, see the ICU User Guide entry at-- <http://userguide.icu-project.org/strings/regexp>.moduleData.Text.ICU.Regex.Pure(-- * TypesMatchOption(..),ParseError(errError,errLine,errOffset),Match,Regex,Regular-- * Functions-- ** Construction,regex,regex'-- ** Inspection,pattern-- ** Searching,find,findAll-- ** Match groups-- $group,groupCount,unfold,span,group,prefix,suffix)whereimportControl.Exception(catch)importData.String(IsString(..))importData.Text(Text)importqualifiedData.TextasTimportqualifiedData.Text.ForeignasTimportData.Text.ICU.Error.Internal(ParseError(..),handleError)importqualifiedData.Text.ICU.RegexasIOimportData.Text.ICU.Regex.Internalhiding(Regex(..),regex)importqualifiedData.Text.ICU.Regex.InternalasInternalimportForeign.ForeignPtr(ForeignPtr,withForeignPtr)importForeign.Marshal.Alloc(alloca)importForeign.Marshal.Array(advancePtr)importForeign.Storable(peek)importPreludehiding(catch,span)importSystem.IO.Unsafe(unsafeInterleaveIO,unsafePerformIO)-- | A compiled regular expression.---- 'Regex' values are usually constructed using the 'regex' or-- 'regex'' functions. This type is also an instance of 'IsString',-- so if you have the @OverloadedStrings@ language extension enabled,-- you can construct a 'Regex' by simply writing the pattern in-- quotes (though this does not allow you to specify any 'Option's).newtypeRegex=Regex{reRe::Internal.Regex}instanceShowRegexwhereshowre="Regex "++show(patternre)instanceIsStringRegexwherefromString=regex[].T.pack-- | A match for a regular expression.dataMatch=Match{matchRe::Internal.Regex,_matchPrev::T.I16}instanceShowMatchwhereshowm="Match "++show(unfoldgroupm)-- | A typeclass for functions common to both 'Match' and 'Regex'-- types.classRegularrwhereregRe::r->Internal.RegexregFp::r->ForeignPtrURegularExpressionregFp=Internal.reRe.regRe{-# INLINE regFp #-}instanceRegularMatchwhereregRe=matchReinstanceRegularRegexwhereregRe=reRe-- | Compile a regular expression with the given options. This-- function throws a 'ParseError' if the pattern is invalid, so it is-- best for use when the pattern is statically known.regex::[MatchOption]->Text->Regexregexoptspat=Regex.unsafePerformIO$IO.regexoptspat-- | Compile a regular expression with the given options. This is-- safest to use when the pattern is constructed at run time.regex'::[MatchOption]->Text->EitherParseErrorRegexregex'optspat=unsafePerformIO$((Right.Regex)`fmap`Internal.regexoptspat)`catch`\(err::ParseError)->return(Lefterr)-- | Return the source form of the pattern used to construct this-- regular expression or match.pattern::Regularr=>r->Textpatternr=unsafePerformIO.withForeignPtr(regFpr)$\rePtr->alloca$\lenPtr->dotextPtr<-handleError$uregex_patternrePtrlenPtr(T.fromPtrtextPtr.fromIntegral)=<<peeklenPtr-- | Find the first match for the regular expression in the given text.find::Regex->Text->MaybeMatchfindre0haystack=unsafePerformIO.matchingre0haystack$\re->dom<-IO.findNextrereturn$!ifmthenNothingelseJust(Matchre0)-- | Lazily find all matches for the regular expression in the given-- text.findAll::Regex->Text->[Match]findAllre0haystack=unsafePerformIO.unsafeInterleaveIO$go0wherego!n=matchingre0haystack$\re->dof<-IO.findreniffthendon'<-IO.end_re0(Matchren:)`fmap`gon'elsereturn[]matching::Regex->Text->(IO.Regex->IOa)->IOamatching(Regexre0)haystackact=dore<-IO.clonere0IO.setTextrehaystackactre-- $group---- Capturing groups are numbered starting from zero. Group zero is-- always the entire matching text. Groups greater than zero contain-- the text matching each capturing group in a regular expression.-- | Return the number of capturing groups in this regular-- expression or match's pattern.groupCount::Regularr=>r->IntgroupCount=unsafePerformIO.IO.groupCount.regRe-- | A combinator for returning a list of all capturing groups on a-- 'Match'.unfold::(Int->Match->MaybeText)->Match->[Text]unfoldfm=go0wherego!n=casefnmofNothing->[]Justz->z:go(n+1)-- | Return the /n/th capturing group in a match, or 'Nothing' if /n/-- is out of bounds.group::Int->Match->MaybeTextgroupnm=groupingnm$\re->doletn'=fromIntegralnstart<-fromIntegral`fmap`IO.start_ren'end<-fromIntegral`fmap`IO.end_ren'(fp,_)<-IO.getTextrewithForeignPtrfp$\ptr->T.fromPtr(ptr`advancePtr`fromIntegralstart)(end-start)-- | Return the prefix of the /n/th capturing group in a match (the-- text from the start of the string to the start of the match), or-- 'Nothing' if /n/ is out of bounds.prefix::Int->Match->MaybeTextprefixnm=groupingnm$\re->dostart<-fromIntegral`fmap`IO.start_ren(fp,_)<-IO.getTextrewithForeignPtrfp(`T.fromPtr`start)-- | Return the span of text between the end of the previous match and-- the beginning of the current match.span::Match->Textspan(Matchrep)=unsafePerformIO$dostart<-IO.start_re0(fp,_)<-IO.getTextrewithForeignPtrfp$\ptr->T.fromPtr(ptr`advancePtr`fromIntegralp)(start-p)-- | Return the suffix of the /n/th capturing group in a match (the-- text from the end of the match to the end of the string), or-- 'Nothing' if /n/ is out of bounds.suffix::Int->Match->MaybeTextsuffixnm=groupingnm$\re->doend<-fromIntegral`fmap`IO.end_ren(fp,len)<-IO.getTextrewithForeignPtrfp$\ptr->doT.fromPtr(ptr`advancePtr`fromIntegralend)(len-end)grouping::Int->Match->(Internal.Regex->IOa)->Maybeagroupingn(Matchm_)act=unsafePerformIO$docount<-IO.groupCountmletn'=fromIntegralnifn<0||(n'>=count&&count>0)thenreturnNothingelseJust`fmap`actm