{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts #-}{-# OPTIONS_GHC -Wall -fwarn-tabs -fno-warn-name-shadowing #-}------------------------------------------------------------------ ~ 2012.03.25-- |-- Module : Control.Unification-- Copyright : Copyright (c) 2007--2012 wren ng thornton-- License : BSD-- Maintainer : wren@community.haskell.org-- Stability : experimental-- Portability : semi-portable (MPTCs, FlexibleContexts)---- This module provides first-order structural unification over-- general structure types. It also provides the standard suite of-- functions accompanying unification (applying bindings, getting-- free variables, etc.).---- The implementation makes use of numerous optimization techniques.-- First, we use path compression everywhere (for weighted path-- compression see "Control.Unification.Ranked"). Second, we replace-- the occurs-check with visited-sets. Third, we use a technique-- for aggressive opportunistic observable sharing; that is, we-- track as much sharing as possible in the bindings (without-- introducing new variables), so that we can compare bound variables-- directly and therefore eliminate redundant unifications.----------------------------------------------------------------moduleControl.Unification(-- * Data types, classes, etc-- ** Unification termsUTerm(..),freeze,unfreeze-- ** Errors,UnificationFailure(..)-- ** Basic type classes,Unifiable(..),Variable(..),BindingMonad(..)-- * Operations on one term,getFreeVars,applyBindings,freshen-- freezeM -- apply bindings and freeze in one traversal-- unskolemize -- convert Skolemized variables to free variables-- skolemize -- convert free variables to Skolemized variables-- getSkolems -- compute the skolem variables in a term; helpful?-- * Operations on two terms-- ** Symbolic names,(===),(=~=),(=:=),(<:=)-- ** Textual names,equals,equiv,unify,unifyOccurs,subsumes-- * Operations on many terms,getFreeVarsAll,applyBindingsAll,freshenAll-- subsumesAll -- to ensure that there's a single coherent substitution allowing the schema to subsume all the terms in some collection. -- * Helper functions-- | Client code should not need to use these functions, but-- they are exposed just in case they are needed.,fullprune,semiprune,occursIn-- TODO: add a post-hoc occurs check in order to have a version of unify which is fast, yet is also guaranteed to fail when it ought to (rather than deferring the failure until later, as the current unify does).)whereimportPreludehiding(mapM,mapM_,sequence,foldr,foldr1,foldl,foldl1,all,and,or)importqualifiedData.IntMapasIMimportqualifiedData.IntSetasISimportData.FoldableimportData.TraversableimportControl.Monad.Identity(Identity(..))importControl.ApplicativeimportControl.Monad(MonadPlus(..))importControl.Monad.Trans(MonadTrans(..))importControl.Monad.Error(MonadError(..))importControl.Monad.State(MonadState(..),StateT,evalStateT,execStateT)importControl.Monad.MaybeKimportControl.Monad.State.UnificationExtrasimportControl.Unification.Types---------------------------------------------------------------------------------------------------------------------------------- N.B., this assumes there are no directly-cyclic chains!---- | Canonicalize a chain of variables so they all point directly-- to the term at the end of the chain (or the free variable, if-- the chain is unbound), and return that end.---- N.B., this is almost never the function you want. Cf., 'semiprune'.fullprune::(BindingMonadtvm)=>UTermtv->m(UTermtv)fullprunet0@(UTerm_)=returnt0fullprunet0@(UVarv0)=domb<-lookupVarv0casembofNothing->returnt0Justt->dofinalTerm<-fullprunetv0`bindVar`finalTermreturnfinalTerm-- N.B., this assumes there are no directly-cyclic chains!---- | Canonicalize a chain of variables so they all point directly-- to the last variable in the chain, regardless of whether it is-- bound or not. This allows detecting many cases where multiple-- variables point to the same term, thereby allowing us to avoid-- re-unifying the term they point to.semiprune::(BindingMonadtvm)=>UTermtv->m(UTermtv)semiprunet0@(UTerm_)=returnt0semiprunet0@(UVarv0)=loopt0v0where-- We pass the @t@ for @v@ in order to add just a little more sharing.loopt0v0=domb<-lookupVarv0casembofNothing->returnt0Justt->casetofUTerm_->returnt0UVarv->dofinalVar<-looptvv0`bindVar`finalVarreturnfinalVar-- | Determine if a variable appears free somewhere inside a term.-- Since occurs checks only make sense when we're about to bind the-- variable to the term, we do not bother checking for the possibility-- of the variable occuring bound in the term.occursIn::(BindingMonadtvm)=>v->UTermtv->mBool{-# INLINE occursIn #-}occursInv0t0=dot0<-fullprunet0caset0ofUTermt->or<$>mapM(v0`occursIn`)t-- TODO: benchmark the following for shortcircuiting-- > Traversable.foldlM (\b t' -> if b then return True else v0 `occursIn` t') tUVarv->return$!v0==v-- TODO: use IM.insertWith or the like to do this in one pass---- | Update the visited-set with a seclaration that a variable has-- been seen with a given binding, or throw 'OccursIn' if the-- variable has already been seen.seenAs::(BindingMonadtvm,MonadTranse,MonadError(UnificationFailuretv)(em))=>v-- ^->t(UTermtv)-- ^->StateT(IM.IntMap(t(UTermtv)))(em)()-- ^{-# INLINE seenAs #-}seenAsv0t0=doseenVars<-getcaseIM.lookup(getVarIDv0)seenVarsofJustt->lift.throwError$OccursInv0(UTermt)Nothing->put$!IM.insert(getVarIDv0)t0seenVars---------------------------------------------------------------------------------------------------------------------------------- TODO: these assume pure variables, hence the spine cloning; but-- we may want to make variants for impure variables with explicit-- rollback on backtracking.-- TODO: See if MTL still has that overhead over doing things manually.-- TODO: Figure out how to abstract the left-catamorphism from these.-- | Walk a term and determine which variables are still free. N.B.,-- this function does not detect cyclic terms (i.e., throw errors),-- but it will return the correct answer for them in finite time.getFreeVars::(BindingMonadtvm)=>UTermtv->m[v]getFreeVars=getFreeVarsAll.Identity-- TODO: Should we return the IntMap instead?---- | Same as 'getFreeVars', but works on several terms simultaneously.-- This is more efficient than getting the free variables for each-- of the terms separately because we can make use of sharing across-- the whole collection. That is, each time we move to the next-- term, we still remember the bound variables we've already looked-- at (and therefore do not need to traverse, since we've already-- seen whatever free variables there are down there); whereas we-- would forget between each call to @getFreeVars@.---- /Since: 0.7.0/getFreeVarsAll::(BindingMonadtvm,Foldables)=>s(UTermtv)->m[v]getFreeVarsAllts0=IM.elems<$>evalStateT(loopAllts0)IS.emptywhere-- TODO: is that the most efficient direction/associativity?loopAll=foldrM(\tr->IM.unionr<$>loopt)IM.emptyloopt0=dot0<-lift$semiprunet0caset0ofUTermt->fold<$>mapMloopt-- TODO: benchmark using the following instead:-- > foldMapM f = foldlM (\a b -> mappend a <$> f b) memptyUVarv->doseenVars<-getleti=getVarIDvifIS.memberiseenVarsthenreturnIM.empty-- no (more) free vars down hereelsedoput$!IS.insertiseenVarsmb<-lift$lookupVarvcasembofJustt'->loopt'Nothing->return$IM.singletoniv-- | Apply the current bindings from the monad so that any remaining-- variables in the result must, therefore, be free. N.B., this-- expensively clones term structure and should only be performed-- when a pure term is needed, or when 'OccursIn' exceptions must-- be forced. This function /does/ preserve sharing, however that-- sharing is no longer observed by the monad.---- If any cyclic bindings are detected, then an 'OccursIn' exception-- will be thrown.applyBindings::(BindingMonadtvm,MonadTranse,Functor(em)-- Grr, Monad(e m) should imply Functor(e m),MonadError(UnificationFailuretv)(em))=>UTermtv-- ^->em(UTermtv)-- ^applyBindings=fmaprunIdentity.applyBindingsAll.Identity-- | Same as 'applyBindings', but works on several terms simultaneously.-- This function preserves sharing across the entire collection of-- terms, whereas applying the bindings to each term separately-- would only preserve sharing within each term.---- /Since: 0.7.0/applyBindingsAll::(BindingMonadtvm,MonadTranse,Functor(em)-- Grr, Monad(e m) should imply Functor(e m),MonadError(UnificationFailuretv)(em),Traversables)=>s(UTermtv)-- ^->em(s(UTermtv))-- ^applyBindingsAllts0=evalStateT(mapMloopts0)IM.emptywhereloopt0=dot0<-lift.lift$semiprunet0caset0ofUTermt->UTerm<$>mapMlooptUVarv->doleti=getVarIDvmb<-IM.lookupi<$>getcasembofJust(Rightt)->returntJust(Leftt)->lift.throwError$OccursInvtNothing->domb'<-lift.lift$lookupVarvcasemb'ofNothing->returnt0Justt->domodify'.IM.inserti$Lefttt'<-looptmodify'.IM.inserti$Rightt'returnt'-- | Freshen all variables in a term, both bound and free. This-- ensures that the observability of sharing is maintained, while-- freshening the free variables. N.B., this expensively clones-- term structure and should only be performed when necessary.---- If any cyclic bindings are detected, then an 'OccursIn' exception-- will be thrown.freshen::(BindingMonadtvm,MonadTranse,Functor(em)-- Grr, Monad(e m) should imply Functor(e m),MonadError(UnificationFailuretv)(em))=>UTermtv-- ^->em(UTermtv)-- ^freshen=fmaprunIdentity.freshenAll.Identity-- | Same as 'freshen', but works on several terms simultaneously.-- This is different from freshening each term separately, because-- @freshenAll@ preserves the relationship between the terms. For-- instance, the result of---- > mapM freshen [UVar 1, UVar 1]---- would be @[UVar 2, UVar 3]@ or something alpha-equivalent, whereas-- the result of---- > freshenAll [UVar 1, UVar 1]---- would be @[UVar 2, UVar 2]@ or something alpha-equivalent.---- /Since: 0.7.0/freshenAll::(BindingMonadtvm,MonadTranse,Functor(em)-- Grr, Monad(e m) should imply Functor(e m),MonadError(UnificationFailuretv)(em),Traversables)=>s(UTermtv)-- ^->em(s(UTermtv))-- ^freshenAllts0=evalStateT(mapMloopts0)IM.emptywhereloopt0=dot0<-lift.lift$semiprunet0caset0ofUTermt->UTerm<$>mapMlooptUVarv->doleti=getVarIDvseenVars<-getcaseIM.lookupiseenVarsofJust(Rightt)->returntJust(Leftt)->lift.throwError$OccursInvtNothing->domb<-lift.lift$lookupVarvcasembofNothing->dov'<-lift.lift$UVar<$>freeVarput$!IM.inserti(Rightv')seenVarsreturnv'Justt->doput$!IM.inserti(Leftt)seenVarst'<-looptv'<-lift.lift$UVar<$>newVart'modify'$IM.inserti(Rightv')returnv'---------------------------------------------------------------------------------------------------------------------------------- BUG: have to give the signatures for Haddock :(-- | 'equals'(===)::(BindingMonadtvm)=>UTermtv-- ^->UTermtv-- ^->mBool-- ^(===)=equals{-# INLINE (===) #-}infix4===,`equals`-- | 'equiv'(=~=)::(BindingMonadtvm)=>UTermtv-- ^->UTermtv-- ^->m(Maybe(IM.IntMapInt))-- ^(=~=)=equiv{-# INLINE (=~=) #-}infix4=~=,`equiv`-- | 'unify'(=:=)::(BindingMonadtvm,MonadTranse,Functor(em)-- Grr, Monad(e m) should imply Functor(e m),MonadError(UnificationFailuretv)(em))=>UTermtv-- ^->UTermtv-- ^->em(UTermtv)-- ^(=:=)=unify{-# INLINE (=:=) #-}infix4=:=,`unify`-- | 'subsumes'(<:=)::(BindingMonadtvm,MonadTranse,Functor(em)-- Grr, Monad(e m) should imply Functor(e m),MonadError(UnificationFailuretv)(em))=>UTermtv-- ^->UTermtv-- ^->emBool-- ^(<:=)=subsumes{-# INLINE (<:=) #-}infix4<:=,`subsumes`----------------------------------------------------------------{- BUG:
If we don't use anything special, then there's a 2x overhead for
calling 'equals' (and probably the rest of them too). If we add a
SPECIALIZE pragma, or if we try to use MaybeT instead of MaybeKT
then that jumps up to 4x overhead. However, if we add an INLINE
pragma then it gets faster than the same implementation in the
benchmark file. I've no idea what's going on here...
-}-- TODO: should we offer a variant which gives the reason for failure?---- | Determine if two terms are structurally equal. This is essentially-- equivalent to @('==')@ except that it does not require applying-- bindings before comparing, so it is more efficient. N.B., this-- function does not consider alpha-variance, and thus variables-- with different names are considered unequal. Cf., 'equiv'.equals::(BindingMonadtvm)=>UTermtv-- ^->UTermtv-- ^->mBool-- ^equalstl0tr0=domb<-runMaybeKT(looptl0tr0)casembofNothing->returnFalseJust()->returnTruewherelooptl0tr0=dotl0<-lift$semiprunetl0tr0<-lift$semiprunetr0case(tl0,tr0)of(UVarvl,UVarvr)|vl==vr->return()-- success|otherwise->domtl<-lift$lookupVarvlmtr<-lift$lookupVarvrcase(mtl,mtr)of(Nothing,Nothing)->mzero(Nothing,Just_)->mzero(Just_,Nothing)->mzero(Just(UTermtl),Just(UTermtr))->matchtltr_->error_impossible_equals(UVar_,UTerm_)->mzero(UTerm_,UVar_)->mzero(UTermtl,UTermtr)->matchtltrmatchtltr=casezipMatchtltrofNothing->mzeroJusttlr->mapM_loop_tlrloop_(Left_)=return()-- successloop_(Right(tl,tr))=looptltr_impossible_equals::String{-# NOINLINE _impossible_equals #-}_impossible_equals="equals: the impossible happened"-- TODO: is that the most helpful return type?---- | Determine if two terms are structurally equivalent; that is,-- structurally equal modulo renaming of free variables. Returns a-- mapping from variable IDs of the left term to variable IDs of-- the right term, indicating the renaming used.equiv::(BindingMonadtvm)=>UTermtv-- ^->UTermtv-- ^->m(Maybe(IM.IntMapInt))-- ^equivtl0tr0=runMaybeKT(execStateT(looptl0tr0)IM.empty)wherelooptl0tr0=dotl0<-lift.lift$fullprunetl0tr0<-lift.lift$fullprunetr0case(tl0,tr0)of(UVarvl,UVarvr)->doletil=getVarIDvlletir=getVarIDvrxs<-getcaseIM.lookupilxsofJustx|x==ir->return()-- success; no changes|otherwise->liftmzeroNothing->put$!IM.insertilirxs(UVar_,UTerm_)->liftmzero(UTerm_,UVar_)->liftmzero(UTermtl,UTermtr)->casezipMatchtltrofNothing->liftmzeroJusttlr->mapM_loop_tlrloop_(Left_)=return()-- success; no changesloop_(Right(tl,tr))=looptltr------------------------------------------------------------------ Not quite unify2 from the benchmarks, since we do AOOS.---- | A variant of 'unify' which uses 'occursIn' instead of visited-sets.-- This should only be used when eager throwing of 'OccursIn' errors-- is absolutely essential (or for testing the correctness of-- @unify@). Performing the occurs-check is expensive. Not only is-- it slow, it's asymptotically slow since it can cause the same-- subterm to be traversed multiple times.unifyOccurs::(BindingMonadtvm,MonadTranse,Functor(em)-- Grr, Monad(e m) should imply Functor(e m),MonadError(UnificationFailuretv)(em))=>UTermtv-- ^->UTermtv-- ^->em(UTermtv)-- ^unifyOccurs=loopwhere{-# INLINE (=:) #-}v=:t=lift$v`bindVar`t{-# INLINE acyclicBindVar #-}acyclicBindVarvt=dob<-lift$v`occursIn`tifbthenthrowError$OccursInvtelsev=:t-- TODO: cf todos in 'unify'looptl0tr0=dotl0<-lift$semiprunetl0tr0<-lift$semiprunetr0case(tl0,tr0)of(UVarvl,UVarvr)|vl==vr->returntr0|otherwise->domtl<-lift$lookupVarvlmtr<-lift$lookupVarvrcase(mtl,mtr)of(Nothing,Nothing)->dovl=:tr0returntr0(Nothing,Just_)->dovl`acyclicBindVar`tr0returntr0(Just_,Nothing)->dovr`acyclicBindVar`tl0returntl0(Just(UTermtl),Just(UTermtr))->dot<-matchtltrvr=:tvl=:tr0returntr0_->error_impossible_unifyOccurs(UVarvl,UTermtr)->domtl<-lift$lookupVarvlcasemtlofNothing->dovl`acyclicBindVar`tr0returntl0Just(UTermtl)->dot<-matchtltrvl=:treturntl0_->error_impossible_unifyOccurs(UTermtl,UVarvr)->domtr<-lift$lookupVarvrcasemtrofNothing->dovr`acyclicBindVar`tl0returntr0Just(UTermtr)->dot<-matchtltrvr=:treturntr0_->error_impossible_unifyOccurs(UTermtl,UTermtr)->matchtltrmatchtltr=casezipMatchtltrofNothing->throwError$TermMismatchtltrJusttlr->UTerm<$>mapMloop_tlrloop_(Leftt)=returntloop_(Right(tl,tr))=looptltr_impossible_unifyOccurs::String{-# NOINLINE _impossible_unifyOccurs #-}_impossible_unifyOccurs="unifyOccurs: the impossible happened"------------------------------------------------------------------ TODO: verify correctness, especially for the visited-set stuff.-- TODO: return Maybe(UTerm t v) in the loop so we can avoid updating bindings trivially-- TODO: figure out why unifyOccurs is so much faster on pure ground terms!! The only difference there is in lifting over StateT...-- -- | Unify two terms, or throw an error with an explanation of why-- unification failed. Since bindings are stored in the monad, the-- two input terms and the output term are all equivalent if-- unification succeeds. However, the returned value makes use of-- aggressive opportunistic observable sharing, so it will be more-- efficient to use it in future calculations than either argument.unify::(BindingMonadtvm,MonadTranse,Functor(em)-- Grr, Monad(e m) should imply Functor(e m),MonadError(UnificationFailuretv)(em))=>UTermtv-- ^->UTermtv-- ^->em(UTermtv)-- ^unifytl0tr0=evalStateT(looptl0tr0)IM.emptywhere{-# INLINE (=:) #-}v=:t=lift.lift$v`bindVar`t-- TODO: would it be beneficial to manually fuse @x <- lift m; y <- lift n@ to @(x,y) <- lift (m;n)@ everywhere we can?looptl0tr0=dotl0<-lift.lift$semiprunetl0tr0<-lift.lift$semiprunetr0case(tl0,tr0)of(UVarvl,UVarvr)|vl==vr->returntr0|otherwise->domtl<-lift.lift$lookupVarvlmtr<-lift.lift$lookupVarvrcase(mtl,mtr)of(Nothing,Nothing)->dovl=:tr0;returntr0(Nothing,Just_)->dovl=:tr0;returntr0(Just_,Nothing)->dovr=:tl0;returntl0(Just(UTermtl),Just(UTermtr))->dot<-localState$dovl`seenAs`tlvr`seenAs`trmatchtltrvr=:tvl=:tr0returntr0_->error_impossible_unify(UVarvl,UTermtr)->dot<-domtl<-lift.lift$lookupVarvlcasemtlofNothing->returntr0Just(UTermtl)->localState$dovl`seenAs`tlmatchtltr_->error_impossible_unifyvl=:treturntl0(UTermtl,UVarvr)->dot<-domtr<-lift.lift$lookupVarvrcasemtrofNothing->returntl0Just(UTermtr)->localState$dovr`seenAs`trmatchtltr_->error_impossible_unifyvr=:treturntr0(UTermtl,UTermtr)->matchtltrmatchtltr=casezipMatchtltrofNothing->lift.throwError$TermMismatchtltrJusttlr->UTerm<$>mapMloop_tlrloop_(Leftt)=returntloop_(Right(tl,tr))=looptltr_impossible_unify::String{-# NOINLINE _impossible_unify #-}_impossible_unify="unify: the impossible happened"------------------------------------------------------------------ TODO: can we find an efficient way to return the bindings directly instead of altering the monadic bindings? Maybe another StateT IntMap taking getVarID to the variable and its pseudo-bound term?---- TODO: verify correctness-- TODO: redo with some codensity-- TODO: there should be some way to catch OccursIn errors and repair the bindings...-- | Determine whether the left term subsumes the right term. That-- is, whereas @(tl =:= tr)@ will compute the most general substitution-- @s@ such that @(s tl === s tr)@, @(tl <:= tr)@ computes the most-- general substitution @s@ such that @(s tl === tr)@. This means-- that @tl@ is less defined than and consistent with @tr@.---- /N.B./, this function updates the monadic bindings just like-- 'unify' does. However, while the use cases for unification often-- want to keep the bindings around, the use cases for subsumption-- usually do not. Thus, you'll probably want to use a binding monad-- which supports backtracking in order to undo the changes.-- Unfortunately, leaving the monadic bindings unaltered and returning-- the necessary substitution directly imposes a performance penalty-- or else requires specifying too much about the implementation-- of variables.subsumes::(BindingMonadtvm,MonadTranse,Functor(em)-- Grr, Monad(e m) should imply Functor(e m),MonadError(UnificationFailuretv)(em))=>UTermtv-- ^->UTermtv-- ^->emBool-- ^subsumestl0tr0=evalStateT(looptl0tr0)IM.emptywhere{-# INLINE (=:) #-}v=:t=lift.lift$dov`bindVar`t;returnTrue-- TODO: cf todos in 'unify'looptl0tr0=dotl0<-lift.lift$semiprunetl0tr0<-lift.lift$semiprunetr0case(tl0,tr0)of(UVarvl,UVarvr)|vl==vr->returnTrue|otherwise->domtl<-lift.lift$lookupVarvlmtr<-lift.lift$lookupVarvrcase(mtl,mtr)of(Nothing,Nothing)->vl=:tr0(Nothing,Just_)->vl=:tr0(Just_,Nothing)->returnFalse(Just(UTermtl),Just(UTermtr))->localState$dovl`seenAs`tlvr`seenAs`trmatchtltr_->error_impossible_subsumes(UVarvl,UTermtr)->domtl<-lift.lift$lookupVarvlcasemtlofNothing->vl=:tr0Just(UTermtl)->localState$dovl`seenAs`tlmatchtltr_->error_impossible_subsumes(UTerm_,UVar_)->returnFalse(UTermtl,UTermtr)->matchtltrmatchtltr=casezipMatchtltrofNothing->returnFalseJusttlr->and<$>mapMloop_tlr-- TODO: use foldlM?loop_(Left_)=returnTrueloop_(Right(tl,tr))=looptltr_impossible_subsumes::String{-# NOINLINE _impossible_subsumes #-}_impossible_subsumes="subsumes: the impossible happened"--------------------------------------------------------------------------------------------------------------------------- fin.