// ArrayDiff.h
#ifndef ARRAY_DIFF_HPP
#define ARRAY_DIFF_HPP
/*
class ArrayDiff - compute a shortest edit script (SES) given two arrays,
detect moved items and modified items and track the
movement of a particular item.
Version 1.0 (20111223)
Modifications from Michael B. Allen's code are Copyright (c) 2011 Mike Buckham
(replace _at_ and _dot_ accordingly).
==========================================================================
Source based on code from libmba-0.9.1:
http://www.ioplex.com/~miallen/libmba/dl/src/diff.c
with the following copyright notice:
==========================================================================
diff - compute a shortest edit script (SES) given two sequences
Copyright (c) 2004 Michael B. Allen
The MIT License
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
==========================================================================
[From Michael B. Allen's diff.c]
This algorithm is basically Myers' solution to SES/LCS with
the Hirschberg linear space refinement as described in the
following publication:
E. Myers, ``An O(ND) Difference Algorithm and Its Variations,''
Algorithmica 1, 2 (1986), 251-266.
http://research.janelia.org/myers/Papers/diff.pdf
This is the same algorithm used by GNU diff(1).
==========================================================================
Modifications from Michael B. Allen's code which are designed by
Mike Buckham are the following:
* Packaged into a class and changed callback usage
* Detection of moved items
* Detection of modified items (the same item, but it has changed in some way)
* Specific item index tracking
The above copyright notice now applies to these modifications also.
*/
// Not all systems have available, so pick whichever of these include files you require.
//#include
// Array_TPtr.h emulates the std::vector API's which are used in class ArrayDiff.
#include "Array_TPtr.h"
/**************** Global Constants ******************************************/
#ifndef NDEBUG
#define ARRAYDIFF_DEBUG
#endif
/**************** Global Types **********************************************/
typedef unsigned int U32;
/*
Terminology
===========
* Working Array - this is the array which is in use by a system and is to be compared to a Target
Array and manipulated by the processing of an Edit Script until it matches the
Target array.
* Target Array - this is the array which defines the required ordering and content of the Working
Array after processing of an Edit Script.
* Edit Script - this is a list of Operations which are applied one by one on a Working Array in
order to make it match a Target Array.
* Operation - an individual function to be performed on a Working Array which will perform
insertions, deletions and movements of blocks of items within the Working Array.
* Client - this is the system which owns the Working and Target Arrays. The Client is the caller
of functions within the ArrayDiff class, provides the comparison callback functions
and processes the Operations within the Edit Script in order to make the Working Array
match the Target Array.
The ArrayDiff class is used by Clients to generate the Edit Script in order to manipulate the
Working Array to make it match the Target Array. Items in the Working and Target Arrays need
not be of the same type, but there does need some commonality in order to perform comparisons.
Single Stage Matching
=====================
In this mode, the 'pModifiedFn' parameter passed into initialise() is NULL. If the 'pCompareFn'
callback passed into initialise() returns true then there is a full match between the Working
and Target Arrays for the specified items and the MATCH Operation is returned to the Client.
In this mode, none of the *MODIFIED Operations will be returned to the Client.
Two Stage Matching
==================
In this mode, the 'pModifiedFn' parameter passed into initialise() is defined. Items are only
passed to this callback if they have already passed the first stage i.e. the 'pCompareFn'
callback has returned true. If the 'pModifiedFn' callback returns false then a full match is
achieved and the MATCH Operation is returned to the Client, otherwise a partial match is
achieved and the MODIFIED Operation is returned to the Client.
Overview
========
After constructing and initialising the ArrayDiff class and performing the comparison
function, each Operation is retrieved from the ArrayDiff class using getEditOp(),
which passes an object of the type SesData_t in order to maintain the context and return
the Operation parameters. All Operations are performed on the Working Array and are ordered
sequentially from the start of the array towards its end such that SesData_t::workingIndex
always increments. The SesData_t::moveIndex is another index into the Working Array for
MOVE Operations, specifying the index to move from or to. SesData_t::targetIndex is an
index into the Target Array. The Operations are described below.
Either before or after compare() is called, but before getEditOp(), trackItemIndex() can
be used to track a particular index during the Operations performed on the Working Array.
For example, a highlighted item in a list can remain highlighted after the Operations are
performed without having to search for it again.
Operation: MATCH
================
The Working and Target Arrays fully match for this block of items. The 'pCompareFn' callback
has returned true and either the 'pModifiedFn' callback was not defined, or it returned false.
No action need be taken. This Operation is only passed back to the Client if 'reportMatchOps'
is true in the call to getEditOp(). It is not normally required by the Client.
workingIndex: index into the Working Array
moveIndex: unused
targetIndex: index into the Target Array
pseudo: WorkingArray[workingIndex..] == TargetArray[targetIndex..]
Operation: MODIFY
=================
The Working and Target Arrays partially match for this block of items. The 'pCompareFn' callback
has returned true and the 'pModifiedFn' callback has returned true. This operation is normally
used to keep an existing item in the Working Array, but modify some small part of it or even
all of it as required.
workingIndex: index into the Working Array
moveIndex: unused
targetIndex: index into the Target Array
pseudo: WorkingArray[workingIndex..] <== TargetArray[targetIndex..]
Operation: DELETE
=================
This Operation is used to delete a block of items from the Working Array because they were not
found in the Target Array. There are two interpretations of this Operation depending on the
value of 'checkMoved' passed into initialise(). If 'checkMoved' is false then the block of
items was simply not found at that point in the Target Array. If 'checkMoved' is true then
the block of items was not found anywhere in the Target Array.
workingIndex: index into the Working Array
moveIndex: unused
targetIndex: index into the Target Array
pseudo: delete WorkingArray[workingIndex..]
Operation: INSERT
=================
This Operation is used to insert a block of items into the Working Array because they were not
previously there but they were found in the Target Array. There are two interpretations of this
Operation depending on the value of 'checkMoved' passed into initialise(). If 'checkMoved' is
false then the block of items were new at this point in the Target Array. If 'checkMoved'
is true then the block of items are new to the whole Target Array.
workingIndex: index into the Working Array
moveIndex: unused
targetIndex: index into the Target Array
pseudo: insert WorkingArray[workingIndex..] from TargetArray[targetIndex..]
Operation: MOVE_FORWARD
=======================
Only returned to the Client if 'checkMoved' is true in the call to initialise(). This Operation
is used to move a block of items from 'workingIndex' forward to 'moveIndex' within the Working
Array using one of the following sequences:
* Cut the block of 'opLength' items from the Working Array at 'workingIndex' (shifting the
remaining items back to close up the gap)
* Insert that block of items into the Working Array at index 'moveIndex' (shifting the remaining
items forward to make the space)
Alternatively:
* Copy the block of items from the Working Array at 'workingIndex', block size 'opLength' into
a temporary store
* Shift the block of items in the Working Array from ('workingIndex'+'opLength) back to
'workingIndex', block size ('moveIndex'-'workingIndex'). Be careful of possible overlaps in the
start and end positions of this block.
* Copy the block of items from the temporary store back into the Working Array at index 'moveIndex'
For example:
0123456789
Working Array: abcdefghij
MOVE_FORWARD, workingIndex=2, opLength=3, moveIndex=6
0123456789
Working Array: abfghicdej ("cde" moved forward in the array by 4 positions)
workingIndex: index into the Working Array to move from
moveIndex: index into the Working Array to move to
targetIndex: index into the Target Array
pseudo: move from WorkingArray[workingIndex..] to WorkingArray[moveIndex..]
Operation: MOVE_FORWARD_MODIFIED
================================
This Operation is the same as MOVE_FORWARD except that the 'pModifiedFn' callback was defined and
returned true for this block of moved items.
Operation: MOVE_BACK
====================
Only returned to the Client if 'checkMoved' is true in the call to initialise(). This Operation
is used to move a block of items from 'moveIndex' backward to 'workingIndex' within the Working
Array using one of the following sequences:
* Cut the block of 'opLength' items from the Working Array at 'moveIndex' (shifting the remaining
items back to close up the gap)
* Insert that block of items into the Working Array at 'workingIndex' (shifting the remaining items
forward to make the space)
Alternatively:
* Copy the block of items from the Working Array at 'moveIndex', block size 'opLength' into
a temporary store
* Shift the block of items in the Working Array from 'workingIndex' forward to
('workingIndex'+'opLength), block size ('moveIndex'-'workingIndex'). Be careful of possible overlaps
in the start and end positions of this block.
* Copy the block of items from the temporary store back into the Working Array at index 'workingIndex'
For example:
0123456789
Working Array: abcdefghij
MOVE_BACK, workingIndex=2, opLength=3, moveIndex=6
0123456789
Working Array: abghicdefj ("ghi" moved backward in the array by 4 positions)
workingIndex: index into the Working Array to move to
moveIndex: index into the Working Array to move from
targetIndex: index into the Target Array
pseudo: move from WorkingArray[moveIndex..] to WorkingArray[workingIndex..]
Operation: MOVE_BACK_MODIFIED
=============================
This Operation is the same as MOVE_BACK except that the 'pModifiedFn' callback was defined and
returned true for this block of moved items.
Operations: SKIP, WAS_INSERTED, INVALID
=======================================
None of these Operations are returned to the Client. They are only used internally. After construction
of a ArrayDiff::SesData_t object, the 'op' parameter will be INVALID, but this should never be
interpretted by the application.
Usage Example
=============
#include
// TargetRecord_t is a Target Array item
struct TargetRecord_t
{
TargetRecord_t(U32 _id, time_t _setTime) { id = _id; dummyB = 0; setTime = _setTime; }
U32 id;
U32 dummyB;
time_t setTime;
};
// WorkingRecord_t is a Working Array item
struct WorkingRecord_t
{
WorkingRecord_t(const TargetRecord_t& rData) { id = rData.id; dummyA = 0; lastUpdatedTime = rData.setTime; }
U32 id;
U32 dummyA;
time_t lastUpdatedTime;
};
// Declare the comparison arrays
static std::vector gTargetArray;
static std::vector gWorkingArray;
// Comparison callback function
static bool compareArrayCB(U32 workingIndex, U32 targetIndex, void* pContextData)
{
return (gWorkingArray[workingIndex]->id == gTargetArray[targetIndex]->id);
}
// Modified callback function
static bool modifiedArrayCB(U32 workingIndex, U32 targetIndex, void* pContextData)
{
return (gWorkingArray[workingIndex]->lastUpdatedTime != gTargetArray[targetIndex]->setTime);
}
static void runComparison(int& rHighlightedIndex)
{
void* pContextData = NULL;
// Construct an ArrayDiff object
ArrayDiff diff;
// Initialise the comparison parameters
diff.initialise(compareArrayCB,modifiedArrayCB,pContextData,true);
// Perform the comparison, defining the entire Working and Target Arrays
int diffDistance = diff.compare(0,gWorkingArray.size(),0,gTargetArray.size());
// If there are any Operations to perform on the Working Array
if ((diffDistance > 0) || (diff.getNumModified() > 0))
{
diff.trackItemIndex(&rHighlightedIndex);
// While there are Operations to perform
ArrayDiff::SesData_t sesData;
while (diff.getEditOp(sesData))
{
U32 index;
switch (sesData.op)
{
case ArrayDiff::DiffOp_MODIFY :
for (index = 0; index < sesData.opLength; index++)
{
gWorkingArray[sesData.workingIndex+index]->lastUpdatedTime = gTargetArray[sesData.targetIndex+index]->setTime;
}
break;
case ArrayDiff::DiffOp_DELETE :
for (index = 0; index < sesData.opLength; index++)
{
delete gWorkingArray[sesData.workingIndex];
gWorkingArray.erase(gWorkingArray.begin()+sesData.workingIndex);
}
break;
case ArrayDiff::DiffOp_INSERT :
for (index = 0; index < sesData.opLength; index++)
{
gWorkingArray.insert(gWorkingArray.begin()+sesData.workingIndex+index,new WorkingRecord_t(*gTargetArray[sesData.targetIndex+index]));
}
break;
case ArrayDiff::DiffOp_MOVE_FORWARD_MODIFIED :
for (index = 0; index < sesData.opLength; index++)
{
gWorkingArray[sesData.workingIndex+index]->lastUpdatedTime = gTargetArray[sesData.targetIndex+index]->setTime;
}
// Fall through to next case
case ArrayDiff::DiffOp_MOVE_FORWARD :
for (index = 0; index < sesData.opLength; index++)
{
WorkingRecord_t* pTemp = gWorkingArray[sesData.workingIndex];
gWorkingArray.erase(gWorkingArray.begin()+sesData.workingIndex);
gWorkingArray.insert(gWorkingArray.begin()+sesData.moveIndex+sesData.opLength-1,pTemp);
}
break;
case ArrayDiff::DiffOp_MOVE_BACK_MODIFIED :
for (index = 0; index < sesData.opLength; index++)
{
gWorkingArray[sesData.moveIndex+index]->lastUpdatedTime = gTargetArray[sesData.targetIndex+index]->setTime;
}
// Fall through to next case
case ArrayDiff::DiffOp_MOVE_BACK :
for (index = 0; index < sesData.opLength; index++)
{
WorkingRecord_t* pTemp = gWorkingArray[sesData.moveIndex+index];
gWorkingArray.erase(gWorkingArray.begin()+sesData.moveIndex+index);
gWorkingArray.insert(gWorkingArray.begin()+sesData.workingIndex+index,pTemp);
}
break;
default :
// No other Operations should be reported
assert(false);
break;
}
}
// The Working and Target Arrays should now be matching
}
}
*/
class ArrayDiff
{
public :
/* DiffOps_t Operations are used to identify what must be done to the Working Array to make it
match the Target Array.
*/
enum DiffOps_t
{ // Pass to Client?
// ===============
DiffOp_MATCH = 0, // Optional
DiffOp_MODIFY, // Yes if 'pModifiedFn' is defined
DiffOp_DELETE, // Yes
DiffOp_INSERT, // Yes
DiffOp_MOVE_FORWARD, // Yes if 'checkMoved' is true
DiffOp_MOVE_FORWARD_MODIFIED, // Yes if 'checkMoved' is true and 'pModifiedFn' is defined
DiffOp_MOVE_BACK, // Yes if 'checkMoved' is true
DiffOp_MOVE_BACK_MODIFIED, // Yes if 'checkMoved' is true and 'pModifiedFn' is defined
DiffOp_SKIP, // No. Only used internally if 'checkMoved' is true
DiffOp_WAS_INSERTED, // No. Only used internally if 'checkMoved' is true
DiffOp_INVALID // No. Only used internally
};
/* The SesData_t structure is owned by the Client to maintain the context of the comparison
and return parameters to operate on the Working Array. Use successive calls to getEditOp()
to retrieve each Operation until getEditOp() returns false.
*/
struct SesData_t
{
DiffOps_t op; // The Operation to perform
U32 opLength; // The number of array items in this Operation block
U32 workingIndex; // Index into the Working Array
U32 moveIndex; // Only required for MOVE* Operations - index into the Working Array to move from or to
U32 targetIndex; // Index into the Target Array
U32 nextWorkingIndex; // Internal usage only
U32 sesIndex; // Internal usage only
SesData_t()
{
reset();
}
void reset()
{
op = DiffOp_INVALID;
opLength = 0;
workingIndex = 0;
moveIndex = 0;
targetIndex = 0;
nextWorkingIndex = 0;
sesIndex = 0;
}
};
// CallbackFn_t defines the prototype for the comparison and modified callback functions
typedef bool (*CallbackFn_t)(U32 workingIndex, U32 targetIndex, void* pContextData);
ArrayDiff();
~ArrayDiff();
// initialise() must be called before compare()
void initialise(CallbackFn_t pCompareFn, CallbackFn_t pModifiedFn, void* pContextData, bool checkMoved);
// Only valid if checkMoved is true in initialise().
void trackItemIndex(int* pTrackItemIndex);
// Returns the number of operations required to manipulate the Working Array to match the Target Array.
int compare(U32 workingStartIndex, U32 workingLength, U32 targetStartIndex, U32 targetLength);
// Return the total number of Operations to be performed
U32 getSesSize() const { return mSes.size(); }
// Return the total number of modified items
U32 getNumModified() const { return mNumModified; }
// Return the next Operation; if false is returned then there are no more Operations
bool getEditOp(SesData_t& rSesData, bool reportMatchOps = false);
#ifdef ARRAYDIFF_DEBUG
// For debug only, return an internal script operation
bool debug_getDiffEdit(U32 diffIndex, DiffOps_t& rOp, int& rIndex, int& rTargetIndex, int& rLength) const;
#endif
private :
struct DiffEdit_t
{
DiffOps_t op;
int index; // Index into Target for MATCH, MODIFY, INSERT or index into Working for DELETE, MOVE_FORWARD, MOVE_BACK
int targetIndex; // Always index into Target
int length;
U32 id; // Used in moved block analysis
};
#ifdef ARRAY_TPTR_H
typedef TArray ArrayOfInt_t;
typedef TPtrArray ArrayOfDiffEdit_t;
#else
typedef std::vector ArrayOfInt_t;
typedef std::vector ArrayOfDiffEdit_t;
#endif
struct MiddleSnake_t
{
int x;
int y;
int u;
int v;
};
// Original functions from diff.c (with modifications)
void setValue(int k, int r, int val);
int getValue(int k, int r);
int findMiddleSnake(int workingIndex, int workingLength, int targetIndex, int targetLength, MiddleSnake_t* pMiddleSnake);
void sesAppend(DiffOps_t op, int off, int len, int targetIndex, bool checkMatchModified = true);
int createSes(int workingIndex, int workingLength, int targetIndex, int targetLength);
// New functions for moved block analysis and modified block detection
int comparePartBlock(int workingIndex, int workingLength, int workingBlockOffset, int searchOffset, int searchLen, int& rSearchBlockOffset, bool searchIsWorking);
void sesSet(ArrayOfDiffEdit_t::iterator& rSesIt, int editOffset, DiffOps_t op, int off, int len, ArrayOfDiffEdit_t::iterator& rOtherSesIt, U32 newEditId, int overrideTargetIndex);
void checkMovedModified(ArrayOfDiffEdit_t::iterator movedSesIt, int workingIndex, int searchOffset, bool searchIsWorking, ArrayOfDiffEdit_t::iterator& rSesIt, ArrayOfDiffEdit_t::iterator& rSearchIt);
bool isModified(int workingIndex, int searchOffset, bool searchIsWorking);
// Copy and assignment functions not implemented
ArrayDiff(const ArrayDiff& rOther);
ArrayDiff& operator=(const ArrayDiff& rOther);
CallbackFn_t mpCompareFn;
CallbackFn_t mpModifiedFn;
void* mpContextData;
ArrayOfInt_t mBuf;
ArrayOfDiffEdit_t mSes;
int mDmax;
U32 mNumModified;
U32 mLastCompareWorkingIndex;
bool mCheckMoved;
int* mpTrackItemIndex;
void* mpTrackItem;
};
/**************** Global Variables ******************************************/
/**************** Function Prototypes ***************************************/
#ifdef ARRAYDIFF_DEBUG
void arraydiff_DumpSes(const ArrayDiff& rDiff, const char* pDescription, int diffDistance);
#include
int arraydiff_printf(const char* pFormat, ...);
#endif
/**************** End of File ***********************************************/
#endif // ifndef ARRAY_DIFF_HPP