The following C# program calculates the longest common subsequence (note the singular) of 2 strings. For example, for the strings "computer" and "houseboat" this algorithm returns a value of 3, specifically the string "out".

This version only works for lists but can be generalized for all sequences.

(defunlcs-list(list-1list-2&key(test#'eql))"Find the longest common subsequence of LIST-1 and LIST-2 using TEST."(cond((nulllist-1)nil)((nulllist-2)nil)((funcalltest(firstlist-1)(firstlist-2))(cons(firstlist-1)(lcs-list(restlist-1)(restlist-2):testtest)))(t(let((lcs-1(lcs-listlist-1(restlist-2):testtest))(lcs-2(lcs-list(restlist-1)list-2:testtest)))(if(>(lengthlcs-1)(lengthlcs-2))lcs-1lcs-2)))))(defundiff(list1list2&key(test#'eql))"Find the differences between LIST1 and LIST2 using TEST."(let((lcs(lcs-listlist1list2:testtest))result)(dolist(clcs)(let*((sync-list1(positionclist1:testtest))(sync-list2(positionclist2:testtest))(removed(subseqlist10sync-list1))(added(subseqlist20sync-list2)))(setflist1(subseqlist1(1+sync-list1)))(setflist2(subseqlist2(1+sync-list2)))(whenremoved(push(cons:removedremoved)result))(whenadded(push(cons:addedadded)result))(pushcresult)))(whenlist1(push(cons:removedlist1)result))(whenlist2(push(cons:addedlist2)result))(nreverseresult)))

defLCS(X,Y):m=len(X)n=len(Y)# An (m+1) times (n+1) matrixC=[[0]*(n+1)for_inrange(m+1)]foriinrange(1,m+1):forjinrange(1,n+1):ifX[i-1]==Y[j-1]:C[i][j]=C[i-1][j-1]+1else:C[i][j]=max(C[i][j-1],C[i-1][j])returnC

X=["This part of the document has stayed","the same from version to version.","","This paragraph contains text that is","outdated - it will be deprecated '''and'''","deleted '''in''' the near future.","","It is important to spell check this","dokument. On the other hand, a misspelled","word isn't the end of the world.",]Y=["This is an important notice! It should","therefore be located at the beginning of","this document!","","This part of the document has stayed","the same from version to version.","","It is important to spell check this","document. On the other hand, a misspelled","word isn't the end of the world. This","paragraph contains important new","additions to this document.",]C=LCS(X,Y)printDiff(C,X,Y,len(X),len(Y))

It prints the following:

+ This is an important notice! It should
+ therefore be located at the beginning of
+ this document!
+
This part of the document has stayed
the same from version to version.
-
- This paragraph contains text that is
- outdated - it will be deprecated and
- deleted in the near future.
It is important to spell check this
- dokument. On the other hand, a misspelled
- word isn't the end of the world.
+ document. On the other hand, a misspelled
+ word isn't the end of the world. This
+ paragraph contains important new
+ additions to this document.

The following VB.NET program calculates the longest common subsequence (note the singular) of 2 strings. For example, for the strings "computer" and "houseboat" this algorithm returns a value of 3, specifically the string "out".

The Java implementation uses two classes. The first, an abstract class that implements the algorithm. The second, an concrete class implementing this for a string. Obviously, you could use this so that instead of comparing characters in a string, it could compare lines in a file, blocks of code, nodes in an XML document, or whatever you choose.

importstaticjava.lang.Math.*;importjava.util.ArrayList;importjava.util.List;importstaticjava.lang.Math.*;/** * A class to compute the longest common subsequence in two strings. * Algorithms from Wikipedia: * http://en.wikipedia.org/wiki/Longest_common_subsequence_problem * * @author jhess * */publicabstractclassLongestCommonSubsequence<VALUE>{privateint[][]c;privateArrayList<DiffEntry<VALUE>>diff;privateArrayList<VALUE>backtrack;/** * A constructor for classes inheriting this one, allowing them to * do some initialization before setting the values of X and Y. Once * the initialization is complete, the inheriting class must call * initValues(VALUE[] x, VALUE[] y) */protectedLongestCommonSubsequence(){}protectedabstractintlengthOfY();protectedabstractintlengthOfX();protectedabstractVALUEvalueOfX(intindex);protectedabstractVALUEvalueOfY(intindex);protectedbooleanequals(VALUEx1,VALUEy1){return(null==x1&&null==y1)||x1.equals(y1);}privatebooleanisXYEqual(inti,intj){returnequals(valueOfXInternal(i),valueOfYInternal(j));}privateVALUEvalueOfXInternal(inti){returnvalueOfX(i-1);}privateVALUEvalueOfYInternal(intj){returnvalueOfY(j-1);}publicvoidcalculateLcs(){if(c!=null){return;}c=newint[lengthOfX()+1][];for(inti=0;i<c.length;i++){c[i]=newint[lengthOfY()+1];}for(inti=1;i<c.length;i++){for(intj=1;j<c[i].length;j++){if(isXYEqual(i,j)){c[i][j]=c[i-1][j-1]+1;}else{c[i][j]=max(c[i][j-1],c[i-1][j]);}}}}publicintgetLcsLength(){calculateLcs();returnc[lengthOfX()][lengthOfY()];}publicintgetMinEditDistance(){calculateLcs();returnlengthOfX()+lengthOfY()-2*abs(getLcsLength());}publicList<VALUE>backtrack(){calculateLcs();if(this.backtrack==null){this.backtrack=newArrayList<VALUE>();backtrack(lengthOfX(),lengthOfY());}returnthis.backtrack;}publicvoidbacktrack(inti,intj){calculateLcs();if(i==0||j==0){return;}elseif(isXYEqual(i,j)){backtrack(i-1,j-1);backtrack.add(valueOfXInternal(i));}else{if(c[i][j-1]>c[i-1][j]){backtrack(i,j-1);}else{backtrack(i-1,j);}}}publicList<DiffEntry<VALUE>>diff(){calculateLcs();if(this.diff==null){this.diff=newArrayList<DiffEntry<VALUE>>();diff(lengthOfX(),lengthOfY());}returnthis.diff;}privatevoiddiff(inti,intj){calculateLcs();while(!(i==0&&j==0)){if(i>0&&j>0&&isXYEqual(i,j)){this.diff.add(newDiffEntry<VALUE>(DiffType.NONE,valueOfXInternal(i)));i--;j--;}else{if(j>0&&(i==0||c[i][j-1]>=c[i-1][j])){this.diff.add(newDiffEntry<VALUE>(DiffType.ADD,valueOfYInternal(j)));j--;}elseif(i>0&&(j==0||c[i][j-1]<c[i-1][j])){this.diff.add(newDiffEntry<VALUE>(DiffType.REMOVE,valueOfXInternal(i)));i--;}}}Collections.reverse(this.diff);}@OverridepublicStringtoString(){calculateLcs();StringBufferbuf=newStringBuffer();buf.append(" ");for(intj=1;j<=lengthOfY();j++){buf.append(valueOfYInternal(j));}buf.append("\n");buf.append(" ");for(intj=0;j<c[0].length;j++){buf.append(Integer.toString(c[0][j]));}buf.append("\n");for(inti=1;i<c.length;i++){buf.append(valueOfXInternal(i));for(intj=0;j<c[i].length;j++){buf.append(Integer.toString(c[i][j]));}buf.append("\n");}returnbuf.toString();}publicstaticenumDiffType{ADD("+","add"),REMOVE("-","remove"),NONE(" ","none");privateStringval;privateStringname;DiffType(Stringval,Stringname){this.val=val;this.name=name;}@OverridepublicStringtoString(){returnval;}publicStringgetName(){returnname;}publicStringgetVal(){returnval;}}publicstaticclassDiffEntry<VALUE>{privateDiffTypetype;privateVALUEvalue;publicDiffEntry(DiffTypetype,VALUEvalue){super();this.type=type;this.value=value;}publicDiffTypegetType(){returntype;}publicvoidsetType(DiffTypetype){this.type=type;}publicVALUEgetValue(){returnvalue;}publicvoidsetValue(VALUEvalue){this.value=value;}@OverridepublicStringtoString(){returntype.toString()+value.toString();}}}

#include<algorithm>#include<string>#include<vector>#include<stdio.h>#include<string.h>// See http://www-igm.univ-mlv.fr/~lecroq/seqcomp/node4.html.classLCS{classLCSTable{size_tm_;size_tn_;size_t*data_;public:LCSTable(size_tm,size_tn):m_(m),n_(n){data_=newsize_t[(m_+1)*(n_+1)];}~LCSTable(){delete[]data_;}voidsetAt(size_ti,size_tj,size_tvalue){data_[i+j*(m_+1)]=value;}size_tgetAt(size_ti,size_tj)const{returndata_[i+j*(m_+1)];}template<typenameT>voidbuild(constT*X,constT*Y){for(size_ti=0;i<=m_;++i)setAt(i,0,0);for(size_tj=0;j<=n_;++j)setAt(0,j,0);for(size_ti=0;i<m_;++i){for(size_tj=0;j<n_;++j){if(X[i]==Y[j])setAt(i+1,j+1,getAt(i,j)+1);elsesetAt(i+1,j+1,std::max(getAt(i+1,j),getAt(i,j+1)));}}}};template<typenameT>staticvoidbacktrackOne(constLCSTable&table,constT*X,constT*Y,size_ti,size_tj,std::vector<T>&result){result.clear();if(i==0||j==0)return;if(X[i-1]==Y[j-1]){backtrackOne(table,X,Y,i-1,j-1,result);result.push_back(X[i-1]);return;}if(table.getAt(i,j-1)>table.getAt(i-1,j))backtrackOne(table,X,Y,i,j-1,result);elsebacktrackOne(table,X,Y,i-1,j,result);}public:template<typenameT>staticvoidfindOne(T*X,size_tm,T*Y,size_tn,std::vector<T>&result){LCSTabletable(m,n);table.build(X,Y);backtrackOne(table,X,Y,m,n,result);}};

C←LCSLengthrarg;X;Y;i;j;shape;lastalike;words;B;jb//⍝ Computes the length of the Longest common sequenceXY←rarg//⍝ Optimize by working with numbers instead of stringswords←X∪YX←words⍳XY←words⍳Y//⍝ Optimize and reduce sets by removing similar trailing objects.shape←-(⍴X)⌊(⍴Y)lastalike←-+/∧\⌽(shape↑X)=(shape↑Y)X←lastalike↓XY←lastalike↓Y//⍝ C is a numeric matrix where the height is the shape of X//⍝ and the width is the shape of YC←(1+⊃,/⍴¨XY)⍴0//⍝ Fill C with LCSlengthsj←1+⍳⍴Yjb←j-1:Fori:In1+⍳⍴XB←X[i-1]=YC[i;B/j]←1+C[i-1;B/jb]C[i;(~B)/j]←⌈\(~B)/C[i;jb]⌈C[i-1;j]:EndFor

R←MakeDiffrarg;do;⎕IO;C;X;Y;i;j;t1;t2//⍝ Get the diff between two vectors given the LCS in rarg1//⍝ \ret [;1] 1=Added item, 0=Deleted item//⍝ [;2] index in rarg2//⍝ [;3] index in rarg3//⍝ [;4] The item ⎕IO←0CXY←rarg//⍝ Number of elements in each vector. This is the starting//⍝ point for the calculation belowij←⊃,/⍴¨,¨XY//⍝ Add an empty item before each item... NeededXY←' ',¨XY//⍝ Test 1t1←{Cij←⍵⋄j>0:{i=0:1⋄C[i;j-1]≥C[i-1;j]:1⋄0}0⋄0}//⍝ Test 2t2←{Cij←⍵⋄i>0:{j=0:1⋄C[i;j-1]<C[i-1;j]:1⋄0}0⋄0}//⍝ Walk the path and trace additions or removals of items recursivlydo←{CXYijsofar←⍵(i>0)∧(j>0)∧(X[i]≡Y[j]):∇CXY(i-1)(j-1)sofart1Cij:∇CXY(i)(j-1)(sofar⍪1ij(j⊃Y))t2Cij:∇CXY(i-1)(j)(sofar⍪0ij(i⊃X))sofar}//⍝ ShootR←doCXYij(04⍴0)