// Implement System.IDisposable.
// Do not make this method virtual.
// A derived class should not be able to override this method.
public void Dispose ()
{
this.Dispose(true);
// This object will be cleaned up by the Dispose method.
// Take this object off the finalization queue and prevent finalization code for this object from executing a second time.
System.GC.SuppressFinalize(this);
}

// Dispose (bool this._Disposing) executes in two distinct scenarios.
// If disposing equals true, the method has been called directly or indirectly by a user's code. Managed and unmanaged resources can be disposed.
// If disposing equals false, the method has been called by the runtime from inside the finalizer and you should not reference other objects.
// Only unmanaged resources can be disposed.
private void Dispose (bool disposing)
{
if (!this._Disposed)
{
// If disposing equals true, dispose all managed and unmanaged resources.
if (disposing)
{
// Dispose managed resources here.
this.Close();
}

// The main array of data points that will hold the image data
private Clustering.Library.DataPoint [] _DataPointCollection;
// Simple int array to hold the cluster index for each data point
// Length will be the same as _DataPointCollection
private int [] _ClusterIndexCollection;
// An array of ArrayList objects. The array will be of length CLUSTER_COUNT and
// will hold one ArrayList for each cluster. Each ArrayList will store only the
// data point index from _DataPointCollection. I've used this approach since it
// saves memory and the BinarySearch feature in ArrayList objects is lightning fast.
// This array is not neccessary but I've included it since this aproach increases
// performance dramatically
private System.Collections.ArrayList [] _ClusterIndexCollectionCollection;
// Simple data point array to hold the mean of each cluster
// Length will be the same as CLUSTER_COUNT
private Clustering.Library.DataPoint [] _MeanCollection;
// Simple data point array to hold the old mean of each cluster
// Length will be the same as CLUSTER_COUNT
private Clustering.Library.DataPoint [] _MeanOldCollection;

// Avoid redundant reassigning to same cluster
if (this._DataPointCollection[i].ClusterIndex != clusterIndex)
{
// The data point was already assigned to another cluster
// Due to reassignment, the From and To cluster Mean values
// will be affected so we need to recalculate new Mean values
// for both
displacement++;
clusterIndexTemp = this._DataPointCollection[i].ClusterIndex;
this.AssignDataPointToCluster(clusterIndex, i);
this.ComputeClusterMean(clusterIndex);
this.ComputeClusterMean(clusterIndexTemp);
}
}

// These variables are exclusive to the EuclideanDistance function and should not be accessed elsewhere
// Declared at class level for performance.
private double _EuclideanDistanceResult = 0.0D;
// Passing the parameters by reference is good to avoid unnecessary copying since we are using structs
private double EuclideanDistance (ref Clustering.Library.DataPoint dataPoint1, ref Clustering.Library.DataPoint dataPoint2)
{
this._EuclideanDistanceResult = 0.0D;

// Implement System.IDisposable.
// Do not make this method virtual.
// A derived class should not be able to override this method.
public void Dispose ()
{
this.Dispose(true);
// This object will be cleaned up by the Dispose method.
// Take this object off the finalization queue and prevent finalization code for this object from executing a second time.
System.GC.SuppressFinalize(this);
}

// Dispose (bool this._Disposing) executes in two distinct scenarios.
// If disposing equals true, the method has been called directly or indirectly by a user's code. Managed and unmanaged resources can be disposed.
// If disposing equals false, the method has been called by the runtime from inside the finalizer and you should not reference other objects.
// Only unmanaged resources can be disposed.
private void Dispose (bool disposing)
{
if (!this._Disposed)
{
// If disposing equals true, dispose all managed and unmanaged resources.
if (disposing)
{
// Dispose managed resources here.
this.Initialize();
}

I don't understand this code very well, but here are some things I noticed.

I think the biggest thing is that computing the cluster means on every pixel is very inefficient. Since you walk through 2 whole clusters on every pixel move, each iteraton of the while(true) takes O(n) * O(2n/CLUSTER_COUNT). This is O(n^2) if n>>CLUSTER_COUNT.
Do you have to recompute the cluster mean every time? Or could you be lazy and recompute only after moving each pixel once, outside the for loop? You wouldn't expect the mean to change that much in one pass, especially after the first few passes.
Even better, though, I think you can compute the means incrementally. If you remember the total for each cluster in an array (say float[] ClusterMeanTotals[CLUSTER_COUNT] , then you can just update the mean on each pixel. Update the total in AssignDataPointToCluster by subtracting the total from the old cluster and adding the total to the new cluster. Then you could always get the mean with one division instead iterating a whole array!

In AssignDataPointToCluster, I think you could use hashtables instead of ArrayLists. The lookup is even faster - O(1) instead of O(lg(n)).

In EuclideanDistance, do you really need to do the sqrt? since you're just comparing distances, you can compare the square of the euclidean distance, right? And x* should be faster than Pow(x,2.0).

Is there a better way to initialize than randomly? Since most images have their colors grouped together, dividing the image into CLUSTER_COUNT blocks might give you a lot better clusters to start with. it'd be a little more difficult to get regular 2-d blocks but since you only do it once per image, it might be well worth it. Also, I don't think you need to assign all the points in initialization. unassigned points will just get assigned in the first time throught the Compute() loop, right?

How good do your results have to be? Can you quit early - say when all the displacements are below a threshhold instead of when the clustering is _perfect_?

If you're doing this on a lot of images, it looks to me like you'll spend a lot of time in Open(). Creating and manually filling all those arrays has got to be expensive. You probably don't need to store the _RGB,_R,_G,_B, arrays at all since you only use them for computing HSV values, right? And you can eliminate the _H,_S, and _V arrays and just access _HSV like:
float[] HSV = image.HSV;
float h = HSV[(i*3)];
float s = HSV[(i*3)+1];
float v =HSV[(i*3)+2];
That'll save you a bunch of memory too, which will make everything faster anyway.

Finally, I think you're overestimating how much you save by using huge flat arrays instead of more structured data. Creating a "cluster" class and making more use of your DataPoint class would make the code a lot simpler and easier to diagnose problems. I'd use a class instead of a struct for DataPoint. You're copying all that data whenever you do an assignment, and you usually don't need to. And then you can just pass around DataPoints instead of indices - the references are just pointers, so they're no heavier than ints. I wrote some stuff for fun - it's below. Up to you to test it to see if I'm right that it works and is faster!

Your first suggestion is a killer. Instead of iterating through 2 entire clusters, remembering the totals will probably reduce computing time drastically. I did suspect the AssignDataPointToCluster() method as a major culprit.

Regarding HashTables vs ArrayLists, I'm not too familiar with the internal workings of HashTables but I imagine they operate on some sort of Binary Tree storage concept. If that is the case, the BinarySearch feature of ArrayLists should be comparable in performance. I'll try HashTables anyways.

Regarding EuclideanDistance, you're absolutely right again. Since I don't need the actual values apart from comparison, the Sqrt can be skipped. And, of course, should have thought of Pow vs * as well.

Regarding initialization, I'm already working on splitting the image into square tiles since the decrease in performance is exponential on image size.

As far as the terminating condition is concerned, the clusters do have to be perfect. That is one of the restrictions I have to live by.

Regarding the Image.Open method, the performance is very fast (1 second for a 1000x1000 image). Bemoving the R, G, B, RGB, H, S, V arrays will definately save a lot of memory and reduce paging overhead.

Regarding huge flat arrays vs structured data, I took that approach because sometimes, flat C styled code allows room for 'dirty' optimizations, normally not allowed by structured code. Also, I am of the impression that heap access would be faster than stack (structs vs classes). One thing I was not sure about was if an array of structures stays on the heap or the stack.

Thanks for the code sample. I'll make the above optimizations in my code and test performance against your approach.

Poor audio quality is one of the top reasons people don’t use video conferencing. Get the crispest, clearest audio powered by Dolby Voice in every meeting. Highfive and Dolby Voice deliver the best video conferencing and audio experience for every meeting and every room.

Hashtables are faster than binary trees for insert and lookup. Internally, they're usually implemented as a growable array of "buckets" The hashtable computes a "hashcode" for the element you're inserting, and uses that as an index into the array. It puts the element in the bucket (the bucket is usually a short linked list so it can hold multiple entries). Then when you want to retrieve an element, it computes the hashcode again, and it only has to look through one bucket. If there are on average N buckets for N entries, the buckets are small, os it does only a few comparisons instead of log(n). Most hashtables (including the .NET one) support inserting a key _and_ a value. Hashcodes are computed based on the key, so you can look up the data even if you only know its key (typically a name or ID). This is why they're sometimes called "dictionaries".http://en.wikipedia.org/wiki/Hashtable

An array of structures (or any other data) is allocated on the stack or the heap depending on how you allocate it, not on the structure itself. In .NET almost everything is allocated on the heap anyway. Essentially every time you call "new", you get heap memory, whether it's a struct or a class. _Locally_ declared simple types go on the stack, as do locally declared structs. In .NET, I believe arrays are always on the heap. Traditionally stack variables are accessed faster than heap variables, but these day's it's almost the same. The important difference is that _creating_ heap objects is more expensive.

Question - what's the application of this? Are you sorting images by color for some sort of art project?

// Implement System.IDisposable.
// Do not make this method virtual.
// A derived class should not be able to override this method.
public void Dispose ()
{
this.Dispose(true);
// This object will be cleaned up by the Dispose method.
// Take this object off the finalization queue and prevent finalization code for this object from executing a second time.
System.GC.SuppressFinalize(this);
}

// Dispose (bool this._Disposing) executes in two distinct scenarios.
// If disposing equals true, the method has been called directly or indirectly by a user's code. Managed and unmanaged resources can be disposed.
// If disposing equals false, the method has been called by the runtime from inside the finalizer and you should not reference other objects.
// Only unmanaged resources can be disposed.
private void Dispose (bool disposing)
{
if (!this._Disposed)
{
// If disposing equals true, dispose all managed and unmanaged resources.
if (disposing)
{
// Dispose managed resources here.
this.Close();
}

// The main array of data points that will hold the image data
private Clustering.Library.DataPoint [] _DataPointCollection;
// Simple int array to hold the cluster index for each data point
// Length will be the same as _DataPointCollection
private int [] _ClusterIndexCollection;
// An array of ArrayList objects. The array will be of length CLUSTER_COUNT and
// will hold one ArrayList for each cluster. Each ArrayList will store only the
// data point index from _DataPointCollection. I've used this approach since it
// saves memory and the BinarySearch feature in ArrayList objects is lightning fast.
// This array is not neccessary but I've included it since this aproach increases
// performance dramatically
private System.Collections.ArrayList [] _ClusterIndexCollectionCollection;
// Simple data point array to hold the mean of each cluster
// Length will be the same as CLUSTER_COUNT
private Clustering.Library.DataPoint [] _MeanCollection;
// Simple data point array to hold the sum of each cluster
// Length will be the same as CLUSTER_COUNT
private Clustering.Library.DataPoint [] _MeanSum;
// Simple data point array to hold the old mean of each cluster
// Length will be the same as CLUSTER_COUNT
private Clustering.Library.DataPoint [] _MeanOldCollection;
// Simple data point array to hold the old sum of each cluster
// Length will be the same as CLUSTER_COUNT
private Clustering.Library.DataPoint [] _MeanOldSum;

// Avoid redundant reassigning to same cluster
if (this._DataPointCollection[i].ClusterIndex != clusterIndex)
{
// The data point was already assigned to another cluster
// Due to reassignment, the From and To cluster Mean values
// will be affected so we need to recalculate new Mean values
// for both
displacement++;
clusterIndexTemp = this._DataPointCollection[i].ClusterIndex;
this.AssignDataPointToCluster(clusterIndex, i);
//this.ComputeClusterMean(clusterIndex);
//this.ComputeClusterMean(clusterIndexTemp);
}
}

// These variables are exclusive to the EuclideanDistance function and should not be accessed elsewhere
// Declared at class level for performance.
private double _EuclideanDistanceResult = 0.0D;
// Passing the parameters by reference is good to avoid unnecessary copying since we are using structs
private double EuclideanDistance (ref Clustering.Library.DataPoint dataPoint1, ref Clustering.Library.DataPoint dataPoint2)
{
this._EuclideanDistanceResult = 0.0D;

// Implement System.IDisposable.
// Do not make this method virtual.
// A derived class should not be able to override this method.
public void Dispose ()
{
this.Dispose(true);
// This object will be cleaned up by the Dispose method.
// Take this object off the finalization queue and prevent finalization code for this object from executing a second time.
System.GC.SuppressFinalize(this);
}

// Dispose (bool this._Disposing) executes in two distinct scenarios.
// If disposing equals true, the method has been called directly or indirectly by a user's code. Managed and unmanaged resources can be disposed.
// If disposing equals false, the method has been called by the runtime from inside the finalizer and you should not reference other objects.
// Only unmanaged resources can be disposed.
private void Dispose (bool disposing)
{
if (!this._Disposed)
{
// If disposing equals true, dispose all managed and unmanaged resources.
if (disposing)
{
// Dispose managed resources here.
this.Initialize();
}

Suggested Solutions

Introduction
Although it is an old technology, serial ports are still being used by many hardware manufacturers.
If you develop applications in C#, Microsoft .NET framework has SerialPort class to communicate with the serial ports. I needed to…

Summary:
Persistence is the capability of an application to store the state of objects and recover it when necessary. This article compares the two common types of serialization in aspects of data access, readability, and runtime cost. A ready-to…

When you create an app prototype with Adobe XD, you can insert system screens -- sharing or Control Center, for example -- with just a few clicks. This video shows you how. You can take the full course on Experts Exchange at http://bit.ly/XDcourse.