package matrix;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
/**
* An instance of this class represents a matrix of numbers
* as double values.
* @author syb0rg
*/
public class Matrix
{
private final double[][] matrix;
private final int rows;
private final int columns;
/**
* Creates a matrix with the indicated numbers of rows and columns.
* @param rows the number of rows in the matrix
* @param columns the number of columns in the matrix
*/
public Matrix(int rows, int columns)
{
matrix = new double[rows][columns];
this.rows = rows;
this.columns = columns;
}
/**
* Getter for the number of rows in this matrix.
* @return the number of rows in this matrix
*/
public int getRows()
{
return rows;
}
/**
* Getter for the number of columns in this matrix.
* @return the number of columns in this matrix
*/
public int getColumns()
{
return columns;
}
/**
* Gets the element at the indicated row and column in this matrix.
* @param row the row position for the element.
* It must be the case that 0 &le; row &lt; getRows().
* @param column the column position for the element.
* It must be the case that 0 &le; column &lt; getColumns().
* @return the element at the indicated row and column
* @throws MatrixException if row or column is out of bounds
*/
public double get(int row, int column) throws MatrixException
{
if (validIndicies(row, column))
{
return matrix[row][column];
}
else return 0.0;
}
/**
* Sets the element at the indicated row and column in this matrix.
* @param row the row position for the element.
* It must be the case that 0 &le; row &lt; getRows().
* @param column the column position for the element.
* It must be the case that 0 &le; column &lt; getColumns().
* @param element the value to set in the matrix
* @throws MatrixException if row or column is out of bounds
*/
public void set(int row, int column, double element) throws MatrixException
{
if (validIndicies(row, column))
{
matrix[row][column] = element;
}
}
/**
* Tests for equality of this matrix with another.
* Matrices are equal if they have the same dimensions and all
* elements are equal by ==.
* Note that since the parameter type for the other matrix is <b>Object</b>,
* its reference must be cast to <b>Matrix</b>.
* The parameter's type is <b>Object</b> so that this method overrides the
* <b>equals</b> method in the <b>Object</b> superclass.
* @param other the other matrix to be tested for equality with this matrix
* @return <b>true</b> if the other matrix is equal to this matrix, <b>false</b> otherwise
*/
@Override
public boolean equals(Object other)
{
return Arrays.deepEquals(matrix, ((Matrix)other).getMatrix());
}
/**
* Adds this matrix to another.
* This matrix and the other matrix must have the same dimensions.
* @param other the other matrix to add
* @return a new matrix that is the sum of this matrix and other
* @throws MatrixException if this matrix and the other matrix do not
* have the same dimensions
*/
public Matrix add(Matrix other) throws MatrixException
{
if (this.getRows() != other.getRows() || this.getColumns() != other.getColumns())
{
throw new MatrixException("added matrices do not have same dimensions");
}
Matrix newMatrix = new Matrix(this.getRows(), this.getColumns());
for (int i = 0; i < rows; ++i)
{
for (int j = 0; j < columns; ++j)
{
newMatrix.set(i, j, other.get(i, j) + this.get(i, j));
}
}
return newMatrix;
}
/**
* Multiples this matrix with another.
* The number of columns in this matrix must match the number of rows in the other.
* @param other the other matrix to multiply
* @return a new matrix that is the product of this matrix and other
* @throws MatrixException if the number of columns in this matrix does not match
* the number of rows in the other
*/
public Matrix multiply(final Matrix other)
{
if (this.getColumns() != other.getRows())
{
throw new MatrixException("multiplied matrices are not compatible");
}
Matrix newMatrix = new Matrix(this.getRows(), other.getColumns());
for (int i = 0; i < this.getRows(); i++)
{
for (int j = 0; j < other.getColumns(); j++)
{
for (int k = 0; k < this.getColumns(); k++)
{
newMatrix.set(i, j, newMatrix.get(i, j) + this.get(i, k) * other.get(k, j));
}
}
}
return newMatrix;
}
/**
* Creates a matrix from a data string.
* Note that this method is written without knowing the representation
* details of the matrix.
* Only the constructor and public API method <b>set</b> are used.
* @param string A string containing blank-separated matrix data
* which must parse as double values, or a NumberFormatException will be thrown.
* Each row must be terminated by end-of-line character '\n'.
* @param rows The number of rows in the matrix. If the number of
* rows in the data string is not the same, a MatrixException will be thrown.
* @param columns The number of columns in the matrix. If the number of
* columns in the data string is not the same, a MatrixException will be thrown.
* @return the created matrix.
*/
public static Matrix toMatrix(String string, int rows, int columns) throws IOException {
Matrix m = new Matrix(rows, columns);
BufferedReader reader = new BufferedReader(new StringReader(string));
String rowString = reader.readLine();
int row = 0;
while ( rowString != null ) {
String[] values = rowString.trim().split("\\s+");
for (int column = 0; column < values.length; column++) {
m.set(row, column, Double.parseDouble(values[column]));
}
row++;
rowString = reader.readLine();
}
return m;
}
/**
* Creates a visual representation of this matrix as a string.
* The opposite of <b>toMatrix</b>, this method can be used for debugging.
* Note that this method is written without knowing the representation
* details of the matrix.
* Only the public API methods <b>getRows</b>, <b>getColumns</b>, and
* <b>get</b> are used.
* @return the string representation of this matrix.
*/
public String toString() {
StringBuilder builder = new StringBuilder();
int row = 0;
while ( row < getRows() ) {
int column = 0;
while ( column < getColumns() ) {
builder.append(get(row, column++));
builder.append(" ");
}
builder.append("\n");
row++;
}
return builder.toString();
}
// *****************************************************************
// Your private fields and methods follow here
private boolean validIndicies(int row, int column) throws MatrixException
{
if (row < 0 || row >= rows)
{
throw new MatrixException("row index out of bounds");
}
else if (column < 0 || column >= columns)
{
throw new MatrixException("column index out of bounds");
}
else return true;
}
public double[][] getMatrix()
{
return matrix;
}
}

It would be good to note that the code must also pass this test code, which I am not allowed to modify (therefore please do not review this):

\$\begingroup\$Not sure if that is a problem but at the end of Matrix class there is section which is meant to have private fields and methods but it contains *public* double[][] getMatrix().\$\endgroup\$
– PshemoFeb 4 '16 at 21:37

2

\$\begingroup\$the getMatrix method is very dangerous because it let the user change the shape of the matrix. Consider for example : new Matrix(2,2).getMatrix()[0] = new double[0];\$\endgroup\$
– njzk2Feb 5 '16 at 3:53

1

\$\begingroup\$The unit test has a bug, you can pass the testBounds by just NOT throwing a exception, just pass this to the instructor/teacher.\$\endgroup\$
– FerrybigFeb 5 '16 at 12:02

\$\begingroup\$@Ferrybig Have you tested out this exploit yourself?\$\endgroup\$
– syb0rgFeb 5 '16 at 15:06

It is generating a number of pairs of random matrices of compatible dimensions, computes their product and times the operation. After a warm-up phase for the JVM, the timing results are printed to standard error output. I'm also printing a random element of the result matrix to standard output to avoid the JIT compiler optimizing the entire product computation away.

You'll notice that I have added a static method randomMatrix to the Matrixclass.

Use a contiguous data layout (major)

You are currently storing the matrix data in a 2-dimensional array. This means that you're hopping through your address space as you traverse the matrix elements. It also means that you'll have a double indirect memory access for accessing a matrix element.

It would be better to store all the data in a 1-dimensional array and remap the 2-dimensional indices into that array. This way of storing a matrix is more natural than it might seem at first sight. Look at how you would write down a matrix on paper.

If you were to fill in all the elements, you can either do it row-by-row or column-by-column. If you count how many elements you have already filled in, this will give you the index of the current element in the 1-dimensional array. Another way to think of this is to cut the paper into stripes such that you'll get either row or columns vectors, then glue all those stripes together to a long 1-dimensional stripe. Storing the matrix data as row vectors is known as row major order; storing it as column vectors as column major order. There is no real reason why one layout would be better than the other but once you've settled on one, it is important to write the matrix traversals with this layout in mind (see later). I will use row major order in the following because I find it more natural and it comes closer to how 2-dimensional arrays work in Java. That means that the matrix from above will have the following memory layout.

to map a 2-dimensional index into a 1-dimensional index in the data array.

(Forgive me for having renamed your columns attribute to cols. I couldn't get my fingers used to type the “umn” any more.)

Avoid unnecessary argument checking (minor)

Checking arguments in public functions is good but for your internal use, it adds mostly overhead. In Java, you'll get well-defined behavior for accessing an array out-of-bounds anyway so any additional check doesn't increase security but only produces more descriptive error messages. This is fine for client-facing methods but unneeded for internal use.

Avoid dynamic dispatch by using final or private methods (minor)

Since the accessor functions we'll use internally are now private and private methods are always statically bound, we also save a dynamic call indirection. Since there is no good reason to override get or set (and I'm not a fan of inheritance in general), we can make these final too which might enable the JIT optimizer to devirtualize calls to them, too.

The code is calling getRows and getColumns over and over again even though they will never change during the method. For one, you could access the private attributes rows and columns directly. This would certainly save you the function call but if you're unlucky, the JIT compiler might not see that the attributes cannot change and reload them every time. If instead you cache the values once in local variables, you're telling the JIT compiler that they cannot change and it may optimize accordingly.

Not only will it help the optimizer, I also find the code easier to read.

Note another optimization applied in the above code. There is no need to update product in each iteration. We can accumulate the result in a local variable dot (named such because it is a dot product) and only store it after the innermost loop. However, read on for an even better optimization.

Optimize traversals for linear memory acccess (major)

The hardware really loves iterating over memory in linear order because this enables most efficient caching and cache memory is much faster than main memory on modern hardware. We have already somewhat enabled this by storing our data in a contiguous array but now we have to optimize how we traverse it.

First look at addition because it is really simple. There are two (obvious) ways to iterate the matrix: row-by-row or column-by-column. Since we have decided to store the data in row major order, we really should iterate row-by-row because this will give us linear memory access. (If you find this hard to imagine, sit down with a sheet of paper and do a matrix operation by hand. See how different memory locations are accessed.)

Note how there isn't even a nested loop any more. (At this point, we might want to go back and apply the same trick to randomMatrix.)

For the product, the solution is a little less obvious but incredibly important.

Look at the computation of the dot product in the innermost loop of the method shown above again. We are traversing the lhs matrix row-by-row, which is good. However, the rhs matrix is traversed column-by-column which is not good. The product matrix is also traversed row-by-row but since we don't access it in the innermost loop, this is not so important. Fortunately, we can simply swap the innermost and middle loop to get the desired traversal.

Be sure to exercise this algorithm with pencil and paper to realize that the memory access in the innermost loop is now indeed linear. This gives tremendous performance gains.

Note that the implementation relies on the fact that arrays are default-initialized to all-zeros in Java.

Another minor optimization baked into the above method is that I'm accessing product.data directly rather than through getUnchecked followed by setUnchecked to save one index computation.

Measurements

I have compared the effects of using contiguous storage and changing the multiplication algorithm to have a linear memory access pattern. The following graph shows the results of the benchmark shown at the very beginning for three stages of optimization.

“before” — your original code

“data layout” — optimized for contiguous storage

“memory access” — additionally optimized for linear memory access

The abscissa of the graph shows the problem size. For computing the product of an n×l and an l×m matrix, the problem size is defined (by me) as
$$
\text{problem size} = \sqrt[3]{nlm}
\enspace.
$$

The ordinate shows the performance in mega-FLOPs per second. A FLOP (for floating-point operation) is defined as a floating-point multiplication and addition as it occurs when computing a dot product. (Some people use different definitions.) For given matrix sizes, the number of FLOPs is given by
$$
\text{FLOPs} = nlm
\enspace.
$$
If computing the matrix product takes time t, then the algorithm has achieved a FLOP rate of
$$
\text{rate} = \frac{\text{FLOPs}}{t} = \frac{nlm}{t}
\enspace.
$$
Therefore, in this graph, higher values means better.

It is easy to see that while the contiguous storage helped somewhat, what really rocked the performance is optimizing the memory access pattern. Simply interchanging two lines of code made the algorithm four times faster! You can also see that the optimum is far from reached. In particular, it is embarrassing that the algorithm doesn't become faster for larger problems as one should expect. I must say that I am not an expert in JVM tuning and the benchmark itself might not be 100 % meaningful.

Style

I don't have much to add considering your coding style. Your code is already generally well-written. Just a few minor remarks.

Design for extension

I'm a big fan of the “abstract, final or empty” rule. Since your Matrix is not designed for extension, declare all methods final. Alternatively, declare the entire class as final.

Don't specify unchecked exceptions in throws specifications

Since your MatrixException derives from RuntimeException (as it should), there is no need to declare it in the throws specification. Mentioning it in the JavaDocString is still fine.

\$\begingroup\$Good point about the layout. One comment: using a benchmarking library such as JMH would probably make your test more robust - 5 warmups seems low - the JIT typically compiles a method after 10,000 calls (although it may happen sooner if the method does a lot of computation). Without monitoring when the compilation happens it is hard to say what you are measuring.\$\endgroup\$
– assyliasFeb 5 '16 at 10:18

3

\$\begingroup\$@assylias Using some kind of framework would probably be a good idea. As I've said, I'm not an expert in JVM benchmarking and I'm not familiar with any framework to aid in doing this even though I've heard of them. I did monitor the performance gain during warmup (by printing out all timings) and it seemed they stabilized after three or four runs so I thought 5 should be good. I do believe that there is still a lot of noise in the gathered data but it shows the overall trend I wanted to discuss.\$\endgroup\$
– 5gon12ederFeb 5 '16 at 17:58

boolean validIndices looks suspicious: it never returns false, so why bother to return anything at all? Notice that it also forces you to make a dummy return 0.0 in get. I recommend to make it void and rewrite get as

{
validateIndices(row, column);
return matrix[row][column];
}

I don't see why class methodsadd and multiply should access data via public interface. They already validated their inputs which guarantees that all their accesses are valid, and get validation is wasteful. I would rather bypass get completely.

For performance increase you'd need something like Strassen algorithm. I think it is way beyound the scope of the exercise.

I don't think that the amount of the code could be significantly reduced.

Style

In most java coding styles, { are placed at the end of a line, not at the begining of a new line.

while ( row < getRows() ) { should be while (row < getRows()) {

It is surprising to see a while loop here, and the position of the column++ makes it easily missed. You'd make things easier for your readers by using a for loop instead. You could also access the underlying double[][] directly and use Java8 streams to avoid the loops altogether.

Indicies are Indices, probably a typo

if validIndicies returns a boolean, it should be called areIndicesValid

Major issue

getMatrix returns the underlying double[][]. It makes it possible to modify it, and therefore change the shape of the matrix, and break it:

Input validation

Your getter and setter verify the input, but the constructor does not. It is possible to call new Matrix(-1, -1);

You verify that the matrix have the correct shape, but not that the input is not null. new Matrix(1,1).multiply(null); crashes.

You can add @NotNull or @Nullable to your methods and parameters to indicate if the parameters and result can or are never null. Analyse tools will then tell you when there is a good chance that your code does not respect those cases. This can alleviate some of the necessary input sanitation.

Nota: you do not absolutely have to check for every possible issue with the input. Having a NullPointerException raised when you pass a null object is often the expected behavior. I just add this here as you have defined your own exceptions instead of letting the array bound check happen and the ArrayIndexOutOfBoundException be thrown.

\$\begingroup\$The parenthesis are purely a personal choice, what matters is if I'm consistent. Other than that, you've made some pretty good points!\$\endgroup\$
– syb0rgFeb 5 '16 at 4:52

1

\$\begingroup\$@syb0rg more or less (not entirely personal as it affects the other people working with you). There are official recommendations (cf stackoverflow.com/questions/1334204/… ). You are of course free to customize your formatting rules, but since you'd want everyone on a project to use the same rules, it is usually easier to use those. Plus these are integrated in the formatting tool of most modern IDEs.\$\endgroup\$
– njzk2Feb 5 '16 at 14:49

Besides @vnp's suggestions, you should have specific exceptions, instead of storing that information in the reason. Some examples:

MatrixSizeMismatchException when adding and multiplying matrices with different sizes. The specific detail (e.g. both dimensions must be the same for addition, or rows1 == columns2 for multiplication) can be left on the reason, since whoever uses the API will quickly observe the operation that was being made via the stack trace.

MatrixIndicesOutOfBoundsException as the name states, when getting or setting elements outside the bounds of the matrix.

The names suggested are a bit verbose, but transmit the idea well. Name at your own discretion.

Misc Suggestions:

A nice addition would be having in-place operations, that is, methods that mutate the current matrix.

toMatrix might be more meaningful as fromString, since it's a static method (Matrix.toMatrix() is redundant, but Matrix.fromString() describes exactly what it does too).

Similarly, you could add a fromArray(double[][] data, bool columnMajor) (note the boolean there, might aswell be a enum MatrixMode {COLUMN_MAJOR, ROW_MAJOR}, or be ommited if you document the expected format)