View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math3.linear;
19  
20  import java.io.Serializable;
21  import java.util.Arrays;
22  
23  import org.apache.commons.math3.exception.DimensionMismatchException;
24  import org.apache.commons.math3.exception.NoDataException;
25  import org.apache.commons.math3.exception.NotStrictlyPositiveException;
26  import org.apache.commons.math3.exception.NullArgumentException;
27  import org.apache.commons.math3.exception.NumberIsTooSmallException;
28  import org.apache.commons.math3.exception.OutOfRangeException;
29  import org.apache.commons.math3.exception.util.LocalizedFormats;
30  import org.apache.commons.math3.util.FastMath;
31  import org.apache.commons.math3.util.MathUtils;
32  
33  /**
34   * Cache-friendly implementation of RealMatrix using a flat arrays to store
35   * square blocks of the matrix.
36   * <p>
37   * This implementation is specially designed to be cache-friendly. Square blocks are
38   * stored as small arrays and allow efficient traversal of data both in row major direction
39   * and columns major direction, one block at a time. This greatly increases performances
40   * for algorithms that use crossed directions loops like multiplication or transposition.
41   * </p>
42   * <p>
43   * The size of square blocks is a static parameter. It may be tuned according to the cache
44   * size of the target computer processor. As a rule of thumbs, it should be the largest
45   * value that allows three blocks to be simultaneously cached (this is necessary for example
46   * for matrix multiplication). The default value is to use 52x52 blocks which is well suited
47   * for processors with 64k L1 cache (one block holds 2704 values or 21632 bytes). This value
48   * could be lowered to 36x36 for processors with 32k L1 cache.
49   * </p>
50   * <p>
51   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
52   * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
53   * blocks are flattened in row major order in single dimension arrays which are therefore
54   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
55   * organized in row major order.
56   * </p>
57   * <p>
58   * As an example, for a block size of 52x52, a 100x60 matrix would be stored in 4 blocks.
59   * Block 0 would be a double[2704] array holding the upper left 52x52 square, block 1 would be
60   * a double[416] array holding the upper right 52x8 rectangle, block 2 would be a double[2496]
61   * array holding the lower left 48x52 rectangle and block 3 would be a double[384] array
62   * holding the lower right 48x8 rectangle.
63   * </p>
64   * <p>
65   * The layout complexity overhead versus simple mapping of matrices to java
66   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
67   * to up to 3-fold improvements for matrices of moderate to large size.
68   * </p>
69   * @version $Id: BlockRealMatrix.java 1416643 2012-12-03 19:37:14Z tn $
70   * @since 2.0
71   */
72  public class BlockRealMatrix extends AbstractRealMatrix implements Serializable {
73      /** Block size. */
74      public static final int BLOCK_SIZE = 52;
75      /** Serializable version identifier */
76      private static final long serialVersionUID = 4991895511313664478L;
77      /** Blocks of matrix entries. */
78      private final double blocks[][];
79      /** Number of rows of the matrix. */
80      private final int rows;
81      /** Number of columns of the matrix. */
82      private final int columns;
83      /** Number of block rows of the matrix. */
84      private final int blockRows;
85      /** Number of block columns of the matrix. */
86      private final int blockColumns;
87  
88      /**
89       * Create a new matrix with the supplied row and column dimensions.
90       *
91       * @param rows  the number of rows in the new matrix
92       * @param columns  the number of columns in the new matrix
93       * @throws NotStrictlyPositiveException if row or column dimension is not
94       * positive.
95       */
96      public BlockRealMatrix(final int rows, final int columns)
97          throws NotStrictlyPositiveException {
98          super(rows, columns);
99          this.rows = rows;
100         this.columns = columns;
101 
102         // number of blocks
103         blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
104         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
105 
106         // allocate storage blocks, taking care of smaller ones at right and bottom
107         blocks = createBlocksLayout(rows, columns);
108     }
109 
110     /**
111      * Create a new dense matrix copying entries from raw layout data.
112      * <p>The input array <em>must</em> already be in raw layout.</p>
113      * <p>Calling this constructor is equivalent to call:
114      * <pre>matrix = new BlockRealMatrix(rawData.length, rawData[0].length,
115      *                                   toBlocksLayout(rawData), false);</pre>
116      * </p>
117      *
118      * @param rawData data for new matrix, in raw layout
119      * @throws DimensionMismatchException if the shape of {@code blockData} is
120      * inconsistent with block layout.
121      * @throws NotStrictlyPositiveException if row or column dimension is not
122      * positive.
123      * @see #BlockRealMatrix(int, int, double[][], boolean)
124      */
125     public BlockRealMatrix(final double[][] rawData)
126         throws DimensionMismatchException, NotStrictlyPositiveException {
127         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
128     }
129 
130     /**
131      * Create a new dense matrix copying entries from block layout data.
132      * <p>The input array <em>must</em> already be in blocks layout.</p>
133      *
134      * @param rows Number of rows in the new matrix.
135      * @param columns Number of columns in the new matrix.
136      * @param blockData data for new matrix
137      * @param copyArray Whether the input array will be copied or referenced.
138      * @throws DimensionMismatchException if the shape of {@code blockData} is
139      * inconsistent with block layout.
140      * @throws NotStrictlyPositiveException if row or column dimension is not
141      * positive.
142      * @see #createBlocksLayout(int, int)
143      * @see #toBlocksLayout(double[][])
144      * @see #BlockRealMatrix(double[][])
145      */
146     public BlockRealMatrix(final int rows, final int columns,
147                            final double[][] blockData, final boolean copyArray)
148         throws DimensionMismatchException, NotStrictlyPositiveException {
149         super(rows, columns);
150         this.rows = rows;
151         this.columns = columns;
152 
153         // number of blocks
154         blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
155         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
156 
157         if (copyArray) {
158             // allocate storage blocks, taking care of smaller ones at right and bottom
159             blocks = new double[blockRows * blockColumns][];
160         } else {
161             // reference existing array
162             blocks = blockData;
163         }
164 
165         int index = 0;
166         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
167             final int iHeight = blockHeight(iBlock);
168             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
169                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
170                     throw new DimensionMismatchException(blockData[index].length,
171                                                          iHeight * blockWidth(jBlock));
172                 }
173                 if (copyArray) {
174                     blocks[index] = blockData[index].clone();
175                 }
176             }
177         }
178     }
179 
180     /**
181      * Convert a data array from raw layout to blocks layout.
182      * <p>
183      * Raw layout is the straightforward layout where element at row i and
184      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
185      * is the layout used in {@link BlockRealMatrix} instances, where the matrix
186      * is split in square blocks (except at right and bottom side where blocks may
187      * be rectangular to fit matrix size) and each block is stored in a flattened
188      * one-dimensional array.
189      * </p>
190      * <p>
191      * This method creates an array in blocks layout from an input array in raw layout.
192      * It can be used to provide the array argument of the {@link
193      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
194      * </p>
195      * @param rawData Data array in raw layout.
196      * @return a new data array containing the same entries but in blocks layout.
197      * @throws DimensionMismatchException if {@code rawData} is not rectangular.
198      * @see #createBlocksLayout(int, int)
199      * @see #BlockRealMatrix(int, int, double[][], boolean)
200      */
201     public static double[][] toBlocksLayout(final double[][] rawData)
202         throws DimensionMismatchException {
203         final int rows = rawData.length;
204         final int columns = rawData[0].length;
205         final int blockRows = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
206         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
207 
208         // safety checks
209         for (int i = 0; i < rawData.length; ++i) {
210             final int length = rawData[i].length;
211             if (length != columns) {
212                 throw new DimensionMismatchException(columns, length);
213             }
214         }
215 
216         // convert array
217         final double[][] blocks = new double[blockRows * blockColumns][];
218         int blockIndex = 0;
219         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
220             final int pStart = iBlock * BLOCK_SIZE;
221             final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
222             final int iHeight = pEnd - pStart;
223             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
224                 final int qStart = jBlock * BLOCK_SIZE;
225                 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
226                 final int jWidth = qEnd - qStart;
227 
228                 // allocate new block
229                 final double[] block = new double[iHeight * jWidth];
230                 blocks[blockIndex] = block;
231 
232                 // copy data
233                 int index = 0;
234                 for (int p = pStart; p < pEnd; ++p) {
235                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
236                     index += jWidth;
237                 }
238                 ++blockIndex;
239             }
240         }
241 
242         return blocks;
243     }
244 
245     /**
246      * Create a data array in blocks layout.
247      * <p>
248      * This method can be used to create the array argument of the {@link
249      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
250      * </p>
251      * @param rows Number of rows in the new matrix.
252      * @param columns Number of columns in the new matrix.
253      * @return a new data array in blocks layout.
254      * @see #toBlocksLayout(double[][])
255      * @see #BlockRealMatrix(int, int, double[][], boolean)
256      */
257     public static double[][] createBlocksLayout(final int rows, final int columns) {
258         final int blockRows = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
259         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
260 
261         final double[][] blocks = new double[blockRows * blockColumns][];
262         int blockIndex = 0;
263         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
264             final int pStart = iBlock * BLOCK_SIZE;
265             final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
266             final int iHeight = pEnd - pStart;
267             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
268                 final int qStart = jBlock * BLOCK_SIZE;
269                 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
270                 final int jWidth = qEnd - qStart;
271                 blocks[blockIndex] = new double[iHeight * jWidth];
272                 ++blockIndex;
273             }
274         }
275 
276         return blocks;
277     }
278 
279     /** {@inheritDoc} */
280     @Override
281     public BlockRealMatrix createMatrix(final int rowDimension,
282                                         final int columnDimension)
283         throws NotStrictlyPositiveException {
284         return new BlockRealMatrix(rowDimension, columnDimension);
285     }
286 
287     /** {@inheritDoc} */
288     @Override
289     public BlockRealMatrix copy() {
290         // create an empty matrix
291         BlockRealMatrix copied = new BlockRealMatrix(rows, columns);
292 
293         // copy the blocks
294         for (int i = 0; i < blocks.length; ++i) {
295             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
296         }
297 
298         return copied;
299     }
300 
301     /** {@inheritDoc} */
302     @Override
303     public BlockRealMatrix add(final RealMatrix m)
304         throws MatrixDimensionMismatchException {
305         try {
306             return add((BlockRealMatrix) m);
307         } catch (ClassCastException cce) {
308             // safety check
309             MatrixUtils.checkAdditionCompatible(this, m);
310 
311             final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
312 
313             // perform addition block-wise, to ensure good cache behavior
314             int blockIndex = 0;
315             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
316                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
317 
318                     // perform addition on the current block
319                     final double[] outBlock = out.blocks[blockIndex];
320                     final double[] tBlock   = blocks[blockIndex];
321                     final int pStart = iBlock * BLOCK_SIZE;
322                     final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
323                     final int qStart = jBlock * BLOCK_SIZE;
324                     final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
325                     int k = 0;
326                     for (int p = pStart; p < pEnd; ++p) {
327                         for (int q = qStart; q < qEnd; ++q) {
328                             outBlock[k] = tBlock[k] + m.getEntry(p, q);
329                             ++k;
330                         }
331                     }
332                     // go to next block
333                     ++blockIndex;
334                 }
335             }
336 
337             return out;
338         }
339     }
340 
341     /**
342      * Compute the sum of this matrix and {@code m}.
343      *
344      * @param m Matrix to be added.
345      * @return {@code this} + m.
346      * @throws MatrixDimensionMismatchException if {@code m} is not the same
347      * size as this matrix.
348      */
349     public BlockRealMatrix add(final BlockRealMatrix m)
350         throws MatrixDimensionMismatchException {
351         // safety check
352         MatrixUtils.checkAdditionCompatible(this, m);
353 
354         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
355 
356         // perform addition block-wise, to ensure good cache behavior
357         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
358             final double[] outBlock = out.blocks[blockIndex];
359             final double[] tBlock = blocks[blockIndex];
360             final double[] mBlock = m.blocks[blockIndex];
361             for (int k = 0; k < outBlock.length; ++k) {
362                 outBlock[k] = tBlock[k] + mBlock[k];
363             }
364         }
365 
366         return out;
367     }
368 
369     /** {@inheritDoc} */
370     @Override
371     public BlockRealMatrix subtract(final RealMatrix m)
372         throws MatrixDimensionMismatchException {
373         try {
374             return subtract((BlockRealMatrix) m);
375         } catch (ClassCastException cce) {
376             // safety check
377             MatrixUtils.checkSubtractionCompatible(this, m);
378 
379             final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
380 
381             // perform subtraction block-wise, to ensure good cache behavior
382             int blockIndex = 0;
383             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
384                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
385 
386                     // perform subtraction on the current block
387                     final double[] outBlock = out.blocks[blockIndex];
388                     final double[] tBlock = blocks[blockIndex];
389                     final int pStart = iBlock * BLOCK_SIZE;
390                     final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
391                     final int qStart = jBlock * BLOCK_SIZE;
392                     final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
393                     int k = 0;
394                     for (int p = pStart; p < pEnd; ++p) {
395                         for (int q = qStart; q < qEnd; ++q) {
396                             outBlock[k] = tBlock[k] - m.getEntry(p, q);
397                             ++k;
398                         }
399                     }
400                     // go to next block
401                     ++blockIndex;
402                 }
403             }
404 
405             return out;
406         }
407     }
408 
409     /**
410      * Subtract {@code m} from this matrix.
411      *
412      * @param m Matrix to be subtracted.
413      * @return {@code this} - m.
414      * @throws MatrixDimensionMismatchException if {@code m} is not the
415      * same size as this matrix.
416      */
417     public BlockRealMatrix subtract(final BlockRealMatrix m)
418         throws MatrixDimensionMismatchException {
419         // safety check
420         MatrixUtils.checkSubtractionCompatible(this, m);
421 
422         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
423 
424         // perform subtraction block-wise, to ensure good cache behavior
425         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
426             final double[] outBlock = out.blocks[blockIndex];
427             final double[] tBlock = blocks[blockIndex];
428             final double[] mBlock = m.blocks[blockIndex];
429             for (int k = 0; k < outBlock.length; ++k) {
430                 outBlock[k] = tBlock[k] - mBlock[k];
431             }
432         }
433 
434         return out;
435     }
436 
437     /** {@inheritDoc} */
438     @Override
439     public BlockRealMatrix scalarAdd(final double d) {
440 
441         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
442 
443         // perform subtraction block-wise, to ensure good cache behavior
444         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
445             final double[] outBlock = out.blocks[blockIndex];
446             final double[] tBlock = blocks[blockIndex];
447             for (int k = 0; k < outBlock.length; ++k) {
448                 outBlock[k] = tBlock[k] + d;
449             }
450         }
451 
452         return out;
453     }
454 
455     /** {@inheritDoc} */
456     @Override
457     public RealMatrix scalarMultiply(final double d) {
458         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
459 
460         // perform subtraction block-wise, to ensure good cache behavior
461         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
462             final double[] outBlock = out.blocks[blockIndex];
463             final double[] tBlock = blocks[blockIndex];
464             for (int k = 0; k < outBlock.length; ++k) {
465                 outBlock[k] = tBlock[k] * d;
466             }
467         }
468 
469         return out;
470     }
471 
472     /** {@inheritDoc} */
473     @Override
474     public BlockRealMatrix multiply(final RealMatrix m)
475         throws DimensionMismatchException {
476         try {
477             return multiply((BlockRealMatrix) m);
478         } catch (ClassCastException cce) {
479             // safety check
480             MatrixUtils.checkMultiplicationCompatible(this, m);
481 
482             final BlockRealMatrix out = new BlockRealMatrix(rows, m.getColumnDimension());
483 
484             // perform multiplication block-wise, to ensure good cache behavior
485             int blockIndex = 0;
486             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
487                 final int pStart = iBlock * BLOCK_SIZE;
488                 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
489 
490                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
491                     final int qStart = jBlock * BLOCK_SIZE;
492                     final int qEnd = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
493 
494                     // select current block
495                     final double[] outBlock = out.blocks[blockIndex];
496 
497                     // perform multiplication on current block
498                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
499                         final int kWidth = blockWidth(kBlock);
500                         final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
501                         final int rStart = kBlock * BLOCK_SIZE;
502                         int k = 0;
503                         for (int p = pStart; p < pEnd; ++p) {
504                             final int lStart = (p - pStart) * kWidth;
505                             final int lEnd = lStart + kWidth;
506                             for (int q = qStart; q < qEnd; ++q) {
507                                 double sum = 0;
508                                 int r = rStart;
509                                 for (int l = lStart; l < lEnd; ++l) {
510                                     sum += tBlock[l] * m.getEntry(r, q);
511                                     ++r;
512                                 }
513                                 outBlock[k] += sum;
514                                 ++k;
515                             }
516                         }
517                     }
518                     // go to next block
519                     ++blockIndex;
520                 }
521             }
522 
523             return out;
524         }
525     }
526 
527     /**
528      * Returns the result of postmultiplying this by {@code m}.
529      *
530      * @param m Matrix to postmultiply by.
531      * @return {@code this} * m.
532      * @throws DimensionMismatchException if the matrices are not compatible.
533      */
534     public BlockRealMatrix multiply(BlockRealMatrix m)
535         throws DimensionMismatchException {
536         // safety check
537         MatrixUtils.checkMultiplicationCompatible(this, m);
538 
539         final BlockRealMatrix out = new BlockRealMatrix(rows, m.columns);
540 
541         // perform multiplication block-wise, to ensure good cache behavior
542         int blockIndex = 0;
543         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
544 
545             final int pStart = iBlock * BLOCK_SIZE;
546             final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
547 
548             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
549                 final int jWidth = out.blockWidth(jBlock);
550                 final int jWidth2 = jWidth  + jWidth;
551                 final int jWidth3 = jWidth2 + jWidth;
552                 final int jWidth4 = jWidth3 + jWidth;
553 
554                 // select current block
555                 final double[] outBlock = out.blocks[blockIndex];
556 
557                 // perform multiplication on current block
558                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
559                     final int kWidth = blockWidth(kBlock);
560                     final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
561                     final double[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
562                     int k = 0;
563                     for (int p = pStart; p < pEnd; ++p) {
564                         final int lStart = (p - pStart) * kWidth;
565                         final int lEnd = lStart + kWidth;
566                         for (int nStart = 0; nStart < jWidth; ++nStart) {
567                             double sum = 0;
568                             int l = lStart;
569                             int n = nStart;
570                             while (l < lEnd - 3) {
571                                 sum += tBlock[l] * mBlock[n] +
572                                        tBlock[l + 1] * mBlock[n + jWidth] +
573                                        tBlock[l + 2] * mBlock[n + jWidth2] +
574                                        tBlock[l + 3] * mBlock[n + jWidth3];
575                                 l += 4;
576                                 n += jWidth4;
577                             }
578                             while (l < lEnd) {
579                                 sum += tBlock[l++] * mBlock[n];
580                                 n += jWidth;
581                             }
582                             outBlock[k] += sum;
583                             ++k;
584                         }
585                     }
586                 }
587                 // go to next block
588                 ++blockIndex;
589             }
590         }
591 
592         return out;
593     }
594 
595     /** {@inheritDoc} */
596     @Override
597     public double[][] getData() {
598         final double[][] data = new double[getRowDimension()][getColumnDimension()];
599         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
600 
601         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
602             final int pStart = iBlock * BLOCK_SIZE;
603             final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
604             int regularPos = 0;
605             int lastPos = 0;
606             for (int p = pStart; p < pEnd; ++p) {
607                 final double[] dataP = data[p];
608                 int blockIndex = iBlock * blockColumns;
609                 int dataPos = 0;
610                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
611                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
612                     dataPos += BLOCK_SIZE;
613                 }
614                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
615                 regularPos += BLOCK_SIZE;
616                 lastPos    += lastColumns;
617             }
618         }
619 
620         return data;
621     }
622 
623     /** {@inheritDoc} */
624     @Override
625     public double getNorm() {
626         final double[] colSums = new double[BLOCK_SIZE];
627         double maxColSum = 0;
628         for (int jBlock = 0; jBlock < blockColumns; jBlock++) {
629             final int jWidth = blockWidth(jBlock);
630             Arrays.fill(colSums, 0, jWidth, 0.0);
631             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
632                 final int iHeight = blockHeight(iBlock);
633                 final double[] block = blocks[iBlock * blockColumns + jBlock];
634                 for (int j = 0; j < jWidth; ++j) {
635                     double sum = 0;
636                     for (int i = 0; i < iHeight; ++i) {
637                         sum += FastMath.abs(block[i * jWidth + j]);
638                     }
639                     colSums[j] += sum;
640                 }
641             }
642             for (int j = 0; j < jWidth; ++j) {
643                 maxColSum = FastMath.max(maxColSum, colSums[j]);
644             }
645         }
646         return maxColSum;
647     }
648 
649     /** {@inheritDoc} */
650     @Override
651     public double getFrobeniusNorm() {
652         double sum2 = 0;
653         for (int blockIndex = 0; blockIndex < blocks.length; ++blockIndex) {
654             for (final double entry : blocks[blockIndex]) {
655                 sum2 += entry * entry;
656             }
657         }
658         return FastMath.sqrt(sum2);
659     }
660 
661     /** {@inheritDoc} */
662     @Override
663     public BlockRealMatrix getSubMatrix(final int startRow, final int endRow,
664                                         final int startColumn,
665                                         final int endColumn)
666         throws OutOfRangeException, NumberIsTooSmallException {
667         // safety checks
668         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
669 
670         // create the output matrix
671         final BlockRealMatrix out =
672             new BlockRealMatrix(endRow - startRow + 1, endColumn - startColumn + 1);
673 
674         // compute blocks shifts
675         final int blockStartRow = startRow / BLOCK_SIZE;
676         final int rowsShift = startRow % BLOCK_SIZE;
677         final int blockStartColumn = startColumn / BLOCK_SIZE;
678         final int columnsShift = startColumn % BLOCK_SIZE;
679 
680         // perform extraction block-wise, to ensure good cache behavior
681         int pBlock = blockStartRow;
682         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
683             final int iHeight = out.blockHeight(iBlock);
684             int qBlock = blockStartColumn;
685             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
686                 final int jWidth = out.blockWidth(jBlock);
687 
688                 // handle one block of the output matrix
689                 final int outIndex = iBlock * out.blockColumns + jBlock;
690                 final double[] outBlock = out.blocks[outIndex];
691                 final int index = pBlock * blockColumns + qBlock;
692                 final int width = blockWidth(qBlock);
693 
694                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
695                 final int widthExcess = jWidth + columnsShift - BLOCK_SIZE;
696                 if (heightExcess > 0) {
697                     // the submatrix block spans on two blocks rows from the original matrix
698                     if (widthExcess > 0) {
699                         // the submatrix block spans on two blocks columns from the original matrix
700                         final int width2 = blockWidth(qBlock + 1);
701                         copyBlockPart(blocks[index], width,
702                                       rowsShift, BLOCK_SIZE,
703                                       columnsShift, BLOCK_SIZE,
704                                       outBlock, jWidth, 0, 0);
705                         copyBlockPart(blocks[index + 1], width2,
706                                       rowsShift, BLOCK_SIZE,
707                                       0, widthExcess,
708                                       outBlock, jWidth, 0, jWidth - widthExcess);
709                         copyBlockPart(blocks[index + blockColumns], width,
710                                       0, heightExcess,
711                                       columnsShift, BLOCK_SIZE,
712                                       outBlock, jWidth, iHeight - heightExcess, 0);
713                         copyBlockPart(blocks[index + blockColumns + 1], width2,
714                                       0, heightExcess,
715                                       0, widthExcess,
716                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
717                     } else {
718                         // the submatrix block spans on one block column from the original matrix
719                         copyBlockPart(blocks[index], width,
720                                       rowsShift, BLOCK_SIZE,
721                                       columnsShift, jWidth + columnsShift,
722                                       outBlock, jWidth, 0, 0);
723                         copyBlockPart(blocks[index + blockColumns], width,
724                                       0, heightExcess,
725                                       columnsShift, jWidth + columnsShift,
726                                       outBlock, jWidth, iHeight - heightExcess, 0);
727                     }
728                 } else {
729                     // the submatrix block spans on one block row from the original matrix
730                     if (widthExcess > 0) {
731                         // the submatrix block spans on two blocks columns from the original matrix
732                         final int width2 = blockWidth(qBlock + 1);
733                         copyBlockPart(blocks[index], width,
734                                       rowsShift, iHeight + rowsShift,
735                                       columnsShift, BLOCK_SIZE,
736                                       outBlock, jWidth, 0, 0);
737                         copyBlockPart(blocks[index + 1], width2,
738                                       rowsShift, iHeight + rowsShift,
739                                       0, widthExcess,
740                                       outBlock, jWidth, 0, jWidth - widthExcess);
741                     } else {
742                         // the submatrix block spans on one block column from the original matrix
743                         copyBlockPart(blocks[index], width,
744                                       rowsShift, iHeight + rowsShift,
745                                       columnsShift, jWidth + columnsShift,
746                                       outBlock, jWidth, 0, 0);
747                     }
748                }
749                 ++qBlock;
750             }
751             ++pBlock;
752         }
753 
754         return out;
755     }
756 
757     /**
758      * Copy a part of a block into another one
759      * <p>This method can be called only when the specified part fits in both
760      * blocks, no verification is done here.</p>
761      * @param srcBlock source block
762      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
763      * @param srcStartRow start row in the source block
764      * @param srcEndRow end row (exclusive) in the source block
765      * @param srcStartColumn start column in the source block
766      * @param srcEndColumn end column (exclusive) in the source block
767      * @param dstBlock destination block
768      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
769      * @param dstStartRow start row in the destination block
770      * @param dstStartColumn start column in the destination block
771      */
772     private void copyBlockPart(final double[] srcBlock, final int srcWidth,
773                                final int srcStartRow, final int srcEndRow,
774                                final int srcStartColumn, final int srcEndColumn,
775                                final double[] dstBlock, final int dstWidth,
776                                final int dstStartRow, final int dstStartColumn) {
777         final int length = srcEndColumn - srcStartColumn;
778         int srcPos = srcStartRow * srcWidth + srcStartColumn;
779         int dstPos = dstStartRow * dstWidth + dstStartColumn;
780         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
781             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
782             srcPos += srcWidth;
783             dstPos += dstWidth;
784         }
785     }
786 
787     /** {@inheritDoc} */
788     @Override
789     public void setSubMatrix(final double[][] subMatrix, final int row,
790                              final int column)
791         throws OutOfRangeException, NoDataException, NullArgumentException,
792         DimensionMismatchException {
793         // safety checks
794         MathUtils.checkNotNull(subMatrix);
795         final int refLength = subMatrix[0].length;
796         if (refLength == 0) {
797             throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
798         }
799         final int endRow = row + subMatrix.length - 1;
800         final int endColumn = column + refLength - 1;
801         MatrixUtils.checkSubMatrixIndex(this, row, endRow, column, endColumn);
802         for (final double[] subRow : subMatrix) {
803             if (subRow.length != refLength) {
804                 throw new DimensionMismatchException(refLength, subRow.length);
805             }
806         }
807 
808         // compute blocks bounds
809         final int blockStartRow = row / BLOCK_SIZE;
810         final int blockEndRow = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
811         final int blockStartColumn = column / BLOCK_SIZE;
812         final int blockEndColumn = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
813 
814         // perform copy block-wise, to ensure good cache behavior
815         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
816             final int iHeight = blockHeight(iBlock);
817             final int firstRow = iBlock * BLOCK_SIZE;
818             final int iStart = FastMath.max(row,    firstRow);
819             final int iEnd = FastMath.min(endRow + 1, firstRow + iHeight);
820 
821             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
822                 final int jWidth = blockWidth(jBlock);
823                 final int firstColumn = jBlock * BLOCK_SIZE;
824                 final int jStart = FastMath.max(column,    firstColumn);
825                 final int jEnd = FastMath.min(endColumn + 1, firstColumn + jWidth);
826                 final int jLength = jEnd - jStart;
827 
828                 // handle one block, row by row
829                 final double[] block = blocks[iBlock * blockColumns + jBlock];
830                 for (int i = iStart; i < iEnd; ++i) {
831                     System.arraycopy(subMatrix[i - row], jStart - column,
832                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
833                                      jLength);
834                 }
835 
836             }
837         }
838     }
839 
840     /** {@inheritDoc} */
841     @Override
842     public BlockRealMatrix getRowMatrix(final int row)
843         throws OutOfRangeException {
844         MatrixUtils.checkRowIndex(this, row);
845         final BlockRealMatrix out = new BlockRealMatrix(1, columns);
846 
847         // perform copy block-wise, to ensure good cache behavior
848         final int iBlock = row / BLOCK_SIZE;
849         final int iRow = row - iBlock * BLOCK_SIZE;
850         int outBlockIndex = 0;
851         int outIndex = 0;
852         double[] outBlock = out.blocks[outBlockIndex];
853         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
854             final int jWidth = blockWidth(jBlock);
855             final double[] block = blocks[iBlock * blockColumns + jBlock];
856             final int available = outBlock.length - outIndex;
857             if (jWidth > available) {
858                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
859                 outBlock = out.blocks[++outBlockIndex];
860                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
861                 outIndex = jWidth - available;
862             } else {
863                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
864                 outIndex += jWidth;
865             }
866         }
867 
868         return out;
869     }
870 
871     /** {@inheritDoc} */
872     @Override
873     public void setRowMatrix(final int row, final RealMatrix matrix)
874         throws OutOfRangeException, MatrixDimensionMismatchException {
875         try {
876             setRowMatrix(row, (BlockRealMatrix) matrix);
877         } catch (ClassCastException cce) {
878             super.setRowMatrix(row, matrix);
879         }
880     }
881 
882     /**
883      * Sets the entries in row number <code>row</code>
884      * as a row matrix.  Row indices start at 0.
885      *
886      * @param row the row to be set
887      * @param matrix row matrix (must have one row and the same number of columns
888      * as the instance)
889      * @throws OutOfRangeException if the specified row index is invalid.
890      * @throws MatrixDimensionMismatchException if the matrix dimensions do
891      * not match one instance row.
892      */
893     public void setRowMatrix(final int row, final BlockRealMatrix matrix)
894         throws OutOfRangeException, MatrixDimensionMismatchException {
895         MatrixUtils.checkRowIndex(this, row);
896         final int nCols = getColumnDimension();
897         if ((matrix.getRowDimension() != 1) ||
898             (matrix.getColumnDimension() != nCols)) {
899             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
900                                                        matrix.getColumnDimension(),
901                                                        1, nCols);
902         }
903 
904         // perform copy block-wise, to ensure good cache behavior
905         final int iBlock = row / BLOCK_SIZE;
906         final int iRow = row - iBlock * BLOCK_SIZE;
907         int mBlockIndex = 0;
908         int mIndex = 0;
909         double[] mBlock = matrix.blocks[mBlockIndex];
910         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
911             final int jWidth = blockWidth(jBlock);
912             final double[] block = blocks[iBlock * blockColumns + jBlock];
913             final int available  = mBlock.length - mIndex;
914             if (jWidth > available) {
915                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
916                 mBlock = matrix.blocks[++mBlockIndex];
917                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
918                 mIndex = jWidth - available;
919             } else {
920                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
921                 mIndex += jWidth;
922            }
923         }
924     }
925 
926     /** {@inheritDoc} */
927     @Override
928     public BlockRealMatrix getColumnMatrix(final int column)
929         throws OutOfRangeException {
930         MatrixUtils.checkColumnIndex(this, column);
931         final BlockRealMatrix out = new BlockRealMatrix(rows, 1);
932 
933         // perform copy block-wise, to ensure good cache behavior
934         final int jBlock = column / BLOCK_SIZE;
935         final int jColumn = column - jBlock * BLOCK_SIZE;
936         final int jWidth = blockWidth(jBlock);
937         int outBlockIndex = 0;
938         int outIndex = 0;
939         double[] outBlock = out.blocks[outBlockIndex];
940         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
941             final int iHeight = blockHeight(iBlock);
942             final double[] block = blocks[iBlock * blockColumns + jBlock];
943             for (int i = 0; i < iHeight; ++i) {
944                 if (outIndex >= outBlock.length) {
945                     outBlock = out.blocks[++outBlockIndex];
946                     outIndex = 0;
947                 }
948                 outBlock[outIndex++] = block[i * jWidth + jColumn];
949             }
950         }
951 
952         return out;
953     }
954 
955     /** {@inheritDoc} */
956     @Override
957     public void setColumnMatrix(final int column, final RealMatrix matrix)
958         throws OutOfRangeException, MatrixDimensionMismatchException {
959         try {
960             setColumnMatrix(column, (BlockRealMatrix) matrix);
961         } catch (ClassCastException cce) {
962             super.setColumnMatrix(column, matrix);
963         }
964     }
965 
966     /**
967      * Sets the entries in column number <code>column</code>
968      * as a column matrix.  Column indices start at 0.
969      *
970      * @param column the column to be set
971      * @param matrix column matrix (must have one column and the same number of rows
972      * as the instance)
973      * @throws OutOfRangeException if the specified column index is invalid.
974      * @throws MatrixDimensionMismatchException if the matrix dimensions do
975      * not match one instance column.
976      */
977     void setColumnMatrix(final int column, final BlockRealMatrix matrix)
978         throws OutOfRangeException, MatrixDimensionMismatchException {
979         MatrixUtils.checkColumnIndex(this, column);
980         final int nRows = getRowDimension();
981         if ((matrix.getRowDimension() != nRows) ||
982             (matrix.getColumnDimension() != 1)) {
983             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
984                                                        matrix.getColumnDimension(),
985                                                        nRows, 1);
986         }
987 
988         // perform copy block-wise, to ensure good cache behavior
989         final int jBlock = column / BLOCK_SIZE;
990         final int jColumn = column - jBlock * BLOCK_SIZE;
991         final int jWidth = blockWidth(jBlock);
992         int mBlockIndex = 0;
993         int mIndex = 0;
994         double[] mBlock = matrix.blocks[mBlockIndex];
995         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
996             final int iHeight = blockHeight(iBlock);
997             final double[] block = blocks[iBlock * blockColumns + jBlock];
998             for (int i = 0; i < iHeight; ++i) {
999                 if (mIndex >= mBlock.length) {
1000                     mBlock = matrix.blocks[++mBlockIndex];
1001                     mIndex = 0;
1002                 }
1003                 block[i * jWidth + jColumn] = mBlock[mIndex++];
1004             }
1005         }
1006     }
1007 
1008     /** {@inheritDoc} */
1009     @Override
1010     public RealVector getRowVector(final int row)
1011         throws OutOfRangeException {
1012         MatrixUtils.checkRowIndex(this, row);
1013         final double[] outData = new double[columns];
1014 
1015         // perform copy block-wise, to ensure good cache behavior
1016         final int iBlock = row / BLOCK_SIZE;
1017         final int iRow = row - iBlock * BLOCK_SIZE;
1018         int outIndex = 0;
1019         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1020             final int jWidth = blockWidth(jBlock);
1021             final double[] block = blocks[iBlock * blockColumns + jBlock];
1022             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1023             outIndex += jWidth;
1024         }
1025 
1026         return new ArrayRealVector(outData, false);
1027     }
1028 
1029     /** {@inheritDoc} */
1030     @Override
1031     public void setRowVector(final int row, final RealVector vector)
1032         throws OutOfRangeException, MatrixDimensionMismatchException {
1033         try {
1034             setRow(row, ((ArrayRealVector) vector).getDataRef());
1035         } catch (ClassCastException cce) {
1036             super.setRowVector(row, vector);
1037         }
1038     }
1039 
1040     /** {@inheritDoc} */
1041     @Override
1042     public RealVector getColumnVector(final int column)
1043         throws OutOfRangeException {
1044         MatrixUtils.checkColumnIndex(this, column);
1045         final double[] outData = new double[rows];
1046 
1047         // perform copy block-wise, to ensure good cache behavior
1048         final int jBlock = column / BLOCK_SIZE;
1049         final int jColumn = column - jBlock * BLOCK_SIZE;
1050         final int jWidth = blockWidth(jBlock);
1051         int outIndex = 0;
1052         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1053             final int iHeight = blockHeight(iBlock);
1054             final double[] block = blocks[iBlock * blockColumns + jBlock];
1055             for (int i = 0; i < iHeight; ++i) {
1056                 outData[outIndex++] = block[i * jWidth + jColumn];
1057             }
1058         }
1059 
1060         return new ArrayRealVector(outData, false);
1061     }
1062 
1063     /** {@inheritDoc} */
1064     @Override
1065     public void setColumnVector(final int column, final RealVector vector)
1066         throws OutOfRangeException, MatrixDimensionMismatchException {
1067         try {
1068             setColumn(column, ((ArrayRealVector) vector).getDataRef());
1069         } catch (ClassCastException cce) {
1070             super.setColumnVector(column, vector);
1071         }
1072     }
1073 
1074     /** {@inheritDoc} */
1075     @Override
1076     public double[] getRow(final int row) throws OutOfRangeException {
1077         MatrixUtils.checkRowIndex(this, row);
1078         final double[] out = new double[columns];
1079 
1080         // perform copy block-wise, to ensure good cache behavior
1081         final int iBlock = row / BLOCK_SIZE;
1082         final int iRow = row - iBlock * BLOCK_SIZE;
1083         int outIndex = 0;
1084         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1085             final int jWidth     = blockWidth(jBlock);
1086             final double[] block = blocks[iBlock * blockColumns + jBlock];
1087             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1088             outIndex += jWidth;
1089         }
1090 
1091         return out;
1092     }
1093 
1094     /** {@inheritDoc} */
1095     @Override
1096     public void setRow(final int row, final double[] array)
1097         throws OutOfRangeException, MatrixDimensionMismatchException {
1098         MatrixUtils.checkRowIndex(this, row);
1099         final int nCols = getColumnDimension();
1100         if (array.length != nCols) {
1101             throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
1102         }
1103 
1104         // perform copy block-wise, to ensure good cache behavior
1105         final int iBlock = row / BLOCK_SIZE;
1106         final int iRow = row - iBlock * BLOCK_SIZE;
1107         int outIndex = 0;
1108         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1109             final int jWidth     = blockWidth(jBlock);
1110             final double[] block = blocks[iBlock * blockColumns + jBlock];
1111             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1112             outIndex += jWidth;
1113         }
1114     }
1115 
1116     /** {@inheritDoc} */
1117     @Override
1118     public double[] getColumn(final int column) throws OutOfRangeException {
1119         MatrixUtils.checkColumnIndex(this, column);
1120         final double[] out = new double[rows];
1121 
1122         // perform copy block-wise, to ensure good cache behavior
1123         final int jBlock  = column / BLOCK_SIZE;
1124         final int jColumn = column - jBlock * BLOCK_SIZE;
1125         final int jWidth  = blockWidth(jBlock);
1126         int outIndex = 0;
1127         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1128             final int iHeight = blockHeight(iBlock);
1129             final double[] block = blocks[iBlock * blockColumns + jBlock];
1130             for (int i = 0; i < iHeight; ++i) {
1131                 out[outIndex++] = block[i * jWidth + jColumn];
1132             }
1133         }
1134 
1135         return out;
1136     }
1137 
1138     /** {@inheritDoc} */
1139     @Override
1140     public void setColumn(final int column, final double[] array)
1141         throws OutOfRangeException, MatrixDimensionMismatchException {
1142         MatrixUtils.checkColumnIndex(this, column);
1143         final int nRows = getRowDimension();
1144         if (array.length != nRows) {
1145             throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
1146         }
1147 
1148         // perform copy block-wise, to ensure good cache behavior
1149         final int jBlock  = column / BLOCK_SIZE;
1150         final int jColumn = column - jBlock * BLOCK_SIZE;
1151         final int jWidth = blockWidth(jBlock);
1152         int outIndex = 0;
1153         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1154             final int iHeight = blockHeight(iBlock);
1155             final double[] block = blocks[iBlock * blockColumns + jBlock];
1156             for (int i = 0; i < iHeight; ++i) {
1157                 block[i * jWidth + jColumn] = array[outIndex++];
1158             }
1159         }
1160     }
1161 
1162     /** {@inheritDoc} */
1163     @Override
1164     public double getEntry(final int row, final int column)
1165         throws OutOfRangeException {
1166         MatrixUtils.checkMatrixIndex(this, row, column);
1167         final int iBlock = row / BLOCK_SIZE;
1168         final int jBlock = column / BLOCK_SIZE;
1169         final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1170             (column - jBlock * BLOCK_SIZE);
1171         return blocks[iBlock * blockColumns + jBlock][k];
1172     }
1173 
1174     /** {@inheritDoc} */
1175     @Override
1176     public void setEntry(final int row, final int column, final double value)
1177         throws OutOfRangeException {
1178         MatrixUtils.checkMatrixIndex(this, row, column);
1179         final int iBlock = row / BLOCK_SIZE;
1180         final int jBlock = column / BLOCK_SIZE;
1181         final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1182             (column - jBlock * BLOCK_SIZE);
1183         blocks[iBlock * blockColumns + jBlock][k] = value;
1184     }
1185 
1186     /** {@inheritDoc} */
1187     @Override
1188     public void addToEntry(final int row, final int column,
1189                            final double increment)
1190         throws OutOfRangeException {
1191         MatrixUtils.checkMatrixIndex(this, row, column);
1192         final int iBlock = row    / BLOCK_SIZE;
1193         final int jBlock = column / BLOCK_SIZE;
1194         final int k = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1195             (column - jBlock * BLOCK_SIZE);
1196         blocks[iBlock * blockColumns + jBlock][k] += increment;
1197     }
1198 
1199     /** {@inheritDoc} */
1200     @Override
1201     public void multiplyEntry(final int row, final int column,
1202                               final double factor)
1203         throws OutOfRangeException {
1204         MatrixUtils.checkMatrixIndex(this, row, column);
1205         final int iBlock = row / BLOCK_SIZE;
1206         final int jBlock = column / BLOCK_SIZE;
1207         final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1208             (column - jBlock * BLOCK_SIZE);
1209         blocks[iBlock * blockColumns + jBlock][k] *= factor;
1210     }
1211 
1212     /** {@inheritDoc} */
1213     @Override
1214     public BlockRealMatrix transpose() {
1215         final int nRows = getRowDimension();
1216         final int nCols = getColumnDimension();
1217         final BlockRealMatrix out = new BlockRealMatrix(nCols, nRows);
1218 
1219         // perform transpose block-wise, to ensure good cache behavior
1220         int blockIndex = 0;
1221         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1222             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1223                 // transpose current block
1224                 final double[] outBlock = out.blocks[blockIndex];
1225                 final double[] tBlock = blocks[jBlock * blockColumns + iBlock];
1226                 final int pStart = iBlock * BLOCK_SIZE;
1227                 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, columns);
1228                 final int qStart = jBlock * BLOCK_SIZE;
1229                 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, rows);
1230                 int k = 0;
1231                 for (int p = pStart; p < pEnd; ++p) {
1232                     final int lInc = pEnd - pStart;
1233                     int l = p - pStart;
1234                     for (int q = qStart; q < qEnd; ++q) {
1235                         outBlock[k] = tBlock[l];
1236                         ++k;
1237                         l+= lInc;
1238                     }
1239                 }
1240                 // go to next block
1241                 ++blockIndex;
1242             }
1243         }
1244 
1245         return out;
1246     }
1247 
1248     /** {@inheritDoc} */
1249     @Override
1250     public int getRowDimension() {
1251         return rows;
1252     }
1253 
1254     /** {@inheritDoc} */
1255     @Override
1256     public int getColumnDimension() {
1257         return columns;
1258     }
1259 
1260     /** {@inheritDoc} */
1261     @Override
1262     public double[] operate(final double[] v)
1263         throws DimensionMismatchException {
1264         if (v.length != columns) {
1265             throw new DimensionMismatchException(v.length, columns);
1266         }
1267         final double[] out = new double[rows];
1268 
1269         // perform multiplication block-wise, to ensure good cache behavior
1270         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1271             final int pStart = iBlock * BLOCK_SIZE;
1272             final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1273             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1274                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
1275                 final int qStart = jBlock * BLOCK_SIZE;
1276                 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1277                 int k = 0;
1278                 for (int p = pStart; p < pEnd; ++p) {
1279                     double sum = 0;
1280                     int q = qStart;
1281                     while (q < qEnd - 3) {
1282                         sum += block[k]     * v[q]     +
1283                                block[k + 1] * v[q + 1] +
1284                                block[k + 2] * v[q + 2] +
1285                                block[k + 3] * v[q + 3];
1286                         k += 4;
1287                         q += 4;
1288                     }
1289                     while (q < qEnd) {
1290                         sum += block[k++] * v[q++];
1291                     }
1292                     out[p] += sum;
1293                 }
1294             }
1295         }
1296 
1297         return out;
1298     }
1299 
1300     /** {@inheritDoc} */
1301     @Override
1302     public double[] preMultiply(final double[] v)
1303         throws DimensionMismatchException {
1304         if (v.length != rows) {
1305             throw new DimensionMismatchException(v.length, rows);
1306         }
1307         final double[] out = new double[columns];
1308 
1309         // perform multiplication block-wise, to ensure good cache behavior
1310         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1311             final int jWidth  = blockWidth(jBlock);
1312             final int jWidth2 = jWidth  + jWidth;
1313             final int jWidth3 = jWidth2 + jWidth;
1314             final int jWidth4 = jWidth3 + jWidth;
1315             final int qStart = jBlock * BLOCK_SIZE;
1316             final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1317             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1318                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
1319                 final int pStart = iBlock * BLOCK_SIZE;
1320                 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1321                 for (int q = qStart; q < qEnd; ++q) {
1322                     int k = q - qStart;
1323                     double sum = 0;
1324                     int p = pStart;
1325                     while (p < pEnd - 3) {
1326                         sum += block[k]           * v[p]     +
1327                                block[k + jWidth]  * v[p + 1] +
1328                                block[k + jWidth2] * v[p + 2] +
1329                                block[k + jWidth3] * v[p + 3];
1330                         k += jWidth4;
1331                         p += 4;
1332                     }
1333                     while (p < pEnd) {
1334                         sum += block[k] * v[p++];
1335                         k += jWidth;
1336                     }
1337                     out[q] += sum;
1338                 }
1339             }
1340         }
1341 
1342         return out;
1343     }
1344 
1345     /** {@inheritDoc} */
1346     @Override
1347     public double walkInRowOrder(final RealMatrixChangingVisitor visitor) {
1348         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1349         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1350             final int pStart = iBlock * BLOCK_SIZE;
1351             final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1352             for (int p = pStart; p < pEnd; ++p) {
1353                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1354                     final int jWidth = blockWidth(jBlock);
1355                     final int qStart = jBlock * BLOCK_SIZE;
1356                     final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1357                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1358                     int k = (p - pStart) * jWidth;
1359                     for (int q = qStart; q < qEnd; ++q) {
1360                         block[k] = visitor.visit(p, q, block[k]);
1361                         ++k;
1362                     }
1363                 }
1364              }
1365         }
1366         return visitor.end();
1367     }
1368 
1369     /** {@inheritDoc} */
1370     @Override
1371     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor) {
1372         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1373         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1374             final int pStart = iBlock * BLOCK_SIZE;
1375             final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1376             for (int p = pStart; p < pEnd; ++p) {
1377                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1378                     final int jWidth = blockWidth(jBlock);
1379                     final int qStart = jBlock * BLOCK_SIZE;
1380                     final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1381                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1382                     int k = (p - pStart) * jWidth;
1383                     for (int q = qStart; q < qEnd; ++q) {
1384                         visitor.visit(p, q, block[k]);
1385                         ++k;
1386                     }
1387                 }
1388              }
1389         }
1390         return visitor.end();
1391     }
1392 
1393     /** {@inheritDoc} */
1394     @Override
1395     public double walkInRowOrder(final RealMatrixChangingVisitor visitor,
1396                                  final int startRow, final int endRow,
1397                                  final int startColumn, final int endColumn)
1398         throws OutOfRangeException, NumberIsTooSmallException {
1399         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1400         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1401         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1402             final int p0 = iBlock * BLOCK_SIZE;
1403             final int pStart = FastMath.max(startRow, p0);
1404             final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1405             for (int p = pStart; p < pEnd; ++p) {
1406                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1407                     final int jWidth = blockWidth(jBlock);
1408                     final int q0 = jBlock * BLOCK_SIZE;
1409                     final int qStart = FastMath.max(startColumn, q0);
1410                     final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1411                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1412                     int k = (p - p0) * jWidth + qStart - q0;
1413                     for (int q = qStart; q < qEnd; ++q) {
1414                         block[k] = visitor.visit(p, q, block[k]);
1415                         ++k;
1416                     }
1417                 }
1418              }
1419         }
1420         return visitor.end();
1421     }
1422 
1423     /** {@inheritDoc} */
1424     @Override
1425     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor,
1426                                  final int startRow, final int endRow,
1427                                  final int startColumn, final int endColumn)
1428         throws OutOfRangeException, NumberIsTooSmallException {
1429         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1430         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1431         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1432             final int p0 = iBlock * BLOCK_SIZE;
1433             final int pStart = FastMath.max(startRow, p0);
1434             final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1435             for (int p = pStart; p < pEnd; ++p) {
1436                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1437                     final int jWidth = blockWidth(jBlock);
1438                     final int q0 = jBlock * BLOCK_SIZE;
1439                     final int qStart = FastMath.max(startColumn, q0);
1440                     final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1441                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1442                     int k = (p - p0) * jWidth + qStart - q0;
1443                     for (int q = qStart; q < qEnd; ++q) {
1444                         visitor.visit(p, q, block[k]);
1445                         ++k;
1446                     }
1447                 }
1448              }
1449         }
1450         return visitor.end();
1451     }
1452 
1453     /** {@inheritDoc} */
1454     @Override
1455     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor) {
1456         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1457         int blockIndex = 0;
1458         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1459             final int pStart = iBlock * BLOCK_SIZE;
1460             final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1461             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1462                 final int qStart = jBlock * BLOCK_SIZE;
1463                 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1464                 final double[] block = blocks[blockIndex];
1465                 int k = 0;
1466                 for (int p = pStart; p < pEnd; ++p) {
1467                     for (int q = qStart; q < qEnd; ++q) {
1468                         block[k] = visitor.visit(p, q, block[k]);
1469                         ++k;
1470                     }
1471                 }
1472                 ++blockIndex;
1473             }
1474         }
1475         return visitor.end();
1476     }
1477 
1478     /** {@inheritDoc} */
1479     @Override
1480     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor) {
1481         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1482         int blockIndex = 0;
1483         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1484             final int pStart = iBlock * BLOCK_SIZE;
1485             final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1486             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1487                 final int qStart = jBlock * BLOCK_SIZE;
1488                 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1489                 final double[] block = blocks[blockIndex];
1490                 int k = 0;
1491                 for (int p = pStart; p < pEnd; ++p) {
1492                     for (int q = qStart; q < qEnd; ++q) {
1493                         visitor.visit(p, q, block[k]);
1494                         ++k;
1495                     }
1496                 }
1497                 ++blockIndex;
1498             }
1499         }
1500         return visitor.end();
1501     }
1502 
1503     /** {@inheritDoc} */
1504     @Override
1505     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor,
1506                                        final int startRow, final int endRow,
1507                                        final int startColumn,
1508                                        final int endColumn)
1509         throws OutOfRangeException, NumberIsTooSmallException {
1510         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1511         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1512         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1513             final int p0 = iBlock * BLOCK_SIZE;
1514             final int pStart = FastMath.max(startRow, p0);
1515             final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1516             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1517                 final int jWidth = blockWidth(jBlock);
1518                 final int q0 = jBlock * BLOCK_SIZE;
1519                 final int qStart = FastMath.max(startColumn, q0);
1520                 final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1521                 final double[] block = blocks[iBlock * blockColumns + jBlock];
1522                 for (int p = pStart; p < pEnd; ++p) {
1523                     int k = (p - p0) * jWidth + qStart - q0;
1524                     for (int q = qStart; q < qEnd; ++q) {
1525                         block[k] = visitor.visit(p, q, block[k]);
1526                         ++k;
1527                     }
1528                 }
1529             }
1530         }
1531         return visitor.end();
1532     }
1533 
1534     /** {@inheritDoc} */
1535     @Override
1536     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor,
1537                                        final int startRow, final int endRow,
1538                                        final int startColumn,
1539                                        final int endColumn)
1540         throws OutOfRangeException, NumberIsTooSmallException {
1541         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1542         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1543         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1544             final int p0 = iBlock * BLOCK_SIZE;
1545             final int pStart = FastMath.max(startRow, p0);
1546             final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1547             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1548                 final int jWidth = blockWidth(jBlock);
1549                 final int q0 = jBlock * BLOCK_SIZE;
1550                 final int qStart = FastMath.max(startColumn, q0);
1551                 final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1552                 final double[] block = blocks[iBlock * blockColumns + jBlock];
1553                 for (int p = pStart; p < pEnd; ++p) {
1554                     int k = (p - p0) * jWidth + qStart - q0;
1555                     for (int q = qStart; q < qEnd; ++q) {
1556                         visitor.visit(p, q, block[k]);
1557                         ++k;
1558                     }
1559                 }
1560             }
1561         }
1562         return visitor.end();
1563     }
1564 
1565     /**
1566      * Get the height of a block.
1567      * @param blockRow row index (in block sense) of the block
1568      * @return height (number of rows) of the block
1569      */
1570     private int blockHeight(final int blockRow) {
1571         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1572     }
1573 
1574     /**
1575      * Get the width of a block.
1576      * @param blockColumn column index (in block sense) of the block
1577      * @return width (number of columns) of the block
1578      */
1579     private int blockWidth(final int blockColumn) {
1580         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1581     }
1582 }