View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math.linear;
19  
20  import java.io.Serializable;
21  import java.util.Arrays;
22  
23  import org.apache.commons.math.MathRuntimeException;
24  
25  /**
26   * Cache-friendly implementation of RealMatrix using a flat arrays to store
27   * square blocks of the matrix.
28   * <p>
29   * This implementation is specially designed to be cache-friendly. Square blocks are
30   * stored as small arrays and allow efficient traversal of data both in row major direction
31   * and columns major direction, one block at a time. This greatly increases performances
32   * for algorithms that use crossed directions loops like multiplication or transposition.
33   * </p>
34   * <p>
35   * The size of square blocks is a static parameter. It may be tuned according to the cache
36   * size of the target computer processor. As a rule of thumbs, it should be the largest
37   * value that allows three blocks to be simultaneously cached (this is necessary for example
38   * for matrix multiplication). The default value is to use 52x52 blocks which is well suited
39   * for processors with 64k L1 cache (one block holds 2704 values or 21632 bytes). This value
40   * could be lowered to 36x36 for processors with 32k L1 cache.
41   * </p>
42   * <p>
43   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
44   * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
45   * blocks are flattened in row major order in single dimension arrays which are therefore
46   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
47   * organized in row major order.
48   * </p>
49   * <p>
50   * As an example, for a block size of 52x52, a 100x60 matrix would be stored in 4 blocks.
51   * Block 0 would be a double[2704] array holding the upper left 52x52 square, block 1 would be
52   * a double[416] array holding the upper right 52x8 rectangle, block 2 would be a double[2496]
53   * array holding the lower left 48x52 rectangle and block 3 would be a double[384] array
54   * holding the lower right 48x8 rectangle.
55   * </p>
56   * <p>
57   * The layout complexity overhead versus simple mapping of matrices to java
58   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
59   * to up to 3-fold improvements for matrices of moderate to large size.
60   * </p>
61   * @version $Revision: 825919 $ $Date: 2009-10-16 10:51:55 -0400 (Fri, 16 Oct 2009) $
62   * @since 2.0
63   */
64  public class BlockRealMatrix extends AbstractRealMatrix implements Serializable {
65  
66      /** Block size. */
67      public static final int BLOCK_SIZE = 52;
68  
69      /** Serializable version identifier */
70      private static final long serialVersionUID = 4991895511313664478L;
71  
72      /** Blocks of matrix entries. */
73      private final double blocks[][];
74  
75      /** Number of rows of the matrix. */
76      private final int rows;
77  
78      /** Number of columns of the matrix. */
79      private final int columns;
80  
81      /** Number of block rows of the matrix. */
82      private final int blockRows;
83  
84      /** Number of block columns of the matrix. */
85      private final int blockColumns;
86  
87      /**
88       * Create a new matrix with the supplied row and column dimensions.
89       *
90       * @param rows  the number of rows in the new matrix
91       * @param columns  the number of columns in the new matrix
92       * @throws IllegalArgumentException if row or column dimension is not
93       *  positive
94       */
95      public BlockRealMatrix(final int rows, final int columns)
96          throws IllegalArgumentException {
97  
98          super(rows, columns);
99          this.rows    = rows;
100         this.columns = columns;
101 
102         // number of blocks
103         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
104         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
105 
106         // allocate storage blocks, taking care of smaller ones at right and bottom
107         blocks = createBlocksLayout(rows, columns);
108 
109     }
110 
111     /**
112      * Create a new dense matrix copying entries from raw layout data.
113      * <p>The input array <em>must</em> already be in raw layout.</p>
114      * <p>Calling this constructor is equivalent to call:
115      * <pre>matrix = new BlockRealMatrix(rawData.length, rawData[0].length,
116      *                                   toBlocksLayout(rawData), false);</pre>
117      * </p>
118      * @param rawData data for new matrix, in raw layout
119      *
120      * @exception IllegalArgumentException if <code>blockData</code> shape is
121      * inconsistent with block layout
122      * @see #BlockRealMatrix(int, int, double[][], boolean)
123      */
124     public BlockRealMatrix(final double[][] rawData)
125         throws IllegalArgumentException {
126         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
127     }
128 
129     /**
130      * Create a new dense matrix copying entries from block layout data.
131      * <p>The input array <em>must</em> already be in blocks layout.</p>
132      * @param rows  the number of rows in the new matrix
133      * @param columns  the number of columns in the new matrix
134      * @param blockData data for new matrix
135      * @param copyArray if true, the input array will be copied, otherwise
136      * it will be referenced
137      *
138      * @exception IllegalArgumentException if <code>blockData</code> shape is
139      * inconsistent with block layout
140      * @see #createBlocksLayout(int, int)
141      * @see #toBlocksLayout(double[][])
142      * @see #BlockRealMatrix(double[][])
143      */
144     public BlockRealMatrix(final int rows, final int columns,
145                            final double[][] blockData, final boolean copyArray)
146         throws IllegalArgumentException {
147 
148         super(rows, columns);
149         this.rows    = rows;
150         this.columns = columns;
151 
152         // number of blocks
153         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
154         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
155 
156         if (copyArray) {
157             // allocate storage blocks, taking care of smaller ones at right and bottom
158             blocks = new double[blockRows * blockColumns][];
159         } else {
160             // reference existing array
161             blocks = blockData;
162         }
163 
164         int index = 0;
165         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
166             final int iHeight = blockHeight(iBlock);
167             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
168                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
169                     throw MathRuntimeException.createIllegalArgumentException(
170                             "wrong array shape (block length = {0}, expected {1})",
171                             blockData[index].length, iHeight * blockWidth(jBlock));
172                 }
173                 if (copyArray) {
174                     blocks[index] = blockData[index].clone();
175                 }
176             }
177         }
178 
179     }
180 
181     /**
182      * Convert a data array from raw layout to blocks layout.
183      * <p>
184      * Raw layout is the straightforward layout where element at row i and
185      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
186      * is the layout used in {@link BlockRealMatrix} instances, where the matrix
187      * is split in square blocks (except at right and bottom side where blocks may
188      * be rectangular to fit matrix size) and each block is stored in a flattened
189      * one-dimensional array.
190      * </p>
191      * <p>
192      * This method creates an array in blocks layout from an input array in raw layout.
193      * It can be used to provide the array argument of the {@link
194      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
195      * </p>
196      * @param rawData data array in raw layout
197      * @return a new data array containing the same entries but in blocks layout
198      * @exception IllegalArgumentException if <code>rawData</code> is not rectangular
199      *  (not all rows have the same length)
200      * @see #createBlocksLayout(int, int)
201      * @see #BlockRealMatrix(int, int, double[][], boolean)
202      */
203     public static double[][] toBlocksLayout(final double[][] rawData)
204         throws IllegalArgumentException {
205 
206         final int rows         = rawData.length;
207         final int columns      = rawData[0].length;
208         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
209         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
210 
211         // safety checks
212         for (int i = 0; i < rawData.length; ++i) {
213             final int length = rawData[i].length;
214             if (length != columns) {
215                 throw MathRuntimeException.createIllegalArgumentException(
216                         "some rows have length {0} while others have length {1}",
217                         columns, length);
218             }
219         }
220 
221         // convert array
222         final double[][] blocks = new double[blockRows * blockColumns][];
223         int blockIndex = 0;
224         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
225             final int pStart  = iBlock * BLOCK_SIZE;
226             final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
227             final int iHeight = pEnd - pStart;
228             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
229                 final int qStart = jBlock * BLOCK_SIZE;
230                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
231                 final int jWidth = qEnd - qStart;
232 
233                 // allocate new block
234                 final double[] block = new double[iHeight * jWidth];
235                 blocks[blockIndex] = block;
236 
237                 // copy data
238                 int index = 0;
239                 for (int p = pStart; p < pEnd; ++p) {
240                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
241                     index += jWidth;
242                 }
243 
244                 ++blockIndex;
245 
246             }
247         }
248 
249         return blocks;
250 
251     }
252 
253     /**
254      * Create a data array in blocks layout.
255      * <p>
256      * This method can be used to create the array argument of the {@link
257      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
258      * </p>
259      * @param rows  the number of rows in the new matrix
260      * @param columns  the number of columns in the new matrix
261      * @return a new data array in blocks layout
262      * @see #toBlocksLayout(double[][])
263      * @see #BlockRealMatrix(int, int, double[][], boolean)
264      */
265     public static double[][] createBlocksLayout(final int rows, final int columns) {
266 
267         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
268         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
269 
270         final double[][] blocks = new double[blockRows * blockColumns][];
271         int blockIndex = 0;
272         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
273             final int pStart  = iBlock * BLOCK_SIZE;
274             final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
275             final int iHeight = pEnd - pStart;
276             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
277                 final int qStart = jBlock * BLOCK_SIZE;
278                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
279                 final int jWidth = qEnd - qStart;
280                 blocks[blockIndex] = new double[iHeight * jWidth];
281                 ++blockIndex;
282             }
283         }
284 
285         return blocks;
286 
287     }
288 
289     /** {@inheritDoc} */
290     @Override
291     public BlockRealMatrix createMatrix(final int rowDimension, final int columnDimension)
292         throws IllegalArgumentException {
293         return new BlockRealMatrix(rowDimension, columnDimension);
294     }
295 
296     /** {@inheritDoc} */
297     @Override
298     public BlockRealMatrix copy() {
299 
300         // create an empty matrix
301         BlockRealMatrix copied = new BlockRealMatrix(rows, columns);
302 
303         // copy the blocks
304         for (int i = 0; i < blocks.length; ++i) {
305             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
306         }
307 
308         return copied;
309 
310     }
311 
312     /** {@inheritDoc} */
313     @Override
314     public BlockRealMatrix add(final RealMatrix m)
315         throws IllegalArgumentException {
316         try {
317             return add((BlockRealMatrix) m);
318         } catch (ClassCastException cce) {
319 
320             // safety check
321             MatrixUtils.checkAdditionCompatible(this, m);
322 
323             final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
324 
325             // perform addition block-wise, to ensure good cache behavior
326             int blockIndex = 0;
327             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
328                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
329 
330                     // perform addition on the current block
331                     final double[] outBlock = out.blocks[blockIndex];
332                     final double[] tBlock   = blocks[blockIndex];
333                     final int      pStart   = iBlock * BLOCK_SIZE;
334                     final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
335                     final int      qStart   = jBlock * BLOCK_SIZE;
336                     final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
337                     int k = 0;
338                     for (int p = pStart; p < pEnd; ++p) {
339                         for (int q = qStart; q < qEnd; ++q) {
340                             outBlock[k] = tBlock[k] + m.getEntry(p, q);
341                             ++k;
342                         }
343                     }
344 
345                     // go to next block
346                     ++blockIndex;
347 
348                 }
349             }
350 
351             return out;
352 
353         }
354     }
355 
356     /**
357      * Compute the sum of this and <code>m</code>.
358      *
359      * @param m    matrix to be added
360      * @return     this + m
361      * @throws  IllegalArgumentException if m is not the same size as this
362      */
363     public BlockRealMatrix add(final BlockRealMatrix m)
364         throws IllegalArgumentException {
365 
366         // safety check
367         MatrixUtils.checkAdditionCompatible(this, m);
368 
369         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
370 
371         // perform addition block-wise, to ensure good cache behavior
372         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
373             final double[] outBlock = out.blocks[blockIndex];
374             final double[] tBlock   = blocks[blockIndex];
375             final double[] mBlock   = m.blocks[blockIndex];
376             for (int k = 0; k < outBlock.length; ++k) {
377                 outBlock[k] = tBlock[k] + mBlock[k];
378             }
379         }
380 
381         return out;
382 
383     }
384 
385     /** {@inheritDoc} */
386     @Override
387     public BlockRealMatrix subtract(final RealMatrix m)
388         throws IllegalArgumentException {
389         try {
390             return subtract((BlockRealMatrix) m);
391         } catch (ClassCastException cce) {
392 
393             // safety check
394             MatrixUtils.checkSubtractionCompatible(this, m);
395 
396             final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
397 
398             // perform subtraction block-wise, to ensure good cache behavior
399             int blockIndex = 0;
400             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
401                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
402 
403                     // perform subtraction on the current block
404                     final double[] outBlock = out.blocks[blockIndex];
405                     final double[] tBlock   = blocks[blockIndex];
406                     final int      pStart   = iBlock * BLOCK_SIZE;
407                     final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
408                     final int      qStart   = jBlock * BLOCK_SIZE;
409                     final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
410                     int k = 0;
411                     for (int p = pStart; p < pEnd; ++p) {
412                         for (int q = qStart; q < qEnd; ++q) {
413                             outBlock[k] = tBlock[k] - m.getEntry(p, q);
414                             ++k;
415                         }
416                     }
417 
418                     // go to next block
419                     ++blockIndex;
420 
421                 }
422             }
423 
424             return out;
425 
426         }
427     }
428 
429     /**
430      * Compute this minus <code>m</code>.
431      *
432      * @param m    matrix to be subtracted
433      * @return     this - m
434      * @throws  IllegalArgumentException if m is not the same size as this
435      */
436     public BlockRealMatrix subtract(final BlockRealMatrix m)
437         throws IllegalArgumentException {
438 
439         // safety check
440         MatrixUtils.checkSubtractionCompatible(this, m);
441 
442         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
443 
444         // perform subtraction block-wise, to ensure good cache behavior
445         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
446             final double[] outBlock = out.blocks[blockIndex];
447             final double[] tBlock   = blocks[blockIndex];
448             final double[] mBlock   = m.blocks[blockIndex];
449             for (int k = 0; k < outBlock.length; ++k) {
450                 outBlock[k] = tBlock[k] - mBlock[k];
451             }
452         }
453 
454         return out;
455 
456     }
457 
458     /** {@inheritDoc} */
459     @Override
460     public BlockRealMatrix scalarAdd(final double d)
461         throws IllegalArgumentException {
462 
463         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
464 
465         // perform subtraction block-wise, to ensure good cache behavior
466         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
467             final double[] outBlock = out.blocks[blockIndex];
468             final double[] tBlock   = blocks[blockIndex];
469             for (int k = 0; k < outBlock.length; ++k) {
470                 outBlock[k] = tBlock[k] + d;
471             }
472         }
473 
474         return out;
475 
476     }
477 
478     /** {@inheritDoc} */
479     @Override
480     public RealMatrix scalarMultiply(final double d)
481         throws IllegalArgumentException {
482 
483         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
484 
485         // perform subtraction block-wise, to ensure good cache behavior
486         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
487             final double[] outBlock = out.blocks[blockIndex];
488             final double[] tBlock   = blocks[blockIndex];
489             for (int k = 0; k < outBlock.length; ++k) {
490                 outBlock[k] = tBlock[k] * d;
491             }
492         }
493 
494         return out;
495 
496     }
497 
498     /** {@inheritDoc} */
499     @Override
500     public BlockRealMatrix multiply(final RealMatrix m)
501         throws IllegalArgumentException {
502         try {
503             return multiply((BlockRealMatrix) m);
504         } catch (ClassCastException cce) {
505 
506             // safety check
507             MatrixUtils.checkMultiplicationCompatible(this, m);
508 
509             final BlockRealMatrix out = new BlockRealMatrix(rows, m.getColumnDimension());
510 
511             // perform multiplication block-wise, to ensure good cache behavior
512             int blockIndex = 0;
513             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
514 
515                 final int pStart = iBlock * BLOCK_SIZE;
516                 final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
517 
518                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
519 
520                     final int qStart = jBlock * BLOCK_SIZE;
521                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, m.getColumnDimension());
522 
523                     // select current block
524                     final double[] outBlock = out.blocks[blockIndex];
525 
526                     // perform multiplication on current block
527                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
528                         final int kWidth      = blockWidth(kBlock);
529                         final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
530                         final int rStart      = kBlock * BLOCK_SIZE;
531                         int k = 0;
532                         for (int p = pStart; p < pEnd; ++p) {
533                             final int lStart = (p - pStart) * kWidth;
534                             final int lEnd   = lStart + kWidth;
535                             for (int q = qStart; q < qEnd; ++q) {
536                                 double sum = 0;
537                                 int r = rStart;
538                                 for (int l = lStart; l < lEnd; ++l) {
539                                     sum += tBlock[l] * m.getEntry(r, q);
540                                     ++r;
541                                 }
542                                 outBlock[k] += sum;
543                                 ++k;
544                             }
545                         }
546                     }
547 
548                     // go to next block
549                     ++blockIndex;
550 
551                 }
552             }
553 
554             return out;
555 
556         }
557     }
558 
559     /**
560      * Returns the result of postmultiplying this by m.
561      *
562      * @param m    matrix to postmultiply by
563      * @return     this * m
564      * @throws     IllegalArgumentException
565      *             if columnDimension(this) != rowDimension(m)
566      */
567     public BlockRealMatrix multiply(BlockRealMatrix m) throws IllegalArgumentException {
568 
569         // safety check
570         MatrixUtils.checkMultiplicationCompatible(this, m);
571 
572         final BlockRealMatrix out = new BlockRealMatrix(rows, m.columns);
573 
574         // perform multiplication block-wise, to ensure good cache behavior
575         int blockIndex = 0;
576         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
577 
578             final int pStart = iBlock * BLOCK_SIZE;
579             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
580 
581             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
582                 final int jWidth = out.blockWidth(jBlock);
583                 final int jWidth2 = jWidth  + jWidth;
584                 final int jWidth3 = jWidth2 + jWidth;
585                 final int jWidth4 = jWidth3 + jWidth;
586 
587                 // select current block
588                 final double[] outBlock = out.blocks[blockIndex];
589 
590                 // perform multiplication on current block
591                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
592                     final int kWidth = blockWidth(kBlock);
593                     final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
594                     final double[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
595                     int k = 0;
596                     for (int p = pStart; p < pEnd; ++p) {
597                         final int lStart = (p - pStart) * kWidth;
598                         final int lEnd   = lStart + kWidth;
599                         for (int nStart = 0; nStart < jWidth; ++nStart) {
600                             double sum = 0;
601                             int l = lStart;
602                             int n = nStart;
603                             while (l < lEnd - 3) {
604                                 sum += tBlock[l] * mBlock[n] +
605                                        tBlock[l + 1] * mBlock[n + jWidth] +
606                                        tBlock[l + 2] * mBlock[n + jWidth2] +
607                                        tBlock[l + 3] * mBlock[n + jWidth3];
608                                 l += 4;
609                                 n += jWidth4;
610                             }
611                             while (l < lEnd) {
612                                 sum += tBlock[l++] * mBlock[n];
613                                 n += jWidth;
614                             }
615                             outBlock[k] += sum;
616                             ++k;
617                         }
618                     }
619                 }
620 
621                 // go to next block
622                 ++blockIndex;
623 
624             }
625         }
626 
627         return out;
628 
629     }
630 
631     /** {@inheritDoc} */
632     @Override
633     public double[][] getData() {
634 
635         final double[][] data = new double[getRowDimension()][getColumnDimension()];
636         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
637 
638         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
639             final int pStart = iBlock * BLOCK_SIZE;
640             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
641             int regularPos   = 0;
642             int lastPos      = 0;
643             for (int p = pStart; p < pEnd; ++p) {
644                 final double[] dataP = data[p];
645                 int blockIndex = iBlock * blockColumns;
646                 int dataPos    = 0;
647                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
648                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
649                     dataPos += BLOCK_SIZE;
650                 }
651                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
652                 regularPos += BLOCK_SIZE;
653                 lastPos    += lastColumns;
654             }
655         }
656 
657         return data;
658 
659     }
660 
661     /** {@inheritDoc} */
662     @Override
663     public double getNorm() {
664         final double[] colSums = new double[BLOCK_SIZE];
665         double maxColSum = 0;
666         for (int jBlock = 0; jBlock < blockColumns; jBlock++) {
667             final int jWidth = blockWidth(jBlock);
668             Arrays.fill(colSums, 0, jWidth, 0.0);
669             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
670                 final int iHeight = blockHeight(iBlock);
671                 final double[] block = blocks[iBlock * blockColumns + jBlock];
672                 for (int j = 0; j < jWidth; ++j) {
673                     double sum = 0;
674                     for (int i = 0; i < iHeight; ++i) {
675                         sum += Math.abs(block[i * jWidth + j]);
676                     }
677                     colSums[j] += sum;
678                 }
679             }
680             for (int j = 0; j < jWidth; ++j) {
681                 maxColSum = Math.max(maxColSum, colSums[j]);
682             }
683         }
684         return maxColSum;
685     }
686 
687     /** {@inheritDoc} */
688     @Override
689     public double getFrobeniusNorm() {
690         double sum2 = 0;
691         for (int blockIndex = 0; blockIndex < blocks.length; ++blockIndex) {
692             for (final double entry : blocks[blockIndex]) {
693                 sum2 += entry * entry;
694             }
695         }
696         return Math.sqrt(sum2);
697     }
698 
699     /** {@inheritDoc} */
700     @Override
701     public BlockRealMatrix getSubMatrix(final int startRow, final int endRow,
702                                    final int startColumn, final int endColumn)
703         throws MatrixIndexException {
704 
705         // safety checks
706         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
707 
708         // create the output matrix
709         final BlockRealMatrix out =
710             new BlockRealMatrix(endRow - startRow + 1, endColumn - startColumn + 1);
711 
712         // compute blocks shifts
713         final int blockStartRow    = startRow    / BLOCK_SIZE;
714         final int rowsShift        = startRow    % BLOCK_SIZE;
715         final int blockStartColumn = startColumn / BLOCK_SIZE;
716         final int columnsShift     = startColumn % BLOCK_SIZE;
717 
718         // perform extraction block-wise, to ensure good cache behavior
719         int pBlock = blockStartRow;
720         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
721             final int iHeight = out.blockHeight(iBlock);
722             int qBlock = blockStartColumn;
723             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
724                 final int jWidth = out.blockWidth(jBlock);
725 
726                 // handle one block of the output matrix
727                 final int      outIndex = iBlock * out.blockColumns + jBlock;
728                 final double[] outBlock = out.blocks[outIndex];
729                 final int      index    = pBlock * blockColumns + qBlock;
730                 final int      width    = blockWidth(qBlock);
731 
732                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
733                 final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
734                 if (heightExcess > 0) {
735                     // the submatrix block spans on two blocks rows from the original matrix
736                     if (widthExcess > 0) {
737                         // the submatrix block spans on two blocks columns from the original matrix
738                         final int width2 = blockWidth(qBlock + 1);
739                         copyBlockPart(blocks[index], width,
740                                       rowsShift, BLOCK_SIZE,
741                                       columnsShift, BLOCK_SIZE,
742                                       outBlock, jWidth, 0, 0);
743                         copyBlockPart(blocks[index + 1], width2,
744                                       rowsShift, BLOCK_SIZE,
745                                       0, widthExcess,
746                                       outBlock, jWidth, 0, jWidth - widthExcess);
747                         copyBlockPart(blocks[index + blockColumns], width,
748                                       0, heightExcess,
749                                       columnsShift, BLOCK_SIZE,
750                                       outBlock, jWidth, iHeight - heightExcess, 0);
751                         copyBlockPart(blocks[index + blockColumns + 1], width2,
752                                       0, heightExcess,
753                                       0, widthExcess,
754                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
755                     } else {
756                         // the submatrix block spans on one block column from the original matrix
757                         copyBlockPart(blocks[index], width,
758                                       rowsShift, BLOCK_SIZE,
759                                       columnsShift, jWidth + columnsShift,
760                                       outBlock, jWidth, 0, 0);
761                         copyBlockPart(blocks[index + blockColumns], width,
762                                       0, heightExcess,
763                                       columnsShift, jWidth + columnsShift,
764                                       outBlock, jWidth, iHeight - heightExcess, 0);
765                     }
766                 } else {
767                     // the submatrix block spans on one block row from the original matrix
768                     if (widthExcess > 0) {
769                         // the submatrix block spans on two blocks columns from the original matrix
770                         final int width2 = blockWidth(qBlock + 1);
771                         copyBlockPart(blocks[index], width,
772                                       rowsShift, iHeight + rowsShift,
773                                       columnsShift, BLOCK_SIZE,
774                                       outBlock, jWidth, 0, 0);
775                         copyBlockPart(blocks[index + 1], width2,
776                                       rowsShift, iHeight + rowsShift,
777                                       0, widthExcess,
778                                       outBlock, jWidth, 0, jWidth - widthExcess);
779                     } else {
780                         // the submatrix block spans on one block column from the original matrix
781                         copyBlockPart(blocks[index], width,
782                                       rowsShift, iHeight + rowsShift,
783                                       columnsShift, jWidth + columnsShift,
784                                       outBlock, jWidth, 0, 0);
785                     }
786                }
787 
788                 ++qBlock;
789 
790             }
791 
792             ++pBlock;
793 
794         }
795 
796         return out;
797 
798     }
799 
800     /**
801      * Copy a part of a block into another one
802      * <p>This method can be called only when the specified part fits in both
803      * blocks, no verification is done here.</p>
804      * @param srcBlock source block
805      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
806      * @param srcStartRow start row in the source block
807      * @param srcEndRow end row (exclusive) in the source block
808      * @param srcStartColumn start column in the source block
809      * @param srcEndColumn end column (exclusive) in the source block
810      * @param dstBlock destination block
811      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
812      * @param dstStartRow start row in the destination block
813      * @param dstStartColumn start column in the destination block
814      */
815     private void copyBlockPart(final double[] srcBlock, final int srcWidth,
816                                final int srcStartRow, final int srcEndRow,
817                                final int srcStartColumn, final int srcEndColumn,
818                                final double[] dstBlock, final int dstWidth,
819                                final int dstStartRow, final int dstStartColumn) {
820         final int length = srcEndColumn - srcStartColumn;
821         int srcPos = srcStartRow * srcWidth + srcStartColumn;
822         int dstPos = dstStartRow * dstWidth + dstStartColumn;
823         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
824             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
825             srcPos += srcWidth;
826             dstPos += dstWidth;
827         }
828     }
829 
830     /** {@inheritDoc} */
831     @Override
832     public void setSubMatrix(final double[][] subMatrix, final int row, final int column)
833         throws MatrixIndexException {
834 
835         // safety checks
836         final int refLength = subMatrix[0].length;
837         if (refLength < 1) {
838             throw MathRuntimeException.createIllegalArgumentException("matrix must have at least one column");
839         }
840         final int endRow    = row + subMatrix.length - 1;
841         final int endColumn = column + refLength - 1;
842         MatrixUtils.checkSubMatrixIndex(this, row, endRow, column, endColumn);
843         for (final double[] subRow : subMatrix) {
844             if (subRow.length != refLength) {
845                 throw MathRuntimeException.createIllegalArgumentException(
846                         "some rows have length {0} while others have length {1}",
847                         refLength, subRow.length);
848             }
849         }
850 
851         // compute blocks bounds
852         final int blockStartRow    = row / BLOCK_SIZE;
853         final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
854         final int blockStartColumn = column / BLOCK_SIZE;
855         final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
856 
857         // perform copy block-wise, to ensure good cache behavior
858         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
859             final int iHeight  = blockHeight(iBlock);
860             final int firstRow = iBlock * BLOCK_SIZE;
861             final int iStart   = Math.max(row,    firstRow);
862             final int iEnd     = Math.min(endRow + 1, firstRow + iHeight);
863 
864             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
865                 final int jWidth      = blockWidth(jBlock);
866                 final int firstColumn = jBlock * BLOCK_SIZE;
867                 final int jStart      = Math.max(column,    firstColumn);
868                 final int jEnd        = Math.min(endColumn + 1, firstColumn + jWidth);
869                 final int jLength     = jEnd - jStart;
870 
871                 // handle one block, row by row
872                 final double[] block = blocks[iBlock * blockColumns + jBlock];
873                 for (int i = iStart; i < iEnd; ++i) {
874                     System.arraycopy(subMatrix[i - row], jStart - column,
875                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
876                                      jLength);
877                 }
878 
879             }
880         }
881     }
882 
883     /** {@inheritDoc} */
884     @Override
885     public BlockRealMatrix getRowMatrix(final int row)
886         throws MatrixIndexException {
887 
888         MatrixUtils.checkRowIndex(this, row);
889         final BlockRealMatrix out = new BlockRealMatrix(1, columns);
890 
891         // perform copy block-wise, to ensure good cache behavior
892         final int iBlock  = row / BLOCK_SIZE;
893         final int iRow    = row - iBlock * BLOCK_SIZE;
894         int outBlockIndex = 0;
895         int outIndex      = 0;
896         double[] outBlock = out.blocks[outBlockIndex];
897         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
898             final int jWidth     = blockWidth(jBlock);
899             final double[] block = blocks[iBlock * blockColumns + jBlock];
900             final int available  = outBlock.length - outIndex;
901             if (jWidth > available) {
902                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
903                 outBlock = out.blocks[++outBlockIndex];
904                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
905                 outIndex = jWidth - available;
906             } else {
907                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
908                 outIndex += jWidth;
909             }
910         }
911 
912         return out;
913 
914     }
915 
916     /** {@inheritDoc} */
917     @Override
918     public void setRowMatrix(final int row, final RealMatrix matrix)
919         throws MatrixIndexException, InvalidMatrixException {
920         try {
921             setRowMatrix(row, (BlockRealMatrix) matrix);
922         } catch (ClassCastException cce) {
923             super.setRowMatrix(row, matrix);
924         }
925     }
926 
927     /**
928      * Sets the entries in row number <code>row</code>
929      * as a row matrix.  Row indices start at 0.
930      *
931      * @param row the row to be set
932      * @param matrix row matrix (must have one row and the same number of columns
933      * as the instance)
934      * @throws MatrixIndexException if the specified row index is invalid
935      * @throws InvalidMatrixException if the matrix dimensions do not match one
936      * instance row
937      */
938     public void setRowMatrix(final int row, final BlockRealMatrix matrix)
939         throws MatrixIndexException, InvalidMatrixException {
940 
941         MatrixUtils.checkRowIndex(this, row);
942         final int nCols = getColumnDimension();
943         if ((matrix.getRowDimension() != 1) ||
944             (matrix.getColumnDimension() != nCols)) {
945             throw new InvalidMatrixException(
946                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
947                     matrix.getRowDimension(), matrix.getColumnDimension(),
948                     1, nCols);
949         }
950 
951         // perform copy block-wise, to ensure good cache behavior
952         final int iBlock = row / BLOCK_SIZE;
953         final int iRow   = row - iBlock * BLOCK_SIZE;
954         int mBlockIndex  = 0;
955         int mIndex       = 0;
956         double[] mBlock  = matrix.blocks[mBlockIndex];
957         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
958             final int jWidth     = blockWidth(jBlock);
959             final double[] block = blocks[iBlock * blockColumns + jBlock];
960             final int available  = mBlock.length - mIndex;
961             if (jWidth > available) {
962                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
963                 mBlock = matrix.blocks[++mBlockIndex];
964                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
965                 mIndex = jWidth - available;
966             } else {
967                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
968                 mIndex += jWidth;
969            }
970         }
971 
972     }
973 
974     /** {@inheritDoc} */
975     @Override
976     public BlockRealMatrix getColumnMatrix(final int column)
977         throws MatrixIndexException {
978 
979         MatrixUtils.checkColumnIndex(this, column);
980         final BlockRealMatrix out = new BlockRealMatrix(rows, 1);
981 
982         // perform copy block-wise, to ensure good cache behavior
983         final int jBlock  = column / BLOCK_SIZE;
984         final int jColumn = column - jBlock * BLOCK_SIZE;
985         final int jWidth  = blockWidth(jBlock);
986         int outBlockIndex = 0;
987         int outIndex      = 0;
988         double[] outBlock = out.blocks[outBlockIndex];
989         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
990             final int iHeight = blockHeight(iBlock);
991             final double[] block = blocks[iBlock * blockColumns + jBlock];
992             for (int i = 0; i < iHeight; ++i) {
993                 if (outIndex >= outBlock.length) {
994                     outBlock = out.blocks[++outBlockIndex];
995                     outIndex = 0;
996                 }
997                 outBlock[outIndex++] = block[i * jWidth + jColumn];
998             }
999         }
1000 
1001         return out;
1002 
1003     }
1004 
1005     /** {@inheritDoc} */
1006     @Override
1007     public void setColumnMatrix(final int column, final RealMatrix matrix)
1008         throws MatrixIndexException, InvalidMatrixException {
1009         try {
1010             setColumnMatrix(column, (BlockRealMatrix) matrix);
1011         } catch (ClassCastException cce) {
1012             super.setColumnMatrix(column, matrix);
1013         }
1014     }
1015 
1016     /**
1017      * Sets the entries in column number <code>column</code>
1018      * as a column matrix.  Column indices start at 0.
1019      *
1020      * @param column the column to be set
1021      * @param matrix column matrix (must have one column and the same number of rows
1022      * as the instance)
1023      * @throws MatrixIndexException if the specified column index is invalid
1024      * @throws InvalidMatrixException if the matrix dimensions do not match one
1025      * instance column
1026      */
1027     void setColumnMatrix(final int column, final BlockRealMatrix matrix)
1028         throws MatrixIndexException, InvalidMatrixException {
1029 
1030         MatrixUtils.checkColumnIndex(this, column);
1031         final int nRows = getRowDimension();
1032         if ((matrix.getRowDimension() != nRows) ||
1033             (matrix.getColumnDimension() != 1)) {
1034             throw new InvalidMatrixException(
1035                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1036                     matrix.getRowDimension(), matrix.getColumnDimension(),
1037                     nRows, 1);
1038         }
1039 
1040         // perform copy block-wise, to ensure good cache behavior
1041         final int jBlock  = column / BLOCK_SIZE;
1042         final int jColumn = column - jBlock * BLOCK_SIZE;
1043         final int jWidth  = blockWidth(jBlock);
1044         int mBlockIndex = 0;
1045         int mIndex      = 0;
1046         double[] mBlock = matrix.blocks[mBlockIndex];
1047         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1048             final int iHeight = blockHeight(iBlock);
1049             final double[] block = blocks[iBlock * blockColumns + jBlock];
1050             for (int i = 0; i < iHeight; ++i) {
1051                 if (mIndex >= mBlock.length) {
1052                     mBlock = matrix.blocks[++mBlockIndex];
1053                     mIndex = 0;
1054                 }
1055                 block[i * jWidth + jColumn] = mBlock[mIndex++];
1056             }
1057         }
1058 
1059     }
1060 
1061     /** {@inheritDoc} */
1062     @Override
1063     public RealVector getRowVector(final int row)
1064         throws MatrixIndexException {
1065 
1066         MatrixUtils.checkRowIndex(this, row);
1067         final double[] outData = new double[columns];
1068 
1069         // perform copy block-wise, to ensure good cache behavior
1070         final int iBlock  = row / BLOCK_SIZE;
1071         final int iRow    = row - iBlock * BLOCK_SIZE;
1072         int outIndex      = 0;
1073         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1074             final int jWidth     = blockWidth(jBlock);
1075             final double[] block = blocks[iBlock * blockColumns + jBlock];
1076             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1077             outIndex += jWidth;
1078         }
1079 
1080         return new ArrayRealVector(outData, false);
1081 
1082     }
1083 
1084     /** {@inheritDoc} */
1085     @Override
1086     public void setRowVector(final int row, final RealVector vector)
1087         throws MatrixIndexException, InvalidMatrixException {
1088         try {
1089             setRow(row, ((ArrayRealVector) vector).getDataRef());
1090         } catch (ClassCastException cce) {
1091             super.setRowVector(row, vector);
1092         }
1093     }
1094 
1095     /** {@inheritDoc} */
1096     @Override
1097     public RealVector getColumnVector(final int column)
1098         throws MatrixIndexException {
1099 
1100         MatrixUtils.checkColumnIndex(this, column);
1101         final double[] outData = new double[rows];
1102 
1103         // perform copy block-wise, to ensure good cache behavior
1104         final int jBlock  = column / BLOCK_SIZE;
1105         final int jColumn = column - jBlock * BLOCK_SIZE;
1106         final int jWidth  = blockWidth(jBlock);
1107         int outIndex      = 0;
1108         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1109             final int iHeight = blockHeight(iBlock);
1110             final double[] block = blocks[iBlock * blockColumns + jBlock];
1111             for (int i = 0; i < iHeight; ++i) {
1112                 outData[outIndex++] = block[i * jWidth + jColumn];
1113             }
1114         }
1115 
1116         return new ArrayRealVector(outData, false);
1117 
1118     }
1119 
1120     /** {@inheritDoc} */
1121     @Override
1122     public void setColumnVector(final int column, final RealVector vector)
1123         throws MatrixIndexException, InvalidMatrixException {
1124         try {
1125             setColumn(column, ((ArrayRealVector) vector).getDataRef());
1126         } catch (ClassCastException cce) {
1127             super.setColumnVector(column, vector);
1128         }
1129     }
1130 
1131     /** {@inheritDoc} */
1132     @Override
1133     public double[] getRow(final int row)
1134         throws MatrixIndexException {
1135 
1136         MatrixUtils.checkRowIndex(this, row);
1137         final double[] out = new double[columns];
1138 
1139         // perform copy block-wise, to ensure good cache behavior
1140         final int iBlock  = row / BLOCK_SIZE;
1141         final int iRow    = row - iBlock * BLOCK_SIZE;
1142         int outIndex      = 0;
1143         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1144             final int jWidth     = blockWidth(jBlock);
1145             final double[] block = blocks[iBlock * blockColumns + jBlock];
1146             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1147             outIndex += jWidth;
1148         }
1149 
1150         return out;
1151 
1152     }
1153 
1154     /** {@inheritDoc} */
1155     @Override
1156     public void setRow(final int row, final double[] array)
1157         throws MatrixIndexException, InvalidMatrixException {
1158 
1159         MatrixUtils.checkRowIndex(this, row);
1160         final int nCols = getColumnDimension();
1161         if (array.length != nCols) {
1162             throw new InvalidMatrixException(
1163                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1164                     1, array.length, 1, nCols);
1165         }
1166 
1167         // perform copy block-wise, to ensure good cache behavior
1168         final int iBlock  = row / BLOCK_SIZE;
1169         final int iRow    = row - iBlock * BLOCK_SIZE;
1170         int outIndex      = 0;
1171         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1172             final int jWidth     = blockWidth(jBlock);
1173             final double[] block = blocks[iBlock * blockColumns + jBlock];
1174             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1175             outIndex += jWidth;
1176         }
1177 
1178     }
1179 
1180     /** {@inheritDoc} */
1181     @Override
1182     public double[] getColumn(final int column)
1183         throws MatrixIndexException {
1184 
1185         MatrixUtils.checkColumnIndex(this, column);
1186         final double[] out = new double[rows];
1187 
1188         // perform copy block-wise, to ensure good cache behavior
1189         final int jBlock  = column / BLOCK_SIZE;
1190         final int jColumn = column - jBlock * BLOCK_SIZE;
1191         final int jWidth  = blockWidth(jBlock);
1192         int outIndex      = 0;
1193         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1194             final int iHeight = blockHeight(iBlock);
1195             final double[] block = blocks[iBlock * blockColumns + jBlock];
1196             for (int i = 0; i < iHeight; ++i) {
1197                 out[outIndex++] = block[i * jWidth + jColumn];
1198             }
1199         }
1200 
1201         return out;
1202 
1203     }
1204 
1205     /** {@inheritDoc} */
1206     @Override
1207     public void setColumn(final int column, final double[] array)
1208         throws MatrixIndexException, InvalidMatrixException {
1209 
1210         MatrixUtils.checkColumnIndex(this, column);
1211         final int nRows = getRowDimension();
1212         if (array.length != nRows) {
1213             throw new InvalidMatrixException(
1214                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1215                     array.length, 1, nRows, 1);
1216         }
1217 
1218         // perform copy block-wise, to ensure good cache behavior
1219         final int jBlock  = column / BLOCK_SIZE;
1220         final int jColumn = column - jBlock * BLOCK_SIZE;
1221         final int jWidth  = blockWidth(jBlock);
1222         int outIndex      = 0;
1223         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1224             final int iHeight = blockHeight(iBlock);
1225             final double[] block = blocks[iBlock * blockColumns + jBlock];
1226             for (int i = 0; i < iHeight; ++i) {
1227                 block[i * jWidth + jColumn] = array[outIndex++];
1228             }
1229         }
1230 
1231     }
1232 
1233     /** {@inheritDoc} */
1234     @Override
1235     public double getEntry(final int row, final int column)
1236         throws MatrixIndexException {
1237         try {
1238             final int iBlock = row    / BLOCK_SIZE;
1239             final int jBlock = column / BLOCK_SIZE;
1240             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1241                                (column - jBlock * BLOCK_SIZE);
1242             return blocks[iBlock * blockColumns + jBlock][k];
1243         } catch (ArrayIndexOutOfBoundsException e) {
1244             throw new MatrixIndexException(
1245                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1246                     row, column, getRowDimension(), getColumnDimension());
1247         }
1248     }
1249 
1250     /** {@inheritDoc} */
1251     @Override
1252     public void setEntry(final int row, final int column, final double value)
1253         throws MatrixIndexException {
1254         try {
1255             final int iBlock = row    / BLOCK_SIZE;
1256             final int jBlock = column / BLOCK_SIZE;
1257             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1258                                (column - jBlock * BLOCK_SIZE);
1259             blocks[iBlock * blockColumns + jBlock][k] = value;
1260         } catch (ArrayIndexOutOfBoundsException e) {
1261             throw new MatrixIndexException(
1262                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1263                     row, column, getRowDimension(), getColumnDimension());
1264         }
1265     }
1266 
1267     /** {@inheritDoc} */
1268     @Override
1269     public void addToEntry(final int row, final int column, final double increment)
1270         throws MatrixIndexException {
1271         try {
1272             final int iBlock = row    / BLOCK_SIZE;
1273             final int jBlock = column / BLOCK_SIZE;
1274             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1275                                (column - jBlock * BLOCK_SIZE);
1276             blocks[iBlock * blockColumns + jBlock][k] += increment;
1277         } catch (ArrayIndexOutOfBoundsException e) {
1278             throw new MatrixIndexException(
1279                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1280                     row, column, getRowDimension(), getColumnDimension());
1281         }
1282     }
1283 
1284     /** {@inheritDoc} */
1285     @Override
1286     public void multiplyEntry(final int row, final int column, final double factor)
1287         throws MatrixIndexException {
1288         try {
1289             final int iBlock = row    / BLOCK_SIZE;
1290             final int jBlock = column / BLOCK_SIZE;
1291             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1292                                (column - jBlock * BLOCK_SIZE);
1293             blocks[iBlock * blockColumns + jBlock][k] *= factor;
1294         } catch (ArrayIndexOutOfBoundsException e) {
1295             throw new MatrixIndexException(
1296                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1297                     row, column, getRowDimension(), getColumnDimension());
1298         }
1299     }
1300 
1301     /** {@inheritDoc} */
1302     @Override
1303     public BlockRealMatrix transpose() {
1304 
1305         final int nRows = getRowDimension();
1306         final int nCols = getColumnDimension();
1307         final BlockRealMatrix out = new BlockRealMatrix(nCols, nRows);
1308 
1309         // perform transpose block-wise, to ensure good cache behavior
1310         int blockIndex = 0;
1311         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1312             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1313 
1314                 // transpose current block
1315                 final double[] outBlock = out.blocks[blockIndex];
1316                 final double[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1317                 final int      pStart   = iBlock * BLOCK_SIZE;
1318                 final int      pEnd     = Math.min(pStart + BLOCK_SIZE, columns);
1319                 final int      qStart   = jBlock * BLOCK_SIZE;
1320                 final int      qEnd     = Math.min(qStart + BLOCK_SIZE, rows);
1321                 int k = 0;
1322                 for (int p = pStart; p < pEnd; ++p) {
1323                     final int lInc = pEnd - pStart;
1324                     int l = p - pStart;
1325                     for (int q = qStart; q < qEnd; ++q) {
1326                         outBlock[k] = tBlock[l];
1327                         ++k;
1328                         l+= lInc;
1329                     }
1330                 }
1331 
1332                 // go to next block
1333                 ++blockIndex;
1334 
1335             }
1336         }
1337 
1338         return out;
1339 
1340     }
1341 
1342     /** {@inheritDoc} */
1343     @Override
1344     public int getRowDimension() {
1345         return rows;
1346     }
1347 
1348     /** {@inheritDoc} */
1349     @Override
1350     public int getColumnDimension() {
1351         return columns;
1352     }
1353 
1354     /** {@inheritDoc} */
1355     @Override
1356     public double[] operate(final double[] v)
1357         throws IllegalArgumentException {
1358 
1359         if (v.length != columns) {
1360             throw MathRuntimeException.createIllegalArgumentException(
1361                     "vector length mismatch: got {0} but expected {1}",
1362                     v.length, columns);
1363         }
1364         final double[] out = new double[rows];
1365 
1366         // perform multiplication block-wise, to ensure good cache behavior
1367         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1368             final int pStart = iBlock * BLOCK_SIZE;
1369             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1370             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1371                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
1372                 final int      qStart = jBlock * BLOCK_SIZE;
1373                 final int      qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1374                 int k = 0;
1375                 for (int p = pStart; p < pEnd; ++p) {
1376                     double sum = 0;
1377                     int q = qStart;
1378                     while (q < qEnd - 3) {
1379                         sum += block[k]     * v[q]     +
1380                                block[k + 1] * v[q + 1] +
1381                                block[k + 2] * v[q + 2] +
1382                                block[k + 3] * v[q + 3];
1383                         k += 4;
1384                         q += 4;
1385                     }
1386                     while (q < qEnd) {
1387                         sum += block[k++] * v[q++];
1388                     }
1389                     out[p] += sum;
1390                 }
1391             }
1392         }
1393 
1394         return out;
1395 
1396     }
1397 
1398     /** {@inheritDoc} */
1399     @Override
1400     public double[] preMultiply(final double[] v)
1401         throws IllegalArgumentException {
1402 
1403         if (v.length != rows) {
1404             throw MathRuntimeException.createIllegalArgumentException(
1405                     "vector length mismatch: got {0} but expected {1}",
1406                     v.length, rows);
1407         }
1408         final double[] out = new double[columns];
1409 
1410         // perform multiplication block-wise, to ensure good cache behavior
1411         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1412             final int jWidth  = blockWidth(jBlock);
1413             final int jWidth2 = jWidth  + jWidth;
1414             final int jWidth3 = jWidth2 + jWidth;
1415             final int jWidth4 = jWidth3 + jWidth;
1416             final int qStart = jBlock * BLOCK_SIZE;
1417             final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1418             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1419                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
1420                 final int      pStart = iBlock * BLOCK_SIZE;
1421                 final int      pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1422                 for (int q = qStart; q < qEnd; ++q) {
1423                     int k = q - qStart;
1424                     double sum = 0;
1425                     int p = pStart;
1426                     while (p < pEnd - 3) {
1427                         sum += block[k]           * v[p]     +
1428                                block[k + jWidth]  * v[p + 1] +
1429                                block[k + jWidth2] * v[p + 2] +
1430                                block[k + jWidth3] * v[p + 3];
1431                         k += jWidth4;
1432                         p += 4;
1433                     }
1434                     while (p < pEnd) {
1435                         sum += block[k] * v[p++];
1436                         k += jWidth;
1437                     }
1438                     out[q] += sum;
1439                 }
1440             }
1441         }
1442 
1443         return out;
1444 
1445     }
1446 
1447     /** {@inheritDoc} */
1448     @Override
1449     public double walkInRowOrder(final RealMatrixChangingVisitor visitor)
1450         throws MatrixVisitorException {
1451         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1452         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1453             final int pStart = iBlock * BLOCK_SIZE;
1454             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1455             for (int p = pStart; p < pEnd; ++p) {
1456                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1457                     final int jWidth = blockWidth(jBlock);
1458                     final int qStart = jBlock * BLOCK_SIZE;
1459                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1460                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1461                     int k = (p - pStart) * jWidth;
1462                     for (int q = qStart; q < qEnd; ++q) {
1463                         block[k] = visitor.visit(p, q, block[k]);
1464                         ++k;
1465                     }
1466                 }
1467              }
1468         }
1469         return visitor.end();
1470     }
1471 
1472     /** {@inheritDoc} */
1473     @Override
1474     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor)
1475         throws MatrixVisitorException {
1476         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1477         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1478             final int pStart = iBlock * BLOCK_SIZE;
1479             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1480             for (int p = pStart; p < pEnd; ++p) {
1481                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1482                     final int jWidth = blockWidth(jBlock);
1483                     final int qStart = jBlock * BLOCK_SIZE;
1484                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1485                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1486                     int k = (p - pStart) * jWidth;
1487                     for (int q = qStart; q < qEnd; ++q) {
1488                         visitor.visit(p, q, block[k]);
1489                         ++k;
1490                     }
1491                 }
1492              }
1493         }
1494         return visitor.end();
1495     }
1496 
1497     /** {@inheritDoc} */
1498     @Override
1499     public double walkInRowOrder(final RealMatrixChangingVisitor visitor,
1500                                  final int startRow, final int endRow,
1501                                  final int startColumn, final int endColumn)
1502         throws MatrixIndexException, MatrixVisitorException {
1503         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1504         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1505         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1506             final int p0     = iBlock * BLOCK_SIZE;
1507             final int pStart = Math.max(startRow, p0);
1508             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1509             for (int p = pStart; p < pEnd; ++p) {
1510                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1511                     final int jWidth = blockWidth(jBlock);
1512                     final int q0     = jBlock * BLOCK_SIZE;
1513                     final int qStart = Math.max(startColumn, q0);
1514                     final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1515                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1516                     int k = (p - p0) * jWidth + qStart - q0;
1517                     for (int q = qStart; q < qEnd; ++q) {
1518                         block[k] = visitor.visit(p, q, block[k]);
1519                         ++k;
1520                     }
1521                 }
1522              }
1523         }
1524         return visitor.end();
1525     }
1526 
1527     /** {@inheritDoc} */
1528     @Override
1529     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor,
1530                                  final int startRow, final int endRow,
1531                                  final int startColumn, final int endColumn)
1532         throws MatrixIndexException, MatrixVisitorException {
1533         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1534         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1535         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1536             final int p0     = iBlock * BLOCK_SIZE;
1537             final int pStart = Math.max(startRow, p0);
1538             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1539             for (int p = pStart; p < pEnd; ++p) {
1540                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1541                     final int jWidth = blockWidth(jBlock);
1542                     final int q0     = jBlock * BLOCK_SIZE;
1543                     final int qStart = Math.max(startColumn, q0);
1544                     final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1545                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1546                     int k = (p - p0) * jWidth + qStart - q0;
1547                     for (int q = qStart; q < qEnd; ++q) {
1548                         visitor.visit(p, q, block[k]);
1549                         ++k;
1550                     }
1551                 }
1552              }
1553         }
1554         return visitor.end();
1555     }
1556 
1557     /** {@inheritDoc} */
1558     @Override
1559     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor)
1560         throws MatrixVisitorException {
1561         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1562         int blockIndex = 0;
1563         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1564             final int pStart = iBlock * BLOCK_SIZE;
1565             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1566             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1567                 final int qStart = jBlock * BLOCK_SIZE;
1568                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1569                 final double[] block = blocks[blockIndex];
1570                 int k = 0;
1571                 for (int p = pStart; p < pEnd; ++p) {
1572                     for (int q = qStart; q < qEnd; ++q) {
1573                         block[k] = visitor.visit(p, q, block[k]);
1574                         ++k;
1575                     }
1576                 }
1577                 ++blockIndex;
1578             }
1579         }
1580         return visitor.end();
1581     }
1582 
1583     /** {@inheritDoc} */
1584     @Override
1585     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor)
1586         throws MatrixVisitorException {
1587         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1588         int blockIndex = 0;
1589         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1590             final int pStart = iBlock * BLOCK_SIZE;
1591             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1592             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1593                 final int qStart = jBlock * BLOCK_SIZE;
1594                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1595                 final double[] block = blocks[blockIndex];
1596                 int k = 0;
1597                 for (int p = pStart; p < pEnd; ++p) {
1598                     for (int q = qStart; q < qEnd; ++q) {
1599                         visitor.visit(p, q, block[k]);
1600                         ++k;
1601                     }
1602                 }
1603                 ++blockIndex;
1604             }
1605         }
1606         return visitor.end();
1607     }
1608 
1609     /** {@inheritDoc} */
1610     @Override
1611     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor,
1612                                        final int startRow, final int endRow,
1613                                        final int startColumn, final int endColumn)
1614         throws MatrixIndexException, MatrixVisitorException {
1615         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1616         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1617         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1618             final int p0     = iBlock * BLOCK_SIZE;
1619             final int pStart = Math.max(startRow, p0);
1620             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1621             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1622                 final int jWidth = blockWidth(jBlock);
1623                 final int q0     = jBlock * BLOCK_SIZE;
1624                 final int qStart = Math.max(startColumn, q0);
1625                 final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1626                 final double[] block = blocks[iBlock * blockColumns + jBlock];
1627                 for (int p = pStart; p < pEnd; ++p) {
1628                     int k = (p - p0) * jWidth + qStart - q0;
1629                     for (int q = qStart; q < qEnd; ++q) {
1630                         block[k] = visitor.visit(p, q, block[k]);
1631                         ++k;
1632                     }
1633                 }
1634             }
1635         }
1636         return visitor.end();
1637     }
1638 
1639     /** {@inheritDoc} */
1640     @Override
1641     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor,
1642                                        final int startRow, final int endRow,
1643                                        final int startColumn, final int endColumn)
1644         throws MatrixIndexException, MatrixVisitorException {
1645         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1646         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1647         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1648             final int p0     = iBlock * BLOCK_SIZE;
1649             final int pStart = Math.max(startRow, p0);
1650             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1651             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1652                 final int jWidth = blockWidth(jBlock);
1653                 final int q0     = jBlock * BLOCK_SIZE;
1654                 final int qStart = Math.max(startColumn, q0);
1655                 final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1656                 final double[] block = blocks[iBlock * blockColumns + jBlock];
1657                 for (int p = pStart; p < pEnd; ++p) {
1658                     int k = (p - p0) * jWidth + qStart - q0;
1659                     for (int q = qStart; q < qEnd; ++q) {
1660                         visitor.visit(p, q, block[k]);
1661                         ++k;
1662                     }
1663                 }
1664             }
1665         }
1666         return visitor.end();
1667     }
1668 
1669     /**
1670      * Get the height of a block.
1671      * @param blockRow row index (in block sense) of the block
1672      * @return height (number of rows) of the block
1673      */
1674     private int blockHeight(final int blockRow) {
1675         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1676     }
1677 
1678     /**
1679      * Get the width of a block.
1680      * @param blockColumn column index (in block sense) of the block
1681      * @return width (number of columns) of the block
1682      */
1683     private int blockWidth(final int blockColumn) {
1684         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1685     }
1686 
1687 }