View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math3.linear;
19  
20  import java.io.Serializable;
21  
22  import org.apache.commons.math3.Field;
23  import org.apache.commons.math3.FieldElement;
24  import org.apache.commons.math3.exception.NoDataException;
25  import org.apache.commons.math3.exception.DimensionMismatchException;
26  import org.apache.commons.math3.exception.NotStrictlyPositiveException;
27  import org.apache.commons.math3.exception.NullArgumentException;
28  import org.apache.commons.math3.exception.NumberIsTooSmallException;
29  import org.apache.commons.math3.exception.OutOfRangeException;
30  import org.apache.commons.math3.exception.util.LocalizedFormats;
31  import org.apache.commons.math3.util.FastMath;
32  import org.apache.commons.math3.util.MathArrays;
33  import org.apache.commons.math3.util.MathUtils;
34  
35  /**
36   * Cache-friendly implementation of FieldMatrix using a flat arrays to store
37   * square blocks of the matrix.
38   * <p>
39   * This implementation is specially designed to be cache-friendly. Square blocks are
40   * stored as small arrays and allow efficient traversal of data both in row major direction
41   * and columns major direction, one block at a time. This greatly increases performances
42   * for algorithms that use crossed directions loops like multiplication or transposition.
43   * </p>
44   * <p>
45   * The size of square blocks is a static parameter. It may be tuned according to the cache
46   * size of the target computer processor. As a rule of thumbs, it should be the largest
47   * value that allows three blocks to be simultaneously cached (this is necessary for example
48   * for matrix multiplication). The default value is to use 36x36 blocks.
49   * </p>
50   * <p>
51   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
52   * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
53   * blocks are flattened in row major order in single dimension arrays which are therefore
54   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
55   * organized in row major order.
56   * </p>
57   * <p>
58   * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
59   * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
60   * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
61   * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
62   * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
63   * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
64   * rectangle.
65   * </p>
66   * <p>
67   * The layout complexity overhead versus simple mapping of matrices to java
68   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
69   * to up to 3-fold improvements for matrices of moderate to large size.
70   * </p>
71   * @param <T> the type of the field elements
72   * @since 2.0
73   */
74  public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
75      /** Block size. */
76      public static final int BLOCK_SIZE = 36;
77      /** Serializable version identifier. */
78      private static final long serialVersionUID = -4602336630143123183L;
79      /** Blocks of matrix entries. */
80      private final T blocks[][];
81      /** Number of rows of the matrix. */
82      private final int rows;
83      /** Number of columns of the matrix. */
84      private final int columns;
85      /** Number of block rows of the matrix. */
86      private final int blockRows;
87      /** Number of block columns of the matrix. */
88      private final int blockColumns;
89  
90      /**
91       * Create a new matrix with the supplied row and column dimensions.
92       *
93       * @param field Field to which the elements belong.
94       * @param rows Number of rows in the new matrix.
95       * @param columns Number of columns in the new matrix.
96       * @throws NotStrictlyPositiveException if row or column dimension is not
97       * positive.
98       */
99      public BlockFieldMatrix(final Field<T> field, final int rows,
100                             final int columns)
101         throws NotStrictlyPositiveException {
102         super(field, rows, columns);
103         this.rows    = rows;
104         this.columns = columns;
105 
106         // number of blocks
107         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
108         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
109 
110         // allocate storage blocks, taking care of smaller ones at right and bottom
111         blocks = createBlocksLayout(field, rows, columns);
112     }
113 
114     /**
115      * Create a new dense matrix copying entries from raw layout data.
116      * <p>The input array <em>must</em> already be in raw layout.</p>
117      * <p>Calling this constructor is equivalent to call:
118      * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
119      *                                   toBlocksLayout(rawData), false);</pre>
120      * </p>
121      *
122      * @param rawData Data for the new matrix, in raw layout.
123      * @throws DimensionMismatchException if the {@code blockData} shape is
124      * inconsistent with block layout.
125      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
126      */
127     public BlockFieldMatrix(final T[][] rawData)
128         throws DimensionMismatchException {
129         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
130     }
131 
132     /**
133      * Create a new dense matrix copying entries from block layout data.
134      * <p>The input array <em>must</em> already be in blocks layout.</p>
135      * @param rows  the number of rows in the new matrix
136      * @param columns  the number of columns in the new matrix
137      * @param blockData data for new matrix
138      * @param copyArray if true, the input array will be copied, otherwise
139      * it will be referenced
140      *
141      * @throws DimensionMismatchException if the {@code blockData} shape is
142      * inconsistent with block layout.
143      * @throws NotStrictlyPositiveException if row or column dimension is not
144      * positive.
145      * @see #createBlocksLayout(Field, int, int)
146      * @see #toBlocksLayout(FieldElement[][])
147      * @see #BlockFieldMatrix(FieldElement[][])
148      */
149     public BlockFieldMatrix(final int rows, final int columns,
150                             final T[][] blockData, final boolean copyArray)
151         throws DimensionMismatchException, NotStrictlyPositiveException {
152         super(extractField(blockData), rows, columns);
153         this.rows    = rows;
154         this.columns = columns;
155 
156         // number of blocks
157         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
158         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
159 
160         if (copyArray) {
161             // allocate storage blocks, taking care of smaller ones at right and bottom
162             blocks = MathArrays.buildArray(getField(), blockRows * blockColumns, -1);
163         } else {
164             // reference existing array
165             blocks = blockData;
166         }
167 
168         int index = 0;
169         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
170             final int iHeight = blockHeight(iBlock);
171             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
172                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
173                     throw new DimensionMismatchException(blockData[index].length,
174                                                          iHeight * blockWidth(jBlock));
175                 }
176                 if (copyArray) {
177                     blocks[index] = blockData[index].clone();
178                 }
179             }
180         }
181     }
182 
183     /**
184      * Convert a data array from raw layout to blocks layout.
185      * <p>
186      * Raw layout is the straightforward layout where element at row i and
187      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
188      * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
189      * is split in square blocks (except at right and bottom side where blocks may
190      * be rectangular to fit matrix size) and each block is stored in a flattened
191      * one-dimensional array.
192      * </p>
193      * <p>
194      * This method creates an array in blocks layout from an input array in raw layout.
195      * It can be used to provide the array argument of the {@link
196      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
197      * constructor.
198      * </p>
199      * @param <T> Type of the field elements.
200      * @param rawData Data array in raw layout.
201      * @return a new data array containing the same entries but in blocks layout
202      * @throws DimensionMismatchException if {@code rawData} is not rectangular
203      *  (not all rows have the same length).
204      * @see #createBlocksLayout(Field, int, int)
205      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
206      */
207     public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
208         throws DimensionMismatchException {
209 
210         final int rows         = rawData.length;
211         final int columns      = rawData[0].length;
212         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
213         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
214 
215         // safety checks
216         for (int i = 0; i < rawData.length; ++i) {
217             final int length = rawData[i].length;
218             if (length != columns) {
219                 throw new DimensionMismatchException(columns, length);
220             }
221         }
222 
223         // convert array
224         final Field<T> field = extractField(rawData);
225         final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
226         int blockIndex = 0;
227         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
228             final int pStart  = iBlock * BLOCK_SIZE;
229             final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
230             final int iHeight = pEnd - pStart;
231             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
232                 final int qStart = jBlock * BLOCK_SIZE;
233                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
234                 final int jWidth = qEnd - qStart;
235 
236                 // allocate new block
237                 final T[] block = MathArrays.buildArray(field, iHeight * jWidth);
238                 blocks[blockIndex] = block;
239 
240                 // copy data
241                 int index = 0;
242                 for (int p = pStart; p < pEnd; ++p) {
243                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
244                     index += jWidth;
245                 }
246 
247                 ++blockIndex;
248             }
249         }
250 
251         return blocks;
252     }
253 
254     /**
255      * Create a data array in blocks layout.
256      * <p>
257      * This method can be used to create the array argument of the {@link
258      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
259      * constructor.
260      * </p>
261      * @param <T> Type of the field elements.
262      * @param field Field to which the elements belong.
263      * @param rows Number of rows in the new matrix.
264      * @param columns Number of columns in the new matrix.
265      * @return a new data array in blocks layout.
266      * @see #toBlocksLayout(FieldElement[][])
267      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
268      */
269     public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
270                                                                        final int rows, final int columns) {
271         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
272         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
273 
274         final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
275         int blockIndex = 0;
276         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
277             final int pStart  = iBlock * BLOCK_SIZE;
278             final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
279             final int iHeight = pEnd - pStart;
280             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
281                 final int qStart = jBlock * BLOCK_SIZE;
282                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
283                 final int jWidth = qEnd - qStart;
284                 blocks[blockIndex] = MathArrays.buildArray(field, iHeight * jWidth);
285                 ++blockIndex;
286             }
287         }
288 
289         return blocks;
290     }
291 
292     /** {@inheritDoc} */
293     @Override
294     public FieldMatrix<T> createMatrix(final int rowDimension,
295                                        final int columnDimension)
296         throws NotStrictlyPositiveException {
297         return new BlockFieldMatrix<T>(getField(), rowDimension,
298                                        columnDimension);
299     }
300 
301     /** {@inheritDoc} */
302     @Override
303     public FieldMatrix<T> copy() {
304 
305         // create an empty matrix
306         BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
307 
308         // copy the blocks
309         for (int i = 0; i < blocks.length; ++i) {
310             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
311         }
312 
313         return copied;
314     }
315 
316     /** {@inheritDoc} */
317     @Override
318     public FieldMatrix<T> add(final FieldMatrix<T> m)
319         throws MatrixDimensionMismatchException {
320         try {
321             return add((BlockFieldMatrix<T>) m);
322         } catch (ClassCastException cce) {
323 
324             // safety check
325             checkAdditionCompatible(m);
326 
327             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
328 
329             // perform addition block-wise, to ensure good cache behavior
330             int blockIndex = 0;
331             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
332                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
333 
334                     // perform addition on the current block
335                     final T[] outBlock = out.blocks[blockIndex];
336                     final T[] tBlock   = blocks[blockIndex];
337                     final int      pStart   = iBlock * BLOCK_SIZE;
338                     final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
339                     final int      qStart   = jBlock * BLOCK_SIZE;
340                     final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
341                     int k = 0;
342                     for (int p = pStart; p < pEnd; ++p) {
343                         for (int q = qStart; q < qEnd; ++q) {
344                             outBlock[k] = tBlock[k].add(m.getEntry(p, q));
345                             ++k;
346                         }
347                     }
348 
349                     // go to next block
350                     ++blockIndex;
351 
352                 }
353             }
354 
355             return out;
356         }
357     }
358 
359     /**
360      * Compute the sum of {@code this} and {@code m}.
361      *
362      * @param m matrix to be added
363      * @return {@code this + m}
364      * @throws MatrixDimensionMismatchException if {@code m} is not the same
365      * size as {@code this}
366      */
367     public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
368         throws MatrixDimensionMismatchException {
369 
370         // safety check
371         checkAdditionCompatible(m);
372 
373         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
374 
375         // perform addition block-wise, to ensure good cache behavior
376         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
377             final T[] outBlock = out.blocks[blockIndex];
378             final T[] tBlock   = blocks[blockIndex];
379             final T[] mBlock   = m.blocks[blockIndex];
380             for (int k = 0; k < outBlock.length; ++k) {
381                 outBlock[k] = tBlock[k].add(mBlock[k]);
382             }
383         }
384 
385         return out;
386     }
387 
388     /** {@inheritDoc} */
389     @Override
390     public FieldMatrix<T> subtract(final FieldMatrix<T> m)
391         throws MatrixDimensionMismatchException {
392         try {
393             return subtract((BlockFieldMatrix<T>) m);
394         } catch (ClassCastException cce) {
395 
396             // safety check
397             checkSubtractionCompatible(m);
398 
399             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
400 
401             // perform subtraction block-wise, to ensure good cache behavior
402             int blockIndex = 0;
403             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
404                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
405 
406                     // perform subtraction on the current block
407                     final T[] outBlock = out.blocks[blockIndex];
408                     final T[] tBlock   = blocks[blockIndex];
409                     final int      pStart   = iBlock * BLOCK_SIZE;
410                     final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
411                     final int      qStart   = jBlock * BLOCK_SIZE;
412                     final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
413                     int k = 0;
414                     for (int p = pStart; p < pEnd; ++p) {
415                         for (int q = qStart; q < qEnd; ++q) {
416                             outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
417                             ++k;
418                         }
419                     }
420 
421                     // go to next block
422                     ++blockIndex;
423 
424                 }
425             }
426 
427             return out;
428         }
429     }
430 
431     /**
432      * Compute {@code this - m}.
433      *
434      * @param m matrix to be subtracted
435      * @return {@code this - m}
436      * @throws MatrixDimensionMismatchException if {@code m} is not the same
437      * size as {@code this}
438      */
439     public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m) throws MatrixDimensionMismatchException {
440         // safety check
441         checkSubtractionCompatible(m);
442 
443         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
444 
445         // perform subtraction block-wise, to ensure good cache behavior
446         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
447             final T[] outBlock = out.blocks[blockIndex];
448             final T[] tBlock   = blocks[blockIndex];
449             final T[] mBlock   = m.blocks[blockIndex];
450             for (int k = 0; k < outBlock.length; ++k) {
451                 outBlock[k] = tBlock[k].subtract(mBlock[k]);
452             }
453         }
454 
455         return out;
456     }
457 
458     /** {@inheritDoc} */
459     @Override
460     public FieldMatrix<T> scalarAdd(final T d) {
461         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
462 
463         // perform subtraction block-wise, to ensure good cache behavior
464         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
465             final T[] outBlock = out.blocks[blockIndex];
466             final T[] tBlock   = blocks[blockIndex];
467             for (int k = 0; k < outBlock.length; ++k) {
468                 outBlock[k] = tBlock[k].add(d);
469             }
470         }
471 
472         return out;
473     }
474 
475     /** {@inheritDoc} */
476     @Override
477     public FieldMatrix<T> scalarMultiply(final T d) {
478 
479         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
480 
481         // perform subtraction block-wise, to ensure good cache behavior
482         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
483             final T[] outBlock = out.blocks[blockIndex];
484             final T[] tBlock   = blocks[blockIndex];
485             for (int k = 0; k < outBlock.length; ++k) {
486                 outBlock[k] = tBlock[k].multiply(d);
487             }
488         }
489 
490         return out;
491     }
492 
493     /** {@inheritDoc} */
494     @Override
495     public FieldMatrix<T> multiply(final FieldMatrix<T> m)
496         throws DimensionMismatchException {
497         try {
498             return multiply((BlockFieldMatrix<T>) m);
499         } catch (ClassCastException cce) {
500 
501             // safety check
502             checkMultiplicationCompatible(m);
503 
504             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
505             final T zero = getField().getZero();
506 
507             // perform multiplication block-wise, to ensure good cache behavior
508             int blockIndex = 0;
509             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
510 
511                 final int pStart = iBlock * BLOCK_SIZE;
512                 final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
513 
514                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
515 
516                     final int qStart = jBlock * BLOCK_SIZE;
517                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
518 
519                     // select current block
520                     final T[] outBlock = out.blocks[blockIndex];
521 
522                     // perform multiplication on current block
523                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
524                         final int kWidth      = blockWidth(kBlock);
525                         final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
526                         final int rStart      = kBlock * BLOCK_SIZE;
527                         int k = 0;
528                         for (int p = pStart; p < pEnd; ++p) {
529                             final int lStart = (p - pStart) * kWidth;
530                             final int lEnd   = lStart + kWidth;
531                             for (int q = qStart; q < qEnd; ++q) {
532                                 T sum = zero;
533                                 int r = rStart;
534                                 for (int l = lStart; l < lEnd; ++l) {
535                                     sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
536                                     ++r;
537                                 }
538                                 outBlock[k] = outBlock[k].add(sum);
539                                 ++k;
540                             }
541                         }
542                     }
543 
544                     // go to next block
545                     ++blockIndex;
546 
547                 }
548             }
549 
550             return out;
551         }
552     }
553 
554     /**
555      * Returns the result of postmultiplying {@code this} by {@code m}.
556      *
557      * @param m matrix to postmultiply by
558      * @return {@code this * m}
559      * @throws DimensionMismatchException if the matrices are not compatible.
560      */
561     public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m)
562         throws DimensionMismatchException {
563 
564         // safety check
565         checkMultiplicationCompatible(m);
566 
567         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
568         final T zero = getField().getZero();
569 
570         // perform multiplication block-wise, to ensure good cache behavior
571         int blockIndex = 0;
572         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
573 
574             final int pStart = iBlock * BLOCK_SIZE;
575             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
576 
577             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
578                 final int jWidth = out.blockWidth(jBlock);
579                 final int jWidth2 = jWidth  + jWidth;
580                 final int jWidth3 = jWidth2 + jWidth;
581                 final int jWidth4 = jWidth3 + jWidth;
582 
583                 // select current block
584                 final T[] outBlock = out.blocks[blockIndex];
585 
586                 // perform multiplication on current block
587                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
588                     final int kWidth = blockWidth(kBlock);
589                     final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
590                     final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
591                     int k = 0;
592                     for (int p = pStart; p < pEnd; ++p) {
593                         final int lStart = (p - pStart) * kWidth;
594                         final int lEnd   = lStart + kWidth;
595                         for (int nStart = 0; nStart < jWidth; ++nStart) {
596                             T sum = zero;
597                             int l = lStart;
598                             int n = nStart;
599                             while (l < lEnd - 3) {
600                                 sum = sum.
601                                       add(tBlock[l].multiply(mBlock[n])).
602                                       add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
603                                       add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
604                                       add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
605                                 l += 4;
606                                 n += jWidth4;
607                             }
608                             while (l < lEnd) {
609                                 sum = sum.add(tBlock[l++].multiply(mBlock[n]));
610                                 n += jWidth;
611                             }
612                             outBlock[k] = outBlock[k].add(sum);
613                             ++k;
614                         }
615                     }
616                 }
617 
618                 // go to next block
619                 ++blockIndex;
620             }
621         }
622 
623         return out;
624     }
625 
626     /** {@inheritDoc} */
627     @Override
628     public T[][] getData() {
629 
630         final T[][] data = MathArrays.buildArray(getField(), getRowDimension(), getColumnDimension());
631         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
632 
633         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
634             final int pStart = iBlock * BLOCK_SIZE;
635             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
636             int regularPos   = 0;
637             int lastPos      = 0;
638             for (int p = pStart; p < pEnd; ++p) {
639                 final T[] dataP = data[p];
640                 int blockIndex = iBlock * blockColumns;
641                 int dataPos    = 0;
642                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
643                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
644                     dataPos += BLOCK_SIZE;
645                 }
646                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
647                 regularPos += BLOCK_SIZE;
648                 lastPos    += lastColumns;
649             }
650         }
651 
652         return data;
653     }
654 
655     /** {@inheritDoc} */
656     @Override
657     public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
658                                        final int startColumn,
659                                        final int endColumn)
660         throws OutOfRangeException, NumberIsTooSmallException {
661         // safety checks
662         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
663 
664         // create the output matrix
665         final BlockFieldMatrix<T> out =
666             new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
667 
668         // compute blocks shifts
669         final int blockStartRow    = startRow    / BLOCK_SIZE;
670         final int rowsShift        = startRow    % BLOCK_SIZE;
671         final int blockStartColumn = startColumn / BLOCK_SIZE;
672         final int columnsShift     = startColumn % BLOCK_SIZE;
673 
674         // perform extraction block-wise, to ensure good cache behavior
675         int pBlock = blockStartRow;
676         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
677             final int iHeight = out.blockHeight(iBlock);
678             int qBlock = blockStartColumn;
679             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
680                 final int jWidth = out.blockWidth(jBlock);
681 
682                 // handle one block of the output matrix
683                 final int      outIndex = iBlock * out.blockColumns + jBlock;
684                 final T[] outBlock = out.blocks[outIndex];
685                 final int      index    = pBlock * blockColumns + qBlock;
686                 final int      width    = blockWidth(qBlock);
687 
688                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
689                 final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
690                 if (heightExcess > 0) {
691                     // the submatrix block spans on two blocks rows from the original matrix
692                     if (widthExcess > 0) {
693                         // the submatrix block spans on two blocks columns from the original matrix
694                         final int width2 = blockWidth(qBlock + 1);
695                         copyBlockPart(blocks[index], width,
696                                       rowsShift, BLOCK_SIZE,
697                                       columnsShift, BLOCK_SIZE,
698                                       outBlock, jWidth, 0, 0);
699                         copyBlockPart(blocks[index + 1], width2,
700                                       rowsShift, BLOCK_SIZE,
701                                       0, widthExcess,
702                                       outBlock, jWidth, 0, jWidth - widthExcess);
703                         copyBlockPart(blocks[index + blockColumns], width,
704                                       0, heightExcess,
705                                       columnsShift, BLOCK_SIZE,
706                                       outBlock, jWidth, iHeight - heightExcess, 0);
707                         copyBlockPart(blocks[index + blockColumns + 1], width2,
708                                       0, heightExcess,
709                                       0, widthExcess,
710                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
711                     } else {
712                         // the submatrix block spans on one block column from the original matrix
713                         copyBlockPart(blocks[index], width,
714                                       rowsShift, BLOCK_SIZE,
715                                       columnsShift, jWidth + columnsShift,
716                                       outBlock, jWidth, 0, 0);
717                         copyBlockPart(blocks[index + blockColumns], width,
718                                       0, heightExcess,
719                                       columnsShift, jWidth + columnsShift,
720                                       outBlock, jWidth, iHeight - heightExcess, 0);
721                     }
722                 } else {
723                     // the submatrix block spans on one block row from the original matrix
724                     if (widthExcess > 0) {
725                         // the submatrix block spans on two blocks columns from the original matrix
726                         final int width2 = blockWidth(qBlock + 1);
727                         copyBlockPart(blocks[index], width,
728                                       rowsShift, iHeight + rowsShift,
729                                       columnsShift, BLOCK_SIZE,
730                                       outBlock, jWidth, 0, 0);
731                         copyBlockPart(blocks[index + 1], width2,
732                                       rowsShift, iHeight + rowsShift,
733                                       0, widthExcess,
734                                       outBlock, jWidth, 0, jWidth - widthExcess);
735                     } else {
736                         // the submatrix block spans on one block column from the original matrix
737                         copyBlockPart(blocks[index], width,
738                                       rowsShift, iHeight + rowsShift,
739                                       columnsShift, jWidth + columnsShift,
740                                       outBlock, jWidth, 0, 0);
741                     }
742                }
743                 ++qBlock;
744             }
745             ++pBlock;
746         }
747 
748         return out;
749     }
750 
751     /**
752      * Copy a part of a block into another one
753      * <p>This method can be called only when the specified part fits in both
754      * blocks, no verification is done here.</p>
755      * @param srcBlock source block
756      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
757      * @param srcStartRow start row in the source block
758      * @param srcEndRow end row (exclusive) in the source block
759      * @param srcStartColumn start column in the source block
760      * @param srcEndColumn end column (exclusive) in the source block
761      * @param dstBlock destination block
762      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
763      * @param dstStartRow start row in the destination block
764      * @param dstStartColumn start column in the destination block
765      */
766     private void copyBlockPart(final T[] srcBlock, final int srcWidth,
767                                final int srcStartRow, final int srcEndRow,
768                                final int srcStartColumn, final int srcEndColumn,
769                                final T[] dstBlock, final int dstWidth,
770                                final int dstStartRow, final int dstStartColumn) {
771         final int length = srcEndColumn - srcStartColumn;
772         int srcPos = srcStartRow * srcWidth + srcStartColumn;
773         int dstPos = dstStartRow * dstWidth + dstStartColumn;
774         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
775             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
776             srcPos += srcWidth;
777             dstPos += dstWidth;
778         }
779     }
780 
781     /** {@inheritDoc} */
782     @Override
783     public void setSubMatrix(final T[][] subMatrix, final int row,
784                              final int column)
785         throws DimensionMismatchException, OutOfRangeException,
786         NoDataException, NullArgumentException {
787         // safety checks
788         MathUtils.checkNotNull(subMatrix);
789         final int refLength = subMatrix[0].length;
790         if (refLength == 0) {
791             throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
792         }
793         final int endRow    = row + subMatrix.length - 1;
794         final int endColumn = column + refLength - 1;
795         checkSubMatrixIndex(row, endRow, column, endColumn);
796         for (final T[] subRow : subMatrix) {
797             if (subRow.length != refLength) {
798                 throw new DimensionMismatchException(refLength, subRow.length);
799             }
800         }
801 
802         // compute blocks bounds
803         final int blockStartRow    = row / BLOCK_SIZE;
804         final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
805         final int blockStartColumn = column / BLOCK_SIZE;
806         final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
807 
808         // perform copy block-wise, to ensure good cache behavior
809         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
810             final int iHeight  = blockHeight(iBlock);
811             final int firstRow = iBlock * BLOCK_SIZE;
812             final int iStart   = FastMath.max(row,    firstRow);
813             final int iEnd     = FastMath.min(endRow + 1, firstRow + iHeight);
814 
815             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
816                 final int jWidth      = blockWidth(jBlock);
817                 final int firstColumn = jBlock * BLOCK_SIZE;
818                 final int jStart      = FastMath.max(column,    firstColumn);
819                 final int jEnd        = FastMath.min(endColumn + 1, firstColumn + jWidth);
820                 final int jLength     = jEnd - jStart;
821 
822                 // handle one block, row by row
823                 final T[] block = blocks[iBlock * blockColumns + jBlock];
824                 for (int i = iStart; i < iEnd; ++i) {
825                     System.arraycopy(subMatrix[i - row], jStart - column,
826                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
827                                      jLength);
828                 }
829 
830             }
831         }
832     }
833 
834     /** {@inheritDoc} */
835     @Override
836     public FieldMatrix<T> getRowMatrix(final int row)
837         throws OutOfRangeException {
838         checkRowIndex(row);
839         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
840 
841         // perform copy block-wise, to ensure good cache behavior
842         final int iBlock  = row / BLOCK_SIZE;
843         final int iRow    = row - iBlock * BLOCK_SIZE;
844         int outBlockIndex = 0;
845         int outIndex      = 0;
846         T[] outBlock = out.blocks[outBlockIndex];
847         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
848             final int jWidth     = blockWidth(jBlock);
849             final T[] block = blocks[iBlock * blockColumns + jBlock];
850             final int available  = outBlock.length - outIndex;
851             if (jWidth > available) {
852                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
853                 outBlock = out.blocks[++outBlockIndex];
854                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
855                 outIndex = jWidth - available;
856             } else {
857                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
858                 outIndex += jWidth;
859             }
860         }
861 
862         return out;
863     }
864 
865     /** {@inheritDoc} */
866     @Override
867     public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
868         throws MatrixDimensionMismatchException, OutOfRangeException {
869         try {
870             setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
871         } catch (ClassCastException cce) {
872             super.setRowMatrix(row, matrix);
873         }
874     }
875 
876     /**
877      * Sets the entries in row number <code>row</code>
878      * as a row matrix.  Row indices start at 0.
879      *
880      * @param row the row to be set
881      * @param matrix row matrix (must have one row and the same number of columns
882      * as the instance)
883      * @throws MatrixDimensionMismatchException if the matrix dimensions do
884      * not match one instance row.
885      * @throws OutOfRangeException if the specified row index is invalid.
886      */
887     public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
888         throws MatrixDimensionMismatchException, OutOfRangeException {
889         checkRowIndex(row);
890         final int nCols = getColumnDimension();
891         if ((matrix.getRowDimension() != 1) ||
892             (matrix.getColumnDimension() != nCols)) {
893             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
894                                                        matrix.getColumnDimension(),
895                                                        1, nCols);
896         }
897 
898         // perform copy block-wise, to ensure good cache behavior
899         final int iBlock = row / BLOCK_SIZE;
900         final int iRow   = row - iBlock * BLOCK_SIZE;
901         int mBlockIndex  = 0;
902         int mIndex       = 0;
903         T[] mBlock  = matrix.blocks[mBlockIndex];
904         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
905             final int jWidth     = blockWidth(jBlock);
906             final T[] block = blocks[iBlock * blockColumns + jBlock];
907             final int available  = mBlock.length - mIndex;
908             if (jWidth > available) {
909                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
910                 mBlock = matrix.blocks[++mBlockIndex];
911                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
912                 mIndex = jWidth - available;
913             } else {
914                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
915                 mIndex += jWidth;
916            }
917         }
918     }
919 
920     /** {@inheritDoc} */
921     @Override
922     public FieldMatrix<T> getColumnMatrix(final int column)
923         throws OutOfRangeException {
924         checkColumnIndex(column);
925         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
926 
927         // perform copy block-wise, to ensure good cache behavior
928         final int jBlock  = column / BLOCK_SIZE;
929         final int jColumn = column - jBlock * BLOCK_SIZE;
930         final int jWidth  = blockWidth(jBlock);
931         int outBlockIndex = 0;
932         int outIndex      = 0;
933         T[] outBlock = out.blocks[outBlockIndex];
934         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
935             final int iHeight = blockHeight(iBlock);
936             final T[] block = blocks[iBlock * blockColumns + jBlock];
937             for (int i = 0; i < iHeight; ++i) {
938                 if (outIndex >= outBlock.length) {
939                     outBlock = out.blocks[++outBlockIndex];
940                     outIndex = 0;
941                 }
942                 outBlock[outIndex++] = block[i * jWidth + jColumn];
943             }
944         }
945 
946         return out;
947     }
948 
949     /** {@inheritDoc} */
950     @Override
951     public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
952         throws MatrixDimensionMismatchException, OutOfRangeException {
953         try {
954             setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
955         } catch (ClassCastException cce) {
956             super.setColumnMatrix(column, matrix);
957         }
958     }
959 
960     /**
961      * Sets the entries in column number {@code column}
962      * as a column matrix.  Column indices start at 0.
963      *
964      * @param column Column to be set.
965      * @param matrix Column matrix (must have one column and the same number of rows
966      * as the instance).
967      * @throws MatrixDimensionMismatchException if the matrix dimensions do
968      * not match one instance column.
969      * @throws OutOfRangeException if the specified column index is invalid.
970      */
971     void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
972         throws MatrixDimensionMismatchException, OutOfRangeException {
973         checkColumnIndex(column);
974         final int nRows = getRowDimension();
975         if ((matrix.getRowDimension() != nRows) ||
976             (matrix.getColumnDimension() != 1)) {
977             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
978                                                        matrix.getColumnDimension(),
979                                                        nRows, 1);
980         }
981 
982         // perform copy block-wise, to ensure good cache behavior
983         final int jBlock  = column / BLOCK_SIZE;
984         final int jColumn = column - jBlock * BLOCK_SIZE;
985         final int jWidth  = blockWidth(jBlock);
986         int mBlockIndex = 0;
987         int mIndex      = 0;
988         T[] mBlock = matrix.blocks[mBlockIndex];
989         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
990             final int iHeight = blockHeight(iBlock);
991             final T[] block = blocks[iBlock * blockColumns + jBlock];
992             for (int i = 0; i < iHeight; ++i) {
993                 if (mIndex >= mBlock.length) {
994                     mBlock = matrix.blocks[++mBlockIndex];
995                     mIndex = 0;
996                 }
997                 block[i * jWidth + jColumn] = mBlock[mIndex++];
998             }
999         }
1000     }
1001 
1002     /** {@inheritDoc} */
1003     @Override
1004     public FieldVector<T> getRowVector(final int row)
1005         throws OutOfRangeException {
1006         checkRowIndex(row);
1007         final T[] outData = MathArrays.buildArray(getField(), columns);
1008 
1009         // perform copy block-wise, to ensure good cache behavior
1010         final int iBlock  = row / BLOCK_SIZE;
1011         final int iRow    = row - iBlock * BLOCK_SIZE;
1012         int outIndex      = 0;
1013         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1014             final int jWidth     = blockWidth(jBlock);
1015             final T[] block = blocks[iBlock * blockColumns + jBlock];
1016             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1017             outIndex += jWidth;
1018         }
1019 
1020         return new ArrayFieldVector<T>(getField(), outData, false);
1021     }
1022 
1023     /** {@inheritDoc} */
1024     @Override
1025     public void setRowVector(final int row, final FieldVector<T> vector)
1026         throws MatrixDimensionMismatchException, OutOfRangeException {
1027         try {
1028             setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1029         } catch (ClassCastException cce) {
1030             super.setRowVector(row, vector);
1031         }
1032     }
1033 
1034     /** {@inheritDoc} */
1035     @Override
1036     public FieldVector<T> getColumnVector(final int column)
1037         throws OutOfRangeException {
1038         checkColumnIndex(column);
1039         final T[] outData = MathArrays.buildArray(getField(), rows);
1040 
1041         // perform copy block-wise, to ensure good cache behavior
1042         final int jBlock  = column / BLOCK_SIZE;
1043         final int jColumn = column - jBlock * BLOCK_SIZE;
1044         final int jWidth  = blockWidth(jBlock);
1045         int outIndex      = 0;
1046         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1047             final int iHeight = blockHeight(iBlock);
1048             final T[] block = blocks[iBlock * blockColumns + jBlock];
1049             for (int i = 0; i < iHeight; ++i) {
1050                 outData[outIndex++] = block[i * jWidth + jColumn];
1051             }
1052         }
1053 
1054         return new ArrayFieldVector<T>(getField(), outData, false);
1055     }
1056 
1057     /** {@inheritDoc} */
1058     @Override
1059     public void setColumnVector(final int column, final FieldVector<T> vector)
1060         throws OutOfRangeException, MatrixDimensionMismatchException {
1061         try {
1062             setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1063         } catch (ClassCastException cce) {
1064             super.setColumnVector(column, vector);
1065         }
1066     }
1067 
1068     /** {@inheritDoc} */
1069     @Override
1070     public T[] getRow(final int row) throws OutOfRangeException {
1071         checkRowIndex(row);
1072         final T[] out = MathArrays.buildArray(getField(), columns);
1073 
1074         // perform copy block-wise, to ensure good cache behavior
1075         final int iBlock  = row / BLOCK_SIZE;
1076         final int iRow    = row - iBlock * BLOCK_SIZE;
1077         int outIndex      = 0;
1078         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1079             final int jWidth     = blockWidth(jBlock);
1080             final T[] block = blocks[iBlock * blockColumns + jBlock];
1081             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1082             outIndex += jWidth;
1083         }
1084 
1085         return out;
1086     }
1087 
1088     /** {@inheritDoc} */
1089     @Override
1090     public void setRow(final int row, final T[] array)
1091         throws OutOfRangeException, MatrixDimensionMismatchException {
1092         checkRowIndex(row);
1093         final int nCols = getColumnDimension();
1094         if (array.length != nCols) {
1095             throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
1096         }
1097 
1098         // perform copy block-wise, to ensure good cache behavior
1099         final int iBlock  = row / BLOCK_SIZE;
1100         final int iRow    = row - iBlock * BLOCK_SIZE;
1101         int outIndex      = 0;
1102         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1103             final int jWidth     = blockWidth(jBlock);
1104             final T[] block = blocks[iBlock * blockColumns + jBlock];
1105             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1106             outIndex += jWidth;
1107         }
1108     }
1109 
1110     /** {@inheritDoc} */
1111     @Override
1112     public T[] getColumn(final int column) throws OutOfRangeException {
1113         checkColumnIndex(column);
1114         final T[] out = MathArrays.buildArray(getField(), rows);
1115 
1116         // perform copy block-wise, to ensure good cache behavior
1117         final int jBlock  = column / BLOCK_SIZE;
1118         final int jColumn = column - jBlock * BLOCK_SIZE;
1119         final int jWidth  = blockWidth(jBlock);
1120         int outIndex      = 0;
1121         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1122             final int iHeight = blockHeight(iBlock);
1123             final T[] block = blocks[iBlock * blockColumns + jBlock];
1124             for (int i = 0; i < iHeight; ++i) {
1125                 out[outIndex++] = block[i * jWidth + jColumn];
1126             }
1127         }
1128 
1129         return out;
1130     }
1131 
1132     /** {@inheritDoc} */
1133     @Override
1134     public void setColumn(final int column, final T[] array)
1135         throws MatrixDimensionMismatchException, OutOfRangeException {
1136         checkColumnIndex(column);
1137         final int nRows = getRowDimension();
1138         if (array.length != nRows) {
1139             throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
1140         }
1141 
1142         // perform copy block-wise, to ensure good cache behavior
1143         final int jBlock  = column / BLOCK_SIZE;
1144         final int jColumn = column - jBlock * BLOCK_SIZE;
1145         final int jWidth  = blockWidth(jBlock);
1146         int outIndex      = 0;
1147         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1148             final int iHeight = blockHeight(iBlock);
1149             final T[] block = blocks[iBlock * blockColumns + jBlock];
1150             for (int i = 0; i < iHeight; ++i) {
1151                 block[i * jWidth + jColumn] = array[outIndex++];
1152             }
1153         }
1154     }
1155 
1156     /** {@inheritDoc} */
1157     @Override
1158     public T getEntry(final int row, final int column)
1159         throws OutOfRangeException {
1160         checkRowIndex(row);
1161         checkColumnIndex(column);
1162 
1163         final int iBlock = row    / BLOCK_SIZE;
1164         final int jBlock = column / BLOCK_SIZE;
1165         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1166             (column - jBlock * BLOCK_SIZE);
1167 
1168         return blocks[iBlock * blockColumns + jBlock][k];
1169     }
1170 
1171     /** {@inheritDoc} */
1172     @Override
1173     public void setEntry(final int row, final int column, final T value)
1174         throws OutOfRangeException {
1175         checkRowIndex(row);
1176         checkColumnIndex(column);
1177 
1178         final int iBlock = row    / BLOCK_SIZE;
1179         final int jBlock = column / BLOCK_SIZE;
1180         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1181             (column - jBlock * BLOCK_SIZE);
1182 
1183         blocks[iBlock * blockColumns + jBlock][k] = value;
1184     }
1185 
1186     /** {@inheritDoc} */
1187     @Override
1188     public void addToEntry(final int row, final int column, final T increment)
1189         throws OutOfRangeException {
1190         checkRowIndex(row);
1191         checkColumnIndex(column);
1192 
1193         final int iBlock = row    / BLOCK_SIZE;
1194         final int jBlock = column / BLOCK_SIZE;
1195         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1196             (column - jBlock * BLOCK_SIZE);
1197         final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1198 
1199         blockIJ[k] = blockIJ[k].add(increment);
1200     }
1201 
1202     /** {@inheritDoc} */
1203     @Override
1204     public void multiplyEntry(final int row, final int column, final T factor)
1205         throws OutOfRangeException {
1206         checkRowIndex(row);
1207         checkColumnIndex(column);
1208 
1209         final int iBlock = row    / BLOCK_SIZE;
1210         final int jBlock = column / BLOCK_SIZE;
1211         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1212             (column - jBlock * BLOCK_SIZE);
1213         final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1214 
1215         blockIJ[k] = blockIJ[k].multiply(factor);
1216     }
1217 
1218     /** {@inheritDoc} */
1219     @Override
1220     public FieldMatrix<T> transpose() {
1221         final int nRows = getRowDimension();
1222         final int nCols = getColumnDimension();
1223         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1224 
1225         // perform transpose block-wise, to ensure good cache behavior
1226         int blockIndex = 0;
1227         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1228             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1229 
1230                 // transpose current block
1231                 final T[] outBlock = out.blocks[blockIndex];
1232                 final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1233                 final int      pStart   = iBlock * BLOCK_SIZE;
1234                 final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
1235                 final int      qStart   = jBlock * BLOCK_SIZE;
1236                 final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, rows);
1237                 int k = 0;
1238                 for (int p = pStart; p < pEnd; ++p) {
1239                     final int lInc = pEnd - pStart;
1240                     int l = p - pStart;
1241                     for (int q = qStart; q < qEnd; ++q) {
1242                         outBlock[k] = tBlock[l];
1243                         ++k;
1244                         l+= lInc;
1245                     }
1246                 }
1247 
1248                 // go to next block
1249                 ++blockIndex;
1250 
1251             }
1252         }
1253 
1254         return out;
1255     }
1256 
1257     /** {@inheritDoc} */
1258     @Override
1259     public int getRowDimension() {
1260         return rows;
1261     }
1262 
1263     /** {@inheritDoc} */
1264     @Override
1265     public int getColumnDimension() {
1266         return columns;
1267     }
1268 
1269     /** {@inheritDoc} */
1270     @Override
1271     public T[] operate(final T[] v) throws DimensionMismatchException {
1272         if (v.length != columns) {
1273             throw new DimensionMismatchException(v.length, columns);
1274         }
1275         final T[] out = MathArrays.buildArray(getField(), rows);
1276         final T zero = getField().getZero();
1277 
1278         // perform multiplication block-wise, to ensure good cache behavior
1279         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1280             final int pStart = iBlock * BLOCK_SIZE;
1281             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1282             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1283                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1284                 final int      qStart = jBlock * BLOCK_SIZE;
1285                 final int      qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1286                 int k = 0;
1287                 for (int p = pStart; p < pEnd; ++p) {
1288                     T sum = zero;
1289                     int q = qStart;
1290                     while (q < qEnd - 3) {
1291                         sum = sum.
1292                               add(block[k].multiply(v[q])).
1293                               add(block[k + 1].multiply(v[q + 1])).
1294                               add(block[k + 2].multiply(v[q + 2])).
1295                               add(block[k + 3].multiply(v[q + 3]));
1296                         k += 4;
1297                         q += 4;
1298                     }
1299                     while (q < qEnd) {
1300                         sum = sum.add(block[k++].multiply(v[q++]));
1301                     }
1302                     out[p] = out[p].add(sum);
1303                 }
1304             }
1305         }
1306 
1307         return out;
1308     }
1309 
1310     /** {@inheritDoc} */
1311     @Override
1312     public T[] preMultiply(final T[] v) throws DimensionMismatchException {
1313 
1314         if (v.length != rows) {
1315             throw new DimensionMismatchException(v.length, rows);
1316         }
1317         final T[] out = MathArrays.buildArray(getField(), columns);
1318         final T zero = getField().getZero();
1319 
1320         // perform multiplication block-wise, to ensure good cache behavior
1321         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1322             final int jWidth  = blockWidth(jBlock);
1323             final int jWidth2 = jWidth  + jWidth;
1324             final int jWidth3 = jWidth2 + jWidth;
1325             final int jWidth4 = jWidth3 + jWidth;
1326             final int qStart = jBlock * BLOCK_SIZE;
1327             final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1328             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1329                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1330                 final int      pStart = iBlock * BLOCK_SIZE;
1331                 final int      pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1332                 for (int q = qStart; q < qEnd; ++q) {
1333                     int k = q - qStart;
1334                     T sum = zero;
1335                     int p = pStart;
1336                     while (p < pEnd - 3) {
1337                         sum = sum.
1338                               add(block[k].multiply(v[p])).
1339                               add(block[k + jWidth].multiply(v[p + 1])).
1340                               add(block[k + jWidth2].multiply(v[p + 2])).
1341                               add(block[k + jWidth3].multiply(v[p + 3]));
1342                         k += jWidth4;
1343                         p += 4;
1344                     }
1345                     while (p < pEnd) {
1346                         sum = sum.add(block[k].multiply(v[p++]));
1347                         k += jWidth;
1348                     }
1349                     out[q] = out[q].add(sum);
1350                 }
1351             }
1352         }
1353 
1354         return out;
1355     }
1356 
1357     /** {@inheritDoc} */
1358     @Override
1359     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor) {
1360         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1361         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1362             final int pStart = iBlock * BLOCK_SIZE;
1363             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1364             for (int p = pStart; p < pEnd; ++p) {
1365                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1366                     final int jWidth = blockWidth(jBlock);
1367                     final int qStart = jBlock * BLOCK_SIZE;
1368                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1369                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1370                     int k = (p - pStart) * jWidth;
1371                     for (int q = qStart; q < qEnd; ++q) {
1372                         block[k] = visitor.visit(p, q, block[k]);
1373                         ++k;
1374                     }
1375                 }
1376              }
1377         }
1378         return visitor.end();
1379     }
1380 
1381     /** {@inheritDoc} */
1382     @Override
1383     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1384         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1385         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1386             final int pStart = iBlock * BLOCK_SIZE;
1387             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1388             for (int p = pStart; p < pEnd; ++p) {
1389                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1390                     final int jWidth = blockWidth(jBlock);
1391                     final int qStart = jBlock * BLOCK_SIZE;
1392                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1393                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1394                     int k = (p - pStart) * jWidth;
1395                     for (int q = qStart; q < qEnd; ++q) {
1396                         visitor.visit(p, q, block[k]);
1397                         ++k;
1398                     }
1399                 }
1400              }
1401         }
1402         return visitor.end();
1403     }
1404 
1405     /** {@inheritDoc} */
1406     @Override
1407     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1408                             final int startRow, final int endRow,
1409                             final int startColumn, final int endColumn)
1410         throws OutOfRangeException, NumberIsTooSmallException {
1411         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1412         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1413         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1414             final int p0     = iBlock * BLOCK_SIZE;
1415             final int pStart = FastMath.max(startRow, p0);
1416             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1417             for (int p = pStart; p < pEnd; ++p) {
1418                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1419                     final int jWidth = blockWidth(jBlock);
1420                     final int q0     = jBlock * BLOCK_SIZE;
1421                     final int qStart = FastMath.max(startColumn, q0);
1422                     final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1423                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1424                     int k = (p - p0) * jWidth + qStart - q0;
1425                     for (int q = qStart; q < qEnd; ++q) {
1426                         block[k] = visitor.visit(p, q, block[k]);
1427                         ++k;
1428                     }
1429                 }
1430              }
1431         }
1432         return visitor.end();
1433     }
1434 
1435     /** {@inheritDoc} */
1436     @Override
1437     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1438                             final int startRow, final int endRow,
1439                             final int startColumn, final int endColumn)
1440         throws OutOfRangeException, NumberIsTooSmallException {
1441         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1442         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1443         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1444             final int p0     = iBlock * BLOCK_SIZE;
1445             final int pStart = FastMath.max(startRow, p0);
1446             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1447             for (int p = pStart; p < pEnd; ++p) {
1448                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1449                     final int jWidth = blockWidth(jBlock);
1450                     final int q0     = jBlock * BLOCK_SIZE;
1451                     final int qStart = FastMath.max(startColumn, q0);
1452                     final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1453                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1454                     int k = (p - p0) * jWidth + qStart - q0;
1455                     for (int q = qStart; q < qEnd; ++q) {
1456                         visitor.visit(p, q, block[k]);
1457                         ++k;
1458                     }
1459                 }
1460              }
1461         }
1462         return visitor.end();
1463     }
1464 
1465     /** {@inheritDoc} */
1466     @Override
1467     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor) {
1468         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1469         int blockIndex = 0;
1470         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1471             final int pStart = iBlock * BLOCK_SIZE;
1472             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1473             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1474                 final int qStart = jBlock * BLOCK_SIZE;
1475                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1476                 final T[] block = blocks[blockIndex];
1477                 int k = 0;
1478                 for (int p = pStart; p < pEnd; ++p) {
1479                     for (int q = qStart; q < qEnd; ++q) {
1480                         block[k] = visitor.visit(p, q, block[k]);
1481                         ++k;
1482                     }
1483                 }
1484                 ++blockIndex;
1485             }
1486         }
1487         return visitor.end();
1488     }
1489 
1490     /** {@inheritDoc} */
1491     @Override
1492     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1493         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1494         int blockIndex = 0;
1495         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1496             final int pStart = iBlock * BLOCK_SIZE;
1497             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1498             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1499                 final int qStart = jBlock * BLOCK_SIZE;
1500                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1501                 final T[] block = blocks[blockIndex];
1502                 int k = 0;
1503                 for (int p = pStart; p < pEnd; ++p) {
1504                     for (int q = qStart; q < qEnd; ++q) {
1505                         visitor.visit(p, q, block[k]);
1506                         ++k;
1507                     }
1508                 }
1509                 ++blockIndex;
1510             }
1511         }
1512         return visitor.end();
1513     }
1514 
1515     /** {@inheritDoc} */
1516     @Override
1517     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1518                                   final int startRow, final int endRow,
1519                                   final int startColumn, final int endColumn)
1520         throws OutOfRangeException, NumberIsTooSmallException {
1521         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1522         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1523         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1524             final int p0     = iBlock * BLOCK_SIZE;
1525             final int pStart = FastMath.max(startRow, p0);
1526             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1527             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1528                 final int jWidth = blockWidth(jBlock);
1529                 final int q0     = jBlock * BLOCK_SIZE;
1530                 final int qStart = FastMath.max(startColumn, q0);
1531                 final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1532                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1533                 for (int p = pStart; p < pEnd; ++p) {
1534                     int k = (p - p0) * jWidth + qStart - q0;
1535                     for (int q = qStart; q < qEnd; ++q) {
1536                         block[k] = visitor.visit(p, q, block[k]);
1537                         ++k;
1538                     }
1539                 }
1540             }
1541         }
1542         return visitor.end();
1543     }
1544 
1545     /** {@inheritDoc} */
1546     @Override
1547     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1548                                   final int startRow, final int endRow,
1549                                   final int startColumn, final int endColumn)
1550         throws OutOfRangeException, NumberIsTooSmallException {
1551         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1552         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1553         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1554             final int p0     = iBlock * BLOCK_SIZE;
1555             final int pStart = FastMath.max(startRow, p0);
1556             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1557             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1558                 final int jWidth = blockWidth(jBlock);
1559                 final int q0     = jBlock * BLOCK_SIZE;
1560                 final int qStart = FastMath.max(startColumn, q0);
1561                 final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1562                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1563                 for (int p = pStart; p < pEnd; ++p) {
1564                     int k = (p - p0) * jWidth + qStart - q0;
1565                     for (int q = qStart; q < qEnd; ++q) {
1566                         visitor.visit(p, q, block[k]);
1567                         ++k;
1568                     }
1569                 }
1570             }
1571         }
1572         return visitor.end();
1573     }
1574 
1575     /**
1576      * Get the height of a block.
1577      * @param blockRow row index (in block sense) of the block
1578      * @return height (number of rows) of the block
1579      */
1580     private int blockHeight(final int blockRow) {
1581         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1582     }
1583 
1584     /**
1585      * Get the width of a block.
1586      * @param blockColumn column index (in block sense) of the block
1587      * @return width (number of columns) of the block
1588      */
1589     private int blockWidth(final int blockColumn) {
1590         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1591     }
1592 }