View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math3.linear;
19  
20  import java.io.Serializable;
21  
22  import org.apache.commons.math3.Field;
23  import org.apache.commons.math3.FieldElement;
24  import org.apache.commons.math3.exception.NoDataException;
25  import org.apache.commons.math3.exception.DimensionMismatchException;
26  import org.apache.commons.math3.exception.NotStrictlyPositiveException;
27  import org.apache.commons.math3.exception.NullArgumentException;
28  import org.apache.commons.math3.exception.NumberIsTooSmallException;
29  import org.apache.commons.math3.exception.OutOfRangeException;
30  import org.apache.commons.math3.exception.util.LocalizedFormats;
31  import org.apache.commons.math3.util.FastMath;
32  import org.apache.commons.math3.util.MathArrays;
33  import org.apache.commons.math3.util.MathUtils;
34  
35  /**
36   * Cache-friendly implementation of FieldMatrix using a flat arrays to store
37   * square blocks of the matrix.
38   * <p>
39   * This implementation is specially designed to be cache-friendly. Square blocks are
40   * stored as small arrays and allow efficient traversal of data both in row major direction
41   * and columns major direction, one block at a time. This greatly increases performances
42   * for algorithms that use crossed directions loops like multiplication or transposition.
43   * </p>
44   * <p>
45   * The size of square blocks is a static parameter. It may be tuned according to the cache
46   * size of the target computer processor. As a rule of thumbs, it should be the largest
47   * value that allows three blocks to be simultaneously cached (this is necessary for example
48   * for matrix multiplication). The default value is to use 36x36 blocks.
49   * </p>
50   * <p>
51   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
52   * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
53   * blocks are flattened in row major order in single dimension arrays which are therefore
54   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
55   * organized in row major order.
56   * </p>
57   * <p>
58   * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
59   * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
60   * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
61   * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
62   * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
63   * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
64   * rectangle.
65   * </p>
66   * <p>
67   * The layout complexity overhead versus simple mapping of matrices to java
68   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
69   * to up to 3-fold improvements for matrices of moderate to large size.
70   * </p>
71   * @param <T> the type of the field elements
72   * @version $Id: BlockFieldMatrix.java 1449528 2013-02-24 19:06:20Z luc $
73   * @since 2.0
74   */
75  public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
76      /** Block size. */
77      public static final int BLOCK_SIZE = 36;
78      /** Serializable version identifier. */
79      private static final long serialVersionUID = -4602336630143123183L;
80      /** Blocks of matrix entries. */
81      private final T blocks[][];
82      /** Number of rows of the matrix. */
83      private final int rows;
84      /** Number of columns of the matrix. */
85      private final int columns;
86      /** Number of block rows of the matrix. */
87      private final int blockRows;
88      /** Number of block columns of the matrix. */
89      private final int blockColumns;
90  
91      /**
92       * Create a new matrix with the supplied row and column dimensions.
93       *
94       * @param field Field to which the elements belong.
95       * @param rows Number of rows in the new matrix.
96       * @param columns Number of columns in the new matrix.
97       * @throws NotStrictlyPositiveException if row or column dimension is not
98       * positive.
99       */
100     public BlockFieldMatrix(final Field<T> field, final int rows,
101                             final int columns)
102         throws NotStrictlyPositiveException {
103         super(field, rows, columns);
104         this.rows    = rows;
105         this.columns = columns;
106 
107         // number of blocks
108         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
109         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
110 
111         // allocate storage blocks, taking care of smaller ones at right and bottom
112         blocks = createBlocksLayout(field, rows, columns);
113     }
114 
115     /**
116      * Create a new dense matrix copying entries from raw layout data.
117      * <p>The input array <em>must</em> already be in raw layout.</p>
118      * <p>Calling this constructor is equivalent to call:
119      * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
120      *                                   toBlocksLayout(rawData), false);</pre>
121      * </p>
122      *
123      * @param rawData Data for the new matrix, in raw layout.
124      * @throws DimensionMismatchException if the {@code blockData} shape is
125      * inconsistent with block layout.
126      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
127      */
128     public BlockFieldMatrix(final T[][] rawData)
129         throws DimensionMismatchException {
130         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
131     }
132 
133     /**
134      * Create a new dense matrix copying entries from block layout data.
135      * <p>The input array <em>must</em> already be in blocks layout.</p>
136      * @param rows  the number of rows in the new matrix
137      * @param columns  the number of columns in the new matrix
138      * @param blockData data for new matrix
139      * @param copyArray if true, the input array will be copied, otherwise
140      * it will be referenced
141      *
142      * @throws DimensionMismatchException if the {@code blockData} shape is
143      * inconsistent with block layout.
144      * @throws NotStrictlyPositiveException if row or column dimension is not
145      * positive.
146      * @see #createBlocksLayout(Field, int, int)
147      * @see #toBlocksLayout(FieldElement[][])
148      * @see #BlockFieldMatrix(FieldElement[][])
149      */
150     public BlockFieldMatrix(final int rows, final int columns,
151                             final T[][] blockData, final boolean copyArray)
152         throws DimensionMismatchException, NotStrictlyPositiveException {
153         super(extractField(blockData), rows, columns);
154         this.rows    = rows;
155         this.columns = columns;
156 
157         // number of blocks
158         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
159         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
160 
161         if (copyArray) {
162             // allocate storage blocks, taking care of smaller ones at right and bottom
163             blocks = MathArrays.buildArray(getField(), blockRows * blockColumns, -1);
164         } else {
165             // reference existing array
166             blocks = blockData;
167         }
168 
169         int index = 0;
170         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
171             final int iHeight = blockHeight(iBlock);
172             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
173                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
174                     throw new DimensionMismatchException(blockData[index].length,
175                                                          iHeight * blockWidth(jBlock));
176                 }
177                 if (copyArray) {
178                     blocks[index] = blockData[index].clone();
179                 }
180             }
181         }
182     }
183 
184     /**
185      * Convert a data array from raw layout to blocks layout.
186      * <p>
187      * Raw layout is the straightforward layout where element at row i and
188      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
189      * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
190      * is split in square blocks (except at right and bottom side where blocks may
191      * be rectangular to fit matrix size) and each block is stored in a flattened
192      * one-dimensional array.
193      * </p>
194      * <p>
195      * This method creates an array in blocks layout from an input array in raw layout.
196      * It can be used to provide the array argument of the {@link
197      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
198      * constructor.
199      * </p>
200      * @param <T> Type of the field elements.
201      * @param rawData Data array in raw layout.
202      * @return a new data array containing the same entries but in blocks layout
203      * @throws DimensionMismatchException if {@code rawData} is not rectangular
204      *  (not all rows have the same length).
205      * @see #createBlocksLayout(Field, int, int)
206      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
207      */
208     public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
209         throws DimensionMismatchException {
210 
211         final int rows         = rawData.length;
212         final int columns      = rawData[0].length;
213         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
214         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
215 
216         // safety checks
217         for (int i = 0; i < rawData.length; ++i) {
218             final int length = rawData[i].length;
219             if (length != columns) {
220                 throw new DimensionMismatchException(columns, length);
221             }
222         }
223 
224         // convert array
225         final Field<T> field = extractField(rawData);
226         final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
227         int blockIndex = 0;
228         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
229             final int pStart  = iBlock * BLOCK_SIZE;
230             final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
231             final int iHeight = pEnd - pStart;
232             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
233                 final int qStart = jBlock * BLOCK_SIZE;
234                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
235                 final int jWidth = qEnd - qStart;
236 
237                 // allocate new block
238                 final T[] block = MathArrays.buildArray(field, iHeight * jWidth);
239                 blocks[blockIndex] = block;
240 
241                 // copy data
242                 int index = 0;
243                 for (int p = pStart; p < pEnd; ++p) {
244                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
245                     index += jWidth;
246                 }
247 
248                 ++blockIndex;
249             }
250         }
251 
252         return blocks;
253     }
254 
255     /**
256      * Create a data array in blocks layout.
257      * <p>
258      * This method can be used to create the array argument of the {@link
259      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
260      * constructor.
261      * </p>
262      * @param <T> Type of the field elements.
263      * @param field Field to which the elements belong.
264      * @param rows Number of rows in the new matrix.
265      * @param columns Number of columns in the new matrix.
266      * @return a new data array in blocks layout.
267      * @see #toBlocksLayout(FieldElement[][])
268      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
269      */
270     public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
271                                                                        final int rows, final int columns) {
272         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
273         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
274 
275         final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
276         int blockIndex = 0;
277         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
278             final int pStart  = iBlock * BLOCK_SIZE;
279             final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
280             final int iHeight = pEnd - pStart;
281             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
282                 final int qStart = jBlock * BLOCK_SIZE;
283                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
284                 final int jWidth = qEnd - qStart;
285                 blocks[blockIndex] = MathArrays.buildArray(field, iHeight * jWidth);
286                 ++blockIndex;
287             }
288         }
289 
290         return blocks;
291     }
292 
293     /** {@inheritDoc} */
294     @Override
295     public FieldMatrix<T> createMatrix(final int rowDimension,
296                                        final int columnDimension)
297         throws NotStrictlyPositiveException {
298         return new BlockFieldMatrix<T>(getField(), rowDimension,
299                                        columnDimension);
300     }
301 
302     /** {@inheritDoc} */
303     @Override
304     public FieldMatrix<T> copy() {
305 
306         // create an empty matrix
307         BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
308 
309         // copy the blocks
310         for (int i = 0; i < blocks.length; ++i) {
311             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
312         }
313 
314         return copied;
315     }
316 
317     /** {@inheritDoc} */
318     @Override
319     public FieldMatrix<T> add(final FieldMatrix<T> m)
320         throws MatrixDimensionMismatchException {
321         try {
322             return add((BlockFieldMatrix<T>) m);
323         } catch (ClassCastException cce) {
324 
325             // safety check
326             checkAdditionCompatible(m);
327 
328             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
329 
330             // perform addition block-wise, to ensure good cache behavior
331             int blockIndex = 0;
332             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
333                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
334 
335                     // perform addition on the current block
336                     final T[] outBlock = out.blocks[blockIndex];
337                     final T[] tBlock   = blocks[blockIndex];
338                     final int      pStart   = iBlock * BLOCK_SIZE;
339                     final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
340                     final int      qStart   = jBlock * BLOCK_SIZE;
341                     final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
342                     int k = 0;
343                     for (int p = pStart; p < pEnd; ++p) {
344                         for (int q = qStart; q < qEnd; ++q) {
345                             outBlock[k] = tBlock[k].add(m.getEntry(p, q));
346                             ++k;
347                         }
348                     }
349 
350                     // go to next block
351                     ++blockIndex;
352 
353                 }
354             }
355 
356             return out;
357         }
358     }
359 
360     /**
361      * Compute the sum of {@code this} and {@code m}.
362      *
363      * @param m matrix to be added
364      * @return {@code this + m}
365      * @throws MatrixDimensionMismatchException if {@code m} is not the same
366      * size as {@code this}
367      */
368     public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
369         throws MatrixDimensionMismatchException {
370 
371         // safety check
372         checkAdditionCompatible(m);
373 
374         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
375 
376         // perform addition block-wise, to ensure good cache behavior
377         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
378             final T[] outBlock = out.blocks[blockIndex];
379             final T[] tBlock   = blocks[blockIndex];
380             final T[] mBlock   = m.blocks[blockIndex];
381             for (int k = 0; k < outBlock.length; ++k) {
382                 outBlock[k] = tBlock[k].add(mBlock[k]);
383             }
384         }
385 
386         return out;
387     }
388 
389     /** {@inheritDoc} */
390     @Override
391     public FieldMatrix<T> subtract(final FieldMatrix<T> m)
392         throws MatrixDimensionMismatchException {
393         try {
394             return subtract((BlockFieldMatrix<T>) m);
395         } catch (ClassCastException cce) {
396 
397             // safety check
398             checkSubtractionCompatible(m);
399 
400             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
401 
402             // perform subtraction block-wise, to ensure good cache behavior
403             int blockIndex = 0;
404             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
405                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
406 
407                     // perform subtraction on the current block
408                     final T[] outBlock = out.blocks[blockIndex];
409                     final T[] tBlock   = blocks[blockIndex];
410                     final int      pStart   = iBlock * BLOCK_SIZE;
411                     final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
412                     final int      qStart   = jBlock * BLOCK_SIZE;
413                     final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
414                     int k = 0;
415                     for (int p = pStart; p < pEnd; ++p) {
416                         for (int q = qStart; q < qEnd; ++q) {
417                             outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
418                             ++k;
419                         }
420                     }
421 
422                     // go to next block
423                     ++blockIndex;
424 
425                 }
426             }
427 
428             return out;
429         }
430     }
431 
432     /**
433      * Compute {@code this - m}.
434      *
435      * @param m matrix to be subtracted
436      * @return {@code this - m}
437      * @throws MatrixDimensionMismatchException if {@code m} is not the same
438      * size as {@code this}
439      */
440     public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m) throws MatrixDimensionMismatchException {
441         // safety check
442         checkSubtractionCompatible(m);
443 
444         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
445 
446         // perform subtraction block-wise, to ensure good cache behavior
447         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
448             final T[] outBlock = out.blocks[blockIndex];
449             final T[] tBlock   = blocks[blockIndex];
450             final T[] mBlock   = m.blocks[blockIndex];
451             for (int k = 0; k < outBlock.length; ++k) {
452                 outBlock[k] = tBlock[k].subtract(mBlock[k]);
453             }
454         }
455 
456         return out;
457     }
458 
459     /** {@inheritDoc} */
460     @Override
461     public FieldMatrix<T> scalarAdd(final T d) {
462         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
463 
464         // perform subtraction block-wise, to ensure good cache behavior
465         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
466             final T[] outBlock = out.blocks[blockIndex];
467             final T[] tBlock   = blocks[blockIndex];
468             for (int k = 0; k < outBlock.length; ++k) {
469                 outBlock[k] = tBlock[k].add(d);
470             }
471         }
472 
473         return out;
474     }
475 
476     /** {@inheritDoc} */
477     @Override
478     public FieldMatrix<T> scalarMultiply(final T d) {
479 
480         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
481 
482         // perform subtraction block-wise, to ensure good cache behavior
483         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
484             final T[] outBlock = out.blocks[blockIndex];
485             final T[] tBlock   = blocks[blockIndex];
486             for (int k = 0; k < outBlock.length; ++k) {
487                 outBlock[k] = tBlock[k].multiply(d);
488             }
489         }
490 
491         return out;
492     }
493 
494     /** {@inheritDoc} */
495     @Override
496     public FieldMatrix<T> multiply(final FieldMatrix<T> m)
497         throws DimensionMismatchException {
498         try {
499             return multiply((BlockFieldMatrix<T>) m);
500         } catch (ClassCastException cce) {
501 
502             // safety check
503             checkMultiplicationCompatible(m);
504 
505             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
506             final T zero = getField().getZero();
507 
508             // perform multiplication block-wise, to ensure good cache behavior
509             int blockIndex = 0;
510             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
511 
512                 final int pStart = iBlock * BLOCK_SIZE;
513                 final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
514 
515                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
516 
517                     final int qStart = jBlock * BLOCK_SIZE;
518                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
519 
520                     // select current block
521                     final T[] outBlock = out.blocks[blockIndex];
522 
523                     // perform multiplication on current block
524                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
525                         final int kWidth      = blockWidth(kBlock);
526                         final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
527                         final int rStart      = kBlock * BLOCK_SIZE;
528                         int k = 0;
529                         for (int p = pStart; p < pEnd; ++p) {
530                             final int lStart = (p - pStart) * kWidth;
531                             final int lEnd   = lStart + kWidth;
532                             for (int q = qStart; q < qEnd; ++q) {
533                                 T sum = zero;
534                                 int r = rStart;
535                                 for (int l = lStart; l < lEnd; ++l) {
536                                     sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
537                                     ++r;
538                                 }
539                                 outBlock[k] = outBlock[k].add(sum);
540                                 ++k;
541                             }
542                         }
543                     }
544 
545                     // go to next block
546                     ++blockIndex;
547 
548                 }
549             }
550 
551             return out;
552         }
553     }
554 
555     /**
556      * Returns the result of postmultiplying {@code this} by {@code m}.
557      *
558      * @param m matrix to postmultiply by
559      * @return {@code this * m}
560      * @throws DimensionMismatchException if the matrices are not compatible.
561      */
562     public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m)
563         throws DimensionMismatchException {
564 
565         // safety check
566         checkMultiplicationCompatible(m);
567 
568         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
569         final T zero = getField().getZero();
570 
571         // perform multiplication block-wise, to ensure good cache behavior
572         int blockIndex = 0;
573         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
574 
575             final int pStart = iBlock * BLOCK_SIZE;
576             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
577 
578             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
579                 final int jWidth = out.blockWidth(jBlock);
580                 final int jWidth2 = jWidth  + jWidth;
581                 final int jWidth3 = jWidth2 + jWidth;
582                 final int jWidth4 = jWidth3 + jWidth;
583 
584                 // select current block
585                 final T[] outBlock = out.blocks[blockIndex];
586 
587                 // perform multiplication on current block
588                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
589                     final int kWidth = blockWidth(kBlock);
590                     final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
591                     final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
592                     int k = 0;
593                     for (int p = pStart; p < pEnd; ++p) {
594                         final int lStart = (p - pStart) * kWidth;
595                         final int lEnd   = lStart + kWidth;
596                         for (int nStart = 0; nStart < jWidth; ++nStart) {
597                             T sum = zero;
598                             int l = lStart;
599                             int n = nStart;
600                             while (l < lEnd - 3) {
601                                 sum = sum.
602                                       add(tBlock[l].multiply(mBlock[n])).
603                                       add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
604                                       add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
605                                       add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
606                                 l += 4;
607                                 n += jWidth4;
608                             }
609                             while (l < lEnd) {
610                                 sum = sum.add(tBlock[l++].multiply(mBlock[n]));
611                                 n += jWidth;
612                             }
613                             outBlock[k] = outBlock[k].add(sum);
614                             ++k;
615                         }
616                     }
617                 }
618 
619                 // go to next block
620                 ++blockIndex;
621             }
622         }
623 
624         return out;
625     }
626 
627     /** {@inheritDoc} */
628     @Override
629     public T[][] getData() {
630 
631         final T[][] data = MathArrays.buildArray(getField(), getRowDimension(), getColumnDimension());
632         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
633 
634         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
635             final int pStart = iBlock * BLOCK_SIZE;
636             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
637             int regularPos   = 0;
638             int lastPos      = 0;
639             for (int p = pStart; p < pEnd; ++p) {
640                 final T[] dataP = data[p];
641                 int blockIndex = iBlock * blockColumns;
642                 int dataPos    = 0;
643                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
644                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
645                     dataPos += BLOCK_SIZE;
646                 }
647                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
648                 regularPos += BLOCK_SIZE;
649                 lastPos    += lastColumns;
650             }
651         }
652 
653         return data;
654     }
655 
656     /** {@inheritDoc} */
657     @Override
658     public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
659                                        final int startColumn,
660                                        final int endColumn)
661         throws OutOfRangeException, NumberIsTooSmallException {
662         // safety checks
663         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
664 
665         // create the output matrix
666         final BlockFieldMatrix<T> out =
667             new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
668 
669         // compute blocks shifts
670         final int blockStartRow    = startRow    / BLOCK_SIZE;
671         final int rowsShift        = startRow    % BLOCK_SIZE;
672         final int blockStartColumn = startColumn / BLOCK_SIZE;
673         final int columnsShift     = startColumn % BLOCK_SIZE;
674 
675         // perform extraction block-wise, to ensure good cache behavior
676         int pBlock = blockStartRow;
677         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
678             final int iHeight = out.blockHeight(iBlock);
679             int qBlock = blockStartColumn;
680             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
681                 final int jWidth = out.blockWidth(jBlock);
682 
683                 // handle one block of the output matrix
684                 final int      outIndex = iBlock * out.blockColumns + jBlock;
685                 final T[] outBlock = out.blocks[outIndex];
686                 final int      index    = pBlock * blockColumns + qBlock;
687                 final int      width    = blockWidth(qBlock);
688 
689                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
690                 final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
691                 if (heightExcess > 0) {
692                     // the submatrix block spans on two blocks rows from the original matrix
693                     if (widthExcess > 0) {
694                         // the submatrix block spans on two blocks columns from the original matrix
695                         final int width2 = blockWidth(qBlock + 1);
696                         copyBlockPart(blocks[index], width,
697                                       rowsShift, BLOCK_SIZE,
698                                       columnsShift, BLOCK_SIZE,
699                                       outBlock, jWidth, 0, 0);
700                         copyBlockPart(blocks[index + 1], width2,
701                                       rowsShift, BLOCK_SIZE,
702                                       0, widthExcess,
703                                       outBlock, jWidth, 0, jWidth - widthExcess);
704                         copyBlockPart(blocks[index + blockColumns], width,
705                                       0, heightExcess,
706                                       columnsShift, BLOCK_SIZE,
707                                       outBlock, jWidth, iHeight - heightExcess, 0);
708                         copyBlockPart(blocks[index + blockColumns + 1], width2,
709                                       0, heightExcess,
710                                       0, widthExcess,
711                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
712                     } else {
713                         // the submatrix block spans on one block column from the original matrix
714                         copyBlockPart(blocks[index], width,
715                                       rowsShift, BLOCK_SIZE,
716                                       columnsShift, jWidth + columnsShift,
717                                       outBlock, jWidth, 0, 0);
718                         copyBlockPart(blocks[index + blockColumns], width,
719                                       0, heightExcess,
720                                       columnsShift, jWidth + columnsShift,
721                                       outBlock, jWidth, iHeight - heightExcess, 0);
722                     }
723                 } else {
724                     // the submatrix block spans on one block row from the original matrix
725                     if (widthExcess > 0) {
726                         // the submatrix block spans on two blocks columns from the original matrix
727                         final int width2 = blockWidth(qBlock + 1);
728                         copyBlockPart(blocks[index], width,
729                                       rowsShift, iHeight + rowsShift,
730                                       columnsShift, BLOCK_SIZE,
731                                       outBlock, jWidth, 0, 0);
732                         copyBlockPart(blocks[index + 1], width2,
733                                       rowsShift, iHeight + rowsShift,
734                                       0, widthExcess,
735                                       outBlock, jWidth, 0, jWidth - widthExcess);
736                     } else {
737                         // the submatrix block spans on one block column from the original matrix
738                         copyBlockPart(blocks[index], width,
739                                       rowsShift, iHeight + rowsShift,
740                                       columnsShift, jWidth + columnsShift,
741                                       outBlock, jWidth, 0, 0);
742                     }
743                }
744                 ++qBlock;
745             }
746             ++pBlock;
747         }
748 
749         return out;
750     }
751 
752     /**
753      * Copy a part of a block into another one
754      * <p>This method can be called only when the specified part fits in both
755      * blocks, no verification is done here.</p>
756      * @param srcBlock source block
757      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
758      * @param srcStartRow start row in the source block
759      * @param srcEndRow end row (exclusive) in the source block
760      * @param srcStartColumn start column in the source block
761      * @param srcEndColumn end column (exclusive) in the source block
762      * @param dstBlock destination block
763      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
764      * @param dstStartRow start row in the destination block
765      * @param dstStartColumn start column in the destination block
766      */
767     private void copyBlockPart(final T[] srcBlock, final int srcWidth,
768                                final int srcStartRow, final int srcEndRow,
769                                final int srcStartColumn, final int srcEndColumn,
770                                final T[] dstBlock, final int dstWidth,
771                                final int dstStartRow, final int dstStartColumn) {
772         final int length = srcEndColumn - srcStartColumn;
773         int srcPos = srcStartRow * srcWidth + srcStartColumn;
774         int dstPos = dstStartRow * dstWidth + dstStartColumn;
775         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
776             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
777             srcPos += srcWidth;
778             dstPos += dstWidth;
779         }
780     }
781 
782     /** {@inheritDoc} */
783     @Override
784     public void setSubMatrix(final T[][] subMatrix, final int row,
785                              final int column)
786         throws DimensionMismatchException, OutOfRangeException,
787         NoDataException, NullArgumentException {
788         // safety checks
789         MathUtils.checkNotNull(subMatrix);
790         final int refLength = subMatrix[0].length;
791         if (refLength == 0) {
792             throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
793         }
794         final int endRow    = row + subMatrix.length - 1;
795         final int endColumn = column + refLength - 1;
796         checkSubMatrixIndex(row, endRow, column, endColumn);
797         for (final T[] subRow : subMatrix) {
798             if (subRow.length != refLength) {
799                 throw new DimensionMismatchException(refLength, subRow.length);
800             }
801         }
802 
803         // compute blocks bounds
804         final int blockStartRow    = row / BLOCK_SIZE;
805         final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
806         final int blockStartColumn = column / BLOCK_SIZE;
807         final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
808 
809         // perform copy block-wise, to ensure good cache behavior
810         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
811             final int iHeight  = blockHeight(iBlock);
812             final int firstRow = iBlock * BLOCK_SIZE;
813             final int iStart   = FastMath.max(row,    firstRow);
814             final int iEnd     = FastMath.min(endRow + 1, firstRow + iHeight);
815 
816             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
817                 final int jWidth      = blockWidth(jBlock);
818                 final int firstColumn = jBlock * BLOCK_SIZE;
819                 final int jStart      = FastMath.max(column,    firstColumn);
820                 final int jEnd        = FastMath.min(endColumn + 1, firstColumn + jWidth);
821                 final int jLength     = jEnd - jStart;
822 
823                 // handle one block, row by row
824                 final T[] block = blocks[iBlock * blockColumns + jBlock];
825                 for (int i = iStart; i < iEnd; ++i) {
826                     System.arraycopy(subMatrix[i - row], jStart - column,
827                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
828                                      jLength);
829                 }
830 
831             }
832         }
833     }
834 
835     /** {@inheritDoc} */
836     @Override
837     public FieldMatrix<T> getRowMatrix(final int row)
838         throws OutOfRangeException {
839         checkRowIndex(row);
840         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
841 
842         // perform copy block-wise, to ensure good cache behavior
843         final int iBlock  = row / BLOCK_SIZE;
844         final int iRow    = row - iBlock * BLOCK_SIZE;
845         int outBlockIndex = 0;
846         int outIndex      = 0;
847         T[] outBlock = out.blocks[outBlockIndex];
848         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
849             final int jWidth     = blockWidth(jBlock);
850             final T[] block = blocks[iBlock * blockColumns + jBlock];
851             final int available  = outBlock.length - outIndex;
852             if (jWidth > available) {
853                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
854                 outBlock = out.blocks[++outBlockIndex];
855                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
856                 outIndex = jWidth - available;
857             } else {
858                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
859                 outIndex += jWidth;
860             }
861         }
862 
863         return out;
864     }
865 
866     /** {@inheritDoc} */
867     @Override
868     public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
869         throws MatrixDimensionMismatchException, OutOfRangeException {
870         try {
871             setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
872         } catch (ClassCastException cce) {
873             super.setRowMatrix(row, matrix);
874         }
875     }
876 
877     /**
878      * Sets the entries in row number <code>row</code>
879      * as a row matrix.  Row indices start at 0.
880      *
881      * @param row the row to be set
882      * @param matrix row matrix (must have one row and the same number of columns
883      * as the instance)
884      * @throws MatrixDimensionMismatchException if the matrix dimensions do
885      * not match one instance row.
886      * @throws OutOfRangeException if the specified row index is invalid.
887      */
888     public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
889         throws MatrixDimensionMismatchException, OutOfRangeException {
890         checkRowIndex(row);
891         final int nCols = getColumnDimension();
892         if ((matrix.getRowDimension() != 1) ||
893             (matrix.getColumnDimension() != nCols)) {
894             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
895                                                        matrix.getColumnDimension(),
896                                                        1, nCols);
897         }
898 
899         // perform copy block-wise, to ensure good cache behavior
900         final int iBlock = row / BLOCK_SIZE;
901         final int iRow   = row - iBlock * BLOCK_SIZE;
902         int mBlockIndex  = 0;
903         int mIndex       = 0;
904         T[] mBlock  = matrix.blocks[mBlockIndex];
905         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
906             final int jWidth     = blockWidth(jBlock);
907             final T[] block = blocks[iBlock * blockColumns + jBlock];
908             final int available  = mBlock.length - mIndex;
909             if (jWidth > available) {
910                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
911                 mBlock = matrix.blocks[++mBlockIndex];
912                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
913                 mIndex = jWidth - available;
914             } else {
915                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
916                 mIndex += jWidth;
917            }
918         }
919     }
920 
921     /** {@inheritDoc} */
922     @Override
923     public FieldMatrix<T> getColumnMatrix(final int column)
924         throws OutOfRangeException {
925         checkColumnIndex(column);
926         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
927 
928         // perform copy block-wise, to ensure good cache behavior
929         final int jBlock  = column / BLOCK_SIZE;
930         final int jColumn = column - jBlock * BLOCK_SIZE;
931         final int jWidth  = blockWidth(jBlock);
932         int outBlockIndex = 0;
933         int outIndex      = 0;
934         T[] outBlock = out.blocks[outBlockIndex];
935         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
936             final int iHeight = blockHeight(iBlock);
937             final T[] block = blocks[iBlock * blockColumns + jBlock];
938             for (int i = 0; i < iHeight; ++i) {
939                 if (outIndex >= outBlock.length) {
940                     outBlock = out.blocks[++outBlockIndex];
941                     outIndex = 0;
942                 }
943                 outBlock[outIndex++] = block[i * jWidth + jColumn];
944             }
945         }
946 
947         return out;
948     }
949 
950     /** {@inheritDoc} */
951     @Override
952     public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
953         throws MatrixDimensionMismatchException, OutOfRangeException {
954         try {
955             setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
956         } catch (ClassCastException cce) {
957             super.setColumnMatrix(column, matrix);
958         }
959     }
960 
961     /**
962      * Sets the entries in column number {@code column}
963      * as a column matrix.  Column indices start at 0.
964      *
965      * @param column Column to be set.
966      * @param matrix Column matrix (must have one column and the same number of rows
967      * as the instance).
968      * @throws MatrixDimensionMismatchException if the matrix dimensions do
969      * not match one instance column.
970      * @throws OutOfRangeException if the specified column index is invalid.
971      */
972     void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
973         throws MatrixDimensionMismatchException, OutOfRangeException {
974         checkColumnIndex(column);
975         final int nRows = getRowDimension();
976         if ((matrix.getRowDimension() != nRows) ||
977             (matrix.getColumnDimension() != 1)) {
978             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
979                                                        matrix.getColumnDimension(),
980                                                        nRows, 1);
981         }
982 
983         // perform copy block-wise, to ensure good cache behavior
984         final int jBlock  = column / BLOCK_SIZE;
985         final int jColumn = column - jBlock * BLOCK_SIZE;
986         final int jWidth  = blockWidth(jBlock);
987         int mBlockIndex = 0;
988         int mIndex      = 0;
989         T[] mBlock = matrix.blocks[mBlockIndex];
990         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
991             final int iHeight = blockHeight(iBlock);
992             final T[] block = blocks[iBlock * blockColumns + jBlock];
993             for (int i = 0; i < iHeight; ++i) {
994                 if (mIndex >= mBlock.length) {
995                     mBlock = matrix.blocks[++mBlockIndex];
996                     mIndex = 0;
997                 }
998                 block[i * jWidth + jColumn] = mBlock[mIndex++];
999             }
1000         }
1001     }
1002 
1003     /** {@inheritDoc} */
1004     @Override
1005     public FieldVector<T> getRowVector(final int row)
1006         throws OutOfRangeException {
1007         checkRowIndex(row);
1008         final T[] outData = MathArrays.buildArray(getField(), columns);
1009 
1010         // perform copy block-wise, to ensure good cache behavior
1011         final int iBlock  = row / BLOCK_SIZE;
1012         final int iRow    = row - iBlock * BLOCK_SIZE;
1013         int outIndex      = 0;
1014         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1015             final int jWidth     = blockWidth(jBlock);
1016             final T[] block = blocks[iBlock * blockColumns + jBlock];
1017             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1018             outIndex += jWidth;
1019         }
1020 
1021         return new ArrayFieldVector<T>(getField(), outData, false);
1022     }
1023 
1024     /** {@inheritDoc} */
1025     @Override
1026     public void setRowVector(final int row, final FieldVector<T> vector)
1027         throws MatrixDimensionMismatchException, OutOfRangeException {
1028         try {
1029             setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1030         } catch (ClassCastException cce) {
1031             super.setRowVector(row, vector);
1032         }
1033     }
1034 
1035     /** {@inheritDoc} */
1036     @Override
1037     public FieldVector<T> getColumnVector(final int column)
1038         throws OutOfRangeException {
1039         checkColumnIndex(column);
1040         final T[] outData = MathArrays.buildArray(getField(), rows);
1041 
1042         // perform copy block-wise, to ensure good cache behavior
1043         final int jBlock  = column / BLOCK_SIZE;
1044         final int jColumn = column - jBlock * BLOCK_SIZE;
1045         final int jWidth  = blockWidth(jBlock);
1046         int outIndex      = 0;
1047         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1048             final int iHeight = blockHeight(iBlock);
1049             final T[] block = blocks[iBlock * blockColumns + jBlock];
1050             for (int i = 0; i < iHeight; ++i) {
1051                 outData[outIndex++] = block[i * jWidth + jColumn];
1052             }
1053         }
1054 
1055         return new ArrayFieldVector<T>(getField(), outData, false);
1056     }
1057 
1058     /** {@inheritDoc} */
1059     @Override
1060     public void setColumnVector(final int column, final FieldVector<T> vector)
1061         throws OutOfRangeException, MatrixDimensionMismatchException {
1062         try {
1063             setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1064         } catch (ClassCastException cce) {
1065             super.setColumnVector(column, vector);
1066         }
1067     }
1068 
1069     /** {@inheritDoc} */
1070     @Override
1071     public T[] getRow(final int row) throws OutOfRangeException {
1072         checkRowIndex(row);
1073         final T[] out = MathArrays.buildArray(getField(), columns);
1074 
1075         // perform copy block-wise, to ensure good cache behavior
1076         final int iBlock  = row / BLOCK_SIZE;
1077         final int iRow    = row - iBlock * BLOCK_SIZE;
1078         int outIndex      = 0;
1079         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1080             final int jWidth     = blockWidth(jBlock);
1081             final T[] block = blocks[iBlock * blockColumns + jBlock];
1082             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1083             outIndex += jWidth;
1084         }
1085 
1086         return out;
1087     }
1088 
1089     /** {@inheritDoc} */
1090     @Override
1091     public void setRow(final int row, final T[] array)
1092         throws OutOfRangeException, MatrixDimensionMismatchException {
1093         checkRowIndex(row);
1094         final int nCols = getColumnDimension();
1095         if (array.length != nCols) {
1096             throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
1097         }
1098 
1099         // perform copy block-wise, to ensure good cache behavior
1100         final int iBlock  = row / BLOCK_SIZE;
1101         final int iRow    = row - iBlock * BLOCK_SIZE;
1102         int outIndex      = 0;
1103         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1104             final int jWidth     = blockWidth(jBlock);
1105             final T[] block = blocks[iBlock * blockColumns + jBlock];
1106             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1107             outIndex += jWidth;
1108         }
1109     }
1110 
1111     /** {@inheritDoc} */
1112     @Override
1113     public T[] getColumn(final int column) throws OutOfRangeException {
1114         checkColumnIndex(column);
1115         final T[] out = MathArrays.buildArray(getField(), rows);
1116 
1117         // perform copy block-wise, to ensure good cache behavior
1118         final int jBlock  = column / BLOCK_SIZE;
1119         final int jColumn = column - jBlock * BLOCK_SIZE;
1120         final int jWidth  = blockWidth(jBlock);
1121         int outIndex      = 0;
1122         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1123             final int iHeight = blockHeight(iBlock);
1124             final T[] block = blocks[iBlock * blockColumns + jBlock];
1125             for (int i = 0; i < iHeight; ++i) {
1126                 out[outIndex++] = block[i * jWidth + jColumn];
1127             }
1128         }
1129 
1130         return out;
1131     }
1132 
1133     /** {@inheritDoc} */
1134     @Override
1135     public void setColumn(final int column, final T[] array)
1136         throws MatrixDimensionMismatchException, OutOfRangeException {
1137         checkColumnIndex(column);
1138         final int nRows = getRowDimension();
1139         if (array.length != nRows) {
1140             throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
1141         }
1142 
1143         // perform copy block-wise, to ensure good cache behavior
1144         final int jBlock  = column / BLOCK_SIZE;
1145         final int jColumn = column - jBlock * BLOCK_SIZE;
1146         final int jWidth  = blockWidth(jBlock);
1147         int outIndex      = 0;
1148         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1149             final int iHeight = blockHeight(iBlock);
1150             final T[] block = blocks[iBlock * blockColumns + jBlock];
1151             for (int i = 0; i < iHeight; ++i) {
1152                 block[i * jWidth + jColumn] = array[outIndex++];
1153             }
1154         }
1155     }
1156 
1157     /** {@inheritDoc} */
1158     @Override
1159     public T getEntry(final int row, final int column)
1160         throws OutOfRangeException {
1161         checkRowIndex(row);
1162         checkColumnIndex(column);
1163 
1164         final int iBlock = row    / BLOCK_SIZE;
1165         final int jBlock = column / BLOCK_SIZE;
1166         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1167             (column - jBlock * BLOCK_SIZE);
1168 
1169         return blocks[iBlock * blockColumns + jBlock][k];
1170     }
1171 
1172     /** {@inheritDoc} */
1173     @Override
1174     public void setEntry(final int row, final int column, final T value)
1175         throws OutOfRangeException {
1176         checkRowIndex(row);
1177         checkColumnIndex(column);
1178 
1179         final int iBlock = row    / BLOCK_SIZE;
1180         final int jBlock = column / BLOCK_SIZE;
1181         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1182             (column - jBlock * BLOCK_SIZE);
1183 
1184         blocks[iBlock * blockColumns + jBlock][k] = value;
1185     }
1186 
1187     /** {@inheritDoc} */
1188     @Override
1189     public void addToEntry(final int row, final int column, final T increment)
1190         throws OutOfRangeException {
1191         checkRowIndex(row);
1192         checkColumnIndex(column);
1193 
1194         final int iBlock = row    / BLOCK_SIZE;
1195         final int jBlock = column / BLOCK_SIZE;
1196         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1197             (column - jBlock * BLOCK_SIZE);
1198         final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1199 
1200         blockIJ[k] = blockIJ[k].add(increment);
1201     }
1202 
1203     /** {@inheritDoc} */
1204     @Override
1205     public void multiplyEntry(final int row, final int column, final T factor)
1206         throws OutOfRangeException {
1207         checkRowIndex(row);
1208         checkColumnIndex(column);
1209 
1210         final int iBlock = row    / BLOCK_SIZE;
1211         final int jBlock = column / BLOCK_SIZE;
1212         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1213             (column - jBlock * BLOCK_SIZE);
1214         final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1215 
1216         blockIJ[k] = blockIJ[k].multiply(factor);
1217     }
1218 
1219     /** {@inheritDoc} */
1220     @Override
1221     public FieldMatrix<T> transpose() {
1222         final int nRows = getRowDimension();
1223         final int nCols = getColumnDimension();
1224         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1225 
1226         // perform transpose block-wise, to ensure good cache behavior
1227         int blockIndex = 0;
1228         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1229             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1230 
1231                 // transpose current block
1232                 final T[] outBlock = out.blocks[blockIndex];
1233                 final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1234                 final int      pStart   = iBlock * BLOCK_SIZE;
1235                 final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
1236                 final int      qStart   = jBlock * BLOCK_SIZE;
1237                 final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, rows);
1238                 int k = 0;
1239                 for (int p = pStart; p < pEnd; ++p) {
1240                     final int lInc = pEnd - pStart;
1241                     int l = p - pStart;
1242                     for (int q = qStart; q < qEnd; ++q) {
1243                         outBlock[k] = tBlock[l];
1244                         ++k;
1245                         l+= lInc;
1246                     }
1247                 }
1248 
1249                 // go to next block
1250                 ++blockIndex;
1251 
1252             }
1253         }
1254 
1255         return out;
1256     }
1257 
1258     /** {@inheritDoc} */
1259     @Override
1260     public int getRowDimension() {
1261         return rows;
1262     }
1263 
1264     /** {@inheritDoc} */
1265     @Override
1266     public int getColumnDimension() {
1267         return columns;
1268     }
1269 
1270     /** {@inheritDoc} */
1271     @Override
1272     public T[] operate(final T[] v) throws DimensionMismatchException {
1273         if (v.length != columns) {
1274             throw new DimensionMismatchException(v.length, columns);
1275         }
1276         final T[] out = MathArrays.buildArray(getField(), rows);
1277         final T zero = getField().getZero();
1278 
1279         // perform multiplication block-wise, to ensure good cache behavior
1280         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1281             final int pStart = iBlock * BLOCK_SIZE;
1282             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1283             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1284                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1285                 final int      qStart = jBlock * BLOCK_SIZE;
1286                 final int      qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1287                 int k = 0;
1288                 for (int p = pStart; p < pEnd; ++p) {
1289                     T sum = zero;
1290                     int q = qStart;
1291                     while (q < qEnd - 3) {
1292                         sum = sum.
1293                               add(block[k].multiply(v[q])).
1294                               add(block[k + 1].multiply(v[q + 1])).
1295                               add(block[k + 2].multiply(v[q + 2])).
1296                               add(block[k + 3].multiply(v[q + 3]));
1297                         k += 4;
1298                         q += 4;
1299                     }
1300                     while (q < qEnd) {
1301                         sum = sum.add(block[k++].multiply(v[q++]));
1302                     }
1303                     out[p] = out[p].add(sum);
1304                 }
1305             }
1306         }
1307 
1308         return out;
1309     }
1310 
1311     /** {@inheritDoc} */
1312     @Override
1313     public T[] preMultiply(final T[] v) throws DimensionMismatchException {
1314 
1315         if (v.length != rows) {
1316             throw new DimensionMismatchException(v.length, rows);
1317         }
1318         final T[] out = MathArrays.buildArray(getField(), columns);
1319         final T zero = getField().getZero();
1320 
1321         // perform multiplication block-wise, to ensure good cache behavior
1322         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1323             final int jWidth  = blockWidth(jBlock);
1324             final int jWidth2 = jWidth  + jWidth;
1325             final int jWidth3 = jWidth2 + jWidth;
1326             final int jWidth4 = jWidth3 + jWidth;
1327             final int qStart = jBlock * BLOCK_SIZE;
1328             final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1329             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1330                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1331                 final int      pStart = iBlock * BLOCK_SIZE;
1332                 final int      pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1333                 for (int q = qStart; q < qEnd; ++q) {
1334                     int k = q - qStart;
1335                     T sum = zero;
1336                     int p = pStart;
1337                     while (p < pEnd - 3) {
1338                         sum = sum.
1339                               add(block[k].multiply(v[p])).
1340                               add(block[k + jWidth].multiply(v[p + 1])).
1341                               add(block[k + jWidth2].multiply(v[p + 2])).
1342                               add(block[k + jWidth3].multiply(v[p + 3]));
1343                         k += jWidth4;
1344                         p += 4;
1345                     }
1346                     while (p < pEnd) {
1347                         sum = sum.add(block[k].multiply(v[p++]));
1348                         k += jWidth;
1349                     }
1350                     out[q] = out[q].add(sum);
1351                 }
1352             }
1353         }
1354 
1355         return out;
1356     }
1357 
1358     /** {@inheritDoc} */
1359     @Override
1360     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor) {
1361         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1362         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1363             final int pStart = iBlock * BLOCK_SIZE;
1364             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1365             for (int p = pStart; p < pEnd; ++p) {
1366                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1367                     final int jWidth = blockWidth(jBlock);
1368                     final int qStart = jBlock * BLOCK_SIZE;
1369                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1370                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1371                     int k = (p - pStart) * jWidth;
1372                     for (int q = qStart; q < qEnd; ++q) {
1373                         block[k] = visitor.visit(p, q, block[k]);
1374                         ++k;
1375                     }
1376                 }
1377              }
1378         }
1379         return visitor.end();
1380     }
1381 
1382     /** {@inheritDoc} */
1383     @Override
1384     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1385         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1386         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1387             final int pStart = iBlock * BLOCK_SIZE;
1388             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1389             for (int p = pStart; p < pEnd; ++p) {
1390                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1391                     final int jWidth = blockWidth(jBlock);
1392                     final int qStart = jBlock * BLOCK_SIZE;
1393                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1394                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1395                     int k = (p - pStart) * jWidth;
1396                     for (int q = qStart; q < qEnd; ++q) {
1397                         visitor.visit(p, q, block[k]);
1398                         ++k;
1399                     }
1400                 }
1401              }
1402         }
1403         return visitor.end();
1404     }
1405 
1406     /** {@inheritDoc} */
1407     @Override
1408     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1409                             final int startRow, final int endRow,
1410                             final int startColumn, final int endColumn)
1411         throws OutOfRangeException, NumberIsTooSmallException {
1412         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1413         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1414         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1415             final int p0     = iBlock * BLOCK_SIZE;
1416             final int pStart = FastMath.max(startRow, p0);
1417             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1418             for (int p = pStart; p < pEnd; ++p) {
1419                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1420                     final int jWidth = blockWidth(jBlock);
1421                     final int q0     = jBlock * BLOCK_SIZE;
1422                     final int qStart = FastMath.max(startColumn, q0);
1423                     final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1424                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1425                     int k = (p - p0) * jWidth + qStart - q0;
1426                     for (int q = qStart; q < qEnd; ++q) {
1427                         block[k] = visitor.visit(p, q, block[k]);
1428                         ++k;
1429                     }
1430                 }
1431              }
1432         }
1433         return visitor.end();
1434     }
1435 
1436     /** {@inheritDoc} */
1437     @Override
1438     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1439                             final int startRow, final int endRow,
1440                             final int startColumn, final int endColumn)
1441         throws OutOfRangeException, NumberIsTooSmallException {
1442         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1443         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1444         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1445             final int p0     = iBlock * BLOCK_SIZE;
1446             final int pStart = FastMath.max(startRow, p0);
1447             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1448             for (int p = pStart; p < pEnd; ++p) {
1449                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1450                     final int jWidth = blockWidth(jBlock);
1451                     final int q0     = jBlock * BLOCK_SIZE;
1452                     final int qStart = FastMath.max(startColumn, q0);
1453                     final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1454                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1455                     int k = (p - p0) * jWidth + qStart - q0;
1456                     for (int q = qStart; q < qEnd; ++q) {
1457                         visitor.visit(p, q, block[k]);
1458                         ++k;
1459                     }
1460                 }
1461              }
1462         }
1463         return visitor.end();
1464     }
1465 
1466     /** {@inheritDoc} */
1467     @Override
1468     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor) {
1469         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1470         int blockIndex = 0;
1471         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1472             final int pStart = iBlock * BLOCK_SIZE;
1473             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1474             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1475                 final int qStart = jBlock * BLOCK_SIZE;
1476                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1477                 final T[] block = blocks[blockIndex];
1478                 int k = 0;
1479                 for (int p = pStart; p < pEnd; ++p) {
1480                     for (int q = qStart; q < qEnd; ++q) {
1481                         block[k] = visitor.visit(p, q, block[k]);
1482                         ++k;
1483                     }
1484                 }
1485                 ++blockIndex;
1486             }
1487         }
1488         return visitor.end();
1489     }
1490 
1491     /** {@inheritDoc} */
1492     @Override
1493     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1494         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1495         int blockIndex = 0;
1496         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1497             final int pStart = iBlock * BLOCK_SIZE;
1498             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1499             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1500                 final int qStart = jBlock * BLOCK_SIZE;
1501                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1502                 final T[] block = blocks[blockIndex];
1503                 int k = 0;
1504                 for (int p = pStart; p < pEnd; ++p) {
1505                     for (int q = qStart; q < qEnd; ++q) {
1506                         visitor.visit(p, q, block[k]);
1507                         ++k;
1508                     }
1509                 }
1510                 ++blockIndex;
1511             }
1512         }
1513         return visitor.end();
1514     }
1515 
1516     /** {@inheritDoc} */
1517     @Override
1518     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1519                                   final int startRow, final int endRow,
1520                                   final int startColumn, final int endColumn)
1521         throws OutOfRangeException, NumberIsTooSmallException {
1522         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1523         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1524         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1525             final int p0     = iBlock * BLOCK_SIZE;
1526             final int pStart = FastMath.max(startRow, p0);
1527             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1528             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1529                 final int jWidth = blockWidth(jBlock);
1530                 final int q0     = jBlock * BLOCK_SIZE;
1531                 final int qStart = FastMath.max(startColumn, q0);
1532                 final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1533                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1534                 for (int p = pStart; p < pEnd; ++p) {
1535                     int k = (p - p0) * jWidth + qStart - q0;
1536                     for (int q = qStart; q < qEnd; ++q) {
1537                         block[k] = visitor.visit(p, q, block[k]);
1538                         ++k;
1539                     }
1540                 }
1541             }
1542         }
1543         return visitor.end();
1544     }
1545 
1546     /** {@inheritDoc} */
1547     @Override
1548     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1549                                   final int startRow, final int endRow,
1550                                   final int startColumn, final int endColumn)
1551         throws OutOfRangeException, NumberIsTooSmallException {
1552         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1553         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1554         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1555             final int p0     = iBlock * BLOCK_SIZE;
1556             final int pStart = FastMath.max(startRow, p0);
1557             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1558             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1559                 final int jWidth = blockWidth(jBlock);
1560                 final int q0     = jBlock * BLOCK_SIZE;
1561                 final int qStart = FastMath.max(startColumn, q0);
1562                 final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1563                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1564                 for (int p = pStart; p < pEnd; ++p) {
1565                     int k = (p - p0) * jWidth + qStart - q0;
1566                     for (int q = qStart; q < qEnd; ++q) {
1567                         visitor.visit(p, q, block[k]);
1568                         ++k;
1569                     }
1570                 }
1571             }
1572         }
1573         return visitor.end();
1574     }
1575 
1576     /**
1577      * Get the height of a block.
1578      * @param blockRow row index (in block sense) of the block
1579      * @return height (number of rows) of the block
1580      */
1581     private int blockHeight(final int blockRow) {
1582         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1583     }
1584 
1585     /**
1586      * Get the width of a block.
1587      * @param blockColumn column index (in block sense) of the block
1588      * @return width (number of columns) of the block
1589      */
1590     private int blockWidth(final int blockColumn) {
1591         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1592     }
1593 }