View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math4.legacy.linear;
19  
20  import java.io.Serializable;
21  
22  import org.apache.commons.math4.legacy.core.Field;
23  import org.apache.commons.math4.legacy.core.FieldElement;
24  import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
25  import org.apache.commons.math4.legacy.exception.NoDataException;
26  import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
27  import org.apache.commons.math4.legacy.exception.NullArgumentException;
28  import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
29  import org.apache.commons.math4.legacy.exception.OutOfRangeException;
30  import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
31  import org.apache.commons.math4.core.jdkmath.JdkMath;
32  import org.apache.commons.math4.legacy.core.MathArrays;
33  
34  /**
35   * Cache-friendly implementation of FieldMatrix using a flat arrays to store
36   * square blocks of the matrix.
37   * <p>
38   * This implementation is specially designed to be cache-friendly. Square blocks are
39   * stored as small arrays and allow efficient traversal of data both in row major direction
40   * and columns major direction, one block at a time. This greatly increases performances
41   * for algorithms that use crossed directions loops like multiplication or transposition.
42   * </p>
43   * <p>
44   * The size of square blocks is a static parameter. It may be tuned according to the cache
45   * size of the target computer processor. As a rule of thumbs, it should be the largest
46   * value that allows three blocks to be simultaneously cached (this is necessary for example
47   * for matrix multiplication). The default value is to use 36x36 blocks.
48   * </p>
49   * <p>
50   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
51   * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
52   * blocks are flattened in row major order in single dimension arrays which are therefore
53   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
54   * organized in row major order.
55   * </p>
56   * <p>
57   * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
58   * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
59   * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
60   * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
61   * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
62   * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
63   * rectangle.
64   * </p>
65   * <p>
66   * The layout complexity overhead versus simple mapping of matrices to java
67   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
68   * to up to 3-fold improvements for matrices of moderate to large size.
69   * </p>
70   * @param <T> the type of the field elements
71   * @since 2.0
72   */
73  public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
74      /** Block size. */
75      public static final int BLOCK_SIZE = 36;
76      /** Serializable version identifier. */
77      private static final long serialVersionUID = -4602336630143123183L;
78      /** Blocks of matrix entries. */
79      private final T[][] blocks;
80      /** Number of rows of the matrix. */
81      private final int rows;
82      /** Number of columns of the matrix. */
83      private final int columns;
84      /** Number of block rows of the matrix. */
85      private final int blockRows;
86      /** Number of block columns of the matrix. */
87      private final int blockColumns;
88  
89      /**
90       * Create a new matrix with the supplied row and column dimensions.
91       *
92       * @param field Field to which the elements belong.
93       * @param rows Number of rows in the new matrix.
94       * @param columns Number of columns in the new matrix.
95       * @throws NotStrictlyPositiveException if row or column dimension is not
96       * positive.
97       */
98      public BlockFieldMatrix(final Field<T> field, final int rows,
99                              final int columns)
100         throws NotStrictlyPositiveException {
101         super(field, rows, columns);
102         this.rows    = rows;
103         this.columns = columns;
104 
105         // number of blocks
106         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
107         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
108 
109         // allocate storage blocks, taking care of smaller ones at right and bottom
110         blocks = createBlocksLayout(field, rows, columns);
111     }
112 
113     /**
114      * Create a new dense matrix copying entries from raw layout data.
115      * <p>The input array <em>must</em> already be in raw layout.</p>
116      * <p>Calling this constructor is equivalent to call:
117      * {@code matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
118      *                                   toBlocksLayout(rawData), false);}
119      *
120      * @param rawData Data for the new matrix, in raw layout.
121      * @throws DimensionMismatchException if the {@code blockData} shape is
122      * inconsistent with block layout.
123      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
124      */
125     public BlockFieldMatrix(final T[][] rawData)
126         throws DimensionMismatchException {
127         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
128     }
129 
130     /**
131      * Create a new dense matrix copying entries from block layout data.
132      * <p>The input array <em>must</em> already be in blocks layout.</p>
133      * @param rows  the number of rows in the new matrix
134      * @param columns  the number of columns in the new matrix
135      * @param blockData data for new matrix
136      * @param copyArray if true, the input array will be copied, otherwise
137      * it will be referenced
138      *
139      * @throws DimensionMismatchException if the {@code blockData} shape is
140      * inconsistent with block layout.
141      * @throws NotStrictlyPositiveException if row or column dimension is not
142      * positive.
143      * @see #createBlocksLayout(Field, int, int)
144      * @see #toBlocksLayout(FieldElement[][])
145      * @see #BlockFieldMatrix(FieldElement[][])
146      */
147     public BlockFieldMatrix(final int rows, final int columns,
148                             final T[][] blockData, final boolean copyArray)
149         throws DimensionMismatchException, NotStrictlyPositiveException {
150         super(extractField(blockData), rows, columns);
151         this.rows    = rows;
152         this.columns = columns;
153 
154         // number of blocks
155         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
156         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
157 
158         if (copyArray) {
159             // allocate storage blocks, taking care of smaller ones at right and bottom
160             blocks = MathArrays.buildArray(getField(), blockRows * blockColumns, -1);
161         } else {
162             // reference existing array
163             blocks = blockData;
164         }
165 
166         int index = 0;
167         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
168             final int iHeight = blockHeight(iBlock);
169             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
170                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
171                     throw new DimensionMismatchException(blockData[index].length,
172                                                          iHeight * blockWidth(jBlock));
173                 }
174                 if (copyArray) {
175                     blocks[index] = blockData[index].clone();
176                 }
177             }
178         }
179     }
180 
181     /**
182      * Convert a data array from raw layout to blocks layout.
183      * <p>
184      * Raw layout is the straightforward layout where element at row i and
185      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
186      * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
187      * is split in square blocks (except at right and bottom side where blocks may
188      * be rectangular to fit matrix size) and each block is stored in a flattened
189      * one-dimensional array.
190      * </p>
191      * <p>
192      * This method creates an array in blocks layout from an input array in raw layout.
193      * It can be used to provide the array argument of the {@link
194      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
195      * constructor.
196      * </p>
197      * @param <T> Type of the field elements.
198      * @param rawData Data array in raw layout.
199      * @return a new data array containing the same entries but in blocks layout
200      * @throws DimensionMismatchException if {@code rawData} is not rectangular
201      *  (not all rows have the same length).
202      * @see #createBlocksLayout(Field, int, int)
203      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
204      */
205     public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
206         throws DimensionMismatchException {
207 
208         final int rows         = rawData.length;
209         final int columns      = rawData[0].length;
210         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
211         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
212 
213         // safety checks
214         for (int i = 0; i < rawData.length; ++i) {
215             final int length = rawData[i].length;
216             if (length != columns) {
217                 throw new DimensionMismatchException(columns, length);
218             }
219         }
220 
221         // convert array
222         final Field<T> field = extractField(rawData);
223         final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
224         int blockIndex = 0;
225         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
226             final int pStart  = iBlock * BLOCK_SIZE;
227             final int pEnd    = JdkMath.min(pStart + BLOCK_SIZE, rows);
228             final int iHeight = pEnd - pStart;
229             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
230                 final int qStart = jBlock * BLOCK_SIZE;
231                 final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
232                 final int jWidth = qEnd - qStart;
233 
234                 // allocate new block
235                 final T[] block = MathArrays.buildArray(field, iHeight * jWidth);
236                 blocks[blockIndex] = block;
237 
238                 // copy data
239                 int index = 0;
240                 for (int p = pStart; p < pEnd; ++p) {
241                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
242                     index += jWidth;
243                 }
244 
245                 ++blockIndex;
246             }
247         }
248 
249         return blocks;
250     }
251 
252     /**
253      * Create a data array in blocks layout.
254      * <p>
255      * This method can be used to create the array argument of the {@link
256      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
257      * constructor.
258      * </p>
259      * @param <T> Type of the field elements.
260      * @param field Field to which the elements belong.
261      * @param rows Number of rows in the new matrix.
262      * @param columns Number of columns in the new matrix.
263      * @return a new data array in blocks layout.
264      * @see #toBlocksLayout(FieldElement[][])
265      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
266      */
267     public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
268                                                                        final int rows, final int columns) {
269         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
270         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
271 
272         final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
273         int blockIndex = 0;
274         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
275             final int pStart  = iBlock * BLOCK_SIZE;
276             final int pEnd    = JdkMath.min(pStart + BLOCK_SIZE, rows);
277             final int iHeight = pEnd - pStart;
278             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
279                 final int qStart = jBlock * BLOCK_SIZE;
280                 final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
281                 final int jWidth = qEnd - qStart;
282                 blocks[blockIndex] = MathArrays.buildArray(field, iHeight * jWidth);
283                 ++blockIndex;
284             }
285         }
286 
287         return blocks;
288     }
289 
290     /** {@inheritDoc} */
291     @Override
292     public FieldMatrix<T> createMatrix(final int rowDimension,
293                                        final int columnDimension)
294         throws NotStrictlyPositiveException {
295         return new BlockFieldMatrix<>(getField(), rowDimension,
296                                        columnDimension);
297     }
298 
299     /** {@inheritDoc} */
300     @Override
301     public FieldMatrix<T> copy() {
302 
303         // create an empty matrix
304         BlockFieldMatrix<T> copied = new BlockFieldMatrix<>(getField(), rows, columns);
305 
306         // copy the blocks
307         for (int i = 0; i < blocks.length; ++i) {
308             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
309         }
310 
311         return copied;
312     }
313 
314     /** {@inheritDoc} */
315     @Override
316     public FieldMatrix<T> add(final FieldMatrix<T> m)
317         throws MatrixDimensionMismatchException {
318         if (m instanceof BlockFieldMatrix) {
319             return add((BlockFieldMatrix<T>) m);
320         }
321 
322         // safety check
323         checkAdd(m);
324 
325         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
326 
327         // perform addition block-wise, to ensure good cache behavior
328         int blockIndex = 0;
329         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
330             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
331 
332                 // perform addition on the current block
333                 final T[] outBlock = out.blocks[blockIndex];
334                 final T[] tBlock   = blocks[blockIndex];
335                 final int      pStart   = iBlock * BLOCK_SIZE;
336                 final int      pEnd     = JdkMath.min(pStart + BLOCK_SIZE, rows);
337                 final int      qStart   = jBlock * BLOCK_SIZE;
338                 final int      qEnd     = JdkMath.min(qStart + BLOCK_SIZE, columns);
339                 int k = 0;
340                 for (int p = pStart; p < pEnd; ++p) {
341                     for (int q = qStart; q < qEnd; ++q) {
342                         outBlock[k] = tBlock[k].add(m.getEntry(p, q));
343                         ++k;
344                     }
345                 }
346 
347                 // go to next block
348                 ++blockIndex;
349             }
350         }
351 
352         return out;
353     }
354 
355     /**
356      * Compute the sum of {@code this} and {@code m}.
357      *
358      * @param m matrix to be added
359      * @return {@code this + m}
360      * @throws MatrixDimensionMismatchException if {@code m} is not the same
361      * size as {@code this}
362      */
363     public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
364         throws MatrixDimensionMismatchException {
365 
366         // safety check
367         checkAdd(m);
368 
369         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
370 
371         // perform addition block-wise, to ensure good cache behavior
372         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
373             final T[] outBlock = out.blocks[blockIndex];
374             final T[] tBlock   = blocks[blockIndex];
375             final T[] mBlock   = m.blocks[blockIndex];
376             for (int k = 0; k < outBlock.length; ++k) {
377                 outBlock[k] = tBlock[k].add(mBlock[k]);
378             }
379         }
380 
381         return out;
382     }
383 
384     /** {@inheritDoc} */
385     @Override
386     public FieldMatrix<T> subtract(final FieldMatrix<T> m)
387         throws MatrixDimensionMismatchException {
388         if (m instanceof BlockFieldMatrix) {
389             return subtract((BlockFieldMatrix<T>) m);
390         }
391 
392         // safety check
393         checkAdd(m);
394 
395         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
396 
397         // perform subtraction block-wise, to ensure good cache behavior
398         int blockIndex = 0;
399         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
400             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
401 
402                 // perform subtraction on the current block
403                 final T[] outBlock = out.blocks[blockIndex];
404                 final T[] tBlock   = blocks[blockIndex];
405                 final int      pStart   = iBlock * BLOCK_SIZE;
406                 final int      pEnd     = JdkMath.min(pStart + BLOCK_SIZE, rows);
407                 final int      qStart   = jBlock * BLOCK_SIZE;
408                 final int      qEnd     = JdkMath.min(qStart + BLOCK_SIZE, columns);
409                 int k = 0;
410                 for (int p = pStart; p < pEnd; ++p) {
411                     for (int q = qStart; q < qEnd; ++q) {
412                         outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
413                         ++k;
414                     }
415                 }
416 
417                 // go to next block
418                 ++blockIndex;
419             }
420         }
421 
422         return out;
423     }
424 
425     /**
426      * Compute {@code this - m}.
427      *
428      * @param m matrix to be subtracted
429      * @return {@code this - m}
430      * @throws MatrixDimensionMismatchException if {@code m} is not the same
431      * size as {@code this}
432      */
433     public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m) throws MatrixDimensionMismatchException {
434         // safety check
435         checkAdd(m);
436 
437         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
438 
439         // perform subtraction block-wise, to ensure good cache behavior
440         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
441             final T[] outBlock = out.blocks[blockIndex];
442             final T[] tBlock   = blocks[blockIndex];
443             final T[] mBlock   = m.blocks[blockIndex];
444             for (int k = 0; k < outBlock.length; ++k) {
445                 outBlock[k] = tBlock[k].subtract(mBlock[k]);
446             }
447         }
448 
449         return out;
450     }
451 
452     /** {@inheritDoc} */
453     @Override
454     public FieldMatrix<T> scalarAdd(final T d) {
455         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
456 
457         // perform subtraction block-wise, to ensure good cache behavior
458         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
459             final T[] outBlock = out.blocks[blockIndex];
460             final T[] tBlock   = blocks[blockIndex];
461             for (int k = 0; k < outBlock.length; ++k) {
462                 outBlock[k] = tBlock[k].add(d);
463             }
464         }
465 
466         return out;
467     }
468 
469     /** {@inheritDoc} */
470     @Override
471     public FieldMatrix<T> scalarMultiply(final T d) {
472 
473         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
474 
475         // perform subtraction block-wise, to ensure good cache behavior
476         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
477             final T[] outBlock = out.blocks[blockIndex];
478             final T[] tBlock   = blocks[blockIndex];
479             for (int k = 0; k < outBlock.length; ++k) {
480                 outBlock[k] = tBlock[k].multiply(d);
481             }
482         }
483 
484         return out;
485     }
486 
487     /** {@inheritDoc} */
488     @Override
489     public FieldMatrix<T> multiply(final FieldMatrix<T> m)
490         throws DimensionMismatchException {
491         if (m instanceof BlockFieldMatrix) {
492             return multiply((BlockFieldMatrix<T>) m);
493         }
494 
495         // safety check
496         checkMultiply(m);
497 
498         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, m.getColumnDimension());
499         final T zero = getField().getZero();
500 
501         // perform multiplication block-wise, to ensure good cache behavior
502         int blockIndex = 0;
503         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
504 
505             final int pStart = iBlock * BLOCK_SIZE;
506             final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
507 
508             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
509 
510                 final int qStart = jBlock * BLOCK_SIZE;
511                 final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
512 
513                 // select current block
514                 final T[] outBlock = out.blocks[blockIndex];
515 
516                 // perform multiplication on current block
517                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
518                     final int kWidth      = blockWidth(kBlock);
519                     final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
520                     final int rStart      = kBlock * BLOCK_SIZE;
521                     int k = 0;
522                     for (int p = pStart; p < pEnd; ++p) {
523                         final int lStart = (p - pStart) * kWidth;
524                         final int lEnd   = lStart + kWidth;
525                         for (int q = qStart; q < qEnd; ++q) {
526                             T sum = zero;
527                             int r = rStart;
528                             for (int l = lStart; l < lEnd; ++l) {
529                                 sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
530                                 ++r;
531                             }
532                             outBlock[k] = outBlock[k].add(sum);
533                             ++k;
534                         }
535                     }
536                 }
537 
538                 // go to next block
539                 ++blockIndex;
540             }
541         }
542 
543         return out;
544     }
545 
546     /**
547      * Returns the result of postmultiplying {@code this} by {@code m}.
548      *
549      * @param m matrix to postmultiply by
550      * @return {@code this * m}
551      * @throws DimensionMismatchException if the matrices are not compatible.
552      */
553     public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m)
554         throws DimensionMismatchException {
555 
556         // safety check
557         checkMultiply(m);
558 
559         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, m.columns);
560         final T zero = getField().getZero();
561 
562         // perform multiplication block-wise, to ensure good cache behavior
563         int blockIndex = 0;
564         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
565 
566             final int pStart = iBlock * BLOCK_SIZE;
567             final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
568 
569             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
570                 final int jWidth = out.blockWidth(jBlock);
571                 final int jWidth2 = jWidth  + jWidth;
572                 final int jWidth3 = jWidth2 + jWidth;
573                 final int jWidth4 = jWidth3 + jWidth;
574 
575                 // select current block
576                 final T[] outBlock = out.blocks[blockIndex];
577 
578                 // perform multiplication on current block
579                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
580                     final int kWidth = blockWidth(kBlock);
581                     final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
582                     final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
583                     int k = 0;
584                     for (int p = pStart; p < pEnd; ++p) {
585                         final int lStart = (p - pStart) * kWidth;
586                         final int lEnd   = lStart + kWidth;
587                         for (int nStart = 0; nStart < jWidth; ++nStart) {
588                             T sum = zero;
589                             int l = lStart;
590                             int n = nStart;
591                             while (l < lEnd - 3) {
592                                 sum = sum.
593                                       add(tBlock[l].multiply(mBlock[n])).
594                                       add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
595                                       add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
596                                       add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
597                                 l += 4;
598                                 n += jWidth4;
599                             }
600                             while (l < lEnd) {
601                                 sum = sum.add(tBlock[l++].multiply(mBlock[n]));
602                                 n += jWidth;
603                             }
604                             outBlock[k] = outBlock[k].add(sum);
605                             ++k;
606                         }
607                     }
608                 }
609 
610                 // go to next block
611                 ++blockIndex;
612             }
613         }
614 
615         return out;
616     }
617 
618     /** {@inheritDoc} */
619     @Override
620     public T[][] getData() {
621 
622         final T[][] data = MathArrays.buildArray(getField(), getRowDimension(), getColumnDimension());
623         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
624 
625         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
626             final int pStart = iBlock * BLOCK_SIZE;
627             final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
628             int regularPos   = 0;
629             int lastPos      = 0;
630             for (int p = pStart; p < pEnd; ++p) {
631                 final T[] dataP = data[p];
632                 int blockIndex = iBlock * blockColumns;
633                 int dataPos    = 0;
634                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
635                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
636                     dataPos += BLOCK_SIZE;
637                 }
638                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
639                 regularPos += BLOCK_SIZE;
640                 lastPos    += lastColumns;
641             }
642         }
643 
644         return data;
645     }
646 
647     /** {@inheritDoc} */
648     @Override
649     public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
650                                        final int startColumn,
651                                        final int endColumn)
652         throws OutOfRangeException, NumberIsTooSmallException {
653         // safety checks
654         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
655 
656         // create the output matrix
657         final BlockFieldMatrix<T> out =
658             new BlockFieldMatrix<>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
659 
660         // compute blocks shifts
661         final int blockStartRow    = startRow    / BLOCK_SIZE;
662         final int rowsShift        = startRow    % BLOCK_SIZE;
663         final int blockStartColumn = startColumn / BLOCK_SIZE;
664         final int columnsShift     = startColumn % BLOCK_SIZE;
665 
666         // perform extraction block-wise, to ensure good cache behavior
667         int pBlock = blockStartRow;
668         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
669             final int iHeight = out.blockHeight(iBlock);
670             int qBlock = blockStartColumn;
671             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
672                 final int jWidth = out.blockWidth(jBlock);
673 
674                 // handle one block of the output matrix
675                 final int      outIndex = iBlock * out.blockColumns + jBlock;
676                 final T[] outBlock = out.blocks[outIndex];
677                 final int      index    = pBlock * blockColumns + qBlock;
678                 final int      width    = blockWidth(qBlock);
679 
680                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
681                 final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
682                 if (heightExcess > 0) {
683                     // the submatrix block spans on two blocks rows from the original matrix
684                     if (widthExcess > 0) {
685                         // the submatrix block spans on two blocks columns from the original matrix
686                         final int width2 = blockWidth(qBlock + 1);
687                         copyBlockPart(blocks[index], width,
688                                       rowsShift, BLOCK_SIZE,
689                                       columnsShift, BLOCK_SIZE,
690                                       outBlock, jWidth, 0, 0);
691                         copyBlockPart(blocks[index + 1], width2,
692                                       rowsShift, BLOCK_SIZE,
693                                       0, widthExcess,
694                                       outBlock, jWidth, 0, jWidth - widthExcess);
695                         copyBlockPart(blocks[index + blockColumns], width,
696                                       0, heightExcess,
697                                       columnsShift, BLOCK_SIZE,
698                                       outBlock, jWidth, iHeight - heightExcess, 0);
699                         copyBlockPart(blocks[index + blockColumns + 1], width2,
700                                       0, heightExcess,
701                                       0, widthExcess,
702                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
703                     } else {
704                         // the submatrix block spans on one block column from the original matrix
705                         copyBlockPart(blocks[index], width,
706                                       rowsShift, BLOCK_SIZE,
707                                       columnsShift, jWidth + columnsShift,
708                                       outBlock, jWidth, 0, 0);
709                         copyBlockPart(blocks[index + blockColumns], width,
710                                       0, heightExcess,
711                                       columnsShift, jWidth + columnsShift,
712                                       outBlock, jWidth, iHeight - heightExcess, 0);
713                     }
714                 } else {
715                     // the submatrix block spans on one block row from the original matrix
716                     if (widthExcess > 0) {
717                         // the submatrix block spans on two blocks columns from the original matrix
718                         final int width2 = blockWidth(qBlock + 1);
719                         copyBlockPart(blocks[index], width,
720                                       rowsShift, iHeight + rowsShift,
721                                       columnsShift, BLOCK_SIZE,
722                                       outBlock, jWidth, 0, 0);
723                         copyBlockPart(blocks[index + 1], width2,
724                                       rowsShift, iHeight + rowsShift,
725                                       0, widthExcess,
726                                       outBlock, jWidth, 0, jWidth - widthExcess);
727                     } else {
728                         // the submatrix block spans on one block column from the original matrix
729                         copyBlockPart(blocks[index], width,
730                                       rowsShift, iHeight + rowsShift,
731                                       columnsShift, jWidth + columnsShift,
732                                       outBlock, jWidth, 0, 0);
733                     }
734                }
735                 ++qBlock;
736             }
737             ++pBlock;
738         }
739 
740         return out;
741     }
742 
743     /**
744      * Copy a part of a block into another one
745      * <p>This method can be called only when the specified part fits in both
746      * blocks, no verification is done here.</p>
747      * @param srcBlock source block
748      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
749      * @param srcStartRow start row in the source block
750      * @param srcEndRow end row (exclusive) in the source block
751      * @param srcStartColumn start column in the source block
752      * @param srcEndColumn end column (exclusive) in the source block
753      * @param dstBlock destination block
754      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
755      * @param dstStartRow start row in the destination block
756      * @param dstStartColumn start column in the destination block
757      */
758     private void copyBlockPart(final T[] srcBlock, final int srcWidth,
759                                final int srcStartRow, final int srcEndRow,
760                                final int srcStartColumn, final int srcEndColumn,
761                                final T[] dstBlock, final int dstWidth,
762                                final int dstStartRow, final int dstStartColumn) {
763         final int length = srcEndColumn - srcStartColumn;
764         int srcPos = srcStartRow * srcWidth + srcStartColumn;
765         int dstPos = dstStartRow * dstWidth + dstStartColumn;
766         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
767             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
768             srcPos += srcWidth;
769             dstPos += dstWidth;
770         }
771     }
772 
773     /** {@inheritDoc} */
774     @Override
775     public void setSubMatrix(final T[][] subMatrix, final int row,
776                              final int column)
777         throws DimensionMismatchException, OutOfRangeException,
778         NoDataException, NullArgumentException {
779         // safety checks
780         NullArgumentException.check(subMatrix);
781         final int refLength = subMatrix[0].length;
782         if (refLength == 0) {
783             throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
784         }
785         final int endRow    = row + subMatrix.length - 1;
786         final int endColumn = column + refLength - 1;
787         checkSubMatrixIndex(row, endRow, column, endColumn);
788         for (final T[] subRow : subMatrix) {
789             if (subRow.length != refLength) {
790                 throw new DimensionMismatchException(refLength, subRow.length);
791             }
792         }
793 
794         // compute blocks bounds
795         final int blockStartRow    = row / BLOCK_SIZE;
796         final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
797         final int blockStartColumn = column / BLOCK_SIZE;
798         final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
799 
800         // perform copy block-wise, to ensure good cache behavior
801         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
802             final int iHeight  = blockHeight(iBlock);
803             final int firstRow = iBlock * BLOCK_SIZE;
804             final int iStart   = JdkMath.max(row,    firstRow);
805             final int iEnd     = JdkMath.min(endRow + 1, firstRow + iHeight);
806 
807             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
808                 final int jWidth      = blockWidth(jBlock);
809                 final int firstColumn = jBlock * BLOCK_SIZE;
810                 final int jStart      = JdkMath.max(column,    firstColumn);
811                 final int jEnd        = JdkMath.min(endColumn + 1, firstColumn + jWidth);
812                 final int jLength     = jEnd - jStart;
813 
814                 // handle one block, row by row
815                 final T[] block = blocks[iBlock * blockColumns + jBlock];
816                 for (int i = iStart; i < iEnd; ++i) {
817                     System.arraycopy(subMatrix[i - row], jStart - column,
818                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
819                                      jLength);
820                 }
821             }
822         }
823     }
824 
825     /** {@inheritDoc} */
826     @Override
827     public FieldMatrix<T> getRowMatrix(final int row)
828         throws OutOfRangeException {
829         checkRowIndex(row);
830         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), 1, columns);
831 
832         // perform copy block-wise, to ensure good cache behavior
833         final int iBlock  = row / BLOCK_SIZE;
834         final int iRow    = row - iBlock * BLOCK_SIZE;
835         int outBlockIndex = 0;
836         int outIndex      = 0;
837         T[] outBlock = out.blocks[outBlockIndex];
838         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
839             final int jWidth     = blockWidth(jBlock);
840             final T[] block = blocks[iBlock * blockColumns + jBlock];
841             final int available  = outBlock.length - outIndex;
842             if (jWidth > available) {
843                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
844                 outBlock = out.blocks[++outBlockIndex];
845                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
846                 outIndex = jWidth - available;
847             } else {
848                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
849                 outIndex += jWidth;
850             }
851         }
852 
853         return out;
854     }
855 
856     /** {@inheritDoc} */
857     @Override
858     public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
859         throws MatrixDimensionMismatchException, OutOfRangeException {
860         if (matrix instanceof BlockFieldMatrix) {
861             setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
862         } else {
863             super.setRowMatrix(row, matrix);
864         }
865     }
866 
867     /**
868      * Sets the entries in row number <code>row</code>
869      * as a row matrix.  Row indices start at 0.
870      *
871      * @param row the row to be set
872      * @param matrix row matrix (must have one row and the same number of columns
873      * as the instance)
874      * @throws MatrixDimensionMismatchException if the matrix dimensions do
875      * not match one instance row.
876      * @throws OutOfRangeException if the specified row index is invalid.
877      */
878     public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
879         throws MatrixDimensionMismatchException, OutOfRangeException {
880         checkRowIndex(row);
881         final int nCols = getColumnDimension();
882         if (matrix.getRowDimension() != 1 ||
883             matrix.getColumnDimension() != nCols) {
884             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
885                                                        matrix.getColumnDimension(),
886                                                        1, nCols);
887         }
888 
889         // perform copy block-wise, to ensure good cache behavior
890         final int iBlock = row / BLOCK_SIZE;
891         final int iRow   = row - iBlock * BLOCK_SIZE;
892         int mBlockIndex  = 0;
893         int mIndex       = 0;
894         T[] mBlock  = matrix.blocks[mBlockIndex];
895         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
896             final int jWidth     = blockWidth(jBlock);
897             final T[] block = blocks[iBlock * blockColumns + jBlock];
898             final int available  = mBlock.length - mIndex;
899             if (jWidth > available) {
900                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
901                 mBlock = matrix.blocks[++mBlockIndex];
902                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
903                 mIndex = jWidth - available;
904             } else {
905                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
906                 mIndex += jWidth;
907            }
908         }
909     }
910 
911     /** {@inheritDoc} */
912     @Override
913     public FieldMatrix<T> getColumnMatrix(final int column)
914         throws OutOfRangeException {
915         checkColumnIndex(column);
916         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, 1);
917 
918         // perform copy block-wise, to ensure good cache behavior
919         final int jBlock  = column / BLOCK_SIZE;
920         final int jColumn = column - jBlock * BLOCK_SIZE;
921         final int jWidth  = blockWidth(jBlock);
922         int outBlockIndex = 0;
923         int outIndex      = 0;
924         T[] outBlock = out.blocks[outBlockIndex];
925         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
926             final int iHeight = blockHeight(iBlock);
927             final T[] block = blocks[iBlock * blockColumns + jBlock];
928             for (int i = 0; i < iHeight; ++i) {
929                 if (outIndex >= outBlock.length) {
930                     outBlock = out.blocks[++outBlockIndex];
931                     outIndex = 0;
932                 }
933                 outBlock[outIndex++] = block[i * jWidth + jColumn];
934             }
935         }
936 
937         return out;
938     }
939 
940     /** {@inheritDoc} */
941     @Override
942     public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
943         throws MatrixDimensionMismatchException, OutOfRangeException {
944         if (matrix instanceof BlockFieldMatrix) {
945             setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
946         } else {
947             super.setColumnMatrix(column, matrix);
948         }
949     }
950 
951     /**
952      * Sets the entries in column number {@code column}
953      * as a column matrix.  Column indices start at 0.
954      *
955      * @param column Column to be set.
956      * @param matrix Column matrix (must have one column and the same number of rows
957      * as the instance).
958      * @throws MatrixDimensionMismatchException if the matrix dimensions do
959      * not match one instance column.
960      * @throws OutOfRangeException if the specified column index is invalid.
961      */
962     void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
963         throws MatrixDimensionMismatchException, OutOfRangeException {
964         checkColumnIndex(column);
965         final int nRows = getRowDimension();
966         if (matrix.getRowDimension() != nRows ||
967             matrix.getColumnDimension() != 1) {
968             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
969                                                        matrix.getColumnDimension(),
970                                                        nRows, 1);
971         }
972 
973         // perform copy block-wise, to ensure good cache behavior
974         final int jBlock  = column / BLOCK_SIZE;
975         final int jColumn = column - jBlock * BLOCK_SIZE;
976         final int jWidth  = blockWidth(jBlock);
977         int mBlockIndex = 0;
978         int mIndex      = 0;
979         T[] mBlock = matrix.blocks[mBlockIndex];
980         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
981             final int iHeight = blockHeight(iBlock);
982             final T[] block = blocks[iBlock * blockColumns + jBlock];
983             for (int i = 0; i < iHeight; ++i) {
984                 if (mIndex >= mBlock.length) {
985                     mBlock = matrix.blocks[++mBlockIndex];
986                     mIndex = 0;
987                 }
988                 block[i * jWidth + jColumn] = mBlock[mIndex++];
989             }
990         }
991     }
992 
993     /** {@inheritDoc} */
994     @Override
995     public FieldVector<T> getRowVector(final int row)
996         throws OutOfRangeException {
997         checkRowIndex(row);
998         final T[] outData = MathArrays.buildArray(getField(), columns);
999 
1000         // perform copy block-wise, to ensure good cache behavior
1001         final int iBlock  = row / BLOCK_SIZE;
1002         final int iRow    = row - iBlock * BLOCK_SIZE;
1003         int outIndex      = 0;
1004         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1005             final int jWidth     = blockWidth(jBlock);
1006             final T[] block = blocks[iBlock * blockColumns + jBlock];
1007             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1008             outIndex += jWidth;
1009         }
1010 
1011         return new ArrayFieldVector<>(getField(), outData, false);
1012     }
1013 
1014     /** {@inheritDoc} */
1015     @Override
1016     public void setRowVector(final int row, final FieldVector<T> vector)
1017         throws MatrixDimensionMismatchException, OutOfRangeException {
1018         if (vector instanceof ArrayFieldVector) {
1019             setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1020         } else {
1021             super.setRowVector(row, vector);
1022         }
1023     }
1024 
1025     /** {@inheritDoc} */
1026     @Override
1027     public FieldVector<T> getColumnVector(final int column)
1028         throws OutOfRangeException {
1029         checkColumnIndex(column);
1030         final T[] outData = MathArrays.buildArray(getField(), rows);
1031 
1032         // perform copy block-wise, to ensure good cache behavior
1033         final int jBlock  = column / BLOCK_SIZE;
1034         final int jColumn = column - jBlock * BLOCK_SIZE;
1035         final int jWidth  = blockWidth(jBlock);
1036         int outIndex      = 0;
1037         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1038             final int iHeight = blockHeight(iBlock);
1039             final T[] block = blocks[iBlock * blockColumns + jBlock];
1040             for (int i = 0; i < iHeight; ++i) {
1041                 outData[outIndex++] = block[i * jWidth + jColumn];
1042             }
1043         }
1044 
1045         return new ArrayFieldVector<>(getField(), outData, false);
1046     }
1047 
1048     /** {@inheritDoc} */
1049     @Override
1050     public void setColumnVector(final int column, final FieldVector<T> vector)
1051         throws OutOfRangeException, MatrixDimensionMismatchException {
1052         if (vector instanceof ArrayFieldVector) {
1053             setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1054         } else {
1055             super.setColumnVector(column, vector);
1056         }
1057     }
1058 
1059     /** {@inheritDoc} */
1060     @Override
1061     public T[] getRow(final int row) throws OutOfRangeException {
1062         checkRowIndex(row);
1063         final T[] out = MathArrays.buildArray(getField(), columns);
1064 
1065         // perform copy block-wise, to ensure good cache behavior
1066         final int iBlock  = row / BLOCK_SIZE;
1067         final int iRow    = row - iBlock * BLOCK_SIZE;
1068         int outIndex      = 0;
1069         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1070             final int jWidth     = blockWidth(jBlock);
1071             final T[] block = blocks[iBlock * blockColumns + jBlock];
1072             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1073             outIndex += jWidth;
1074         }
1075 
1076         return out;
1077     }
1078 
1079     /** {@inheritDoc} */
1080     @Override
1081     public void setRow(final int row, final T[] array)
1082         throws OutOfRangeException, MatrixDimensionMismatchException {
1083         checkRowIndex(row);
1084         final int nCols = getColumnDimension();
1085         if (array.length != nCols) {
1086             throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
1087         }
1088 
1089         // perform copy block-wise, to ensure good cache behavior
1090         final int iBlock  = row / BLOCK_SIZE;
1091         final int iRow    = row - iBlock * BLOCK_SIZE;
1092         int outIndex      = 0;
1093         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1094             final int jWidth     = blockWidth(jBlock);
1095             final T[] block = blocks[iBlock * blockColumns + jBlock];
1096             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1097             outIndex += jWidth;
1098         }
1099     }
1100 
1101     /** {@inheritDoc} */
1102     @Override
1103     public T[] getColumn(final int column) throws OutOfRangeException {
1104         checkColumnIndex(column);
1105         final T[] out = MathArrays.buildArray(getField(), rows);
1106 
1107         // perform copy block-wise, to ensure good cache behavior
1108         final int jBlock  = column / BLOCK_SIZE;
1109         final int jColumn = column - jBlock * BLOCK_SIZE;
1110         final int jWidth  = blockWidth(jBlock);
1111         int outIndex      = 0;
1112         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1113             final int iHeight = blockHeight(iBlock);
1114             final T[] block = blocks[iBlock * blockColumns + jBlock];
1115             for (int i = 0; i < iHeight; ++i) {
1116                 out[outIndex++] = block[i * jWidth + jColumn];
1117             }
1118         }
1119 
1120         return out;
1121     }
1122 
1123     /** {@inheritDoc} */
1124     @Override
1125     public void setColumn(final int column, final T[] array)
1126         throws MatrixDimensionMismatchException, OutOfRangeException {
1127         checkColumnIndex(column);
1128         final int nRows = getRowDimension();
1129         if (array.length != nRows) {
1130             throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
1131         }
1132 
1133         // perform copy block-wise, to ensure good cache behavior
1134         final int jBlock  = column / BLOCK_SIZE;
1135         final int jColumn = column - jBlock * BLOCK_SIZE;
1136         final int jWidth  = blockWidth(jBlock);
1137         int outIndex      = 0;
1138         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1139             final int iHeight = blockHeight(iBlock);
1140             final T[] block = blocks[iBlock * blockColumns + jBlock];
1141             for (int i = 0; i < iHeight; ++i) {
1142                 block[i * jWidth + jColumn] = array[outIndex++];
1143             }
1144         }
1145     }
1146 
1147     /** {@inheritDoc} */
1148     @Override
1149     public T getEntry(final int row, final int column)
1150         throws OutOfRangeException {
1151         checkRowIndex(row);
1152         checkColumnIndex(column);
1153 
1154         final int iBlock = row    / BLOCK_SIZE;
1155         final int jBlock = column / BLOCK_SIZE;
1156         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1157             (column - jBlock * BLOCK_SIZE);
1158 
1159         return blocks[iBlock * blockColumns + jBlock][k];
1160     }
1161 
1162     /** {@inheritDoc} */
1163     @Override
1164     public void setEntry(final int row, final int column, final T value)
1165         throws OutOfRangeException {
1166         checkRowIndex(row);
1167         checkColumnIndex(column);
1168 
1169         final int iBlock = row    / BLOCK_SIZE;
1170         final int jBlock = column / BLOCK_SIZE;
1171         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1172             (column - jBlock * BLOCK_SIZE);
1173 
1174         blocks[iBlock * blockColumns + jBlock][k] = value;
1175     }
1176 
1177     /** {@inheritDoc} */
1178     @Override
1179     public void addToEntry(final int row, final int column, final T increment)
1180         throws OutOfRangeException {
1181         checkRowIndex(row);
1182         checkColumnIndex(column);
1183 
1184         final int iBlock = row    / BLOCK_SIZE;
1185         final int jBlock = column / BLOCK_SIZE;
1186         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1187             (column - jBlock * BLOCK_SIZE);
1188         final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1189 
1190         blockIJ[k] = blockIJ[k].add(increment);
1191     }
1192 
1193     /** {@inheritDoc} */
1194     @Override
1195     public void multiplyEntry(final int row, final int column, final T factor)
1196         throws OutOfRangeException {
1197         checkRowIndex(row);
1198         checkColumnIndex(column);
1199 
1200         final int iBlock = row    / BLOCK_SIZE;
1201         final int jBlock = column / BLOCK_SIZE;
1202         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1203             (column - jBlock * BLOCK_SIZE);
1204         final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1205 
1206         blockIJ[k] = blockIJ[k].multiply(factor);
1207     }
1208 
1209     /** {@inheritDoc} */
1210     @Override
1211     public FieldMatrix<T> transpose() {
1212         final int nRows = getRowDimension();
1213         final int nCols = getColumnDimension();
1214         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), nCols, nRows);
1215 
1216         // perform transpose block-wise, to ensure good cache behavior
1217         int blockIndex = 0;
1218         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1219             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1220 
1221                 // transpose current block
1222                 final T[] outBlock = out.blocks[blockIndex];
1223                 final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1224                 final int      pStart   = iBlock * BLOCK_SIZE;
1225                 final int      pEnd     = JdkMath.min(pStart + BLOCK_SIZE, columns);
1226                 final int      qStart   = jBlock * BLOCK_SIZE;
1227                 final int      qEnd     = JdkMath.min(qStart + BLOCK_SIZE, rows);
1228                 int k = 0;
1229                 for (int p = pStart; p < pEnd; ++p) {
1230                     final int lInc = pEnd - pStart;
1231                     int l = p - pStart;
1232                     for (int q = qStart; q < qEnd; ++q) {
1233                         outBlock[k] = tBlock[l];
1234                         ++k;
1235                         l+= lInc;
1236                     }
1237                 }
1238 
1239                 // go to next block
1240                 ++blockIndex;
1241             }
1242         }
1243 
1244         return out;
1245     }
1246 
1247     /** {@inheritDoc} */
1248     @Override
1249     public int getRowDimension() {
1250         return rows;
1251     }
1252 
1253     /** {@inheritDoc} */
1254     @Override
1255     public int getColumnDimension() {
1256         return columns;
1257     }
1258 
1259     /** {@inheritDoc} */
1260     @Override
1261     public T[] operate(final T[] v) throws DimensionMismatchException {
1262         if (v.length != columns) {
1263             throw new DimensionMismatchException(v.length, columns);
1264         }
1265         final T[] out = MathArrays.buildArray(getField(), rows);
1266         final T zero = getField().getZero();
1267 
1268         // perform multiplication block-wise, to ensure good cache behavior
1269         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1270             final int pStart = iBlock * BLOCK_SIZE;
1271             final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1272             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1273                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1274                 final int      qStart = jBlock * BLOCK_SIZE;
1275                 final int      qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1276                 int k = 0;
1277                 for (int p = pStart; p < pEnd; ++p) {
1278                     T sum = zero;
1279                     int q = qStart;
1280                     while (q < qEnd - 3) {
1281                         sum = sum.
1282                               add(block[k].multiply(v[q])).
1283                               add(block[k + 1].multiply(v[q + 1])).
1284                               add(block[k + 2].multiply(v[q + 2])).
1285                               add(block[k + 3].multiply(v[q + 3]));
1286                         k += 4;
1287                         q += 4;
1288                     }
1289                     while (q < qEnd) {
1290                         sum = sum.add(block[k++].multiply(v[q++]));
1291                     }
1292                     out[p] = out[p].add(sum);
1293                 }
1294             }
1295         }
1296 
1297         return out;
1298     }
1299 
1300     /** {@inheritDoc} */
1301     @Override
1302     public T[] preMultiply(final T[] v) throws DimensionMismatchException {
1303 
1304         if (v.length != rows) {
1305             throw new DimensionMismatchException(v.length, rows);
1306         }
1307         final T[] out = MathArrays.buildArray(getField(), columns);
1308         final T zero = getField().getZero();
1309 
1310         // perform multiplication block-wise, to ensure good cache behavior
1311         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1312             final int jWidth  = blockWidth(jBlock);
1313             final int jWidth2 = jWidth  + jWidth;
1314             final int jWidth3 = jWidth2 + jWidth;
1315             final int jWidth4 = jWidth3 + jWidth;
1316             final int qStart = jBlock * BLOCK_SIZE;
1317             final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1318             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1319                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1320                 final int      pStart = iBlock * BLOCK_SIZE;
1321                 final int      pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1322                 for (int q = qStart; q < qEnd; ++q) {
1323                     int k = q - qStart;
1324                     T sum = zero;
1325                     int p = pStart;
1326                     while (p < pEnd - 3) {
1327                         sum = sum.
1328                               add(block[k].multiply(v[p])).
1329                               add(block[k + jWidth].multiply(v[p + 1])).
1330                               add(block[k + jWidth2].multiply(v[p + 2])).
1331                               add(block[k + jWidth3].multiply(v[p + 3]));
1332                         k += jWidth4;
1333                         p += 4;
1334                     }
1335                     while (p < pEnd) {
1336                         sum = sum.add(block[k].multiply(v[p++]));
1337                         k += jWidth;
1338                     }
1339                     out[q] = out[q].add(sum);
1340                 }
1341             }
1342         }
1343 
1344         return out;
1345     }
1346 
1347     /** {@inheritDoc} */
1348     @Override
1349     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor) {
1350         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1351         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1352             final int pStart = iBlock * BLOCK_SIZE;
1353             final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1354             for (int p = pStart; p < pEnd; ++p) {
1355                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1356                     final int jWidth = blockWidth(jBlock);
1357                     final int qStart = jBlock * BLOCK_SIZE;
1358                     final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1359                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1360                     int k = (p - pStart) * jWidth;
1361                     for (int q = qStart; q < qEnd; ++q) {
1362                         block[k] = visitor.visit(p, q, block[k]);
1363                         ++k;
1364                     }
1365                 }
1366              }
1367         }
1368         return visitor.end();
1369     }
1370 
1371     /** {@inheritDoc} */
1372     @Override
1373     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1374         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1375         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1376             final int pStart = iBlock * BLOCK_SIZE;
1377             final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1378             for (int p = pStart; p < pEnd; ++p) {
1379                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1380                     final int jWidth = blockWidth(jBlock);
1381                     final int qStart = jBlock * BLOCK_SIZE;
1382                     final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1383                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1384                     int k = (p - pStart) * jWidth;
1385                     for (int q = qStart; q < qEnd; ++q) {
1386                         visitor.visit(p, q, block[k]);
1387                         ++k;
1388                     }
1389                 }
1390              }
1391         }
1392         return visitor.end();
1393     }
1394 
1395     /** {@inheritDoc} */
1396     @Override
1397     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1398                             final int startRow, final int endRow,
1399                             final int startColumn, final int endColumn)
1400         throws OutOfRangeException, NumberIsTooSmallException {
1401         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1402         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1403         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1404             final int p0     = iBlock * BLOCK_SIZE;
1405             final int pStart = JdkMath.max(startRow, p0);
1406             final int pEnd   = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1407             for (int p = pStart; p < pEnd; ++p) {
1408                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1409                     final int jWidth = blockWidth(jBlock);
1410                     final int q0     = jBlock * BLOCK_SIZE;
1411                     final int qStart = JdkMath.max(startColumn, q0);
1412                     final int qEnd   = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1413                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1414                     int k = (p - p0) * jWidth + qStart - q0;
1415                     for (int q = qStart; q < qEnd; ++q) {
1416                         block[k] = visitor.visit(p, q, block[k]);
1417                         ++k;
1418                     }
1419                 }
1420              }
1421         }
1422         return visitor.end();
1423     }
1424 
1425     /** {@inheritDoc} */
1426     @Override
1427     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1428                             final int startRow, final int endRow,
1429                             final int startColumn, final int endColumn)
1430         throws OutOfRangeException, NumberIsTooSmallException {
1431         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1432         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1433         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1434             final int p0     = iBlock * BLOCK_SIZE;
1435             final int pStart = JdkMath.max(startRow, p0);
1436             final int pEnd   = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1437             for (int p = pStart; p < pEnd; ++p) {
1438                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1439                     final int jWidth = blockWidth(jBlock);
1440                     final int q0     = jBlock * BLOCK_SIZE;
1441                     final int qStart = JdkMath.max(startColumn, q0);
1442                     final int qEnd   = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1443                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1444                     int k = (p - p0) * jWidth + qStart - q0;
1445                     for (int q = qStart; q < qEnd; ++q) {
1446                         visitor.visit(p, q, block[k]);
1447                         ++k;
1448                     }
1449                 }
1450              }
1451         }
1452         return visitor.end();
1453     }
1454 
1455     /** {@inheritDoc} */
1456     @Override
1457     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor) {
1458         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1459         int blockIndex = 0;
1460         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1461             final int pStart = iBlock * BLOCK_SIZE;
1462             final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1463             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1464                 final int qStart = jBlock * BLOCK_SIZE;
1465                 final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1466                 final T[] block = blocks[blockIndex];
1467                 int k = 0;
1468                 for (int p = pStart; p < pEnd; ++p) {
1469                     for (int q = qStart; q < qEnd; ++q) {
1470                         block[k] = visitor.visit(p, q, block[k]);
1471                         ++k;
1472                     }
1473                 }
1474                 ++blockIndex;
1475             }
1476         }
1477         return visitor.end();
1478     }
1479 
1480     /** {@inheritDoc} */
1481     @Override
1482     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1483         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1484         int blockIndex = 0;
1485         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1486             final int pStart = iBlock * BLOCK_SIZE;
1487             final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1488             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1489                 final int qStart = jBlock * BLOCK_SIZE;
1490                 final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1491                 final T[] block = blocks[blockIndex];
1492                 int k = 0;
1493                 for (int p = pStart; p < pEnd; ++p) {
1494                     for (int q = qStart; q < qEnd; ++q) {
1495                         visitor.visit(p, q, block[k]);
1496                         ++k;
1497                     }
1498                 }
1499                 ++blockIndex;
1500             }
1501         }
1502         return visitor.end();
1503     }
1504 
1505     /** {@inheritDoc} */
1506     @Override
1507     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1508                                   final int startRow, final int endRow,
1509                                   final int startColumn, final int endColumn)
1510         throws OutOfRangeException, NumberIsTooSmallException {
1511         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1512         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1513         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1514             final int p0     = iBlock * BLOCK_SIZE;
1515             final int pStart = JdkMath.max(startRow, p0);
1516             final int pEnd   = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1517             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1518                 final int jWidth = blockWidth(jBlock);
1519                 final int q0     = jBlock * BLOCK_SIZE;
1520                 final int qStart = JdkMath.max(startColumn, q0);
1521                 final int qEnd   = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1522                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1523                 for (int p = pStart; p < pEnd; ++p) {
1524                     int k = (p - p0) * jWidth + qStart - q0;
1525                     for (int q = qStart; q < qEnd; ++q) {
1526                         block[k] = visitor.visit(p, q, block[k]);
1527                         ++k;
1528                     }
1529                 }
1530             }
1531         }
1532         return visitor.end();
1533     }
1534 
1535     /** {@inheritDoc} */
1536     @Override
1537     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1538                                   final int startRow, final int endRow,
1539                                   final int startColumn, final int endColumn)
1540         throws OutOfRangeException, NumberIsTooSmallException {
1541         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1542         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1543         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1544             final int p0     = iBlock * BLOCK_SIZE;
1545             final int pStart = JdkMath.max(startRow, p0);
1546             final int pEnd   = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1547             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1548                 final int jWidth = blockWidth(jBlock);
1549                 final int q0     = jBlock * BLOCK_SIZE;
1550                 final int qStart = JdkMath.max(startColumn, q0);
1551                 final int qEnd   = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1552                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1553                 for (int p = pStart; p < pEnd; ++p) {
1554                     int k = (p - p0) * jWidth + qStart - q0;
1555                     for (int q = qStart; q < qEnd; ++q) {
1556                         visitor.visit(p, q, block[k]);
1557                         ++k;
1558                     }
1559                 }
1560             }
1561         }
1562         return visitor.end();
1563     }
1564 
1565     /**
1566      * Get the height of a block.
1567      * @param blockRow row index (in block sense) of the block
1568      * @return height (number of rows) of the block
1569      */
1570     private int blockHeight(final int blockRow) {
1571         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1572     }
1573 
1574     /**
1575      * Get the width of a block.
1576      * @param blockColumn column index (in block sense) of the block
1577      * @return width (number of columns) of the block
1578      */
1579     private int blockWidth(final int blockColumn) {
1580         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1581     }
1582 }