001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math4.legacy.linear;
019
020import java.io.Serializable;
021
022import org.apache.commons.math4.legacy.core.Field;
023import org.apache.commons.math4.legacy.core.FieldElement;
024import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
025import org.apache.commons.math4.legacy.exception.NoDataException;
026import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
027import org.apache.commons.math4.legacy.exception.NullArgumentException;
028import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
029import org.apache.commons.math4.legacy.exception.OutOfRangeException;
030import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
031import org.apache.commons.math4.core.jdkmath.JdkMath;
032import org.apache.commons.math4.legacy.core.MathArrays;
033
034/**
035 * Cache-friendly implementation of FieldMatrix using a flat arrays to store
036 * square blocks of the matrix.
037 * <p>
038 * This implementation is specially designed to be cache-friendly. Square blocks are
039 * stored as small arrays and allow efficient traversal of data both in row major direction
040 * and columns major direction, one block at a time. This greatly increases performances
041 * for algorithms that use crossed directions loops like multiplication or transposition.
042 * </p>
043 * <p>
044 * The size of square blocks is a static parameter. It may be tuned according to the cache
045 * size of the target computer processor. As a rule of thumbs, it should be the largest
046 * value that allows three blocks to be simultaneously cached (this is necessary for example
047 * for matrix multiplication). The default value is to use 36x36 blocks.
048 * </p>
049 * <p>
050 * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
051 * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
052 * blocks are flattened in row major order in single dimension arrays which are therefore
053 * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
054 * organized in row major order.
055 * </p>
056 * <p>
057 * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
058 * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
059 * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
060 * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
061 * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
062 * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
063 * rectangle.
064 * </p>
065 * <p>
066 * The layout complexity overhead versus simple mapping of matrices to java
067 * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
068 * to up to 3-fold improvements for matrices of moderate to large size.
069 * </p>
070 * @param <T> the type of the field elements
071 * @since 2.0
072 */
073public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
074    /** Block size. */
075    public static final int BLOCK_SIZE = 36;
076    /** Serializable version identifier. */
077    private static final long serialVersionUID = -4602336630143123183L;
078    /** Blocks of matrix entries. */
079    private final T[][] blocks;
080    /** Number of rows of the matrix. */
081    private final int rows;
082    /** Number of columns of the matrix. */
083    private final int columns;
084    /** Number of block rows of the matrix. */
085    private final int blockRows;
086    /** Number of block columns of the matrix. */
087    private final int blockColumns;
088
089    /**
090     * Create a new matrix with the supplied row and column dimensions.
091     *
092     * @param field Field to which the elements belong.
093     * @param rows Number of rows in the new matrix.
094     * @param columns Number of columns in the new matrix.
095     * @throws NotStrictlyPositiveException if row or column dimension is not
096     * positive.
097     */
098    public BlockFieldMatrix(final Field<T> field, final int rows,
099                            final int columns)
100        throws NotStrictlyPositiveException {
101        super(field, rows, columns);
102        this.rows    = rows;
103        this.columns = columns;
104
105        // number of blocks
106        blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
107        blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
108
109        // allocate storage blocks, taking care of smaller ones at right and bottom
110        blocks = createBlocksLayout(field, rows, columns);
111    }
112
113    /**
114     * Create a new dense matrix copying entries from raw layout data.
115     * <p>The input array <em>must</em> already be in raw layout.</p>
116     * <p>Calling this constructor is equivalent to call:
117     * {@code matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
118     *                                   toBlocksLayout(rawData), false);}
119     *
120     * @param rawData Data for the new matrix, in raw layout.
121     * @throws DimensionMismatchException if the {@code blockData} shape is
122     * inconsistent with block layout.
123     * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
124     */
125    public BlockFieldMatrix(final T[][] rawData)
126        throws DimensionMismatchException {
127        this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
128    }
129
130    /**
131     * Create a new dense matrix copying entries from block layout data.
132     * <p>The input array <em>must</em> already be in blocks layout.</p>
133     * @param rows  the number of rows in the new matrix
134     * @param columns  the number of columns in the new matrix
135     * @param blockData data for new matrix
136     * @param copyArray if true, the input array will be copied, otherwise
137     * it will be referenced
138     *
139     * @throws DimensionMismatchException if the {@code blockData} shape is
140     * inconsistent with block layout.
141     * @throws NotStrictlyPositiveException if row or column dimension is not
142     * positive.
143     * @see #createBlocksLayout(Field, int, int)
144     * @see #toBlocksLayout(FieldElement[][])
145     * @see #BlockFieldMatrix(FieldElement[][])
146     */
147    public BlockFieldMatrix(final int rows, final int columns,
148                            final T[][] blockData, final boolean copyArray)
149        throws DimensionMismatchException, NotStrictlyPositiveException {
150        super(extractField(blockData), rows, columns);
151        this.rows    = rows;
152        this.columns = columns;
153
154        // number of blocks
155        blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
156        blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
157
158        if (copyArray) {
159            // allocate storage blocks, taking care of smaller ones at right and bottom
160            blocks = MathArrays.buildArray(getField(), blockRows * blockColumns, -1);
161        } else {
162            // reference existing array
163            blocks = blockData;
164        }
165
166        int index = 0;
167        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
168            final int iHeight = blockHeight(iBlock);
169            for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
170                if (blockData[index].length != iHeight * blockWidth(jBlock)) {
171                    throw new DimensionMismatchException(blockData[index].length,
172                                                         iHeight * blockWidth(jBlock));
173                }
174                if (copyArray) {
175                    blocks[index] = blockData[index].clone();
176                }
177            }
178        }
179    }
180
181    /**
182     * Convert a data array from raw layout to blocks layout.
183     * <p>
184     * Raw layout is the straightforward layout where element at row i and
185     * column j is in array element <code>rawData[i][j]</code>. Blocks layout
186     * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
187     * is split in square blocks (except at right and bottom side where blocks may
188     * be rectangular to fit matrix size) and each block is stored in a flattened
189     * one-dimensional array.
190     * </p>
191     * <p>
192     * This method creates an array in blocks layout from an input array in raw layout.
193     * It can be used to provide the array argument of the {@link
194     * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
195     * constructor.
196     * </p>
197     * @param <T> Type of the field elements.
198     * @param rawData Data array in raw layout.
199     * @return a new data array containing the same entries but in blocks layout
200     * @throws DimensionMismatchException if {@code rawData} is not rectangular
201     *  (not all rows have the same length).
202     * @see #createBlocksLayout(Field, int, int)
203     * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
204     */
205    public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
206        throws DimensionMismatchException {
207
208        final int rows         = rawData.length;
209        final int columns      = rawData[0].length;
210        final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
211        final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
212
213        // safety checks
214        for (int i = 0; i < rawData.length; ++i) {
215            final int length = rawData[i].length;
216            if (length != columns) {
217                throw new DimensionMismatchException(columns, length);
218            }
219        }
220
221        // convert array
222        final Field<T> field = extractField(rawData);
223        final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
224        int blockIndex = 0;
225        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
226            final int pStart  = iBlock * BLOCK_SIZE;
227            final int pEnd    = JdkMath.min(pStart + BLOCK_SIZE, rows);
228            final int iHeight = pEnd - pStart;
229            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
230                final int qStart = jBlock * BLOCK_SIZE;
231                final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
232                final int jWidth = qEnd - qStart;
233
234                // allocate new block
235                final T[] block = MathArrays.buildArray(field, iHeight * jWidth);
236                blocks[blockIndex] = block;
237
238                // copy data
239                int index = 0;
240                for (int p = pStart; p < pEnd; ++p) {
241                    System.arraycopy(rawData[p], qStart, block, index, jWidth);
242                    index += jWidth;
243                }
244
245                ++blockIndex;
246            }
247        }
248
249        return blocks;
250    }
251
252    /**
253     * Create a data array in blocks layout.
254     * <p>
255     * This method can be used to create the array argument of the {@link
256     * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
257     * constructor.
258     * </p>
259     * @param <T> Type of the field elements.
260     * @param field Field to which the elements belong.
261     * @param rows Number of rows in the new matrix.
262     * @param columns Number of columns in the new matrix.
263     * @return a new data array in blocks layout.
264     * @see #toBlocksLayout(FieldElement[][])
265     * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
266     */
267    public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
268                                                                       final int rows, final int columns) {
269        final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
270        final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
271
272        final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
273        int blockIndex = 0;
274        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
275            final int pStart  = iBlock * BLOCK_SIZE;
276            final int pEnd    = JdkMath.min(pStart + BLOCK_SIZE, rows);
277            final int iHeight = pEnd - pStart;
278            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
279                final int qStart = jBlock * BLOCK_SIZE;
280                final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
281                final int jWidth = qEnd - qStart;
282                blocks[blockIndex] = MathArrays.buildArray(field, iHeight * jWidth);
283                ++blockIndex;
284            }
285        }
286
287        return blocks;
288    }
289
290    /** {@inheritDoc} */
291    @Override
292    public FieldMatrix<T> createMatrix(final int rowDimension,
293                                       final int columnDimension)
294        throws NotStrictlyPositiveException {
295        return new BlockFieldMatrix<>(getField(), rowDimension,
296                                       columnDimension);
297    }
298
299    /** {@inheritDoc} */
300    @Override
301    public FieldMatrix<T> copy() {
302
303        // create an empty matrix
304        BlockFieldMatrix<T> copied = new BlockFieldMatrix<>(getField(), rows, columns);
305
306        // copy the blocks
307        for (int i = 0; i < blocks.length; ++i) {
308            System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
309        }
310
311        return copied;
312    }
313
314    /** {@inheritDoc} */
315    @Override
316    public FieldMatrix<T> add(final FieldMatrix<T> m)
317        throws MatrixDimensionMismatchException {
318        if (m instanceof BlockFieldMatrix) {
319            return add((BlockFieldMatrix<T>) m);
320        }
321
322        // safety check
323        checkAdd(m);
324
325        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
326
327        // perform addition block-wise, to ensure good cache behavior
328        int blockIndex = 0;
329        for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
330            for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
331
332                // perform addition on the current block
333                final T[] outBlock = out.blocks[blockIndex];
334                final T[] tBlock   = blocks[blockIndex];
335                final int      pStart   = iBlock * BLOCK_SIZE;
336                final int      pEnd     = JdkMath.min(pStart + BLOCK_SIZE, rows);
337                final int      qStart   = jBlock * BLOCK_SIZE;
338                final int      qEnd     = JdkMath.min(qStart + BLOCK_SIZE, columns);
339                int k = 0;
340                for (int p = pStart; p < pEnd; ++p) {
341                    for (int q = qStart; q < qEnd; ++q) {
342                        outBlock[k] = tBlock[k].add(m.getEntry(p, q));
343                        ++k;
344                    }
345                }
346
347                // go to next block
348                ++blockIndex;
349            }
350        }
351
352        return out;
353    }
354
355    /**
356     * Compute the sum of {@code this} and {@code m}.
357     *
358     * @param m matrix to be added
359     * @return {@code this + m}
360     * @throws MatrixDimensionMismatchException if {@code m} is not the same
361     * size as {@code this}
362     */
363    public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
364        throws MatrixDimensionMismatchException {
365
366        // safety check
367        checkAdd(m);
368
369        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
370
371        // perform addition block-wise, to ensure good cache behavior
372        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
373            final T[] outBlock = out.blocks[blockIndex];
374            final T[] tBlock   = blocks[blockIndex];
375            final T[] mBlock   = m.blocks[blockIndex];
376            for (int k = 0; k < outBlock.length; ++k) {
377                outBlock[k] = tBlock[k].add(mBlock[k]);
378            }
379        }
380
381        return out;
382    }
383
384    /** {@inheritDoc} */
385    @Override
386    public FieldMatrix<T> subtract(final FieldMatrix<T> m)
387        throws MatrixDimensionMismatchException {
388        if (m instanceof BlockFieldMatrix) {
389            return subtract((BlockFieldMatrix<T>) m);
390        }
391
392        // safety check
393        checkAdd(m);
394
395        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
396
397        // perform subtraction block-wise, to ensure good cache behavior
398        int blockIndex = 0;
399        for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
400            for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
401
402                // perform subtraction on the current block
403                final T[] outBlock = out.blocks[blockIndex];
404                final T[] tBlock   = blocks[blockIndex];
405                final int      pStart   = iBlock * BLOCK_SIZE;
406                final int      pEnd     = JdkMath.min(pStart + BLOCK_SIZE, rows);
407                final int      qStart   = jBlock * BLOCK_SIZE;
408                final int      qEnd     = JdkMath.min(qStart + BLOCK_SIZE, columns);
409                int k = 0;
410                for (int p = pStart; p < pEnd; ++p) {
411                    for (int q = qStart; q < qEnd; ++q) {
412                        outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
413                        ++k;
414                    }
415                }
416
417                // go to next block
418                ++blockIndex;
419            }
420        }
421
422        return out;
423    }
424
425    /**
426     * Compute {@code this - m}.
427     *
428     * @param m matrix to be subtracted
429     * @return {@code this - m}
430     * @throws MatrixDimensionMismatchException if {@code m} is not the same
431     * size as {@code this}
432     */
433    public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m) throws MatrixDimensionMismatchException {
434        // safety check
435        checkAdd(m);
436
437        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
438
439        // perform subtraction block-wise, to ensure good cache behavior
440        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
441            final T[] outBlock = out.blocks[blockIndex];
442            final T[] tBlock   = blocks[blockIndex];
443            final T[] mBlock   = m.blocks[blockIndex];
444            for (int k = 0; k < outBlock.length; ++k) {
445                outBlock[k] = tBlock[k].subtract(mBlock[k]);
446            }
447        }
448
449        return out;
450    }
451
452    /** {@inheritDoc} */
453    @Override
454    public FieldMatrix<T> scalarAdd(final T d) {
455        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
456
457        // perform subtraction block-wise, to ensure good cache behavior
458        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
459            final T[] outBlock = out.blocks[blockIndex];
460            final T[] tBlock   = blocks[blockIndex];
461            for (int k = 0; k < outBlock.length; ++k) {
462                outBlock[k] = tBlock[k].add(d);
463            }
464        }
465
466        return out;
467    }
468
469    /** {@inheritDoc} */
470    @Override
471    public FieldMatrix<T> scalarMultiply(final T d) {
472
473        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
474
475        // perform subtraction block-wise, to ensure good cache behavior
476        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
477            final T[] outBlock = out.blocks[blockIndex];
478            final T[] tBlock   = blocks[blockIndex];
479            for (int k = 0; k < outBlock.length; ++k) {
480                outBlock[k] = tBlock[k].multiply(d);
481            }
482        }
483
484        return out;
485    }
486
487    /** {@inheritDoc} */
488    @Override
489    public FieldMatrix<T> multiply(final FieldMatrix<T> m)
490        throws DimensionMismatchException {
491        if (m instanceof BlockFieldMatrix) {
492            return multiply((BlockFieldMatrix<T>) m);
493        }
494
495        // safety check
496        checkMultiply(m);
497
498        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, m.getColumnDimension());
499        final T zero = getField().getZero();
500
501        // perform multiplication block-wise, to ensure good cache behavior
502        int blockIndex = 0;
503        for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
504
505            final int pStart = iBlock * BLOCK_SIZE;
506            final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
507
508            for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
509
510                final int qStart = jBlock * BLOCK_SIZE;
511                final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
512
513                // select current block
514                final T[] outBlock = out.blocks[blockIndex];
515
516                // perform multiplication on current block
517                for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
518                    final int kWidth      = blockWidth(kBlock);
519                    final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
520                    final int rStart      = kBlock * BLOCK_SIZE;
521                    int k = 0;
522                    for (int p = pStart; p < pEnd; ++p) {
523                        final int lStart = (p - pStart) * kWidth;
524                        final int lEnd   = lStart + kWidth;
525                        for (int q = qStart; q < qEnd; ++q) {
526                            T sum = zero;
527                            int r = rStart;
528                            for (int l = lStart; l < lEnd; ++l) {
529                                sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
530                                ++r;
531                            }
532                            outBlock[k] = outBlock[k].add(sum);
533                            ++k;
534                        }
535                    }
536                }
537
538                // go to next block
539                ++blockIndex;
540            }
541        }
542
543        return out;
544    }
545
546    /**
547     * Returns the result of postmultiplying {@code this} by {@code m}.
548     *
549     * @param m matrix to postmultiply by
550     * @return {@code this * m}
551     * @throws DimensionMismatchException if the matrices are not compatible.
552     */
553    public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m)
554        throws DimensionMismatchException {
555
556        // safety check
557        checkMultiply(m);
558
559        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, m.columns);
560        final T zero = getField().getZero();
561
562        // perform multiplication block-wise, to ensure good cache behavior
563        int blockIndex = 0;
564        for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
565
566            final int pStart = iBlock * BLOCK_SIZE;
567            final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
568
569            for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
570                final int jWidth = out.blockWidth(jBlock);
571                final int jWidth2 = jWidth  + jWidth;
572                final int jWidth3 = jWidth2 + jWidth;
573                final int jWidth4 = jWidth3 + jWidth;
574
575                // select current block
576                final T[] outBlock = out.blocks[blockIndex];
577
578                // perform multiplication on current block
579                for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
580                    final int kWidth = blockWidth(kBlock);
581                    final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
582                    final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
583                    int k = 0;
584                    for (int p = pStart; p < pEnd; ++p) {
585                        final int lStart = (p - pStart) * kWidth;
586                        final int lEnd   = lStart + kWidth;
587                        for (int nStart = 0; nStart < jWidth; ++nStart) {
588                            T sum = zero;
589                            int l = lStart;
590                            int n = nStart;
591                            while (l < lEnd - 3) {
592                                sum = sum.
593                                      add(tBlock[l].multiply(mBlock[n])).
594                                      add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
595                                      add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
596                                      add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
597                                l += 4;
598                                n += jWidth4;
599                            }
600                            while (l < lEnd) {
601                                sum = sum.add(tBlock[l++].multiply(mBlock[n]));
602                                n += jWidth;
603                            }
604                            outBlock[k] = outBlock[k].add(sum);
605                            ++k;
606                        }
607                    }
608                }
609
610                // go to next block
611                ++blockIndex;
612            }
613        }
614
615        return out;
616    }
617
618    /** {@inheritDoc} */
619    @Override
620    public T[][] getData() {
621
622        final T[][] data = MathArrays.buildArray(getField(), getRowDimension(), getColumnDimension());
623        final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
624
625        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
626            final int pStart = iBlock * BLOCK_SIZE;
627            final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
628            int regularPos   = 0;
629            int lastPos      = 0;
630            for (int p = pStart; p < pEnd; ++p) {
631                final T[] dataP = data[p];
632                int blockIndex = iBlock * blockColumns;
633                int dataPos    = 0;
634                for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
635                    System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
636                    dataPos += BLOCK_SIZE;
637                }
638                System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
639                regularPos += BLOCK_SIZE;
640                lastPos    += lastColumns;
641            }
642        }
643
644        return data;
645    }
646
647    /** {@inheritDoc} */
648    @Override
649    public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
650                                       final int startColumn,
651                                       final int endColumn)
652        throws OutOfRangeException, NumberIsTooSmallException {
653        // safety checks
654        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
655
656        // create the output matrix
657        final BlockFieldMatrix<T> out =
658            new BlockFieldMatrix<>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
659
660        // compute blocks shifts
661        final int blockStartRow    = startRow    / BLOCK_SIZE;
662        final int rowsShift        = startRow    % BLOCK_SIZE;
663        final int blockStartColumn = startColumn / BLOCK_SIZE;
664        final int columnsShift     = startColumn % BLOCK_SIZE;
665
666        // perform extraction block-wise, to ensure good cache behavior
667        int pBlock = blockStartRow;
668        for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
669            final int iHeight = out.blockHeight(iBlock);
670            int qBlock = blockStartColumn;
671            for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
672                final int jWidth = out.blockWidth(jBlock);
673
674                // handle one block of the output matrix
675                final int      outIndex = iBlock * out.blockColumns + jBlock;
676                final T[] outBlock = out.blocks[outIndex];
677                final int      index    = pBlock * blockColumns + qBlock;
678                final int      width    = blockWidth(qBlock);
679
680                final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
681                final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
682                if (heightExcess > 0) {
683                    // the submatrix block spans on two blocks rows from the original matrix
684                    if (widthExcess > 0) {
685                        // the submatrix block spans on two blocks columns from the original matrix
686                        final int width2 = blockWidth(qBlock + 1);
687                        copyBlockPart(blocks[index], width,
688                                      rowsShift, BLOCK_SIZE,
689                                      columnsShift, BLOCK_SIZE,
690                                      outBlock, jWidth, 0, 0);
691                        copyBlockPart(blocks[index + 1], width2,
692                                      rowsShift, BLOCK_SIZE,
693                                      0, widthExcess,
694                                      outBlock, jWidth, 0, jWidth - widthExcess);
695                        copyBlockPart(blocks[index + blockColumns], width,
696                                      0, heightExcess,
697                                      columnsShift, BLOCK_SIZE,
698                                      outBlock, jWidth, iHeight - heightExcess, 0);
699                        copyBlockPart(blocks[index + blockColumns + 1], width2,
700                                      0, heightExcess,
701                                      0, widthExcess,
702                                      outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
703                    } else {
704                        // the submatrix block spans on one block column from the original matrix
705                        copyBlockPart(blocks[index], width,
706                                      rowsShift, BLOCK_SIZE,
707                                      columnsShift, jWidth + columnsShift,
708                                      outBlock, jWidth, 0, 0);
709                        copyBlockPart(blocks[index + blockColumns], width,
710                                      0, heightExcess,
711                                      columnsShift, jWidth + columnsShift,
712                                      outBlock, jWidth, iHeight - heightExcess, 0);
713                    }
714                } else {
715                    // the submatrix block spans on one block row from the original matrix
716                    if (widthExcess > 0) {
717                        // the submatrix block spans on two blocks columns from the original matrix
718                        final int width2 = blockWidth(qBlock + 1);
719                        copyBlockPart(blocks[index], width,
720                                      rowsShift, iHeight + rowsShift,
721                                      columnsShift, BLOCK_SIZE,
722                                      outBlock, jWidth, 0, 0);
723                        copyBlockPart(blocks[index + 1], width2,
724                                      rowsShift, iHeight + rowsShift,
725                                      0, widthExcess,
726                                      outBlock, jWidth, 0, jWidth - widthExcess);
727                    } else {
728                        // the submatrix block spans on one block column from the original matrix
729                        copyBlockPart(blocks[index], width,
730                                      rowsShift, iHeight + rowsShift,
731                                      columnsShift, jWidth + columnsShift,
732                                      outBlock, jWidth, 0, 0);
733                    }
734               }
735                ++qBlock;
736            }
737            ++pBlock;
738        }
739
740        return out;
741    }
742
743    /**
744     * Copy a part of a block into another one
745     * <p>This method can be called only when the specified part fits in both
746     * blocks, no verification is done here.</p>
747     * @param srcBlock source block
748     * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
749     * @param srcStartRow start row in the source block
750     * @param srcEndRow end row (exclusive) in the source block
751     * @param srcStartColumn start column in the source block
752     * @param srcEndColumn end column (exclusive) in the source block
753     * @param dstBlock destination block
754     * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
755     * @param dstStartRow start row in the destination block
756     * @param dstStartColumn start column in the destination block
757     */
758    private void copyBlockPart(final T[] srcBlock, final int srcWidth,
759                               final int srcStartRow, final int srcEndRow,
760                               final int srcStartColumn, final int srcEndColumn,
761                               final T[] dstBlock, final int dstWidth,
762                               final int dstStartRow, final int dstStartColumn) {
763        final int length = srcEndColumn - srcStartColumn;
764        int srcPos = srcStartRow * srcWidth + srcStartColumn;
765        int dstPos = dstStartRow * dstWidth + dstStartColumn;
766        for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
767            System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
768            srcPos += srcWidth;
769            dstPos += dstWidth;
770        }
771    }
772
773    /** {@inheritDoc} */
774    @Override
775    public void setSubMatrix(final T[][] subMatrix, final int row,
776                             final int column)
777        throws DimensionMismatchException, OutOfRangeException,
778        NoDataException, NullArgumentException {
779        // safety checks
780        NullArgumentException.check(subMatrix);
781        final int refLength = subMatrix[0].length;
782        if (refLength == 0) {
783            throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
784        }
785        final int endRow    = row + subMatrix.length - 1;
786        final int endColumn = column + refLength - 1;
787        checkSubMatrixIndex(row, endRow, column, endColumn);
788        for (final T[] subRow : subMatrix) {
789            if (subRow.length != refLength) {
790                throw new DimensionMismatchException(refLength, subRow.length);
791            }
792        }
793
794        // compute blocks bounds
795        final int blockStartRow    = row / BLOCK_SIZE;
796        final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
797        final int blockStartColumn = column / BLOCK_SIZE;
798        final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
799
800        // perform copy block-wise, to ensure good cache behavior
801        for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
802            final int iHeight  = blockHeight(iBlock);
803            final int firstRow = iBlock * BLOCK_SIZE;
804            final int iStart   = JdkMath.max(row,    firstRow);
805            final int iEnd     = JdkMath.min(endRow + 1, firstRow + iHeight);
806
807            for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
808                final int jWidth      = blockWidth(jBlock);
809                final int firstColumn = jBlock * BLOCK_SIZE;
810                final int jStart      = JdkMath.max(column,    firstColumn);
811                final int jEnd        = JdkMath.min(endColumn + 1, firstColumn + jWidth);
812                final int jLength     = jEnd - jStart;
813
814                // handle one block, row by row
815                final T[] block = blocks[iBlock * blockColumns + jBlock];
816                for (int i = iStart; i < iEnd; ++i) {
817                    System.arraycopy(subMatrix[i - row], jStart - column,
818                                     block, (i - firstRow) * jWidth + (jStart - firstColumn),
819                                     jLength);
820                }
821            }
822        }
823    }
824
825    /** {@inheritDoc} */
826    @Override
827    public FieldMatrix<T> getRowMatrix(final int row)
828        throws OutOfRangeException {
829        checkRowIndex(row);
830        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), 1, columns);
831
832        // perform copy block-wise, to ensure good cache behavior
833        final int iBlock  = row / BLOCK_SIZE;
834        final int iRow    = row - iBlock * BLOCK_SIZE;
835        int outBlockIndex = 0;
836        int outIndex      = 0;
837        T[] outBlock = out.blocks[outBlockIndex];
838        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
839            final int jWidth     = blockWidth(jBlock);
840            final T[] block = blocks[iBlock * blockColumns + jBlock];
841            final int available  = outBlock.length - outIndex;
842            if (jWidth > available) {
843                System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
844                outBlock = out.blocks[++outBlockIndex];
845                System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
846                outIndex = jWidth - available;
847            } else {
848                System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
849                outIndex += jWidth;
850            }
851        }
852
853        return out;
854    }
855
856    /** {@inheritDoc} */
857    @Override
858    public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
859        throws MatrixDimensionMismatchException, OutOfRangeException {
860        if (matrix instanceof BlockFieldMatrix) {
861            setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
862        } else {
863            super.setRowMatrix(row, matrix);
864        }
865    }
866
867    /**
868     * Sets the entries in row number <code>row</code>
869     * as a row matrix.  Row indices start at 0.
870     *
871     * @param row the row to be set
872     * @param matrix row matrix (must have one row and the same number of columns
873     * as the instance)
874     * @throws MatrixDimensionMismatchException if the matrix dimensions do
875     * not match one instance row.
876     * @throws OutOfRangeException if the specified row index is invalid.
877     */
878    public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
879        throws MatrixDimensionMismatchException, OutOfRangeException {
880        checkRowIndex(row);
881        final int nCols = getColumnDimension();
882        if (matrix.getRowDimension() != 1 ||
883            matrix.getColumnDimension() != nCols) {
884            throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
885                                                       matrix.getColumnDimension(),
886                                                       1, nCols);
887        }
888
889        // perform copy block-wise, to ensure good cache behavior
890        final int iBlock = row / BLOCK_SIZE;
891        final int iRow   = row - iBlock * BLOCK_SIZE;
892        int mBlockIndex  = 0;
893        int mIndex       = 0;
894        T[] mBlock  = matrix.blocks[mBlockIndex];
895        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
896            final int jWidth     = blockWidth(jBlock);
897            final T[] block = blocks[iBlock * blockColumns + jBlock];
898            final int available  = mBlock.length - mIndex;
899            if (jWidth > available) {
900                System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
901                mBlock = matrix.blocks[++mBlockIndex];
902                System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
903                mIndex = jWidth - available;
904            } else {
905                System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
906                mIndex += jWidth;
907           }
908        }
909    }
910
911    /** {@inheritDoc} */
912    @Override
913    public FieldMatrix<T> getColumnMatrix(final int column)
914        throws OutOfRangeException {
915        checkColumnIndex(column);
916        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, 1);
917
918        // perform copy block-wise, to ensure good cache behavior
919        final int jBlock  = column / BLOCK_SIZE;
920        final int jColumn = column - jBlock * BLOCK_SIZE;
921        final int jWidth  = blockWidth(jBlock);
922        int outBlockIndex = 0;
923        int outIndex      = 0;
924        T[] outBlock = out.blocks[outBlockIndex];
925        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
926            final int iHeight = blockHeight(iBlock);
927            final T[] block = blocks[iBlock * blockColumns + jBlock];
928            for (int i = 0; i < iHeight; ++i) {
929                if (outIndex >= outBlock.length) {
930                    outBlock = out.blocks[++outBlockIndex];
931                    outIndex = 0;
932                }
933                outBlock[outIndex++] = block[i * jWidth + jColumn];
934            }
935        }
936
937        return out;
938    }
939
940    /** {@inheritDoc} */
941    @Override
942    public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
943        throws MatrixDimensionMismatchException, OutOfRangeException {
944        if (matrix instanceof BlockFieldMatrix) {
945            setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
946        } else {
947            super.setColumnMatrix(column, matrix);
948        }
949    }
950
951    /**
952     * Sets the entries in column number {@code column}
953     * as a column matrix.  Column indices start at 0.
954     *
955     * @param column Column to be set.
956     * @param matrix Column matrix (must have one column and the same number of rows
957     * as the instance).
958     * @throws MatrixDimensionMismatchException if the matrix dimensions do
959     * not match one instance column.
960     * @throws OutOfRangeException if the specified column index is invalid.
961     */
962    void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
963        throws MatrixDimensionMismatchException, OutOfRangeException {
964        checkColumnIndex(column);
965        final int nRows = getRowDimension();
966        if (matrix.getRowDimension() != nRows ||
967            matrix.getColumnDimension() != 1) {
968            throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
969                                                       matrix.getColumnDimension(),
970                                                       nRows, 1);
971        }
972
973        // perform copy block-wise, to ensure good cache behavior
974        final int jBlock  = column / BLOCK_SIZE;
975        final int jColumn = column - jBlock * BLOCK_SIZE;
976        final int jWidth  = blockWidth(jBlock);
977        int mBlockIndex = 0;
978        int mIndex      = 0;
979        T[] mBlock = matrix.blocks[mBlockIndex];
980        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
981            final int iHeight = blockHeight(iBlock);
982            final T[] block = blocks[iBlock * blockColumns + jBlock];
983            for (int i = 0; i < iHeight; ++i) {
984                if (mIndex >= mBlock.length) {
985                    mBlock = matrix.blocks[++mBlockIndex];
986                    mIndex = 0;
987                }
988                block[i * jWidth + jColumn] = mBlock[mIndex++];
989            }
990        }
991    }
992
993    /** {@inheritDoc} */
994    @Override
995    public FieldVector<T> getRowVector(final int row)
996        throws OutOfRangeException {
997        checkRowIndex(row);
998        final T[] outData = MathArrays.buildArray(getField(), columns);
999
1000        // perform copy block-wise, to ensure good cache behavior
1001        final int iBlock  = row / BLOCK_SIZE;
1002        final int iRow    = row - iBlock * BLOCK_SIZE;
1003        int outIndex      = 0;
1004        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1005            final int jWidth     = blockWidth(jBlock);
1006            final T[] block = blocks[iBlock * blockColumns + jBlock];
1007            System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1008            outIndex += jWidth;
1009        }
1010
1011        return new ArrayFieldVector<>(getField(), outData, false);
1012    }
1013
1014    /** {@inheritDoc} */
1015    @Override
1016    public void setRowVector(final int row, final FieldVector<T> vector)
1017        throws MatrixDimensionMismatchException, OutOfRangeException {
1018        if (vector instanceof ArrayFieldVector) {
1019            setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1020        } else {
1021            super.setRowVector(row, vector);
1022        }
1023    }
1024
1025    /** {@inheritDoc} */
1026    @Override
1027    public FieldVector<T> getColumnVector(final int column)
1028        throws OutOfRangeException {
1029        checkColumnIndex(column);
1030        final T[] outData = MathArrays.buildArray(getField(), rows);
1031
1032        // perform copy block-wise, to ensure good cache behavior
1033        final int jBlock  = column / BLOCK_SIZE;
1034        final int jColumn = column - jBlock * BLOCK_SIZE;
1035        final int jWidth  = blockWidth(jBlock);
1036        int outIndex      = 0;
1037        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1038            final int iHeight = blockHeight(iBlock);
1039            final T[] block = blocks[iBlock * blockColumns + jBlock];
1040            for (int i = 0; i < iHeight; ++i) {
1041                outData[outIndex++] = block[i * jWidth + jColumn];
1042            }
1043        }
1044
1045        return new ArrayFieldVector<>(getField(), outData, false);
1046    }
1047
1048    /** {@inheritDoc} */
1049    @Override
1050    public void setColumnVector(final int column, final FieldVector<T> vector)
1051        throws OutOfRangeException, MatrixDimensionMismatchException {
1052        if (vector instanceof ArrayFieldVector) {
1053            setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1054        } else {
1055            super.setColumnVector(column, vector);
1056        }
1057    }
1058
1059    /** {@inheritDoc} */
1060    @Override
1061    public T[] getRow(final int row) throws OutOfRangeException {
1062        checkRowIndex(row);
1063        final T[] out = MathArrays.buildArray(getField(), columns);
1064
1065        // perform copy block-wise, to ensure good cache behavior
1066        final int iBlock  = row / BLOCK_SIZE;
1067        final int iRow    = row - iBlock * BLOCK_SIZE;
1068        int outIndex      = 0;
1069        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1070            final int jWidth     = blockWidth(jBlock);
1071            final T[] block = blocks[iBlock * blockColumns + jBlock];
1072            System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1073            outIndex += jWidth;
1074        }
1075
1076        return out;
1077    }
1078
1079    /** {@inheritDoc} */
1080    @Override
1081    public void setRow(final int row, final T[] array)
1082        throws OutOfRangeException, MatrixDimensionMismatchException {
1083        checkRowIndex(row);
1084        final int nCols = getColumnDimension();
1085        if (array.length != nCols) {
1086            throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
1087        }
1088
1089        // perform copy block-wise, to ensure good cache behavior
1090        final int iBlock  = row / BLOCK_SIZE;
1091        final int iRow    = row - iBlock * BLOCK_SIZE;
1092        int outIndex      = 0;
1093        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1094            final int jWidth     = blockWidth(jBlock);
1095            final T[] block = blocks[iBlock * blockColumns + jBlock];
1096            System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1097            outIndex += jWidth;
1098        }
1099    }
1100
1101    /** {@inheritDoc} */
1102    @Override
1103    public T[] getColumn(final int column) throws OutOfRangeException {
1104        checkColumnIndex(column);
1105        final T[] out = MathArrays.buildArray(getField(), rows);
1106
1107        // perform copy block-wise, to ensure good cache behavior
1108        final int jBlock  = column / BLOCK_SIZE;
1109        final int jColumn = column - jBlock * BLOCK_SIZE;
1110        final int jWidth  = blockWidth(jBlock);
1111        int outIndex      = 0;
1112        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1113            final int iHeight = blockHeight(iBlock);
1114            final T[] block = blocks[iBlock * blockColumns + jBlock];
1115            for (int i = 0; i < iHeight; ++i) {
1116                out[outIndex++] = block[i * jWidth + jColumn];
1117            }
1118        }
1119
1120        return out;
1121    }
1122
1123    /** {@inheritDoc} */
1124    @Override
1125    public void setColumn(final int column, final T[] array)
1126        throws MatrixDimensionMismatchException, OutOfRangeException {
1127        checkColumnIndex(column);
1128        final int nRows = getRowDimension();
1129        if (array.length != nRows) {
1130            throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
1131        }
1132
1133        // perform copy block-wise, to ensure good cache behavior
1134        final int jBlock  = column / BLOCK_SIZE;
1135        final int jColumn = column - jBlock * BLOCK_SIZE;
1136        final int jWidth  = blockWidth(jBlock);
1137        int outIndex      = 0;
1138        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1139            final int iHeight = blockHeight(iBlock);
1140            final T[] block = blocks[iBlock * blockColumns + jBlock];
1141            for (int i = 0; i < iHeight; ++i) {
1142                block[i * jWidth + jColumn] = array[outIndex++];
1143            }
1144        }
1145    }
1146
1147    /** {@inheritDoc} */
1148    @Override
1149    public T getEntry(final int row, final int column)
1150        throws OutOfRangeException {
1151        checkRowIndex(row);
1152        checkColumnIndex(column);
1153
1154        final int iBlock = row    / BLOCK_SIZE;
1155        final int jBlock = column / BLOCK_SIZE;
1156        final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1157            (column - jBlock * BLOCK_SIZE);
1158
1159        return blocks[iBlock * blockColumns + jBlock][k];
1160    }
1161
1162    /** {@inheritDoc} */
1163    @Override
1164    public void setEntry(final int row, final int column, final T value)
1165        throws OutOfRangeException {
1166        checkRowIndex(row);
1167        checkColumnIndex(column);
1168
1169        final int iBlock = row    / BLOCK_SIZE;
1170        final int jBlock = column / BLOCK_SIZE;
1171        final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1172            (column - jBlock * BLOCK_SIZE);
1173
1174        blocks[iBlock * blockColumns + jBlock][k] = value;
1175    }
1176
1177    /** {@inheritDoc} */
1178    @Override
1179    public void addToEntry(final int row, final int column, final T increment)
1180        throws OutOfRangeException {
1181        checkRowIndex(row);
1182        checkColumnIndex(column);
1183
1184        final int iBlock = row    / BLOCK_SIZE;
1185        final int jBlock = column / BLOCK_SIZE;
1186        final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1187            (column - jBlock * BLOCK_SIZE);
1188        final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1189
1190        blockIJ[k] = blockIJ[k].add(increment);
1191    }
1192
1193    /** {@inheritDoc} */
1194    @Override
1195    public void multiplyEntry(final int row, final int column, final T factor)
1196        throws OutOfRangeException {
1197        checkRowIndex(row);
1198        checkColumnIndex(column);
1199
1200        final int iBlock = row    / BLOCK_SIZE;
1201        final int jBlock = column / BLOCK_SIZE;
1202        final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1203            (column - jBlock * BLOCK_SIZE);
1204        final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1205
1206        blockIJ[k] = blockIJ[k].multiply(factor);
1207    }
1208
1209    /** {@inheritDoc} */
1210    @Override
1211    public FieldMatrix<T> transpose() {
1212        final int nRows = getRowDimension();
1213        final int nCols = getColumnDimension();
1214        final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), nCols, nRows);
1215
1216        // perform transpose block-wise, to ensure good cache behavior
1217        int blockIndex = 0;
1218        for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1219            for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1220
1221                // transpose current block
1222                final T[] outBlock = out.blocks[blockIndex];
1223                final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1224                final int      pStart   = iBlock * BLOCK_SIZE;
1225                final int      pEnd     = JdkMath.min(pStart + BLOCK_SIZE, columns);
1226                final int      qStart   = jBlock * BLOCK_SIZE;
1227                final int      qEnd     = JdkMath.min(qStart + BLOCK_SIZE, rows);
1228                int k = 0;
1229                for (int p = pStart; p < pEnd; ++p) {
1230                    final int lInc = pEnd - pStart;
1231                    int l = p - pStart;
1232                    for (int q = qStart; q < qEnd; ++q) {
1233                        outBlock[k] = tBlock[l];
1234                        ++k;
1235                        l+= lInc;
1236                    }
1237                }
1238
1239                // go to next block
1240                ++blockIndex;
1241            }
1242        }
1243
1244        return out;
1245    }
1246
1247    /** {@inheritDoc} */
1248    @Override
1249    public int getRowDimension() {
1250        return rows;
1251    }
1252
1253    /** {@inheritDoc} */
1254    @Override
1255    public int getColumnDimension() {
1256        return columns;
1257    }
1258
1259    /** {@inheritDoc} */
1260    @Override
1261    public T[] operate(final T[] v) throws DimensionMismatchException {
1262        if (v.length != columns) {
1263            throw new DimensionMismatchException(v.length, columns);
1264        }
1265        final T[] out = MathArrays.buildArray(getField(), rows);
1266        final T zero = getField().getZero();
1267
1268        // perform multiplication block-wise, to ensure good cache behavior
1269        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1270            final int pStart = iBlock * BLOCK_SIZE;
1271            final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1272            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1273                final T[] block  = blocks[iBlock * blockColumns + jBlock];
1274                final int      qStart = jBlock * BLOCK_SIZE;
1275                final int      qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1276                int k = 0;
1277                for (int p = pStart; p < pEnd; ++p) {
1278                    T sum = zero;
1279                    int q = qStart;
1280                    while (q < qEnd - 3) {
1281                        sum = sum.
1282                              add(block[k].multiply(v[q])).
1283                              add(block[k + 1].multiply(v[q + 1])).
1284                              add(block[k + 2].multiply(v[q + 2])).
1285                              add(block[k + 3].multiply(v[q + 3]));
1286                        k += 4;
1287                        q += 4;
1288                    }
1289                    while (q < qEnd) {
1290                        sum = sum.add(block[k++].multiply(v[q++]));
1291                    }
1292                    out[p] = out[p].add(sum);
1293                }
1294            }
1295        }
1296
1297        return out;
1298    }
1299
1300    /** {@inheritDoc} */
1301    @Override
1302    public T[] preMultiply(final T[] v) throws DimensionMismatchException {
1303
1304        if (v.length != rows) {
1305            throw new DimensionMismatchException(v.length, rows);
1306        }
1307        final T[] out = MathArrays.buildArray(getField(), columns);
1308        final T zero = getField().getZero();
1309
1310        // perform multiplication block-wise, to ensure good cache behavior
1311        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1312            final int jWidth  = blockWidth(jBlock);
1313            final int jWidth2 = jWidth  + jWidth;
1314            final int jWidth3 = jWidth2 + jWidth;
1315            final int jWidth4 = jWidth3 + jWidth;
1316            final int qStart = jBlock * BLOCK_SIZE;
1317            final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1318            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1319                final T[] block  = blocks[iBlock * blockColumns + jBlock];
1320                final int      pStart = iBlock * BLOCK_SIZE;
1321                final int      pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1322                for (int q = qStart; q < qEnd; ++q) {
1323                    int k = q - qStart;
1324                    T sum = zero;
1325                    int p = pStart;
1326                    while (p < pEnd - 3) {
1327                        sum = sum.
1328                              add(block[k].multiply(v[p])).
1329                              add(block[k + jWidth].multiply(v[p + 1])).
1330                              add(block[k + jWidth2].multiply(v[p + 2])).
1331                              add(block[k + jWidth3].multiply(v[p + 3]));
1332                        k += jWidth4;
1333                        p += 4;
1334                    }
1335                    while (p < pEnd) {
1336                        sum = sum.add(block[k].multiply(v[p++]));
1337                        k += jWidth;
1338                    }
1339                    out[q] = out[q].add(sum);
1340                }
1341            }
1342        }
1343
1344        return out;
1345    }
1346
1347    /** {@inheritDoc} */
1348    @Override
1349    public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor) {
1350        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1351        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1352            final int pStart = iBlock * BLOCK_SIZE;
1353            final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1354            for (int p = pStart; p < pEnd; ++p) {
1355                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1356                    final int jWidth = blockWidth(jBlock);
1357                    final int qStart = jBlock * BLOCK_SIZE;
1358                    final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1359                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1360                    int k = (p - pStart) * jWidth;
1361                    for (int q = qStart; q < qEnd; ++q) {
1362                        block[k] = visitor.visit(p, q, block[k]);
1363                        ++k;
1364                    }
1365                }
1366             }
1367        }
1368        return visitor.end();
1369    }
1370
1371    /** {@inheritDoc} */
1372    @Override
1373    public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1374        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1375        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1376            final int pStart = iBlock * BLOCK_SIZE;
1377            final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1378            for (int p = pStart; p < pEnd; ++p) {
1379                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1380                    final int jWidth = blockWidth(jBlock);
1381                    final int qStart = jBlock * BLOCK_SIZE;
1382                    final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1383                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1384                    int k = (p - pStart) * jWidth;
1385                    for (int q = qStart; q < qEnd; ++q) {
1386                        visitor.visit(p, q, block[k]);
1387                        ++k;
1388                    }
1389                }
1390             }
1391        }
1392        return visitor.end();
1393    }
1394
1395    /** {@inheritDoc} */
1396    @Override
1397    public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1398                            final int startRow, final int endRow,
1399                            final int startColumn, final int endColumn)
1400        throws OutOfRangeException, NumberIsTooSmallException {
1401        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1402        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1403        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1404            final int p0     = iBlock * BLOCK_SIZE;
1405            final int pStart = JdkMath.max(startRow, p0);
1406            final int pEnd   = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1407            for (int p = pStart; p < pEnd; ++p) {
1408                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1409                    final int jWidth = blockWidth(jBlock);
1410                    final int q0     = jBlock * BLOCK_SIZE;
1411                    final int qStart = JdkMath.max(startColumn, q0);
1412                    final int qEnd   = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1413                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1414                    int k = (p - p0) * jWidth + qStart - q0;
1415                    for (int q = qStart; q < qEnd; ++q) {
1416                        block[k] = visitor.visit(p, q, block[k]);
1417                        ++k;
1418                    }
1419                }
1420             }
1421        }
1422        return visitor.end();
1423    }
1424
1425    /** {@inheritDoc} */
1426    @Override
1427    public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1428                            final int startRow, final int endRow,
1429                            final int startColumn, final int endColumn)
1430        throws OutOfRangeException, NumberIsTooSmallException {
1431        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1432        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1433        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1434            final int p0     = iBlock * BLOCK_SIZE;
1435            final int pStart = JdkMath.max(startRow, p0);
1436            final int pEnd   = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1437            for (int p = pStart; p < pEnd; ++p) {
1438                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1439                    final int jWidth = blockWidth(jBlock);
1440                    final int q0     = jBlock * BLOCK_SIZE;
1441                    final int qStart = JdkMath.max(startColumn, q0);
1442                    final int qEnd   = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1443                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1444                    int k = (p - p0) * jWidth + qStart - q0;
1445                    for (int q = qStart; q < qEnd; ++q) {
1446                        visitor.visit(p, q, block[k]);
1447                        ++k;
1448                    }
1449                }
1450             }
1451        }
1452        return visitor.end();
1453    }
1454
1455    /** {@inheritDoc} */
1456    @Override
1457    public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor) {
1458        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1459        int blockIndex = 0;
1460        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1461            final int pStart = iBlock * BLOCK_SIZE;
1462            final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1463            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1464                final int qStart = jBlock * BLOCK_SIZE;
1465                final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1466                final T[] block = blocks[blockIndex];
1467                int k = 0;
1468                for (int p = pStart; p < pEnd; ++p) {
1469                    for (int q = qStart; q < qEnd; ++q) {
1470                        block[k] = visitor.visit(p, q, block[k]);
1471                        ++k;
1472                    }
1473                }
1474                ++blockIndex;
1475            }
1476        }
1477        return visitor.end();
1478    }
1479
1480    /** {@inheritDoc} */
1481    @Override
1482    public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1483        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1484        int blockIndex = 0;
1485        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1486            final int pStart = iBlock * BLOCK_SIZE;
1487            final int pEnd   = JdkMath.min(pStart + BLOCK_SIZE, rows);
1488            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1489                final int qStart = jBlock * BLOCK_SIZE;
1490                final int qEnd   = JdkMath.min(qStart + BLOCK_SIZE, columns);
1491                final T[] block = blocks[blockIndex];
1492                int k = 0;
1493                for (int p = pStart; p < pEnd; ++p) {
1494                    for (int q = qStart; q < qEnd; ++q) {
1495                        visitor.visit(p, q, block[k]);
1496                        ++k;
1497                    }
1498                }
1499                ++blockIndex;
1500            }
1501        }
1502        return visitor.end();
1503    }
1504
1505    /** {@inheritDoc} */
1506    @Override
1507    public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1508                                  final int startRow, final int endRow,
1509                                  final int startColumn, final int endColumn)
1510        throws OutOfRangeException, NumberIsTooSmallException {
1511        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1512        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1513        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1514            final int p0     = iBlock * BLOCK_SIZE;
1515            final int pStart = JdkMath.max(startRow, p0);
1516            final int pEnd   = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1517            for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1518                final int jWidth = blockWidth(jBlock);
1519                final int q0     = jBlock * BLOCK_SIZE;
1520                final int qStart = JdkMath.max(startColumn, q0);
1521                final int qEnd   = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1522                final T[] block = blocks[iBlock * blockColumns + jBlock];
1523                for (int p = pStart; p < pEnd; ++p) {
1524                    int k = (p - p0) * jWidth + qStart - q0;
1525                    for (int q = qStart; q < qEnd; ++q) {
1526                        block[k] = visitor.visit(p, q, block[k]);
1527                        ++k;
1528                    }
1529                }
1530            }
1531        }
1532        return visitor.end();
1533    }
1534
1535    /** {@inheritDoc} */
1536    @Override
1537    public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1538                                  final int startRow, final int endRow,
1539                                  final int startColumn, final int endColumn)
1540        throws OutOfRangeException, NumberIsTooSmallException {
1541        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1542        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1543        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1544            final int p0     = iBlock * BLOCK_SIZE;
1545            final int pStart = JdkMath.max(startRow, p0);
1546            final int pEnd   = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1547            for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1548                final int jWidth = blockWidth(jBlock);
1549                final int q0     = jBlock * BLOCK_SIZE;
1550                final int qStart = JdkMath.max(startColumn, q0);
1551                final int qEnd   = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1552                final T[] block = blocks[iBlock * blockColumns + jBlock];
1553                for (int p = pStart; p < pEnd; ++p) {
1554                    int k = (p - p0) * jWidth + qStart - q0;
1555                    for (int q = qStart; q < qEnd; ++q) {
1556                        visitor.visit(p, q, block[k]);
1557                        ++k;
1558                    }
1559                }
1560            }
1561        }
1562        return visitor.end();
1563    }
1564
1565    /**
1566     * Get the height of a block.
1567     * @param blockRow row index (in block sense) of the block
1568     * @return height (number of rows) of the block
1569     */
1570    private int blockHeight(final int blockRow) {
1571        return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1572    }
1573
1574    /**
1575     * Get the width of a block.
1576     * @param blockColumn column index (in block sense) of the block
1577     * @return width (number of columns) of the block
1578     */
1579    private int blockWidth(final int blockColumn) {
1580        return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1581    }
1582}