001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.linear;
019
020import java.io.Serializable;
021
022import org.apache.commons.math3.Field;
023import org.apache.commons.math3.FieldElement;
024import org.apache.commons.math3.exception.NoDataException;
025import org.apache.commons.math3.exception.DimensionMismatchException;
026import org.apache.commons.math3.exception.NotStrictlyPositiveException;
027import org.apache.commons.math3.exception.NullArgumentException;
028import org.apache.commons.math3.exception.NumberIsTooSmallException;
029import org.apache.commons.math3.exception.OutOfRangeException;
030import org.apache.commons.math3.exception.util.LocalizedFormats;
031import org.apache.commons.math3.util.FastMath;
032import org.apache.commons.math3.util.MathArrays;
033import org.apache.commons.math3.util.MathUtils;
034
035/**
036 * Cache-friendly implementation of FieldMatrix using a flat arrays to store
037 * square blocks of the matrix.
038 * <p>
039 * This implementation is specially designed to be cache-friendly. Square blocks are
040 * stored as small arrays and allow efficient traversal of data both in row major direction
041 * and columns major direction, one block at a time. This greatly increases performances
042 * for algorithms that use crossed directions loops like multiplication or transposition.
043 * </p>
044 * <p>
045 * The size of square blocks is a static parameter. It may be tuned according to the cache
046 * size of the target computer processor. As a rule of thumbs, it should be the largest
047 * value that allows three blocks to be simultaneously cached (this is necessary for example
048 * for matrix multiplication). The default value is to use 36x36 blocks.
049 * </p>
050 * <p>
051 * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
052 * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
053 * blocks are flattened in row major order in single dimension arrays which are therefore
054 * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
055 * organized in row major order.
056 * </p>
057 * <p>
058 * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
059 * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
060 * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
061 * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
062 * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
063 * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
064 * rectangle.
065 * </p>
066 * <p>
067 * The layout complexity overhead versus simple mapping of matrices to java
068 * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
069 * to up to 3-fold improvements for matrices of moderate to large size.
070 * </p>
071 * @param <T> the type of the field elements
072 * @since 2.0
073 */
074public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
075    /** Block size. */
076    public static final int BLOCK_SIZE = 36;
077    /** Serializable version identifier. */
078    private static final long serialVersionUID = -4602336630143123183L;
079    /** Blocks of matrix entries. */
080    private final T blocks[][];
081    /** Number of rows of the matrix. */
082    private final int rows;
083    /** Number of columns of the matrix. */
084    private final int columns;
085    /** Number of block rows of the matrix. */
086    private final int blockRows;
087    /** Number of block columns of the matrix. */
088    private final int blockColumns;
089
090    /**
091     * Create a new matrix with the supplied row and column dimensions.
092     *
093     * @param field Field to which the elements belong.
094     * @param rows Number of rows in the new matrix.
095     * @param columns Number of columns in the new matrix.
096     * @throws NotStrictlyPositiveException if row or column dimension is not
097     * positive.
098     */
099    public BlockFieldMatrix(final Field<T> field, final int rows,
100                            final int columns)
101        throws NotStrictlyPositiveException {
102        super(field, rows, columns);
103        this.rows    = rows;
104        this.columns = columns;
105
106        // number of blocks
107        blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
108        blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
109
110        // allocate storage blocks, taking care of smaller ones at right and bottom
111        blocks = createBlocksLayout(field, rows, columns);
112    }
113
114    /**
115     * Create a new dense matrix copying entries from raw layout data.
116     * <p>The input array <em>must</em> already be in raw layout.</p>
117     * <p>Calling this constructor is equivalent to call:
118     * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
119     *                                   toBlocksLayout(rawData), false);</pre>
120     * </p>
121     *
122     * @param rawData Data for the new matrix, in raw layout.
123     * @throws DimensionMismatchException if the {@code blockData} shape is
124     * inconsistent with block layout.
125     * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
126     */
127    public BlockFieldMatrix(final T[][] rawData)
128        throws DimensionMismatchException {
129        this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
130    }
131
132    /**
133     * Create a new dense matrix copying entries from block layout data.
134     * <p>The input array <em>must</em> already be in blocks layout.</p>
135     * @param rows  the number of rows in the new matrix
136     * @param columns  the number of columns in the new matrix
137     * @param blockData data for new matrix
138     * @param copyArray if true, the input array will be copied, otherwise
139     * it will be referenced
140     *
141     * @throws DimensionMismatchException if the {@code blockData} shape is
142     * inconsistent with block layout.
143     * @throws NotStrictlyPositiveException if row or column dimension is not
144     * positive.
145     * @see #createBlocksLayout(Field, int, int)
146     * @see #toBlocksLayout(FieldElement[][])
147     * @see #BlockFieldMatrix(FieldElement[][])
148     */
149    public BlockFieldMatrix(final int rows, final int columns,
150                            final T[][] blockData, final boolean copyArray)
151        throws DimensionMismatchException, NotStrictlyPositiveException {
152        super(extractField(blockData), rows, columns);
153        this.rows    = rows;
154        this.columns = columns;
155
156        // number of blocks
157        blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
158        blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
159
160        if (copyArray) {
161            // allocate storage blocks, taking care of smaller ones at right and bottom
162            blocks = MathArrays.buildArray(getField(), blockRows * blockColumns, -1);
163        } else {
164            // reference existing array
165            blocks = blockData;
166        }
167
168        int index = 0;
169        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
170            final int iHeight = blockHeight(iBlock);
171            for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
172                if (blockData[index].length != iHeight * blockWidth(jBlock)) {
173                    throw new DimensionMismatchException(blockData[index].length,
174                                                         iHeight * blockWidth(jBlock));
175                }
176                if (copyArray) {
177                    blocks[index] = blockData[index].clone();
178                }
179            }
180        }
181    }
182
183    /**
184     * Convert a data array from raw layout to blocks layout.
185     * <p>
186     * Raw layout is the straightforward layout where element at row i and
187     * column j is in array element <code>rawData[i][j]</code>. Blocks layout
188     * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
189     * is split in square blocks (except at right and bottom side where blocks may
190     * be rectangular to fit matrix size) and each block is stored in a flattened
191     * one-dimensional array.
192     * </p>
193     * <p>
194     * This method creates an array in blocks layout from an input array in raw layout.
195     * It can be used to provide the array argument of the {@link
196     * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
197     * constructor.
198     * </p>
199     * @param <T> Type of the field elements.
200     * @param rawData Data array in raw layout.
201     * @return a new data array containing the same entries but in blocks layout
202     * @throws DimensionMismatchException if {@code rawData} is not rectangular
203     *  (not all rows have the same length).
204     * @see #createBlocksLayout(Field, int, int)
205     * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
206     */
207    public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
208        throws DimensionMismatchException {
209
210        final int rows         = rawData.length;
211        final int columns      = rawData[0].length;
212        final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
213        final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
214
215        // safety checks
216        for (int i = 0; i < rawData.length; ++i) {
217            final int length = rawData[i].length;
218            if (length != columns) {
219                throw new DimensionMismatchException(columns, length);
220            }
221        }
222
223        // convert array
224        final Field<T> field = extractField(rawData);
225        final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
226        int blockIndex = 0;
227        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
228            final int pStart  = iBlock * BLOCK_SIZE;
229            final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
230            final int iHeight = pEnd - pStart;
231            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
232                final int qStart = jBlock * BLOCK_SIZE;
233                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
234                final int jWidth = qEnd - qStart;
235
236                // allocate new block
237                final T[] block = MathArrays.buildArray(field, iHeight * jWidth);
238                blocks[blockIndex] = block;
239
240                // copy data
241                int index = 0;
242                for (int p = pStart; p < pEnd; ++p) {
243                    System.arraycopy(rawData[p], qStart, block, index, jWidth);
244                    index += jWidth;
245                }
246
247                ++blockIndex;
248            }
249        }
250
251        return blocks;
252    }
253
254    /**
255     * Create a data array in blocks layout.
256     * <p>
257     * This method can be used to create the array argument of the {@link
258     * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
259     * constructor.
260     * </p>
261     * @param <T> Type of the field elements.
262     * @param field Field to which the elements belong.
263     * @param rows Number of rows in the new matrix.
264     * @param columns Number of columns in the new matrix.
265     * @return a new data array in blocks layout.
266     * @see #toBlocksLayout(FieldElement[][])
267     * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
268     */
269    public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
270                                                                       final int rows, final int columns) {
271        final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
272        final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
273
274        final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
275        int blockIndex = 0;
276        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
277            final int pStart  = iBlock * BLOCK_SIZE;
278            final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
279            final int iHeight = pEnd - pStart;
280            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
281                final int qStart = jBlock * BLOCK_SIZE;
282                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
283                final int jWidth = qEnd - qStart;
284                blocks[blockIndex] = MathArrays.buildArray(field, iHeight * jWidth);
285                ++blockIndex;
286            }
287        }
288
289        return blocks;
290    }
291
292    /** {@inheritDoc} */
293    @Override
294    public FieldMatrix<T> createMatrix(final int rowDimension,
295                                       final int columnDimension)
296        throws NotStrictlyPositiveException {
297        return new BlockFieldMatrix<T>(getField(), rowDimension,
298                                       columnDimension);
299    }
300
301    /** {@inheritDoc} */
302    @Override
303    public FieldMatrix<T> copy() {
304
305        // create an empty matrix
306        BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
307
308        // copy the blocks
309        for (int i = 0; i < blocks.length; ++i) {
310            System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
311        }
312
313        return copied;
314    }
315
316    /** {@inheritDoc} */
317    @Override
318    public FieldMatrix<T> add(final FieldMatrix<T> m)
319        throws MatrixDimensionMismatchException {
320        try {
321            return add((BlockFieldMatrix<T>) m);
322        } catch (ClassCastException cce) {
323
324            // safety check
325            checkAdditionCompatible(m);
326
327            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
328
329            // perform addition block-wise, to ensure good cache behavior
330            int blockIndex = 0;
331            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
332                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
333
334                    // perform addition on the current block
335                    final T[] outBlock = out.blocks[blockIndex];
336                    final T[] tBlock   = blocks[blockIndex];
337                    final int      pStart   = iBlock * BLOCK_SIZE;
338                    final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
339                    final int      qStart   = jBlock * BLOCK_SIZE;
340                    final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
341                    int k = 0;
342                    for (int p = pStart; p < pEnd; ++p) {
343                        for (int q = qStart; q < qEnd; ++q) {
344                            outBlock[k] = tBlock[k].add(m.getEntry(p, q));
345                            ++k;
346                        }
347                    }
348
349                    // go to next block
350                    ++blockIndex;
351
352                }
353            }
354
355            return out;
356        }
357    }
358
359    /**
360     * Compute the sum of {@code this} and {@code m}.
361     *
362     * @param m matrix to be added
363     * @return {@code this + m}
364     * @throws MatrixDimensionMismatchException if {@code m} is not the same
365     * size as {@code this}
366     */
367    public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
368        throws MatrixDimensionMismatchException {
369
370        // safety check
371        checkAdditionCompatible(m);
372
373        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
374
375        // perform addition block-wise, to ensure good cache behavior
376        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
377            final T[] outBlock = out.blocks[blockIndex];
378            final T[] tBlock   = blocks[blockIndex];
379            final T[] mBlock   = m.blocks[blockIndex];
380            for (int k = 0; k < outBlock.length; ++k) {
381                outBlock[k] = tBlock[k].add(mBlock[k]);
382            }
383        }
384
385        return out;
386    }
387
388    /** {@inheritDoc} */
389    @Override
390    public FieldMatrix<T> subtract(final FieldMatrix<T> m)
391        throws MatrixDimensionMismatchException {
392        try {
393            return subtract((BlockFieldMatrix<T>) m);
394        } catch (ClassCastException cce) {
395
396            // safety check
397            checkSubtractionCompatible(m);
398
399            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
400
401            // perform subtraction block-wise, to ensure good cache behavior
402            int blockIndex = 0;
403            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
404                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
405
406                    // perform subtraction on the current block
407                    final T[] outBlock = out.blocks[blockIndex];
408                    final T[] tBlock   = blocks[blockIndex];
409                    final int      pStart   = iBlock * BLOCK_SIZE;
410                    final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
411                    final int      qStart   = jBlock * BLOCK_SIZE;
412                    final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
413                    int k = 0;
414                    for (int p = pStart; p < pEnd; ++p) {
415                        for (int q = qStart; q < qEnd; ++q) {
416                            outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
417                            ++k;
418                        }
419                    }
420
421                    // go to next block
422                    ++blockIndex;
423
424                }
425            }
426
427            return out;
428        }
429    }
430
431    /**
432     * Compute {@code this - m}.
433     *
434     * @param m matrix to be subtracted
435     * @return {@code this - m}
436     * @throws MatrixDimensionMismatchException if {@code m} is not the same
437     * size as {@code this}
438     */
439    public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m) throws MatrixDimensionMismatchException {
440        // safety check
441        checkSubtractionCompatible(m);
442
443        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
444
445        // perform subtraction block-wise, to ensure good cache behavior
446        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
447            final T[] outBlock = out.blocks[blockIndex];
448            final T[] tBlock   = blocks[blockIndex];
449            final T[] mBlock   = m.blocks[blockIndex];
450            for (int k = 0; k < outBlock.length; ++k) {
451                outBlock[k] = tBlock[k].subtract(mBlock[k]);
452            }
453        }
454
455        return out;
456    }
457
458    /** {@inheritDoc} */
459    @Override
460    public FieldMatrix<T> scalarAdd(final T d) {
461        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
462
463        // perform subtraction block-wise, to ensure good cache behavior
464        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
465            final T[] outBlock = out.blocks[blockIndex];
466            final T[] tBlock   = blocks[blockIndex];
467            for (int k = 0; k < outBlock.length; ++k) {
468                outBlock[k] = tBlock[k].add(d);
469            }
470        }
471
472        return out;
473    }
474
475    /** {@inheritDoc} */
476    @Override
477    public FieldMatrix<T> scalarMultiply(final T d) {
478
479        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
480
481        // perform subtraction block-wise, to ensure good cache behavior
482        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
483            final T[] outBlock = out.blocks[blockIndex];
484            final T[] tBlock   = blocks[blockIndex];
485            for (int k = 0; k < outBlock.length; ++k) {
486                outBlock[k] = tBlock[k].multiply(d);
487            }
488        }
489
490        return out;
491    }
492
493    /** {@inheritDoc} */
494    @Override
495    public FieldMatrix<T> multiply(final FieldMatrix<T> m)
496        throws DimensionMismatchException {
497        try {
498            return multiply((BlockFieldMatrix<T>) m);
499        } catch (ClassCastException cce) {
500
501            // safety check
502            checkMultiplicationCompatible(m);
503
504            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
505            final T zero = getField().getZero();
506
507            // perform multiplication block-wise, to ensure good cache behavior
508            int blockIndex = 0;
509            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
510
511                final int pStart = iBlock * BLOCK_SIZE;
512                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
513
514                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
515
516                    final int qStart = jBlock * BLOCK_SIZE;
517                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
518
519                    // select current block
520                    final T[] outBlock = out.blocks[blockIndex];
521
522                    // perform multiplication on current block
523                    for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
524                        final int kWidth      = blockWidth(kBlock);
525                        final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
526                        final int rStart      = kBlock * BLOCK_SIZE;
527                        int k = 0;
528                        for (int p = pStart; p < pEnd; ++p) {
529                            final int lStart = (p - pStart) * kWidth;
530                            final int lEnd   = lStart + kWidth;
531                            for (int q = qStart; q < qEnd; ++q) {
532                                T sum = zero;
533                                int r = rStart;
534                                for (int l = lStart; l < lEnd; ++l) {
535                                    sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
536                                    ++r;
537                                }
538                                outBlock[k] = outBlock[k].add(sum);
539                                ++k;
540                            }
541                        }
542                    }
543
544                    // go to next block
545                    ++blockIndex;
546
547                }
548            }
549
550            return out;
551        }
552    }
553
554    /**
555     * Returns the result of postmultiplying {@code this} by {@code m}.
556     *
557     * @param m matrix to postmultiply by
558     * @return {@code this * m}
559     * @throws DimensionMismatchException if the matrices are not compatible.
560     */
561    public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m)
562        throws DimensionMismatchException {
563
564        // safety check
565        checkMultiplicationCompatible(m);
566
567        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
568        final T zero = getField().getZero();
569
570        // perform multiplication block-wise, to ensure good cache behavior
571        int blockIndex = 0;
572        for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
573
574            final int pStart = iBlock * BLOCK_SIZE;
575            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
576
577            for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
578                final int jWidth = out.blockWidth(jBlock);
579                final int jWidth2 = jWidth  + jWidth;
580                final int jWidth3 = jWidth2 + jWidth;
581                final int jWidth4 = jWidth3 + jWidth;
582
583                // select current block
584                final T[] outBlock = out.blocks[blockIndex];
585
586                // perform multiplication on current block
587                for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
588                    final int kWidth = blockWidth(kBlock);
589                    final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
590                    final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
591                    int k = 0;
592                    for (int p = pStart; p < pEnd; ++p) {
593                        final int lStart = (p - pStart) * kWidth;
594                        final int lEnd   = lStart + kWidth;
595                        for (int nStart = 0; nStart < jWidth; ++nStart) {
596                            T sum = zero;
597                            int l = lStart;
598                            int n = nStart;
599                            while (l < lEnd - 3) {
600                                sum = sum.
601                                      add(tBlock[l].multiply(mBlock[n])).
602                                      add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
603                                      add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
604                                      add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
605                                l += 4;
606                                n += jWidth4;
607                            }
608                            while (l < lEnd) {
609                                sum = sum.add(tBlock[l++].multiply(mBlock[n]));
610                                n += jWidth;
611                            }
612                            outBlock[k] = outBlock[k].add(sum);
613                            ++k;
614                        }
615                    }
616                }
617
618                // go to next block
619                ++blockIndex;
620            }
621        }
622
623        return out;
624    }
625
626    /** {@inheritDoc} */
627    @Override
628    public T[][] getData() {
629
630        final T[][] data = MathArrays.buildArray(getField(), getRowDimension(), getColumnDimension());
631        final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
632
633        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
634            final int pStart = iBlock * BLOCK_SIZE;
635            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
636            int regularPos   = 0;
637            int lastPos      = 0;
638            for (int p = pStart; p < pEnd; ++p) {
639                final T[] dataP = data[p];
640                int blockIndex = iBlock * blockColumns;
641                int dataPos    = 0;
642                for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
643                    System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
644                    dataPos += BLOCK_SIZE;
645                }
646                System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
647                regularPos += BLOCK_SIZE;
648                lastPos    += lastColumns;
649            }
650        }
651
652        return data;
653    }
654
655    /** {@inheritDoc} */
656    @Override
657    public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
658                                       final int startColumn,
659                                       final int endColumn)
660        throws OutOfRangeException, NumberIsTooSmallException {
661        // safety checks
662        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
663
664        // create the output matrix
665        final BlockFieldMatrix<T> out =
666            new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
667
668        // compute blocks shifts
669        final int blockStartRow    = startRow    / BLOCK_SIZE;
670        final int rowsShift        = startRow    % BLOCK_SIZE;
671        final int blockStartColumn = startColumn / BLOCK_SIZE;
672        final int columnsShift     = startColumn % BLOCK_SIZE;
673
674        // perform extraction block-wise, to ensure good cache behavior
675        int pBlock = blockStartRow;
676        for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
677            final int iHeight = out.blockHeight(iBlock);
678            int qBlock = blockStartColumn;
679            for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
680                final int jWidth = out.blockWidth(jBlock);
681
682                // handle one block of the output matrix
683                final int      outIndex = iBlock * out.blockColumns + jBlock;
684                final T[] outBlock = out.blocks[outIndex];
685                final int      index    = pBlock * blockColumns + qBlock;
686                final int      width    = blockWidth(qBlock);
687
688                final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
689                final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
690                if (heightExcess > 0) {
691                    // the submatrix block spans on two blocks rows from the original matrix
692                    if (widthExcess > 0) {
693                        // the submatrix block spans on two blocks columns from the original matrix
694                        final int width2 = blockWidth(qBlock + 1);
695                        copyBlockPart(blocks[index], width,
696                                      rowsShift, BLOCK_SIZE,
697                                      columnsShift, BLOCK_SIZE,
698                                      outBlock, jWidth, 0, 0);
699                        copyBlockPart(blocks[index + 1], width2,
700                                      rowsShift, BLOCK_SIZE,
701                                      0, widthExcess,
702                                      outBlock, jWidth, 0, jWidth - widthExcess);
703                        copyBlockPart(blocks[index + blockColumns], width,
704                                      0, heightExcess,
705                                      columnsShift, BLOCK_SIZE,
706                                      outBlock, jWidth, iHeight - heightExcess, 0);
707                        copyBlockPart(blocks[index + blockColumns + 1], width2,
708                                      0, heightExcess,
709                                      0, widthExcess,
710                                      outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
711                    } else {
712                        // the submatrix block spans on one block column from the original matrix
713                        copyBlockPart(blocks[index], width,
714                                      rowsShift, BLOCK_SIZE,
715                                      columnsShift, jWidth + columnsShift,
716                                      outBlock, jWidth, 0, 0);
717                        copyBlockPart(blocks[index + blockColumns], width,
718                                      0, heightExcess,
719                                      columnsShift, jWidth + columnsShift,
720                                      outBlock, jWidth, iHeight - heightExcess, 0);
721                    }
722                } else {
723                    // the submatrix block spans on one block row from the original matrix
724                    if (widthExcess > 0) {
725                        // the submatrix block spans on two blocks columns from the original matrix
726                        final int width2 = blockWidth(qBlock + 1);
727                        copyBlockPart(blocks[index], width,
728                                      rowsShift, iHeight + rowsShift,
729                                      columnsShift, BLOCK_SIZE,
730                                      outBlock, jWidth, 0, 0);
731                        copyBlockPart(blocks[index + 1], width2,
732                                      rowsShift, iHeight + rowsShift,
733                                      0, widthExcess,
734                                      outBlock, jWidth, 0, jWidth - widthExcess);
735                    } else {
736                        // the submatrix block spans on one block column from the original matrix
737                        copyBlockPart(blocks[index], width,
738                                      rowsShift, iHeight + rowsShift,
739                                      columnsShift, jWidth + columnsShift,
740                                      outBlock, jWidth, 0, 0);
741                    }
742               }
743                ++qBlock;
744            }
745            ++pBlock;
746        }
747
748        return out;
749    }
750
751    /**
752     * Copy a part of a block into another one
753     * <p>This method can be called only when the specified part fits in both
754     * blocks, no verification is done here.</p>
755     * @param srcBlock source block
756     * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
757     * @param srcStartRow start row in the source block
758     * @param srcEndRow end row (exclusive) in the source block
759     * @param srcStartColumn start column in the source block
760     * @param srcEndColumn end column (exclusive) in the source block
761     * @param dstBlock destination block
762     * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
763     * @param dstStartRow start row in the destination block
764     * @param dstStartColumn start column in the destination block
765     */
766    private void copyBlockPart(final T[] srcBlock, final int srcWidth,
767                               final int srcStartRow, final int srcEndRow,
768                               final int srcStartColumn, final int srcEndColumn,
769                               final T[] dstBlock, final int dstWidth,
770                               final int dstStartRow, final int dstStartColumn) {
771        final int length = srcEndColumn - srcStartColumn;
772        int srcPos = srcStartRow * srcWidth + srcStartColumn;
773        int dstPos = dstStartRow * dstWidth + dstStartColumn;
774        for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
775            System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
776            srcPos += srcWidth;
777            dstPos += dstWidth;
778        }
779    }
780
781    /** {@inheritDoc} */
782    @Override
783    public void setSubMatrix(final T[][] subMatrix, final int row,
784                             final int column)
785        throws DimensionMismatchException, OutOfRangeException,
786        NoDataException, NullArgumentException {
787        // safety checks
788        MathUtils.checkNotNull(subMatrix);
789        final int refLength = subMatrix[0].length;
790        if (refLength == 0) {
791            throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
792        }
793        final int endRow    = row + subMatrix.length - 1;
794        final int endColumn = column + refLength - 1;
795        checkSubMatrixIndex(row, endRow, column, endColumn);
796        for (final T[] subRow : subMatrix) {
797            if (subRow.length != refLength) {
798                throw new DimensionMismatchException(refLength, subRow.length);
799            }
800        }
801
802        // compute blocks bounds
803        final int blockStartRow    = row / BLOCK_SIZE;
804        final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
805        final int blockStartColumn = column / BLOCK_SIZE;
806        final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
807
808        // perform copy block-wise, to ensure good cache behavior
809        for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
810            final int iHeight  = blockHeight(iBlock);
811            final int firstRow = iBlock * BLOCK_SIZE;
812            final int iStart   = FastMath.max(row,    firstRow);
813            final int iEnd     = FastMath.min(endRow + 1, firstRow + iHeight);
814
815            for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
816                final int jWidth      = blockWidth(jBlock);
817                final int firstColumn = jBlock * BLOCK_SIZE;
818                final int jStart      = FastMath.max(column,    firstColumn);
819                final int jEnd        = FastMath.min(endColumn + 1, firstColumn + jWidth);
820                final int jLength     = jEnd - jStart;
821
822                // handle one block, row by row
823                final T[] block = blocks[iBlock * blockColumns + jBlock];
824                for (int i = iStart; i < iEnd; ++i) {
825                    System.arraycopy(subMatrix[i - row], jStart - column,
826                                     block, (i - firstRow) * jWidth + (jStart - firstColumn),
827                                     jLength);
828                }
829
830            }
831        }
832    }
833
834    /** {@inheritDoc} */
835    @Override
836    public FieldMatrix<T> getRowMatrix(final int row)
837        throws OutOfRangeException {
838        checkRowIndex(row);
839        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
840
841        // perform copy block-wise, to ensure good cache behavior
842        final int iBlock  = row / BLOCK_SIZE;
843        final int iRow    = row - iBlock * BLOCK_SIZE;
844        int outBlockIndex = 0;
845        int outIndex      = 0;
846        T[] outBlock = out.blocks[outBlockIndex];
847        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
848            final int jWidth     = blockWidth(jBlock);
849            final T[] block = blocks[iBlock * blockColumns + jBlock];
850            final int available  = outBlock.length - outIndex;
851            if (jWidth > available) {
852                System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
853                outBlock = out.blocks[++outBlockIndex];
854                System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
855                outIndex = jWidth - available;
856            } else {
857                System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
858                outIndex += jWidth;
859            }
860        }
861
862        return out;
863    }
864
865    /** {@inheritDoc} */
866    @Override
867    public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
868        throws MatrixDimensionMismatchException, OutOfRangeException {
869        try {
870            setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
871        } catch (ClassCastException cce) {
872            super.setRowMatrix(row, matrix);
873        }
874    }
875
876    /**
877     * Sets the entries in row number <code>row</code>
878     * as a row matrix.  Row indices start at 0.
879     *
880     * @param row the row to be set
881     * @param matrix row matrix (must have one row and the same number of columns
882     * as the instance)
883     * @throws MatrixDimensionMismatchException if the matrix dimensions do
884     * not match one instance row.
885     * @throws OutOfRangeException if the specified row index is invalid.
886     */
887    public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
888        throws MatrixDimensionMismatchException, OutOfRangeException {
889        checkRowIndex(row);
890        final int nCols = getColumnDimension();
891        if ((matrix.getRowDimension() != 1) ||
892            (matrix.getColumnDimension() != nCols)) {
893            throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
894                                                       matrix.getColumnDimension(),
895                                                       1, nCols);
896        }
897
898        // perform copy block-wise, to ensure good cache behavior
899        final int iBlock = row / BLOCK_SIZE;
900        final int iRow   = row - iBlock * BLOCK_SIZE;
901        int mBlockIndex  = 0;
902        int mIndex       = 0;
903        T[] mBlock  = matrix.blocks[mBlockIndex];
904        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
905            final int jWidth     = blockWidth(jBlock);
906            final T[] block = blocks[iBlock * blockColumns + jBlock];
907            final int available  = mBlock.length - mIndex;
908            if (jWidth > available) {
909                System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
910                mBlock = matrix.blocks[++mBlockIndex];
911                System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
912                mIndex = jWidth - available;
913            } else {
914                System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
915                mIndex += jWidth;
916           }
917        }
918    }
919
920    /** {@inheritDoc} */
921    @Override
922    public FieldMatrix<T> getColumnMatrix(final int column)
923        throws OutOfRangeException {
924        checkColumnIndex(column);
925        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
926
927        // perform copy block-wise, to ensure good cache behavior
928        final int jBlock  = column / BLOCK_SIZE;
929        final int jColumn = column - jBlock * BLOCK_SIZE;
930        final int jWidth  = blockWidth(jBlock);
931        int outBlockIndex = 0;
932        int outIndex      = 0;
933        T[] outBlock = out.blocks[outBlockIndex];
934        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
935            final int iHeight = blockHeight(iBlock);
936            final T[] block = blocks[iBlock * blockColumns + jBlock];
937            for (int i = 0; i < iHeight; ++i) {
938                if (outIndex >= outBlock.length) {
939                    outBlock = out.blocks[++outBlockIndex];
940                    outIndex = 0;
941                }
942                outBlock[outIndex++] = block[i * jWidth + jColumn];
943            }
944        }
945
946        return out;
947    }
948
949    /** {@inheritDoc} */
950    @Override
951    public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
952        throws MatrixDimensionMismatchException, OutOfRangeException {
953        try {
954            setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
955        } catch (ClassCastException cce) {
956            super.setColumnMatrix(column, matrix);
957        }
958    }
959
960    /**
961     * Sets the entries in column number {@code column}
962     * as a column matrix.  Column indices start at 0.
963     *
964     * @param column Column to be set.
965     * @param matrix Column matrix (must have one column and the same number of rows
966     * as the instance).
967     * @throws MatrixDimensionMismatchException if the matrix dimensions do
968     * not match one instance column.
969     * @throws OutOfRangeException if the specified column index is invalid.
970     */
971    void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
972        throws MatrixDimensionMismatchException, OutOfRangeException {
973        checkColumnIndex(column);
974        final int nRows = getRowDimension();
975        if ((matrix.getRowDimension() != nRows) ||
976            (matrix.getColumnDimension() != 1)) {
977            throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
978                                                       matrix.getColumnDimension(),
979                                                       nRows, 1);
980        }
981
982        // perform copy block-wise, to ensure good cache behavior
983        final int jBlock  = column / BLOCK_SIZE;
984        final int jColumn = column - jBlock * BLOCK_SIZE;
985        final int jWidth  = blockWidth(jBlock);
986        int mBlockIndex = 0;
987        int mIndex      = 0;
988        T[] mBlock = matrix.blocks[mBlockIndex];
989        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
990            final int iHeight = blockHeight(iBlock);
991            final T[] block = blocks[iBlock * blockColumns + jBlock];
992            for (int i = 0; i < iHeight; ++i) {
993                if (mIndex >= mBlock.length) {
994                    mBlock = matrix.blocks[++mBlockIndex];
995                    mIndex = 0;
996                }
997                block[i * jWidth + jColumn] = mBlock[mIndex++];
998            }
999        }
1000    }
1001
1002    /** {@inheritDoc} */
1003    @Override
1004    public FieldVector<T> getRowVector(final int row)
1005        throws OutOfRangeException {
1006        checkRowIndex(row);
1007        final T[] outData = MathArrays.buildArray(getField(), columns);
1008
1009        // perform copy block-wise, to ensure good cache behavior
1010        final int iBlock  = row / BLOCK_SIZE;
1011        final int iRow    = row - iBlock * BLOCK_SIZE;
1012        int outIndex      = 0;
1013        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1014            final int jWidth     = blockWidth(jBlock);
1015            final T[] block = blocks[iBlock * blockColumns + jBlock];
1016            System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1017            outIndex += jWidth;
1018        }
1019
1020        return new ArrayFieldVector<T>(getField(), outData, false);
1021    }
1022
1023    /** {@inheritDoc} */
1024    @Override
1025    public void setRowVector(final int row, final FieldVector<T> vector)
1026        throws MatrixDimensionMismatchException, OutOfRangeException {
1027        try {
1028            setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1029        } catch (ClassCastException cce) {
1030            super.setRowVector(row, vector);
1031        }
1032    }
1033
1034    /** {@inheritDoc} */
1035    @Override
1036    public FieldVector<T> getColumnVector(final int column)
1037        throws OutOfRangeException {
1038        checkColumnIndex(column);
1039        final T[] outData = MathArrays.buildArray(getField(), rows);
1040
1041        // perform copy block-wise, to ensure good cache behavior
1042        final int jBlock  = column / BLOCK_SIZE;
1043        final int jColumn = column - jBlock * BLOCK_SIZE;
1044        final int jWidth  = blockWidth(jBlock);
1045        int outIndex      = 0;
1046        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1047            final int iHeight = blockHeight(iBlock);
1048            final T[] block = blocks[iBlock * blockColumns + jBlock];
1049            for (int i = 0; i < iHeight; ++i) {
1050                outData[outIndex++] = block[i * jWidth + jColumn];
1051            }
1052        }
1053
1054        return new ArrayFieldVector<T>(getField(), outData, false);
1055    }
1056
1057    /** {@inheritDoc} */
1058    @Override
1059    public void setColumnVector(final int column, final FieldVector<T> vector)
1060        throws OutOfRangeException, MatrixDimensionMismatchException {
1061        try {
1062            setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1063        } catch (ClassCastException cce) {
1064            super.setColumnVector(column, vector);
1065        }
1066    }
1067
1068    /** {@inheritDoc} */
1069    @Override
1070    public T[] getRow(final int row) throws OutOfRangeException {
1071        checkRowIndex(row);
1072        final T[] out = MathArrays.buildArray(getField(), columns);
1073
1074        // perform copy block-wise, to ensure good cache behavior
1075        final int iBlock  = row / BLOCK_SIZE;
1076        final int iRow    = row - iBlock * BLOCK_SIZE;
1077        int outIndex      = 0;
1078        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1079            final int jWidth     = blockWidth(jBlock);
1080            final T[] block = blocks[iBlock * blockColumns + jBlock];
1081            System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1082            outIndex += jWidth;
1083        }
1084
1085        return out;
1086    }
1087
1088    /** {@inheritDoc} */
1089    @Override
1090    public void setRow(final int row, final T[] array)
1091        throws OutOfRangeException, MatrixDimensionMismatchException {
1092        checkRowIndex(row);
1093        final int nCols = getColumnDimension();
1094        if (array.length != nCols) {
1095            throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
1096        }
1097
1098        // perform copy block-wise, to ensure good cache behavior
1099        final int iBlock  = row / BLOCK_SIZE;
1100        final int iRow    = row - iBlock * BLOCK_SIZE;
1101        int outIndex      = 0;
1102        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1103            final int jWidth     = blockWidth(jBlock);
1104            final T[] block = blocks[iBlock * blockColumns + jBlock];
1105            System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1106            outIndex += jWidth;
1107        }
1108    }
1109
1110    /** {@inheritDoc} */
1111    @Override
1112    public T[] getColumn(final int column) throws OutOfRangeException {
1113        checkColumnIndex(column);
1114        final T[] out = MathArrays.buildArray(getField(), rows);
1115
1116        // perform copy block-wise, to ensure good cache behavior
1117        final int jBlock  = column / BLOCK_SIZE;
1118        final int jColumn = column - jBlock * BLOCK_SIZE;
1119        final int jWidth  = blockWidth(jBlock);
1120        int outIndex      = 0;
1121        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1122            final int iHeight = blockHeight(iBlock);
1123            final T[] block = blocks[iBlock * blockColumns + jBlock];
1124            for (int i = 0; i < iHeight; ++i) {
1125                out[outIndex++] = block[i * jWidth + jColumn];
1126            }
1127        }
1128
1129        return out;
1130    }
1131
1132    /** {@inheritDoc} */
1133    @Override
1134    public void setColumn(final int column, final T[] array)
1135        throws MatrixDimensionMismatchException, OutOfRangeException {
1136        checkColumnIndex(column);
1137        final int nRows = getRowDimension();
1138        if (array.length != nRows) {
1139            throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
1140        }
1141
1142        // perform copy block-wise, to ensure good cache behavior
1143        final int jBlock  = column / BLOCK_SIZE;
1144        final int jColumn = column - jBlock * BLOCK_SIZE;
1145        final int jWidth  = blockWidth(jBlock);
1146        int outIndex      = 0;
1147        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1148            final int iHeight = blockHeight(iBlock);
1149            final T[] block = blocks[iBlock * blockColumns + jBlock];
1150            for (int i = 0; i < iHeight; ++i) {
1151                block[i * jWidth + jColumn] = array[outIndex++];
1152            }
1153        }
1154    }
1155
1156    /** {@inheritDoc} */
1157    @Override
1158    public T getEntry(final int row, final int column)
1159        throws OutOfRangeException {
1160        checkRowIndex(row);
1161        checkColumnIndex(column);
1162
1163        final int iBlock = row    / BLOCK_SIZE;
1164        final int jBlock = column / BLOCK_SIZE;
1165        final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1166            (column - jBlock * BLOCK_SIZE);
1167
1168        return blocks[iBlock * blockColumns + jBlock][k];
1169    }
1170
1171    /** {@inheritDoc} */
1172    @Override
1173    public void setEntry(final int row, final int column, final T value)
1174        throws OutOfRangeException {
1175        checkRowIndex(row);
1176        checkColumnIndex(column);
1177
1178        final int iBlock = row    / BLOCK_SIZE;
1179        final int jBlock = column / BLOCK_SIZE;
1180        final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1181            (column - jBlock * BLOCK_SIZE);
1182
1183        blocks[iBlock * blockColumns + jBlock][k] = value;
1184    }
1185
1186    /** {@inheritDoc} */
1187    @Override
1188    public void addToEntry(final int row, final int column, final T increment)
1189        throws OutOfRangeException {
1190        checkRowIndex(row);
1191        checkColumnIndex(column);
1192
1193        final int iBlock = row    / BLOCK_SIZE;
1194        final int jBlock = column / BLOCK_SIZE;
1195        final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1196            (column - jBlock * BLOCK_SIZE);
1197        final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1198
1199        blockIJ[k] = blockIJ[k].add(increment);
1200    }
1201
1202    /** {@inheritDoc} */
1203    @Override
1204    public void multiplyEntry(final int row, final int column, final T factor)
1205        throws OutOfRangeException {
1206        checkRowIndex(row);
1207        checkColumnIndex(column);
1208
1209        final int iBlock = row    / BLOCK_SIZE;
1210        final int jBlock = column / BLOCK_SIZE;
1211        final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1212            (column - jBlock * BLOCK_SIZE);
1213        final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1214
1215        blockIJ[k] = blockIJ[k].multiply(factor);
1216    }
1217
1218    /** {@inheritDoc} */
1219    @Override
1220    public FieldMatrix<T> transpose() {
1221        final int nRows = getRowDimension();
1222        final int nCols = getColumnDimension();
1223        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1224
1225        // perform transpose block-wise, to ensure good cache behavior
1226        int blockIndex = 0;
1227        for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1228            for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1229
1230                // transpose current block
1231                final T[] outBlock = out.blocks[blockIndex];
1232                final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1233                final int      pStart   = iBlock * BLOCK_SIZE;
1234                final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
1235                final int      qStart   = jBlock * BLOCK_SIZE;
1236                final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, rows);
1237                int k = 0;
1238                for (int p = pStart; p < pEnd; ++p) {
1239                    final int lInc = pEnd - pStart;
1240                    int l = p - pStart;
1241                    for (int q = qStart; q < qEnd; ++q) {
1242                        outBlock[k] = tBlock[l];
1243                        ++k;
1244                        l+= lInc;
1245                    }
1246                }
1247
1248                // go to next block
1249                ++blockIndex;
1250
1251            }
1252        }
1253
1254        return out;
1255    }
1256
1257    /** {@inheritDoc} */
1258    @Override
1259    public int getRowDimension() {
1260        return rows;
1261    }
1262
1263    /** {@inheritDoc} */
1264    @Override
1265    public int getColumnDimension() {
1266        return columns;
1267    }
1268
1269    /** {@inheritDoc} */
1270    @Override
1271    public T[] operate(final T[] v) throws DimensionMismatchException {
1272        if (v.length != columns) {
1273            throw new DimensionMismatchException(v.length, columns);
1274        }
1275        final T[] out = MathArrays.buildArray(getField(), rows);
1276        final T zero = getField().getZero();
1277
1278        // perform multiplication block-wise, to ensure good cache behavior
1279        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1280            final int pStart = iBlock * BLOCK_SIZE;
1281            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1282            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1283                final T[] block  = blocks[iBlock * blockColumns + jBlock];
1284                final int      qStart = jBlock * BLOCK_SIZE;
1285                final int      qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1286                int k = 0;
1287                for (int p = pStart; p < pEnd; ++p) {
1288                    T sum = zero;
1289                    int q = qStart;
1290                    while (q < qEnd - 3) {
1291                        sum = sum.
1292                              add(block[k].multiply(v[q])).
1293                              add(block[k + 1].multiply(v[q + 1])).
1294                              add(block[k + 2].multiply(v[q + 2])).
1295                              add(block[k + 3].multiply(v[q + 3]));
1296                        k += 4;
1297                        q += 4;
1298                    }
1299                    while (q < qEnd) {
1300                        sum = sum.add(block[k++].multiply(v[q++]));
1301                    }
1302                    out[p] = out[p].add(sum);
1303                }
1304            }
1305        }
1306
1307        return out;
1308    }
1309
1310    /** {@inheritDoc} */
1311    @Override
1312    public T[] preMultiply(final T[] v) throws DimensionMismatchException {
1313
1314        if (v.length != rows) {
1315            throw new DimensionMismatchException(v.length, rows);
1316        }
1317        final T[] out = MathArrays.buildArray(getField(), columns);
1318        final T zero = getField().getZero();
1319
1320        // perform multiplication block-wise, to ensure good cache behavior
1321        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1322            final int jWidth  = blockWidth(jBlock);
1323            final int jWidth2 = jWidth  + jWidth;
1324            final int jWidth3 = jWidth2 + jWidth;
1325            final int jWidth4 = jWidth3 + jWidth;
1326            final int qStart = jBlock * BLOCK_SIZE;
1327            final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1328            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1329                final T[] block  = blocks[iBlock * blockColumns + jBlock];
1330                final int      pStart = iBlock * BLOCK_SIZE;
1331                final int      pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1332                for (int q = qStart; q < qEnd; ++q) {
1333                    int k = q - qStart;
1334                    T sum = zero;
1335                    int p = pStart;
1336                    while (p < pEnd - 3) {
1337                        sum = sum.
1338                              add(block[k].multiply(v[p])).
1339                              add(block[k + jWidth].multiply(v[p + 1])).
1340                              add(block[k + jWidth2].multiply(v[p + 2])).
1341                              add(block[k + jWidth3].multiply(v[p + 3]));
1342                        k += jWidth4;
1343                        p += 4;
1344                    }
1345                    while (p < pEnd) {
1346                        sum = sum.add(block[k].multiply(v[p++]));
1347                        k += jWidth;
1348                    }
1349                    out[q] = out[q].add(sum);
1350                }
1351            }
1352        }
1353
1354        return out;
1355    }
1356
1357    /** {@inheritDoc} */
1358    @Override
1359    public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor) {
1360        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1361        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1362            final int pStart = iBlock * BLOCK_SIZE;
1363            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1364            for (int p = pStart; p < pEnd; ++p) {
1365                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1366                    final int jWidth = blockWidth(jBlock);
1367                    final int qStart = jBlock * BLOCK_SIZE;
1368                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1369                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1370                    int k = (p - pStart) * jWidth;
1371                    for (int q = qStart; q < qEnd; ++q) {
1372                        block[k] = visitor.visit(p, q, block[k]);
1373                        ++k;
1374                    }
1375                }
1376             }
1377        }
1378        return visitor.end();
1379    }
1380
1381    /** {@inheritDoc} */
1382    @Override
1383    public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1384        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1385        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1386            final int pStart = iBlock * BLOCK_SIZE;
1387            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1388            for (int p = pStart; p < pEnd; ++p) {
1389                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1390                    final int jWidth = blockWidth(jBlock);
1391                    final int qStart = jBlock * BLOCK_SIZE;
1392                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1393                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1394                    int k = (p - pStart) * jWidth;
1395                    for (int q = qStart; q < qEnd; ++q) {
1396                        visitor.visit(p, q, block[k]);
1397                        ++k;
1398                    }
1399                }
1400             }
1401        }
1402        return visitor.end();
1403    }
1404
1405    /** {@inheritDoc} */
1406    @Override
1407    public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1408                            final int startRow, final int endRow,
1409                            final int startColumn, final int endColumn)
1410        throws OutOfRangeException, NumberIsTooSmallException {
1411        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1412        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1413        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1414            final int p0     = iBlock * BLOCK_SIZE;
1415            final int pStart = FastMath.max(startRow, p0);
1416            final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1417            for (int p = pStart; p < pEnd; ++p) {
1418                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1419                    final int jWidth = blockWidth(jBlock);
1420                    final int q0     = jBlock * BLOCK_SIZE;
1421                    final int qStart = FastMath.max(startColumn, q0);
1422                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1423                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1424                    int k = (p - p0) * jWidth + qStart - q0;
1425                    for (int q = qStart; q < qEnd; ++q) {
1426                        block[k] = visitor.visit(p, q, block[k]);
1427                        ++k;
1428                    }
1429                }
1430             }
1431        }
1432        return visitor.end();
1433    }
1434
1435    /** {@inheritDoc} */
1436    @Override
1437    public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1438                            final int startRow, final int endRow,
1439                            final int startColumn, final int endColumn)
1440        throws OutOfRangeException, NumberIsTooSmallException {
1441        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1442        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1443        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1444            final int p0     = iBlock * BLOCK_SIZE;
1445            final int pStart = FastMath.max(startRow, p0);
1446            final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1447            for (int p = pStart; p < pEnd; ++p) {
1448                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1449                    final int jWidth = blockWidth(jBlock);
1450                    final int q0     = jBlock * BLOCK_SIZE;
1451                    final int qStart = FastMath.max(startColumn, q0);
1452                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1453                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1454                    int k = (p - p0) * jWidth + qStart - q0;
1455                    for (int q = qStart; q < qEnd; ++q) {
1456                        visitor.visit(p, q, block[k]);
1457                        ++k;
1458                    }
1459                }
1460             }
1461        }
1462        return visitor.end();
1463    }
1464
1465    /** {@inheritDoc} */
1466    @Override
1467    public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor) {
1468        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1469        int blockIndex = 0;
1470        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1471            final int pStart = iBlock * BLOCK_SIZE;
1472            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1473            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1474                final int qStart = jBlock * BLOCK_SIZE;
1475                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1476                final T[] block = blocks[blockIndex];
1477                int k = 0;
1478                for (int p = pStart; p < pEnd; ++p) {
1479                    for (int q = qStart; q < qEnd; ++q) {
1480                        block[k] = visitor.visit(p, q, block[k]);
1481                        ++k;
1482                    }
1483                }
1484                ++blockIndex;
1485            }
1486        }
1487        return visitor.end();
1488    }
1489
1490    /** {@inheritDoc} */
1491    @Override
1492    public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1493        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1494        int blockIndex = 0;
1495        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1496            final int pStart = iBlock * BLOCK_SIZE;
1497            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1498            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1499                final int qStart = jBlock * BLOCK_SIZE;
1500                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1501                final T[] block = blocks[blockIndex];
1502                int k = 0;
1503                for (int p = pStart; p < pEnd; ++p) {
1504                    for (int q = qStart; q < qEnd; ++q) {
1505                        visitor.visit(p, q, block[k]);
1506                        ++k;
1507                    }
1508                }
1509                ++blockIndex;
1510            }
1511        }
1512        return visitor.end();
1513    }
1514
1515    /** {@inheritDoc} */
1516    @Override
1517    public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1518                                  final int startRow, final int endRow,
1519                                  final int startColumn, final int endColumn)
1520        throws OutOfRangeException, NumberIsTooSmallException {
1521        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1522        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1523        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1524            final int p0     = iBlock * BLOCK_SIZE;
1525            final int pStart = FastMath.max(startRow, p0);
1526            final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1527            for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1528                final int jWidth = blockWidth(jBlock);
1529                final int q0     = jBlock * BLOCK_SIZE;
1530                final int qStart = FastMath.max(startColumn, q0);
1531                final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1532                final T[] block = blocks[iBlock * blockColumns + jBlock];
1533                for (int p = pStart; p < pEnd; ++p) {
1534                    int k = (p - p0) * jWidth + qStart - q0;
1535                    for (int q = qStart; q < qEnd; ++q) {
1536                        block[k] = visitor.visit(p, q, block[k]);
1537                        ++k;
1538                    }
1539                }
1540            }
1541        }
1542        return visitor.end();
1543    }
1544
1545    /** {@inheritDoc} */
1546    @Override
1547    public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1548                                  final int startRow, final int endRow,
1549                                  final int startColumn, final int endColumn)
1550        throws OutOfRangeException, NumberIsTooSmallException {
1551        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1552        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1553        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1554            final int p0     = iBlock * BLOCK_SIZE;
1555            final int pStart = FastMath.max(startRow, p0);
1556            final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1557            for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1558                final int jWidth = blockWidth(jBlock);
1559                final int q0     = jBlock * BLOCK_SIZE;
1560                final int qStart = FastMath.max(startColumn, q0);
1561                final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1562                final T[] block = blocks[iBlock * blockColumns + jBlock];
1563                for (int p = pStart; p < pEnd; ++p) {
1564                    int k = (p - p0) * jWidth + qStart - q0;
1565                    for (int q = qStart; q < qEnd; ++q) {
1566                        visitor.visit(p, q, block[k]);
1567                        ++k;
1568                    }
1569                }
1570            }
1571        }
1572        return visitor.end();
1573    }
1574
1575    /**
1576     * Get the height of a block.
1577     * @param blockRow row index (in block sense) of the block
1578     * @return height (number of rows) of the block
1579     */
1580    private int blockHeight(final int blockRow) {
1581        return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1582    }
1583
1584    /**
1585     * Get the width of a block.
1586     * @param blockColumn column index (in block sense) of the block
1587     * @return width (number of columns) of the block
1588     */
1589    private int blockWidth(final int blockColumn) {
1590        return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1591    }
1592}