View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math.linear;
19  
20  import java.io.Serializable;
21  
22  import org.apache.commons.math.Field;
23  import org.apache.commons.math.FieldElement;
24  import org.apache.commons.math.MathRuntimeException;
25  
26  /**
27   * Cache-friendly implementation of FieldMatrix using a flat arrays to store
28   * square blocks of the matrix.
29   * <p>
30   * This implementation is specially designed to be cache-friendly. Square blocks are
31   * stored as small arrays and allow efficient traversal of data both in row major direction
32   * and columns major direction, one block at a time. This greatly increases performances
33   * for algorithms that use crossed directions loops like multiplication or transposition.
34   * </p>
35   * <p>
36   * The size of square blocks is a static parameter. It may be tuned according to the cache
37   * size of the target computer processor. As a rule of thumbs, it should be the largest
38   * value that allows three blocks to be simultaneously cached (this is necessary for example
39   * for matrix multiplication). The default value is to use 36x36 blocks.
40   * </p>
41   * <p>
42   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
43   * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
44   * blocks are flattened in row major order in single dimension arrays which are therefore
45   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
46   * organized in row major order.
47   * </p>
48   * <p>
49   * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
50   * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
51   * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
52   * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
53   * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
54   * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
55   * rectangle.
56   * </p>
57   * <p>
58   * The layout complexity overhead versus simple mapping of matrices to java
59   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
60   * to up to 3-fold improvements for matrices of moderate to large size.
61   * </p>
62   * @param <T> the type of the field elements
63   * @version $Revision: 825919 $ $Date: 2009-10-16 10:51:55 -0400 (Fri, 16 Oct 2009) $
64   * @since 2.0
65   */
66  public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
67  
68      /** Block size. */
69      public static final int BLOCK_SIZE = 36;
70  
71      /** Serializable version identifier */
72      private static final long serialVersionUID = -4602336630143123183L;
73  
74      /** Blocks of matrix entries. */
75      private final T blocks[][];
76  
77      /** Number of rows of the matrix. */
78      private final int rows;
79  
80      /** Number of columns of the matrix. */
81      private final int columns;
82  
83      /** Number of block rows of the matrix. */
84      private final int blockRows;
85  
86      /** Number of block columns of the matrix. */
87      private final int blockColumns;
88  
89      /**
90       * Create a new matrix with the supplied row and column dimensions.
91       *
92       * @param field field to which the elements belong
93       * @param rows  the number of rows in the new matrix
94       * @param columns  the number of columns in the new matrix
95       * @throws IllegalArgumentException if row or column dimension is not
96       *  positive
97       */
98      public BlockFieldMatrix(final Field<T> field, final int rows, final int columns)
99          throws IllegalArgumentException {
100 
101         super(field, rows, columns);
102         this.rows    = rows;
103         this.columns = columns;
104 
105         // number of blocks
106         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
107         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
108 
109         // allocate storage blocks, taking care of smaller ones at right and bottom
110         blocks = createBlocksLayout(field, rows, columns);
111 
112     }
113 
114     /**
115      * Create a new dense matrix copying entries from raw layout data.
116      * <p>The input array <em>must</em> already be in raw layout.</p>
117      * <p>Calling this constructor is equivalent to call:
118      * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
119      *                                   toBlocksLayout(rawData), false);</pre>
120      * </p>
121      * @param rawData data for new matrix, in raw layout
122      *
123      * @exception IllegalArgumentException if <code>blockData</code> shape is
124      * inconsistent with block layout
125      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
126      */
127     public BlockFieldMatrix(final T[][] rawData)
128         throws IllegalArgumentException {
129         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
130     }
131 
132     /**
133      * Create a new dense matrix copying entries from block layout data.
134      * <p>The input array <em>must</em> already be in blocks layout.</p>
135      * @param rows  the number of rows in the new matrix
136      * @param columns  the number of columns in the new matrix
137      * @param blockData data for new matrix
138      * @param copyArray if true, the input array will be copied, otherwise
139      * it will be referenced
140      *
141      * @exception IllegalArgumentException if <code>blockData</code> shape is
142      * inconsistent with block layout
143      * @see #createBlocksLayout(Field, int, int)
144      * @see #toBlocksLayout(FieldElement[][])
145      * @see #BlockFieldMatrix(FieldElement[][])
146      */
147     public BlockFieldMatrix(final int rows, final int columns,
148                             final T[][] blockData, final boolean copyArray)
149         throws IllegalArgumentException {
150 
151         super(extractField(blockData), rows, columns);
152         this.rows    = rows;
153         this.columns = columns;
154 
155         // number of blocks
156         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
157         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
158 
159         if (copyArray) {
160             // allocate storage blocks, taking care of smaller ones at right and bottom
161             blocks = buildArray(getField(), blockRows * blockColumns, -1);
162         } else {
163             // reference existing array
164             blocks = blockData;
165         }
166 
167         int index = 0;
168         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
169             final int iHeight = blockHeight(iBlock);
170             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
171                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
172                     throw MathRuntimeException.createIllegalArgumentException(
173                             "wrong array shape (block length = {0}, expected {1})",
174                             blockData[index].length, iHeight * blockWidth(jBlock));
175                 }
176                 if (copyArray) {
177                     blocks[index] = blockData[index].clone();
178                 }
179             }
180         }
181 
182     }
183 
184     /**
185      * Convert a data array from raw layout to blocks layout.
186      * <p>
187      * Raw layout is the straightforward layout where element at row i and
188      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
189      * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
190      * is split in square blocks (except at right and bottom side where blocks may
191      * be rectangular to fit matrix size) and each block is stored in a flattened
192      * one-dimensional array.
193      * </p>
194      * <p>
195      * This method creates an array in blocks layout from an input array in raw layout.
196      * It can be used to provide the array argument of the {@link
197      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
198      * constructor.
199      * </p>
200      * @param <T> the type of the field elements
201      * @param rawData data array in raw layout
202      * @return a new data array containing the same entries but in blocks layout
203      * @exception IllegalArgumentException if <code>rawData</code> is not rectangular
204      *  (not all rows have the same length)
205      * @see #createBlocksLayout(Field, int, int)
206      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
207      */
208     public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
209         throws IllegalArgumentException {
210 
211         final int rows         = rawData.length;
212         final int columns      = rawData[0].length;
213         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
214         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
215 
216         // safety checks
217         for (int i = 0; i < rawData.length; ++i) {
218             final int length = rawData[i].length;
219             if (length != columns) {
220                 throw MathRuntimeException.createIllegalArgumentException(
221                         "some rows have length {0} while others have length {1}",
222                         columns, length);
223             }
224         }
225 
226         // convert array
227         final Field<T> field = extractField(rawData);
228         final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
229         int blockIndex = 0;
230         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
231             final int pStart  = iBlock * BLOCK_SIZE;
232             final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
233             final int iHeight = pEnd - pStart;
234             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
235                 final int qStart = jBlock * BLOCK_SIZE;
236                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
237                 final int jWidth = qEnd - qStart;
238 
239                 // allocate new block
240                 final T[] block = buildArray(field, iHeight * jWidth);
241                 blocks[blockIndex] = block;
242 
243                 // copy data
244                 int index = 0;
245                 for (int p = pStart; p < pEnd; ++p) {
246                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
247                     index += jWidth;
248                 }
249 
250                 ++blockIndex;
251 
252             }
253         }
254 
255         return blocks;
256 
257     }
258 
259     /**
260      * Create a data array in blocks layout.
261      * <p>
262      * This method can be used to create the array argument of the {@link
263      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
264      * constructor.
265      * </p>
266      * @param <T> the type of the field elements
267      * @param field field to which the elements belong
268      * @param rows  the number of rows in the new matrix
269      * @param columns  the number of columns in the new matrix
270      * @return a new data array in blocks layout
271      * @see #toBlocksLayout(FieldElement[][])
272      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
273      */
274     public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
275                                                                        final int rows, final int columns) {
276 
277         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
278         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
279 
280         final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
281         int blockIndex = 0;
282         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
283             final int pStart  = iBlock * BLOCK_SIZE;
284             final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
285             final int iHeight = pEnd - pStart;
286             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
287                 final int qStart = jBlock * BLOCK_SIZE;
288                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
289                 final int jWidth = qEnd - qStart;
290                 blocks[blockIndex] = buildArray(field, iHeight * jWidth);
291                 ++blockIndex;
292             }
293         }
294 
295         return blocks;
296 
297     }
298 
299     /** {@inheritDoc} */
300     @Override
301     public FieldMatrix<T> createMatrix(final int rowDimension, final int columnDimension)
302         throws IllegalArgumentException {
303         return new BlockFieldMatrix<T>(getField(), rowDimension, columnDimension);
304     }
305 
306     /** {@inheritDoc} */
307     @Override
308     public FieldMatrix<T> copy() {
309 
310         // create an empty matrix
311         BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
312 
313         // copy the blocks
314         for (int i = 0; i < blocks.length; ++i) {
315             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
316         }
317 
318         return copied;
319 
320     }
321 
322     /** {@inheritDoc} */
323     @Override
324     public FieldMatrix<T> add(final FieldMatrix<T> m)
325         throws IllegalArgumentException {
326         try {
327             return add((BlockFieldMatrix<T>) m);
328         } catch (ClassCastException cce) {
329 
330             // safety check
331             checkAdditionCompatible(m);
332 
333             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
334 
335             // perform addition block-wise, to ensure good cache behavior
336             int blockIndex = 0;
337             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
338                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
339 
340                     // perform addition on the current block
341                     final T[] outBlock = out.blocks[blockIndex];
342                     final T[] tBlock   = blocks[blockIndex];
343                     final int      pStart   = iBlock * BLOCK_SIZE;
344                     final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
345                     final int      qStart   = jBlock * BLOCK_SIZE;
346                     final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
347                     int k = 0;
348                     for (int p = pStart; p < pEnd; ++p) {
349                         for (int q = qStart; q < qEnd; ++q) {
350                             outBlock[k] = tBlock[k].add(m.getEntry(p, q));
351                             ++k;
352                         }
353                     }
354 
355                     // go to next block
356                     ++blockIndex;
357 
358                 }
359             }
360 
361             return out;
362 
363         }
364     }
365 
366     /**
367      * Compute the sum of this and <code>m</code>.
368      *
369      * @param m    matrix to be added
370      * @return     this + m
371      * @throws  IllegalArgumentException if m is not the same size as this
372      */
373     public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
374         throws IllegalArgumentException {
375 
376         // safety check
377         checkAdditionCompatible(m);
378 
379         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
380 
381         // perform addition block-wise, to ensure good cache behavior
382         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
383             final T[] outBlock = out.blocks[blockIndex];
384             final T[] tBlock   = blocks[blockIndex];
385             final T[] mBlock   = m.blocks[blockIndex];
386             for (int k = 0; k < outBlock.length; ++k) {
387                 outBlock[k] = tBlock[k].add(mBlock[k]);
388             }
389         }
390 
391         return out;
392 
393     }
394 
395     /** {@inheritDoc} */
396     @Override
397     public FieldMatrix<T> subtract(final FieldMatrix<T> m)
398         throws IllegalArgumentException {
399         try {
400             return subtract((BlockFieldMatrix<T>) m);
401         } catch (ClassCastException cce) {
402 
403             // safety check
404             checkSubtractionCompatible(m);
405 
406             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
407 
408             // perform subtraction block-wise, to ensure good cache behavior
409             int blockIndex = 0;
410             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
411                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
412 
413                     // perform subtraction on the current block
414                     final T[] outBlock = out.blocks[blockIndex];
415                     final T[] tBlock   = blocks[blockIndex];
416                     final int      pStart   = iBlock * BLOCK_SIZE;
417                     final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
418                     final int      qStart   = jBlock * BLOCK_SIZE;
419                     final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
420                     int k = 0;
421                     for (int p = pStart; p < pEnd; ++p) {
422                         for (int q = qStart; q < qEnd; ++q) {
423                             outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
424                             ++k;
425                         }
426                     }
427 
428                     // go to next block
429                     ++blockIndex;
430 
431                 }
432             }
433 
434             return out;
435 
436         }
437     }
438 
439     /**
440      * Compute this minus <code>m</code>.
441      *
442      * @param m    matrix to be subtracted
443      * @return     this - m
444      * @throws  IllegalArgumentException if m is not the same size as this
445      */
446     public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m)
447         throws IllegalArgumentException {
448 
449         // safety check
450         checkSubtractionCompatible(m);
451 
452         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
453 
454         // perform subtraction block-wise, to ensure good cache behavior
455         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
456             final T[] outBlock = out.blocks[blockIndex];
457             final T[] tBlock   = blocks[blockIndex];
458             final T[] mBlock   = m.blocks[blockIndex];
459             for (int k = 0; k < outBlock.length; ++k) {
460                 outBlock[k] = tBlock[k].subtract(mBlock[k]);
461             }
462         }
463 
464         return out;
465 
466     }
467 
468     /** {@inheritDoc} */
469     @Override
470     public FieldMatrix<T> scalarAdd(final T d)
471         throws IllegalArgumentException {
472 
473         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
474 
475         // perform subtraction block-wise, to ensure good cache behavior
476         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
477             final T[] outBlock = out.blocks[blockIndex];
478             final T[] tBlock   = blocks[blockIndex];
479             for (int k = 0; k < outBlock.length; ++k) {
480                 outBlock[k] = tBlock[k].add(d);
481             }
482         }
483 
484         return out;
485 
486     }
487 
488     /** {@inheritDoc} */
489     @Override
490     public FieldMatrix<T> scalarMultiply(final T d)
491         throws IllegalArgumentException {
492 
493         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
494 
495         // perform subtraction block-wise, to ensure good cache behavior
496         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
497             final T[] outBlock = out.blocks[blockIndex];
498             final T[] tBlock   = blocks[blockIndex];
499             for (int k = 0; k < outBlock.length; ++k) {
500                 outBlock[k] = tBlock[k].multiply(d);
501             }
502         }
503 
504         return out;
505 
506     }
507 
508     /** {@inheritDoc} */
509     @Override
510     public FieldMatrix<T> multiply(final FieldMatrix<T> m)
511         throws IllegalArgumentException {
512         try {
513             return multiply((BlockFieldMatrix<T>) m);
514         } catch (ClassCastException cce) {
515 
516             // safety check
517             checkMultiplicationCompatible(m);
518 
519             final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
520             final T zero = getField().getZero();
521 
522             // perform multiplication block-wise, to ensure good cache behavior
523             int blockIndex = 0;
524             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
525 
526                 final int pStart = iBlock * BLOCK_SIZE;
527                 final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
528 
529                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
530 
531                     final int qStart = jBlock * BLOCK_SIZE;
532                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, m.getColumnDimension());
533 
534                     // select current block
535                     final T[] outBlock = out.blocks[blockIndex];
536 
537                     // perform multiplication on current block
538                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
539                         final int kWidth      = blockWidth(kBlock);
540                         final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
541                         final int rStart      = kBlock * BLOCK_SIZE;
542                         int k = 0;
543                         for (int p = pStart; p < pEnd; ++p) {
544                             final int lStart = (p - pStart) * kWidth;
545                             final int lEnd   = lStart + kWidth;
546                             for (int q = qStart; q < qEnd; ++q) {
547                                 T sum = zero;
548                                 int r = rStart;
549                                 for (int l = lStart; l < lEnd; ++l) {
550                                     sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
551                                     ++r;
552                                 }
553                                 outBlock[k] = outBlock[k].add(sum);
554                                 ++k;
555                             }
556                         }
557                     }
558 
559                     // go to next block
560                     ++blockIndex;
561 
562                 }
563             }
564 
565             return out;
566 
567         }
568     }
569 
570     /**
571      * Returns the result of postmultiplying this by m.
572      *
573      * @param m    matrix to postmultiply by
574      * @return     this * m
575      * @throws     IllegalArgumentException
576      *             if columnDimension(this) != rowDimension(m)
577      */
578     public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m) throws IllegalArgumentException {
579 
580         // safety check
581         checkMultiplicationCompatible(m);
582 
583         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
584         final T zero = getField().getZero();
585 
586         // perform multiplication block-wise, to ensure good cache behavior
587         int blockIndex = 0;
588         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
589 
590             final int pStart = iBlock * BLOCK_SIZE;
591             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
592 
593             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
594                 final int jWidth = out.blockWidth(jBlock);
595                 final int jWidth2 = jWidth  + jWidth;
596                 final int jWidth3 = jWidth2 + jWidth;
597                 final int jWidth4 = jWidth3 + jWidth;
598 
599                 // select current block
600                 final T[] outBlock = out.blocks[blockIndex];
601 
602                 // perform multiplication on current block
603                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
604                     final int kWidth = blockWidth(kBlock);
605                     final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
606                     final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
607                     int k = 0;
608                     for (int p = pStart; p < pEnd; ++p) {
609                         final int lStart = (p - pStart) * kWidth;
610                         final int lEnd   = lStart + kWidth;
611                         for (int nStart = 0; nStart < jWidth; ++nStart) {
612                             T sum = zero;
613                             int l = lStart;
614                             int n = nStart;
615                             while (l < lEnd - 3) {
616                                 sum = sum.
617                                       add(tBlock[l].multiply(mBlock[n])).
618                                       add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
619                                       add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
620                                       add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
621                                 l += 4;
622                                 n += jWidth4;
623                             }
624                             while (l < lEnd) {
625                                 sum = sum.add(tBlock[l++].multiply(mBlock[n]));
626                                 n += jWidth;
627                             }
628                             outBlock[k] = outBlock[k].add(sum);
629                             ++k;
630                         }
631                     }
632                 }
633 
634                 // go to next block
635                 ++blockIndex;
636 
637             }
638         }
639 
640         return out;
641 
642     }
643 
644     /** {@inheritDoc} */
645     @Override
646     public T[][] getData() {
647 
648         final T[][] data = buildArray(getField(), getRowDimension(), getColumnDimension());
649         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
650 
651         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
652             final int pStart = iBlock * BLOCK_SIZE;
653             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
654             int regularPos   = 0;
655             int lastPos      = 0;
656             for (int p = pStart; p < pEnd; ++p) {
657                 final T[] dataP = data[p];
658                 int blockIndex = iBlock * blockColumns;
659                 int dataPos    = 0;
660                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
661                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
662                     dataPos += BLOCK_SIZE;
663                 }
664                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
665                 regularPos += BLOCK_SIZE;
666                 lastPos    += lastColumns;
667             }
668         }
669 
670         return data;
671 
672     }
673 
674     /** {@inheritDoc} */
675     @Override
676     public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
677                                    final int startColumn, final int endColumn)
678         throws MatrixIndexException {
679 
680         // safety checks
681         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
682 
683         // create the output matrix
684         final BlockFieldMatrix<T> out =
685             new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
686 
687         // compute blocks shifts
688         final int blockStartRow    = startRow    / BLOCK_SIZE;
689         final int rowsShift        = startRow    % BLOCK_SIZE;
690         final int blockStartColumn = startColumn / BLOCK_SIZE;
691         final int columnsShift     = startColumn % BLOCK_SIZE;
692 
693         // perform extraction block-wise, to ensure good cache behavior
694         int pBlock = blockStartRow;
695         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
696             final int iHeight = out.blockHeight(iBlock);
697             int qBlock = blockStartColumn;
698             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
699                 final int jWidth = out.blockWidth(jBlock);
700 
701                 // handle one block of the output matrix
702                 final int      outIndex = iBlock * out.blockColumns + jBlock;
703                 final T[] outBlock = out.blocks[outIndex];
704                 final int      index    = pBlock * blockColumns + qBlock;
705                 final int      width    = blockWidth(qBlock);
706 
707                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
708                 final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
709                 if (heightExcess > 0) {
710                     // the submatrix block spans on two blocks rows from the original matrix
711                     if (widthExcess > 0) {
712                         // the submatrix block spans on two blocks columns from the original matrix
713                         final int width2 = blockWidth(qBlock + 1);
714                         copyBlockPart(blocks[index], width,
715                                       rowsShift, BLOCK_SIZE,
716                                       columnsShift, BLOCK_SIZE,
717                                       outBlock, jWidth, 0, 0);
718                         copyBlockPart(blocks[index + 1], width2,
719                                       rowsShift, BLOCK_SIZE,
720                                       0, widthExcess,
721                                       outBlock, jWidth, 0, jWidth - widthExcess);
722                         copyBlockPart(blocks[index + blockColumns], width,
723                                       0, heightExcess,
724                                       columnsShift, BLOCK_SIZE,
725                                       outBlock, jWidth, iHeight - heightExcess, 0);
726                         copyBlockPart(blocks[index + blockColumns + 1], width2,
727                                       0, heightExcess,
728                                       0, widthExcess,
729                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
730                     } else {
731                         // the submatrix block spans on one block column from the original matrix
732                         copyBlockPart(blocks[index], width,
733                                       rowsShift, BLOCK_SIZE,
734                                       columnsShift, jWidth + columnsShift,
735                                       outBlock, jWidth, 0, 0);
736                         copyBlockPart(blocks[index + blockColumns], width,
737                                       0, heightExcess,
738                                       columnsShift, jWidth + columnsShift,
739                                       outBlock, jWidth, iHeight - heightExcess, 0);
740                     }
741                 } else {
742                     // the submatrix block spans on one block row from the original matrix
743                     if (widthExcess > 0) {
744                         // the submatrix block spans on two blocks columns from the original matrix
745                         final int width2 = blockWidth(qBlock + 1);
746                         copyBlockPart(blocks[index], width,
747                                       rowsShift, iHeight + rowsShift,
748                                       columnsShift, BLOCK_SIZE,
749                                       outBlock, jWidth, 0, 0);
750                         copyBlockPart(blocks[index + 1], width2,
751                                       rowsShift, iHeight + rowsShift,
752                                       0, widthExcess,
753                                       outBlock, jWidth, 0, jWidth - widthExcess);
754                     } else {
755                         // the submatrix block spans on one block column from the original matrix
756                         copyBlockPart(blocks[index], width,
757                                       rowsShift, iHeight + rowsShift,
758                                       columnsShift, jWidth + columnsShift,
759                                       outBlock, jWidth, 0, 0);
760                     }
761                }
762 
763                 ++qBlock;
764             }
765 
766             ++pBlock;
767 
768         }
769 
770         return out;
771 
772     }
773 
774     /**
775      * Copy a part of a block into another one
776      * <p>This method can be called only when the specified part fits in both
777      * blocks, no verification is done here.</p>
778      * @param srcBlock source block
779      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
780      * @param srcStartRow start row in the source block
781      * @param srcEndRow end row (exclusive) in the source block
782      * @param srcStartColumn start column in the source block
783      * @param srcEndColumn end column (exclusive) in the source block
784      * @param dstBlock destination block
785      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
786      * @param dstStartRow start row in the destination block
787      * @param dstStartColumn start column in the destination block
788      */
789     private void copyBlockPart(final T[] srcBlock, final int srcWidth,
790                                final int srcStartRow, final int srcEndRow,
791                                final int srcStartColumn, final int srcEndColumn,
792                                final T[] dstBlock, final int dstWidth,
793                                final int dstStartRow, final int dstStartColumn) {
794         final int length = srcEndColumn - srcStartColumn;
795         int srcPos = srcStartRow * srcWidth + srcStartColumn;
796         int dstPos = dstStartRow * dstWidth + dstStartColumn;
797         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
798             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
799             srcPos += srcWidth;
800             dstPos += dstWidth;
801         }
802     }
803 
804     /** {@inheritDoc} */
805     @Override
806     public void setSubMatrix(final T[][] subMatrix, final int row, final int column)
807         throws MatrixIndexException {
808 
809         // safety checks
810         final int refLength = subMatrix[0].length;
811         if (refLength < 1) {
812             throw MathRuntimeException.createIllegalArgumentException("matrix must have at least one column");
813         }
814         final int endRow    = row + subMatrix.length - 1;
815         final int endColumn = column + refLength - 1;
816         checkSubMatrixIndex(row, endRow, column, endColumn);
817         for (final T[] subRow : subMatrix) {
818             if (subRow.length != refLength) {
819                 throw MathRuntimeException.createIllegalArgumentException(
820                         "some rows have length {0} while others have length {1}",
821                         refLength, subRow.length);
822             }
823         }
824 
825         // compute blocks bounds
826         final int blockStartRow    = row / BLOCK_SIZE;
827         final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
828         final int blockStartColumn = column / BLOCK_SIZE;
829         final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
830 
831         // perform copy block-wise, to ensure good cache behavior
832         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
833             final int iHeight  = blockHeight(iBlock);
834             final int firstRow = iBlock * BLOCK_SIZE;
835             final int iStart   = Math.max(row,    firstRow);
836             final int iEnd     = Math.min(endRow + 1, firstRow + iHeight);
837 
838             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
839                 final int jWidth      = blockWidth(jBlock);
840                 final int firstColumn = jBlock * BLOCK_SIZE;
841                 final int jStart      = Math.max(column,    firstColumn);
842                 final int jEnd        = Math.min(endColumn + 1, firstColumn + jWidth);
843                 final int jLength     = jEnd - jStart;
844 
845                 // handle one block, row by row
846                 final T[] block = blocks[iBlock * blockColumns + jBlock];
847                 for (int i = iStart; i < iEnd; ++i) {
848                     System.arraycopy(subMatrix[i - row], jStart - column,
849                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
850                                      jLength);
851                 }
852 
853             }
854         }
855     }
856 
857     /** {@inheritDoc} */
858     @Override
859     public FieldMatrix<T> getRowMatrix(final int row)
860         throws MatrixIndexException {
861 
862         checkRowIndex(row);
863         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
864 
865         // perform copy block-wise, to ensure good cache behavior
866         final int iBlock  = row / BLOCK_SIZE;
867         final int iRow    = row - iBlock * BLOCK_SIZE;
868         int outBlockIndex = 0;
869         int outIndex      = 0;
870         T[] outBlock = out.blocks[outBlockIndex];
871         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
872             final int jWidth     = blockWidth(jBlock);
873             final T[] block = blocks[iBlock * blockColumns + jBlock];
874             final int available  = outBlock.length - outIndex;
875             if (jWidth > available) {
876                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
877                 outBlock = out.blocks[++outBlockIndex];
878                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
879                 outIndex = jWidth - available;
880             } else {
881                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
882                 outIndex += jWidth;
883             }
884         }
885 
886         return out;
887 
888     }
889 
890     /** {@inheritDoc} */
891     @Override
892     public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
893         throws MatrixIndexException, InvalidMatrixException {
894         try {
895             setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
896         } catch (ClassCastException cce) {
897             super.setRowMatrix(row, matrix);
898         }
899     }
900 
901     /**
902      * Sets the entries in row number <code>row</code>
903      * as a row matrix.  Row indices start at 0.
904      *
905      * @param row the row to be set
906      * @param matrix row matrix (must have one row and the same number of columns
907      * as the instance)
908      * @throws MatrixIndexException if the specified row index is invalid
909      * @throws InvalidMatrixException if the matrix dimensions do not match one
910      * instance row
911      */
912     public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
913         throws MatrixIndexException, InvalidMatrixException {
914 
915         checkRowIndex(row);
916         final int nCols = getColumnDimension();
917         if ((matrix.getRowDimension() != 1) ||
918             (matrix.getColumnDimension() != nCols)) {
919             throw new InvalidMatrixException(
920                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
921                     matrix.getRowDimension(), matrix.getColumnDimension(),
922                     1, nCols);
923         }
924 
925         // perform copy block-wise, to ensure good cache behavior
926         final int iBlock = row / BLOCK_SIZE;
927         final int iRow   = row - iBlock * BLOCK_SIZE;
928         int mBlockIndex  = 0;
929         int mIndex       = 0;
930         T[] mBlock  = matrix.blocks[mBlockIndex];
931         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
932             final int jWidth     = blockWidth(jBlock);
933             final T[] block = blocks[iBlock * blockColumns + jBlock];
934             final int available  = mBlock.length - mIndex;
935             if (jWidth > available) {
936                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
937                 mBlock = matrix.blocks[++mBlockIndex];
938                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
939                 mIndex = jWidth - available;
940             } else {
941                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
942                 mIndex += jWidth;
943            }
944         }
945 
946     }
947 
948     /** {@inheritDoc} */
949     @Override
950     public FieldMatrix<T> getColumnMatrix(final int column)
951         throws MatrixIndexException {
952 
953         checkColumnIndex(column);
954         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
955 
956         // perform copy block-wise, to ensure good cache behavior
957         final int jBlock  = column / BLOCK_SIZE;
958         final int jColumn = column - jBlock * BLOCK_SIZE;
959         final int jWidth  = blockWidth(jBlock);
960         int outBlockIndex = 0;
961         int outIndex      = 0;
962         T[] outBlock = out.blocks[outBlockIndex];
963         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
964             final int iHeight = blockHeight(iBlock);
965             final T[] block = blocks[iBlock * blockColumns + jBlock];
966             for (int i = 0; i < iHeight; ++i) {
967                 if (outIndex >= outBlock.length) {
968                     outBlock = out.blocks[++outBlockIndex];
969                     outIndex = 0;
970                 }
971                 outBlock[outIndex++] = block[i * jWidth + jColumn];
972             }
973         }
974 
975         return out;
976 
977     }
978 
979     /** {@inheritDoc} */
980     @Override
981     public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
982         throws MatrixIndexException, InvalidMatrixException {
983         try {
984             setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
985         } catch (ClassCastException cce) {
986             super.setColumnMatrix(column, matrix);
987         }
988     }
989 
990     /**
991      * Sets the entries in column number <code>column</code>
992      * as a column matrix.  Column indices start at 0.
993      *
994      * @param column the column to be set
995      * @param matrix column matrix (must have one column and the same number of rows
996      * as the instance)
997      * @throws MatrixIndexException if the specified column index is invalid
998      * @throws InvalidMatrixException if the matrix dimensions do not match one
999      * instance column
1000      */
1001     void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
1002         throws MatrixIndexException, InvalidMatrixException {
1003 
1004         checkColumnIndex(column);
1005         final int nRows = getRowDimension();
1006         if ((matrix.getRowDimension() != nRows) ||
1007             (matrix.getColumnDimension() != 1)) {
1008             throw new InvalidMatrixException(
1009                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1010                     matrix.getRowDimension(), matrix.getColumnDimension(),
1011                     nRows, 1);
1012         }
1013 
1014         // perform copy block-wise, to ensure good cache behavior
1015         final int jBlock  = column / BLOCK_SIZE;
1016         final int jColumn = column - jBlock * BLOCK_SIZE;
1017         final int jWidth  = blockWidth(jBlock);
1018         int mBlockIndex = 0;
1019         int mIndex      = 0;
1020         T[] mBlock = matrix.blocks[mBlockIndex];
1021         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1022             final int iHeight = blockHeight(iBlock);
1023             final T[] block = blocks[iBlock * blockColumns + jBlock];
1024             for (int i = 0; i < iHeight; ++i) {
1025                 if (mIndex >= mBlock.length) {
1026                     mBlock = matrix.blocks[++mBlockIndex];
1027                     mIndex = 0;
1028                 }
1029                 block[i * jWidth + jColumn] = mBlock[mIndex++];
1030             }
1031         }
1032 
1033     }
1034 
1035     /** {@inheritDoc} */
1036     @Override
1037     public FieldVector<T> getRowVector(final int row)
1038         throws MatrixIndexException {
1039 
1040         checkRowIndex(row);
1041         final T[] outData = buildArray(getField(), columns);
1042 
1043         // perform copy block-wise, to ensure good cache behavior
1044         final int iBlock  = row / BLOCK_SIZE;
1045         final int iRow    = row - iBlock * BLOCK_SIZE;
1046         int outIndex      = 0;
1047         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1048             final int jWidth     = blockWidth(jBlock);
1049             final T[] block = blocks[iBlock * blockColumns + jBlock];
1050             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1051             outIndex += jWidth;
1052         }
1053 
1054         return new ArrayFieldVector<T>(outData, false);
1055 
1056     }
1057 
1058     /** {@inheritDoc} */
1059     @Override
1060     public void setRowVector(final int row, final FieldVector<T> vector)
1061         throws MatrixIndexException, InvalidMatrixException {
1062         try {
1063             setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1064         } catch (ClassCastException cce) {
1065             super.setRowVector(row, vector);
1066         }
1067     }
1068 
1069     /** {@inheritDoc} */
1070     @Override
1071     public FieldVector<T> getColumnVector(final int column)
1072         throws MatrixIndexException {
1073 
1074         checkColumnIndex(column);
1075         final T[] outData = buildArray(getField(), rows);
1076 
1077         // perform copy block-wise, to ensure good cache behavior
1078         final int jBlock  = column / BLOCK_SIZE;
1079         final int jColumn = column - jBlock * BLOCK_SIZE;
1080         final int jWidth  = blockWidth(jBlock);
1081         int outIndex      = 0;
1082         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1083             final int iHeight = blockHeight(iBlock);
1084             final T[] block = blocks[iBlock * blockColumns + jBlock];
1085             for (int i = 0; i < iHeight; ++i) {
1086                 outData[outIndex++] = block[i * jWidth + jColumn];
1087             }
1088         }
1089 
1090         return new ArrayFieldVector<T>(outData, false);
1091 
1092     }
1093 
1094     /** {@inheritDoc} */
1095     @Override
1096     public void setColumnVector(final int column, final FieldVector<T> vector)
1097         throws MatrixIndexException, InvalidMatrixException {
1098         try {
1099             setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1100         } catch (ClassCastException cce) {
1101             super.setColumnVector(column, vector);
1102         }
1103     }
1104 
1105     /** {@inheritDoc} */
1106     @Override
1107     public T[] getRow(final int row)
1108         throws MatrixIndexException {
1109 
1110         checkRowIndex(row);
1111         final T[] out = buildArray(getField(), columns);
1112 
1113         // perform copy block-wise, to ensure good cache behavior
1114         final int iBlock  = row / BLOCK_SIZE;
1115         final int iRow    = row - iBlock * BLOCK_SIZE;
1116         int outIndex      = 0;
1117         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1118             final int jWidth     = blockWidth(jBlock);
1119             final T[] block = blocks[iBlock * blockColumns + jBlock];
1120             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1121             outIndex += jWidth;
1122         }
1123 
1124         return out;
1125 
1126     }
1127 
1128     /** {@inheritDoc} */
1129     @Override
1130     public void setRow(final int row, final T[] array)
1131         throws MatrixIndexException, InvalidMatrixException {
1132 
1133         checkRowIndex(row);
1134         final int nCols = getColumnDimension();
1135         if (array.length != nCols) {
1136             throw new InvalidMatrixException(
1137                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1138                     1, array.length, 1, nCols);
1139         }
1140 
1141         // perform copy block-wise, to ensure good cache behavior
1142         final int iBlock  = row / BLOCK_SIZE;
1143         final int iRow    = row - iBlock * BLOCK_SIZE;
1144         int outIndex      = 0;
1145         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1146             final int jWidth     = blockWidth(jBlock);
1147             final T[] block = blocks[iBlock * blockColumns + jBlock];
1148             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1149             outIndex += jWidth;
1150         }
1151 
1152     }
1153 
1154     /** {@inheritDoc} */
1155     @Override
1156     public T[] getColumn(final int column)
1157         throws MatrixIndexException {
1158 
1159         checkColumnIndex(column);
1160         final T[] out = buildArray(getField(), rows);
1161 
1162         // perform copy block-wise, to ensure good cache behavior
1163         final int jBlock  = column / BLOCK_SIZE;
1164         final int jColumn = column - jBlock * BLOCK_SIZE;
1165         final int jWidth  = blockWidth(jBlock);
1166         int outIndex      = 0;
1167         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1168             final int iHeight = blockHeight(iBlock);
1169             final T[] block = blocks[iBlock * blockColumns + jBlock];
1170             for (int i = 0; i < iHeight; ++i) {
1171                 out[outIndex++] = block[i * jWidth + jColumn];
1172             }
1173         }
1174 
1175         return out;
1176 
1177     }
1178 
1179     /** {@inheritDoc} */
1180     @Override
1181     public void setColumn(final int column, final T[] array)
1182         throws MatrixIndexException, InvalidMatrixException {
1183 
1184         checkColumnIndex(column);
1185         final int nRows = getRowDimension();
1186         if (array.length != nRows) {
1187             throw new InvalidMatrixException(
1188                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1189                     array.length, 1, nRows, 1);
1190         }
1191 
1192         // perform copy block-wise, to ensure good cache behavior
1193         final int jBlock  = column / BLOCK_SIZE;
1194         final int jColumn = column - jBlock * BLOCK_SIZE;
1195         final int jWidth  = blockWidth(jBlock);
1196         int outIndex      = 0;
1197         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1198             final int iHeight = blockHeight(iBlock);
1199             final T[] block = blocks[iBlock * blockColumns + jBlock];
1200             for (int i = 0; i < iHeight; ++i) {
1201                 block[i * jWidth + jColumn] = array[outIndex++];
1202             }
1203         }
1204 
1205     }
1206 
1207     /** {@inheritDoc} */
1208     @Override
1209     public T getEntry(final int row, final int column)
1210         throws MatrixIndexException {
1211         try {
1212             final int iBlock = row    / BLOCK_SIZE;
1213             final int jBlock = column / BLOCK_SIZE;
1214             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1215                                (column - jBlock * BLOCK_SIZE);
1216             return blocks[iBlock * blockColumns + jBlock][k];
1217         } catch (ArrayIndexOutOfBoundsException e) {
1218             throw new MatrixIndexException(
1219                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1220                     row, column, getRowDimension(), getColumnDimension());
1221         }
1222     }
1223 
1224     /** {@inheritDoc} */
1225     @Override
1226     public void setEntry(final int row, final int column, final T value)
1227         throws MatrixIndexException {
1228         try {
1229             final int iBlock = row    / BLOCK_SIZE;
1230             final int jBlock = column / BLOCK_SIZE;
1231             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1232                                (column - jBlock * BLOCK_SIZE);
1233             blocks[iBlock * blockColumns + jBlock][k] = value;
1234         } catch (ArrayIndexOutOfBoundsException e) {
1235             throw new MatrixIndexException(
1236                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1237                     row, column, getRowDimension(), getColumnDimension());
1238         }
1239     }
1240 
1241     /** {@inheritDoc} */
1242     @Override
1243     public void addToEntry(final int row, final int column, final T increment)
1244         throws MatrixIndexException {
1245         try {
1246             final int iBlock = row    / BLOCK_SIZE;
1247             final int jBlock = column / BLOCK_SIZE;
1248             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1249                                (column - jBlock * BLOCK_SIZE);
1250             final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1251             blockIJ[k] = blockIJ[k].add(increment);
1252         } catch (ArrayIndexOutOfBoundsException e) {
1253             throw new MatrixIndexException(
1254                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1255                     row, column, getRowDimension(), getColumnDimension());
1256         }
1257     }
1258 
1259     /** {@inheritDoc} */
1260     @Override
1261     public void multiplyEntry(final int row, final int column, final T factor)
1262         throws MatrixIndexException {
1263         try {
1264             final int iBlock = row    / BLOCK_SIZE;
1265             final int jBlock = column / BLOCK_SIZE;
1266             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1267                                (column - jBlock * BLOCK_SIZE);
1268             final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1269             blockIJ[k] = blockIJ[k].multiply(factor);
1270         } catch (ArrayIndexOutOfBoundsException e) {
1271             throw new MatrixIndexException(
1272                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1273                     row, column, getRowDimension(), getColumnDimension());
1274         }
1275     }
1276 
1277     /** {@inheritDoc} */
1278     @Override
1279     public FieldMatrix<T> transpose() {
1280 
1281         final int nRows = getRowDimension();
1282         final int nCols = getColumnDimension();
1283         final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1284 
1285         // perform transpose block-wise, to ensure good cache behavior
1286         int blockIndex = 0;
1287         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1288             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1289 
1290                 // transpose current block
1291                 final T[] outBlock = out.blocks[blockIndex];
1292                 final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1293                 final int      pStart   = iBlock * BLOCK_SIZE;
1294                 final int      pEnd     = Math.min(pStart + BLOCK_SIZE, columns);
1295                 final int      qStart   = jBlock * BLOCK_SIZE;
1296                 final int      qEnd     = Math.min(qStart + BLOCK_SIZE, rows);
1297                 int k = 0;
1298                 for (int p = pStart; p < pEnd; ++p) {
1299                     final int lInc = pEnd - pStart;
1300                     int l = p - pStart;
1301                     for (int q = qStart; q < qEnd; ++q) {
1302                         outBlock[k] = tBlock[l];
1303                         ++k;
1304                         l+= lInc;
1305                     }
1306                 }
1307 
1308                 // go to next block
1309                 ++blockIndex;
1310 
1311             }
1312         }
1313 
1314         return out;
1315 
1316     }
1317 
1318     /** {@inheritDoc} */
1319     @Override
1320     public int getRowDimension() {
1321         return rows;
1322     }
1323 
1324     /** {@inheritDoc} */
1325     @Override
1326     public int getColumnDimension() {
1327         return columns;
1328     }
1329 
1330     /** {@inheritDoc} */
1331     @Override
1332     public T[] operate(final T[] v)
1333         throws IllegalArgumentException {
1334 
1335         if (v.length != columns) {
1336             throw MathRuntimeException.createIllegalArgumentException(
1337                     "vector length mismatch: got {0} but expected {1}",
1338                     v.length, columns);
1339         }
1340         final T[] out = buildArray(getField(), rows);
1341         final T zero = getField().getZero();
1342 
1343         // perform multiplication block-wise, to ensure good cache behavior
1344         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1345             final int pStart = iBlock * BLOCK_SIZE;
1346             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1347             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1348                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1349                 final int      qStart = jBlock * BLOCK_SIZE;
1350                 final int      qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1351                 int k = 0;
1352                 for (int p = pStart; p < pEnd; ++p) {
1353                     T sum = zero;
1354                     int q = qStart;
1355                     while (q < qEnd - 3) {
1356                         sum = sum.
1357                               add(block[k].multiply(v[q])).
1358                               add(block[k + 1].multiply(v[q + 1])).
1359                               add(block[k + 2].multiply(v[q + 2])).
1360                               add(block[k + 3].multiply(v[q + 3]));
1361                         k += 4;
1362                         q += 4;
1363                     }
1364                     while (q < qEnd) {
1365                         sum = sum.add(block[k++].multiply(v[q++]));
1366                     }
1367                     out[p] = out[p].add(sum);
1368                 }
1369             }
1370         }
1371 
1372         return out;
1373 
1374     }
1375 
1376     /** {@inheritDoc} */
1377     @Override
1378     public T[] preMultiply(final T[] v)
1379         throws IllegalArgumentException {
1380 
1381         if (v.length != rows) {
1382             throw MathRuntimeException.createIllegalArgumentException(
1383                     "vector length mismatch: got {0} but expected {1}",
1384                     v.length, rows);
1385         }
1386         final T[] out = buildArray(getField(), columns);
1387         final T zero = getField().getZero();
1388 
1389         // perform multiplication block-wise, to ensure good cache behavior
1390         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1391             final int jWidth  = blockWidth(jBlock);
1392             final int jWidth2 = jWidth  + jWidth;
1393             final int jWidth3 = jWidth2 + jWidth;
1394             final int jWidth4 = jWidth3 + jWidth;
1395             final int qStart = jBlock * BLOCK_SIZE;
1396             final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1397             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1398                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1399                 final int      pStart = iBlock * BLOCK_SIZE;
1400                 final int      pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1401                 for (int q = qStart; q < qEnd; ++q) {
1402                     int k = q - qStart;
1403                     T sum = zero;
1404                     int p = pStart;
1405                     while (p < pEnd - 3) {
1406                         sum = sum.
1407                               add(block[k].multiply(v[p])).
1408                               add(block[k + jWidth].multiply(v[p + 1])).
1409                               add(block[k + jWidth2].multiply(v[p + 2])).
1410                               add(block[k + jWidth3].multiply(v[p + 3]));
1411                         k += jWidth4;
1412                         p += 4;
1413                     }
1414                     while (p < pEnd) {
1415                         sum = sum.add(block[k].multiply(v[p++]));
1416                         k += jWidth;
1417                     }
1418                     out[q] = out[q].add(sum);
1419                 }
1420             }
1421         }
1422 
1423         return out;
1424 
1425     }
1426 
1427     /** {@inheritDoc} */
1428     @Override
1429     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor)
1430         throws MatrixVisitorException {
1431         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1432         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1433             final int pStart = iBlock * BLOCK_SIZE;
1434             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1435             for (int p = pStart; p < pEnd; ++p) {
1436                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1437                     final int jWidth = blockWidth(jBlock);
1438                     final int qStart = jBlock * BLOCK_SIZE;
1439                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1440                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1441                     int k = (p - pStart) * jWidth;
1442                     for (int q = qStart; q < qEnd; ++q) {
1443                         block[k] = visitor.visit(p, q, block[k]);
1444                         ++k;
1445                     }
1446                 }
1447              }
1448         }
1449         return visitor.end();
1450     }
1451 
1452     /** {@inheritDoc} */
1453     @Override
1454     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor)
1455         throws MatrixVisitorException {
1456         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1457         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1458             final int pStart = iBlock * BLOCK_SIZE;
1459             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1460             for (int p = pStart; p < pEnd; ++p) {
1461                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1462                     final int jWidth = blockWidth(jBlock);
1463                     final int qStart = jBlock * BLOCK_SIZE;
1464                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1465                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1466                     int k = (p - pStart) * jWidth;
1467                     for (int q = qStart; q < qEnd; ++q) {
1468                         visitor.visit(p, q, block[k]);
1469                         ++k;
1470                     }
1471                 }
1472              }
1473         }
1474         return visitor.end();
1475     }
1476 
1477     /** {@inheritDoc} */
1478     @Override
1479     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1480                                  final int startRow, final int endRow,
1481                                  final int startColumn, final int endColumn)
1482         throws MatrixIndexException, MatrixVisitorException {
1483         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1484         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1485         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1486             final int p0     = iBlock * BLOCK_SIZE;
1487             final int pStart = Math.max(startRow, p0);
1488             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1489             for (int p = pStart; p < pEnd; ++p) {
1490                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1491                     final int jWidth = blockWidth(jBlock);
1492                     final int q0     = jBlock * BLOCK_SIZE;
1493                     final int qStart = Math.max(startColumn, q0);
1494                     final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1495                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1496                     int k = (p - p0) * jWidth + qStart - q0;
1497                     for (int q = qStart; q < qEnd; ++q) {
1498                         block[k] = visitor.visit(p, q, block[k]);
1499                         ++k;
1500                     }
1501                 }
1502              }
1503         }
1504         return visitor.end();
1505     }
1506 
1507     /** {@inheritDoc} */
1508     @Override
1509     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1510                                  final int startRow, final int endRow,
1511                                  final int startColumn, final int endColumn)
1512         throws MatrixIndexException, MatrixVisitorException {
1513         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1514         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1515         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1516             final int p0     = iBlock * BLOCK_SIZE;
1517             final int pStart = Math.max(startRow, p0);
1518             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1519             for (int p = pStart; p < pEnd; ++p) {
1520                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1521                     final int jWidth = blockWidth(jBlock);
1522                     final int q0     = jBlock * BLOCK_SIZE;
1523                     final int qStart = Math.max(startColumn, q0);
1524                     final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1525                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1526                     int k = (p - p0) * jWidth + qStart - q0;
1527                     for (int q = qStart; q < qEnd; ++q) {
1528                         visitor.visit(p, q, block[k]);
1529                         ++k;
1530                     }
1531                 }
1532              }
1533         }
1534         return visitor.end();
1535     }
1536 
1537     /** {@inheritDoc} */
1538     @Override
1539     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor)
1540         throws MatrixVisitorException {
1541         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1542         int blockIndex = 0;
1543         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1544             final int pStart = iBlock * BLOCK_SIZE;
1545             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1546             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1547                 final int qStart = jBlock * BLOCK_SIZE;
1548                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1549                 final T[] block = blocks[blockIndex];
1550                 int k = 0;
1551                 for (int p = pStart; p < pEnd; ++p) {
1552                     for (int q = qStart; q < qEnd; ++q) {
1553                         block[k] = visitor.visit(p, q, block[k]);
1554                         ++k;
1555                     }
1556                 }
1557                 ++blockIndex;
1558             }
1559         }
1560         return visitor.end();
1561     }
1562 
1563     /** {@inheritDoc} */
1564     @Override
1565     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor)
1566         throws MatrixVisitorException {
1567         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1568         int blockIndex = 0;
1569         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1570             final int pStart = iBlock * BLOCK_SIZE;
1571             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1572             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1573                 final int qStart = jBlock * BLOCK_SIZE;
1574                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1575                 final T[] block = blocks[blockIndex];
1576                 int k = 0;
1577                 for (int p = pStart; p < pEnd; ++p) {
1578                     for (int q = qStart; q < qEnd; ++q) {
1579                         visitor.visit(p, q, block[k]);
1580                         ++k;
1581                     }
1582                 }
1583                 ++blockIndex;
1584             }
1585         }
1586         return visitor.end();
1587     }
1588 
1589     /** {@inheritDoc} */
1590     @Override
1591     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1592                                        final int startRow, final int endRow,
1593                                        final int startColumn, final int endColumn)
1594         throws MatrixIndexException, MatrixVisitorException {
1595         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1596         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1597         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1598             final int p0     = iBlock * BLOCK_SIZE;
1599             final int pStart = Math.max(startRow, p0);
1600             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1601             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1602                 final int jWidth = blockWidth(jBlock);
1603                 final int q0     = jBlock * BLOCK_SIZE;
1604                 final int qStart = Math.max(startColumn, q0);
1605                 final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1606                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1607                 for (int p = pStart; p < pEnd; ++p) {
1608                     int k = (p - p0) * jWidth + qStart - q0;
1609                     for (int q = qStart; q < qEnd; ++q) {
1610                         block[k] = visitor.visit(p, q, block[k]);
1611                         ++k;
1612                     }
1613                 }
1614             }
1615         }
1616         return visitor.end();
1617     }
1618 
1619     /** {@inheritDoc} */
1620     @Override
1621     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1622                                        final int startRow, final int endRow,
1623                                        final int startColumn, final int endColumn)
1624         throws MatrixIndexException, MatrixVisitorException {
1625         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1626         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1627         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1628             final int p0     = iBlock * BLOCK_SIZE;
1629             final int pStart = Math.max(startRow, p0);
1630             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1631             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1632                 final int jWidth = blockWidth(jBlock);
1633                 final int q0     = jBlock * BLOCK_SIZE;
1634                 final int qStart = Math.max(startColumn, q0);
1635                 final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1636                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1637                 for (int p = pStart; p < pEnd; ++p) {
1638                     int k = (p - p0) * jWidth + qStart - q0;
1639                     for (int q = qStart; q < qEnd; ++q) {
1640                         visitor.visit(p, q, block[k]);
1641                         ++k;
1642                     }
1643                 }
1644             }
1645         }
1646         return visitor.end();
1647     }
1648 
1649     /**
1650      * Get the height of a block.
1651      * @param blockRow row index (in block sense) of the block
1652      * @return height (number of rows) of the block
1653      */
1654     private int blockHeight(final int blockRow) {
1655         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1656     }
1657 
1658     /**
1659      * Get the width of a block.
1660      * @param blockColumn column index (in block sense) of the block
1661      * @return width (number of columns) of the block
1662      */
1663     private int blockWidth(final int blockColumn) {
1664         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1665     }
1666 
1667 }