001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.linear;
019
020 import java.io.Serializable;
021
022 import org.apache.commons.math.Field;
023 import org.apache.commons.math.FieldElement;
024 import org.apache.commons.math.exception.NoDataException;
025 import org.apache.commons.math.exception.DimensionMismatchException;
026 import org.apache.commons.math.exception.util.LocalizedFormats;
027 import org.apache.commons.math.util.FastMath;
028 import org.apache.commons.math.util.MathUtils;
029
030 /**
031 * Cache-friendly implementation of FieldMatrix using a flat arrays to store
032 * square blocks of the matrix.
033 * <p>
034 * This implementation is specially designed to be cache-friendly. Square blocks are
035 * stored as small arrays and allow efficient traversal of data both in row major direction
036 * and columns major direction, one block at a time. This greatly increases performances
037 * for algorithms that use crossed directions loops like multiplication or transposition.
038 * </p>
039 * <p>
040 * The size of square blocks is a static parameter. It may be tuned according to the cache
041 * size of the target computer processor. As a rule of thumbs, it should be the largest
042 * value that allows three blocks to be simultaneously cached (this is necessary for example
043 * for matrix multiplication). The default value is to use 36x36 blocks.
044 * </p>
045 * <p>
046 * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
047 * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
048 * blocks are flattened in row major order in single dimension arrays which are therefore
049 * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
050 * organized in row major order.
051 * </p>
052 * <p>
053 * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
054 * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
055 * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
056 * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
057 * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
058 * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
059 * rectangle.
060 * </p>
061 * <p>
062 * The layout complexity overhead versus simple mapping of matrices to java
063 * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
064 * to up to 3-fold improvements for matrices of moderate to large size.
065 * </p>
066 * @param <T> the type of the field elements
067 * @version $Id: BlockFieldMatrix.java 1132432 2011-06-05 14:59:29Z luc $
068 * @since 2.0
069 */
070 public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
071 /** Block size. */
072 public static final int BLOCK_SIZE = 36;
073 /** Serializable version identifier. */
074 private static final long serialVersionUID = -4602336630143123183L;
075 /** Blocks of matrix entries. */
076 private final T blocks[][];
077 /** Number of rows of the matrix. */
078 private final int rows;
079 /** Number of columns of the matrix. */
080 private final int columns;
081 /** Number of block rows of the matrix. */
082 private final int blockRows;
083 /** Number of block columns of the matrix. */
084 private final int blockColumns;
085
086 /**
087 * Create a new matrix with the supplied row and column dimensions.
088 *
089 * @param field Field to which the elements belong.
090 * @param rows Number of rows in the new matrix.
091 * @param columns Number of columns in the new matrix.
092 * @throws org.apache.commons.math.exception.NotStrictlyPositiveException
093 * if row or column dimension is not positive.
094 */
095 public BlockFieldMatrix(final Field<T> field, final int rows, final int columns) {
096 super(field, rows, columns);
097 this.rows = rows;
098 this.columns = columns;
099
100 // number of blocks
101 blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
102 blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
103
104 // allocate storage blocks, taking care of smaller ones at right and bottom
105 blocks = createBlocksLayout(field, rows, columns);
106 }
107
108 /**
109 * Create a new dense matrix copying entries from raw layout data.
110 * <p>The input array <em>must</em> already be in raw layout.</p>
111 * <p>Calling this constructor is equivalent to call:
112 * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
113 * toBlocksLayout(rawData), false);</pre>
114 * </p>
115 * @param rawData Data for the new matrix, in raw layout.
116 *
117 * @exception DimensionMismatchException if the {@code blockData} shape is
118 * inconsistent with block layout.
119 * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
120 */
121 public BlockFieldMatrix(final T[][] rawData) {
122 this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
123 }
124
125 /**
126 * Create a new dense matrix copying entries from block layout data.
127 * <p>The input array <em>must</em> already be in blocks layout.</p>
128 * @param rows the number of rows in the new matrix
129 * @param columns the number of columns in the new matrix
130 * @param blockData data for new matrix
131 * @param copyArray if true, the input array will be copied, otherwise
132 * it will be referenced
133 *
134 * @exception DimensionMismatchException if the {@code blockData} shape is
135 * inconsistent with block layout.
136 * @see #createBlocksLayout(Field, int, int)
137 * @see #toBlocksLayout(FieldElement[][])
138 * @see #BlockFieldMatrix(FieldElement[][])
139 */
140 public BlockFieldMatrix(final int rows, final int columns,
141 final T[][] blockData, final boolean copyArray) {
142 super(extractField(blockData), rows, columns);
143 this.rows = rows;
144 this.columns = columns;
145
146 // number of blocks
147 blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
148 blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
149
150 if (copyArray) {
151 // allocate storage blocks, taking care of smaller ones at right and bottom
152 blocks = buildArray(getField(), blockRows * blockColumns, -1);
153 } else {
154 // reference existing array
155 blocks = blockData;
156 }
157
158 int index = 0;
159 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
160 final int iHeight = blockHeight(iBlock);
161 for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
162 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
163 throw new DimensionMismatchException(blockData[index].length,
164 iHeight * blockWidth(jBlock));
165 }
166 if (copyArray) {
167 blocks[index] = blockData[index].clone();
168 }
169 }
170 }
171 }
172
173 /**
174 * Convert a data array from raw layout to blocks layout.
175 * <p>
176 * Raw layout is the straightforward layout where element at row i and
177 * column j is in array element <code>rawData[i][j]</code>. Blocks layout
178 * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
179 * is split in square blocks (except at right and bottom side where blocks may
180 * be rectangular to fit matrix size) and each block is stored in a flattened
181 * one-dimensional array.
182 * </p>
183 * <p>
184 * This method creates an array in blocks layout from an input array in raw layout.
185 * It can be used to provide the array argument of the {@link
186 * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
187 * constructor.
188 * </p>
189 * @param <T> Type of the field elements.
190 * @param rawData Data array in raw layout.
191 * @return a new data array containing the same entries but in blocks layout
192 * @throws DimensionMismatchException if {@code rawData} is not rectangular
193 * (not all rows have the same length).
194 * @see #createBlocksLayout(Field, int, int)
195 * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
196 */
197 public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData) {
198
199 final int rows = rawData.length;
200 final int columns = rawData[0].length;
201 final int blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
202 final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
203
204 // safety checks
205 for (int i = 0; i < rawData.length; ++i) {
206 final int length = rawData[i].length;
207 if (length != columns) {
208 throw new DimensionMismatchException(columns, length);
209 }
210 }
211
212 // convert array
213 final Field<T> field = extractField(rawData);
214 final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
215 int blockIndex = 0;
216 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
217 final int pStart = iBlock * BLOCK_SIZE;
218 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
219 final int iHeight = pEnd - pStart;
220 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
221 final int qStart = jBlock * BLOCK_SIZE;
222 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
223 final int jWidth = qEnd - qStart;
224
225 // allocate new block
226 final T[] block = buildArray(field, iHeight * jWidth);
227 blocks[blockIndex] = block;
228
229 // copy data
230 int index = 0;
231 for (int p = pStart; p < pEnd; ++p) {
232 System.arraycopy(rawData[p], qStart, block, index, jWidth);
233 index += jWidth;
234 }
235
236 ++blockIndex;
237 }
238 }
239
240 return blocks;
241 }
242
243 /**
244 * Create a data array in blocks layout.
245 * <p>
246 * This method can be used to create the array argument of the {@link
247 * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
248 * constructor.
249 * </p>
250 * @param <T> Type of the field elements.
251 * @param field Field to which the elements belong.
252 * @param rows Number of rows in the new matrix.
253 * @param columns Number of columns in the new matrix.
254 * @return a new data array in blocks layout.
255 * @see #toBlocksLayout(FieldElement[][])
256 * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
257 */
258 public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
259 final int rows, final int columns) {
260 final int blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
261 final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
262
263 final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
264 int blockIndex = 0;
265 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
266 final int pStart = iBlock * BLOCK_SIZE;
267 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
268 final int iHeight = pEnd - pStart;
269 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
270 final int qStart = jBlock * BLOCK_SIZE;
271 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
272 final int jWidth = qEnd - qStart;
273 blocks[blockIndex] = buildArray(field, iHeight * jWidth);
274 ++blockIndex;
275 }
276 }
277
278 return blocks;
279 }
280
281 /** {@inheritDoc} */
282 @Override
283 public FieldMatrix<T> createMatrix(final int rowDimension, final int columnDimension) {
284 return new BlockFieldMatrix<T>(getField(), rowDimension, columnDimension);
285 }
286
287 /** {@inheritDoc} */
288 @Override
289 public FieldMatrix<T> copy() {
290
291 // create an empty matrix
292 BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
293
294 // copy the blocks
295 for (int i = 0; i < blocks.length; ++i) {
296 System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
297 }
298
299 return copied;
300 }
301
302 /** {@inheritDoc} */
303 @Override
304 public FieldMatrix<T> add(final FieldMatrix<T> m) {
305 try {
306 return add((BlockFieldMatrix<T>) m);
307 } catch (ClassCastException cce) {
308
309 // safety check
310 checkAdditionCompatible(m);
311
312 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
313
314 // perform addition block-wise, to ensure good cache behavior
315 int blockIndex = 0;
316 for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
317 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
318
319 // perform addition on the current block
320 final T[] outBlock = out.blocks[blockIndex];
321 final T[] tBlock = blocks[blockIndex];
322 final int pStart = iBlock * BLOCK_SIZE;
323 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
324 final int qStart = jBlock * BLOCK_SIZE;
325 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
326 int k = 0;
327 for (int p = pStart; p < pEnd; ++p) {
328 for (int q = qStart; q < qEnd; ++q) {
329 outBlock[k] = tBlock[k].add(m.getEntry(p, q));
330 ++k;
331 }
332 }
333
334 // go to next block
335 ++blockIndex;
336
337 }
338 }
339
340 return out;
341 }
342 }
343
344 /**
345 * Compute the sum of this and <code>m</code>.
346 *
347 * @param m matrix to be added
348 * @return this + m
349 * @throws IllegalArgumentException if m is not the same size as this
350 */
351 public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m) {
352
353 // safety check
354 checkAdditionCompatible(m);
355
356 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
357
358 // perform addition block-wise, to ensure good cache behavior
359 for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
360 final T[] outBlock = out.blocks[blockIndex];
361 final T[] tBlock = blocks[blockIndex];
362 final T[] mBlock = m.blocks[blockIndex];
363 for (int k = 0; k < outBlock.length; ++k) {
364 outBlock[k] = tBlock[k].add(mBlock[k]);
365 }
366 }
367
368 return out;
369 }
370
371 /** {@inheritDoc} */
372 @Override
373 public FieldMatrix<T> subtract(final FieldMatrix<T> m) {
374 try {
375 return subtract((BlockFieldMatrix<T>) m);
376 } catch (ClassCastException cce) {
377
378 // safety check
379 checkSubtractionCompatible(m);
380
381 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
382
383 // perform subtraction block-wise, to ensure good cache behavior
384 int blockIndex = 0;
385 for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
386 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
387
388 // perform subtraction on the current block
389 final T[] outBlock = out.blocks[blockIndex];
390 final T[] tBlock = blocks[blockIndex];
391 final int pStart = iBlock * BLOCK_SIZE;
392 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
393 final int qStart = jBlock * BLOCK_SIZE;
394 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
395 int k = 0;
396 for (int p = pStart; p < pEnd; ++p) {
397 for (int q = qStart; q < qEnd; ++q) {
398 outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
399 ++k;
400 }
401 }
402
403 // go to next block
404 ++blockIndex;
405
406 }
407 }
408
409 return out;
410 }
411 }
412
413 /**
414 * Compute this minus <code>m</code>.
415 *
416 * @param m matrix to be subtracted
417 * @return this - m
418 * @throws IllegalArgumentException if m is not the same size as this
419 */
420 public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m) {
421 // safety check
422 checkSubtractionCompatible(m);
423
424 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
425
426 // perform subtraction block-wise, to ensure good cache behavior
427 for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
428 final T[] outBlock = out.blocks[blockIndex];
429 final T[] tBlock = blocks[blockIndex];
430 final T[] mBlock = m.blocks[blockIndex];
431 for (int k = 0; k < outBlock.length; ++k) {
432 outBlock[k] = tBlock[k].subtract(mBlock[k]);
433 }
434 }
435
436 return out;
437 }
438
439 /** {@inheritDoc} */
440 @Override
441 public FieldMatrix<T> scalarAdd(final T d) {
442 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
443
444 // perform subtraction block-wise, to ensure good cache behavior
445 for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
446 final T[] outBlock = out.blocks[blockIndex];
447 final T[] tBlock = blocks[blockIndex];
448 for (int k = 0; k < outBlock.length; ++k) {
449 outBlock[k] = tBlock[k].add(d);
450 }
451 }
452
453 return out;
454 }
455
456 /** {@inheritDoc} */
457 @Override
458 public FieldMatrix<T> scalarMultiply(final T d) {
459
460 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
461
462 // perform subtraction block-wise, to ensure good cache behavior
463 for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
464 final T[] outBlock = out.blocks[blockIndex];
465 final T[] tBlock = blocks[blockIndex];
466 for (int k = 0; k < outBlock.length; ++k) {
467 outBlock[k] = tBlock[k].multiply(d);
468 }
469 }
470
471 return out;
472 }
473
474 /** {@inheritDoc} */
475 @Override
476 public FieldMatrix<T> multiply(final FieldMatrix<T> m) {
477 try {
478 return multiply((BlockFieldMatrix<T>) m);
479 } catch (ClassCastException cce) {
480
481 // safety check
482 checkMultiplicationCompatible(m);
483
484 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
485 final T zero = getField().getZero();
486
487 // perform multiplication block-wise, to ensure good cache behavior
488 int blockIndex = 0;
489 for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
490
491 final int pStart = iBlock * BLOCK_SIZE;
492 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
493
494 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
495
496 final int qStart = jBlock * BLOCK_SIZE;
497 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
498
499 // select current block
500 final T[] outBlock = out.blocks[blockIndex];
501
502 // perform multiplication on current block
503 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
504 final int kWidth = blockWidth(kBlock);
505 final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
506 final int rStart = kBlock * BLOCK_SIZE;
507 int k = 0;
508 for (int p = pStart; p < pEnd; ++p) {
509 final int lStart = (p - pStart) * kWidth;
510 final int lEnd = lStart + kWidth;
511 for (int q = qStart; q < qEnd; ++q) {
512 T sum = zero;
513 int r = rStart;
514 for (int l = lStart; l < lEnd; ++l) {
515 sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
516 ++r;
517 }
518 outBlock[k] = outBlock[k].add(sum);
519 ++k;
520 }
521 }
522 }
523
524 // go to next block
525 ++blockIndex;
526
527 }
528 }
529
530 return out;
531 }
532 }
533
534 /**
535 * Returns the result of postmultiplying this by m.
536 *
537 * @param m matrix to postmultiply by
538 * @return this * m
539 * @throws IllegalArgumentException
540 * if columnDimension(this) != rowDimension(m)
541 */
542 public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m) {
543
544 // safety check
545 checkMultiplicationCompatible(m);
546
547 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
548 final T zero = getField().getZero();
549
550 // perform multiplication block-wise, to ensure good cache behavior
551 int blockIndex = 0;
552 for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
553
554 final int pStart = iBlock * BLOCK_SIZE;
555 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
556
557 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
558 final int jWidth = out.blockWidth(jBlock);
559 final int jWidth2 = jWidth + jWidth;
560 final int jWidth3 = jWidth2 + jWidth;
561 final int jWidth4 = jWidth3 + jWidth;
562
563 // select current block
564 final T[] outBlock = out.blocks[blockIndex];
565
566 // perform multiplication on current block
567 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
568 final int kWidth = blockWidth(kBlock);
569 final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
570 final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
571 int k = 0;
572 for (int p = pStart; p < pEnd; ++p) {
573 final int lStart = (p - pStart) * kWidth;
574 final int lEnd = lStart + kWidth;
575 for (int nStart = 0; nStart < jWidth; ++nStart) {
576 T sum = zero;
577 int l = lStart;
578 int n = nStart;
579 while (l < lEnd - 3) {
580 sum = sum.
581 add(tBlock[l].multiply(mBlock[n])).
582 add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
583 add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
584 add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
585 l += 4;
586 n += jWidth4;
587 }
588 while (l < lEnd) {
589 sum = sum.add(tBlock[l++].multiply(mBlock[n]));
590 n += jWidth;
591 }
592 outBlock[k] = outBlock[k].add(sum);
593 ++k;
594 }
595 }
596 }
597
598 // go to next block
599 ++blockIndex;
600 }
601 }
602
603 return out;
604 }
605
606 /** {@inheritDoc} */
607 @Override
608 public T[][] getData() {
609
610 final T[][] data = buildArray(getField(), getRowDimension(), getColumnDimension());
611 final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
612
613 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
614 final int pStart = iBlock * BLOCK_SIZE;
615 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
616 int regularPos = 0;
617 int lastPos = 0;
618 for (int p = pStart; p < pEnd; ++p) {
619 final T[] dataP = data[p];
620 int blockIndex = iBlock * blockColumns;
621 int dataPos = 0;
622 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
623 System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
624 dataPos += BLOCK_SIZE;
625 }
626 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
627 regularPos += BLOCK_SIZE;
628 lastPos += lastColumns;
629 }
630 }
631
632 return data;
633 }
634
635 /** {@inheritDoc} */
636 @Override
637 public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
638 final int startColumn, final int endColumn) {
639 // safety checks
640 checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
641
642 // create the output matrix
643 final BlockFieldMatrix<T> out =
644 new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
645
646 // compute blocks shifts
647 final int blockStartRow = startRow / BLOCK_SIZE;
648 final int rowsShift = startRow % BLOCK_SIZE;
649 final int blockStartColumn = startColumn / BLOCK_SIZE;
650 final int columnsShift = startColumn % BLOCK_SIZE;
651
652 // perform extraction block-wise, to ensure good cache behavior
653 int pBlock = blockStartRow;
654 for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
655 final int iHeight = out.blockHeight(iBlock);
656 int qBlock = blockStartColumn;
657 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
658 final int jWidth = out.blockWidth(jBlock);
659
660 // handle one block of the output matrix
661 final int outIndex = iBlock * out.blockColumns + jBlock;
662 final T[] outBlock = out.blocks[outIndex];
663 final int index = pBlock * blockColumns + qBlock;
664 final int width = blockWidth(qBlock);
665
666 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
667 final int widthExcess = jWidth + columnsShift - BLOCK_SIZE;
668 if (heightExcess > 0) {
669 // the submatrix block spans on two blocks rows from the original matrix
670 if (widthExcess > 0) {
671 // the submatrix block spans on two blocks columns from the original matrix
672 final int width2 = blockWidth(qBlock + 1);
673 copyBlockPart(blocks[index], width,
674 rowsShift, BLOCK_SIZE,
675 columnsShift, BLOCK_SIZE,
676 outBlock, jWidth, 0, 0);
677 copyBlockPart(blocks[index + 1], width2,
678 rowsShift, BLOCK_SIZE,
679 0, widthExcess,
680 outBlock, jWidth, 0, jWidth - widthExcess);
681 copyBlockPart(blocks[index + blockColumns], width,
682 0, heightExcess,
683 columnsShift, BLOCK_SIZE,
684 outBlock, jWidth, iHeight - heightExcess, 0);
685 copyBlockPart(blocks[index + blockColumns + 1], width2,
686 0, heightExcess,
687 0, widthExcess,
688 outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
689 } else {
690 // the submatrix block spans on one block column from the original matrix
691 copyBlockPart(blocks[index], width,
692 rowsShift, BLOCK_SIZE,
693 columnsShift, jWidth + columnsShift,
694 outBlock, jWidth, 0, 0);
695 copyBlockPart(blocks[index + blockColumns], width,
696 0, heightExcess,
697 columnsShift, jWidth + columnsShift,
698 outBlock, jWidth, iHeight - heightExcess, 0);
699 }
700 } else {
701 // the submatrix block spans on one block row from the original matrix
702 if (widthExcess > 0) {
703 // the submatrix block spans on two blocks columns from the original matrix
704 final int width2 = blockWidth(qBlock + 1);
705 copyBlockPart(blocks[index], width,
706 rowsShift, iHeight + rowsShift,
707 columnsShift, BLOCK_SIZE,
708 outBlock, jWidth, 0, 0);
709 copyBlockPart(blocks[index + 1], width2,
710 rowsShift, iHeight + rowsShift,
711 0, widthExcess,
712 outBlock, jWidth, 0, jWidth - widthExcess);
713 } else {
714 // the submatrix block spans on one block column from the original matrix
715 copyBlockPart(blocks[index], width,
716 rowsShift, iHeight + rowsShift,
717 columnsShift, jWidth + columnsShift,
718 outBlock, jWidth, 0, 0);
719 }
720 }
721 ++qBlock;
722 }
723 ++pBlock;
724 }
725
726 return out;
727 }
728
729 /**
730 * Copy a part of a block into another one
731 * <p>This method can be called only when the specified part fits in both
732 * blocks, no verification is done here.</p>
733 * @param srcBlock source block
734 * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
735 * @param srcStartRow start row in the source block
736 * @param srcEndRow end row (exclusive) in the source block
737 * @param srcStartColumn start column in the source block
738 * @param srcEndColumn end column (exclusive) in the source block
739 * @param dstBlock destination block
740 * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
741 * @param dstStartRow start row in the destination block
742 * @param dstStartColumn start column in the destination block
743 */
744 private void copyBlockPart(final T[] srcBlock, final int srcWidth,
745 final int srcStartRow, final int srcEndRow,
746 final int srcStartColumn, final int srcEndColumn,
747 final T[] dstBlock, final int dstWidth,
748 final int dstStartRow, final int dstStartColumn) {
749 final int length = srcEndColumn - srcStartColumn;
750 int srcPos = srcStartRow * srcWidth + srcStartColumn;
751 int dstPos = dstStartRow * dstWidth + dstStartColumn;
752 for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
753 System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
754 srcPos += srcWidth;
755 dstPos += dstWidth;
756 }
757 }
758
759 /** {@inheritDoc} */
760 @Override
761 public void setSubMatrix(final T[][] subMatrix, final int row, final int column) {
762 // safety checks
763 MathUtils.checkNotNull(subMatrix);
764 final int refLength = subMatrix[0].length;
765 if (refLength == 0) {
766 throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
767 }
768 final int endRow = row + subMatrix.length - 1;
769 final int endColumn = column + refLength - 1;
770 checkSubMatrixIndex(row, endRow, column, endColumn);
771 for (final T[] subRow : subMatrix) {
772 if (subRow.length != refLength) {
773 throw new DimensionMismatchException(refLength, subRow.length);
774 }
775 }
776
777 // compute blocks bounds
778 final int blockStartRow = row / BLOCK_SIZE;
779 final int blockEndRow = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
780 final int blockStartColumn = column / BLOCK_SIZE;
781 final int blockEndColumn = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
782
783 // perform copy block-wise, to ensure good cache behavior
784 for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
785 final int iHeight = blockHeight(iBlock);
786 final int firstRow = iBlock * BLOCK_SIZE;
787 final int iStart = FastMath.max(row, firstRow);
788 final int iEnd = FastMath.min(endRow + 1, firstRow + iHeight);
789
790 for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
791 final int jWidth = blockWidth(jBlock);
792 final int firstColumn = jBlock * BLOCK_SIZE;
793 final int jStart = FastMath.max(column, firstColumn);
794 final int jEnd = FastMath.min(endColumn + 1, firstColumn + jWidth);
795 final int jLength = jEnd - jStart;
796
797 // handle one block, row by row
798 final T[] block = blocks[iBlock * blockColumns + jBlock];
799 for (int i = iStart; i < iEnd; ++i) {
800 System.arraycopy(subMatrix[i - row], jStart - column,
801 block, (i - firstRow) * jWidth + (jStart - firstColumn),
802 jLength);
803 }
804
805 }
806 }
807 }
808
809 /** {@inheritDoc} */
810 @Override
811 public FieldMatrix<T> getRowMatrix(final int row) {
812 checkRowIndex(row);
813 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
814
815 // perform copy block-wise, to ensure good cache behavior
816 final int iBlock = row / BLOCK_SIZE;
817 final int iRow = row - iBlock * BLOCK_SIZE;
818 int outBlockIndex = 0;
819 int outIndex = 0;
820 T[] outBlock = out.blocks[outBlockIndex];
821 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
822 final int jWidth = blockWidth(jBlock);
823 final T[] block = blocks[iBlock * blockColumns + jBlock];
824 final int available = outBlock.length - outIndex;
825 if (jWidth > available) {
826 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
827 outBlock = out.blocks[++outBlockIndex];
828 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
829 outIndex = jWidth - available;
830 } else {
831 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
832 outIndex += jWidth;
833 }
834 }
835
836 return out;
837 }
838
839 /** {@inheritDoc} */
840 @Override
841 public void setRowMatrix(final int row, final FieldMatrix<T> matrix) {
842 try {
843 setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
844 } catch (ClassCastException cce) {
845 super.setRowMatrix(row, matrix);
846 }
847 }
848
849 /**
850 * Sets the entries in row number <code>row</code>
851 * as a row matrix. Row indices start at 0.
852 *
853 * @param row the row to be set
854 * @param matrix row matrix (must have one row and the same number of columns
855 * as the instance)
856 * @throws org.apache.commons.math.exception.OutOfRangeException if the
857 * specified row index is invalid.
858 * @throws MatrixDimensionMismatchException if the matrix dimensions do
859 * not match one instance row.
860 */
861 public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix) {
862 checkRowIndex(row);
863 final int nCols = getColumnDimension();
864 if ((matrix.getRowDimension() != 1) ||
865 (matrix.getColumnDimension() != nCols)) {
866 throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
867 matrix.getColumnDimension(),
868 1, nCols);
869 }
870
871 // perform copy block-wise, to ensure good cache behavior
872 final int iBlock = row / BLOCK_SIZE;
873 final int iRow = row - iBlock * BLOCK_SIZE;
874 int mBlockIndex = 0;
875 int mIndex = 0;
876 T[] mBlock = matrix.blocks[mBlockIndex];
877 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
878 final int jWidth = blockWidth(jBlock);
879 final T[] block = blocks[iBlock * blockColumns + jBlock];
880 final int available = mBlock.length - mIndex;
881 if (jWidth > available) {
882 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
883 mBlock = matrix.blocks[++mBlockIndex];
884 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
885 mIndex = jWidth - available;
886 } else {
887 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
888 mIndex += jWidth;
889 }
890 }
891 }
892
893 /** {@inheritDoc} */
894 @Override
895 public FieldMatrix<T> getColumnMatrix(final int column) {
896 checkColumnIndex(column);
897 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
898
899 // perform copy block-wise, to ensure good cache behavior
900 final int jBlock = column / BLOCK_SIZE;
901 final int jColumn = column - jBlock * BLOCK_SIZE;
902 final int jWidth = blockWidth(jBlock);
903 int outBlockIndex = 0;
904 int outIndex = 0;
905 T[] outBlock = out.blocks[outBlockIndex];
906 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
907 final int iHeight = blockHeight(iBlock);
908 final T[] block = blocks[iBlock * blockColumns + jBlock];
909 for (int i = 0; i < iHeight; ++i) {
910 if (outIndex >= outBlock.length) {
911 outBlock = out.blocks[++outBlockIndex];
912 outIndex = 0;
913 }
914 outBlock[outIndex++] = block[i * jWidth + jColumn];
915 }
916 }
917
918 return out;
919 }
920
921 /** {@inheritDoc} */
922 @Override
923 public void setColumnMatrix(final int column, final FieldMatrix<T> matrix) {
924 try {
925 setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
926 } catch (ClassCastException cce) {
927 super.setColumnMatrix(column, matrix);
928 }
929 }
930
931 /**
932 * Sets the entries in column number {@code column}
933 * as a column matrix. Column indices start at 0.
934 *
935 * @param column Column to be set.
936 * @param matrix Column matrix (must have one column and the same number of rows
937 * as the instance).
938 * @throws org.apache.commons.math.exception.OutOfRangeException if
939 * the specified column index is invalid.
940 * @throws MatrixDimensionMismatchException if the matrix dimensions do
941 * not match one instance column.
942 */
943 void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix) {
944 checkColumnIndex(column);
945 final int nRows = getRowDimension();
946 if ((matrix.getRowDimension() != nRows) ||
947 (matrix.getColumnDimension() != 1)) {
948 throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
949 matrix.getColumnDimension(),
950 nRows, 1);
951 }
952
953 // perform copy block-wise, to ensure good cache behavior
954 final int jBlock = column / BLOCK_SIZE;
955 final int jColumn = column - jBlock * BLOCK_SIZE;
956 final int jWidth = blockWidth(jBlock);
957 int mBlockIndex = 0;
958 int mIndex = 0;
959 T[] mBlock = matrix.blocks[mBlockIndex];
960 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
961 final int iHeight = blockHeight(iBlock);
962 final T[] block = blocks[iBlock * blockColumns + jBlock];
963 for (int i = 0; i < iHeight; ++i) {
964 if (mIndex >= mBlock.length) {
965 mBlock = matrix.blocks[++mBlockIndex];
966 mIndex = 0;
967 }
968 block[i * jWidth + jColumn] = mBlock[mIndex++];
969 }
970 }
971 }
972
973 /** {@inheritDoc} */
974 @Override
975 public FieldVector<T> getRowVector(final int row) {
976 checkRowIndex(row);
977 final T[] outData = buildArray(getField(), columns);
978
979 // perform copy block-wise, to ensure good cache behavior
980 final int iBlock = row / BLOCK_SIZE;
981 final int iRow = row - iBlock * BLOCK_SIZE;
982 int outIndex = 0;
983 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
984 final int jWidth = blockWidth(jBlock);
985 final T[] block = blocks[iBlock * blockColumns + jBlock];
986 System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
987 outIndex += jWidth;
988 }
989
990 return new ArrayFieldVector<T>(getField(), outData, false);
991 }
992
993 /** {@inheritDoc} */
994 @Override
995 public void setRowVector(final int row, final FieldVector<T> vector) {
996 try {
997 setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
998 } catch (ClassCastException cce) {
999 super.setRowVector(row, vector);
1000 }
1001 }
1002
1003 /** {@inheritDoc} */
1004 @Override
1005 public FieldVector<T> getColumnVector(final int column) {
1006 checkColumnIndex(column);
1007 final T[] outData = buildArray(getField(), rows);
1008
1009 // perform copy block-wise, to ensure good cache behavior
1010 final int jBlock = column / BLOCK_SIZE;
1011 final int jColumn = column - jBlock * BLOCK_SIZE;
1012 final int jWidth = blockWidth(jBlock);
1013 int outIndex = 0;
1014 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1015 final int iHeight = blockHeight(iBlock);
1016 final T[] block = blocks[iBlock * blockColumns + jBlock];
1017 for (int i = 0; i < iHeight; ++i) {
1018 outData[outIndex++] = block[i * jWidth + jColumn];
1019 }
1020 }
1021
1022 return new ArrayFieldVector<T>(getField(), outData, false);
1023 }
1024
1025 /** {@inheritDoc} */
1026 @Override
1027 public void setColumnVector(final int column, final FieldVector<T> vector) {
1028 try {
1029 setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1030 } catch (ClassCastException cce) {
1031 super.setColumnVector(column, vector);
1032 }
1033 }
1034
1035 /** {@inheritDoc} */
1036 @Override
1037 public T[] getRow(final int row) {
1038 checkRowIndex(row);
1039 final T[] out = buildArray(getField(), columns);
1040
1041 // perform copy block-wise, to ensure good cache behavior
1042 final int iBlock = row / BLOCK_SIZE;
1043 final int iRow = row - iBlock * BLOCK_SIZE;
1044 int outIndex = 0;
1045 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1046 final int jWidth = blockWidth(jBlock);
1047 final T[] block = blocks[iBlock * blockColumns + jBlock];
1048 System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1049 outIndex += jWidth;
1050 }
1051
1052 return out;
1053 }
1054
1055 /** {@inheritDoc} */
1056 @Override
1057 public void setRow(final int row, final T[] array) {
1058 checkRowIndex(row);
1059 final int nCols = getColumnDimension();
1060 if (array.length != nCols) {
1061 throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
1062 }
1063
1064 // perform copy block-wise, to ensure good cache behavior
1065 final int iBlock = row / BLOCK_SIZE;
1066 final int iRow = row - iBlock * BLOCK_SIZE;
1067 int outIndex = 0;
1068 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1069 final int jWidth = blockWidth(jBlock);
1070 final T[] block = blocks[iBlock * blockColumns + jBlock];
1071 System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1072 outIndex += jWidth;
1073 }
1074 }
1075
1076 /** {@inheritDoc} */
1077 @Override
1078 public T[] getColumn(final int column) {
1079 checkColumnIndex(column);
1080 final T[] out = buildArray(getField(), rows);
1081
1082 // perform copy block-wise, to ensure good cache behavior
1083 final int jBlock = column / BLOCK_SIZE;
1084 final int jColumn = column - jBlock * BLOCK_SIZE;
1085 final int jWidth = blockWidth(jBlock);
1086 int outIndex = 0;
1087 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1088 final int iHeight = blockHeight(iBlock);
1089 final T[] block = blocks[iBlock * blockColumns + jBlock];
1090 for (int i = 0; i < iHeight; ++i) {
1091 out[outIndex++] = block[i * jWidth + jColumn];
1092 }
1093 }
1094
1095 return out;
1096 }
1097
1098 /** {@inheritDoc} */
1099 @Override
1100 public void setColumn(final int column, final T[] array) {
1101 checkColumnIndex(column);
1102 final int nRows = getRowDimension();
1103 if (array.length != nRows) {
1104 throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
1105 }
1106
1107 // perform copy block-wise, to ensure good cache behavior
1108 final int jBlock = column / BLOCK_SIZE;
1109 final int jColumn = column - jBlock * BLOCK_SIZE;
1110 final int jWidth = blockWidth(jBlock);
1111 int outIndex = 0;
1112 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1113 final int iHeight = blockHeight(iBlock);
1114 final T[] block = blocks[iBlock * blockColumns + jBlock];
1115 for (int i = 0; i < iHeight; ++i) {
1116 block[i * jWidth + jColumn] = array[outIndex++];
1117 }
1118 }
1119 }
1120
1121 /** {@inheritDoc} */
1122 @Override
1123 public T getEntry(final int row, final int column) {
1124 checkRowIndex(row);
1125 checkColumnIndex(column);
1126
1127 final int iBlock = row / BLOCK_SIZE;
1128 final int jBlock = column / BLOCK_SIZE;
1129 final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1130 (column - jBlock * BLOCK_SIZE);
1131
1132 return blocks[iBlock * blockColumns + jBlock][k];
1133 }
1134
1135 /** {@inheritDoc} */
1136 @Override
1137 public void setEntry(final int row, final int column, final T value) {
1138 checkRowIndex(row);
1139 checkColumnIndex(column);
1140
1141 final int iBlock = row / BLOCK_SIZE;
1142 final int jBlock = column / BLOCK_SIZE;
1143 final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1144 (column - jBlock * BLOCK_SIZE);
1145
1146 blocks[iBlock * blockColumns + jBlock][k] = value;
1147 }
1148
1149 /** {@inheritDoc} */
1150 @Override
1151 public void addToEntry(final int row, final int column, final T increment) {
1152 checkRowIndex(row);
1153 checkColumnIndex(column);
1154
1155 final int iBlock = row / BLOCK_SIZE;
1156 final int jBlock = column / BLOCK_SIZE;
1157 final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1158 (column - jBlock * BLOCK_SIZE);
1159 final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1160
1161 blockIJ[k] = blockIJ[k].add(increment);
1162 }
1163
1164 /** {@inheritDoc} */
1165 @Override
1166 public void multiplyEntry(final int row, final int column, final T factor) {
1167 checkRowIndex(row);
1168 checkColumnIndex(column);
1169
1170 final int iBlock = row / BLOCK_SIZE;
1171 final int jBlock = column / BLOCK_SIZE;
1172 final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1173 (column - jBlock * BLOCK_SIZE);
1174 final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1175
1176 blockIJ[k] = blockIJ[k].multiply(factor);
1177 }
1178
1179 /** {@inheritDoc} */
1180 @Override
1181 public FieldMatrix<T> transpose() {
1182 final int nRows = getRowDimension();
1183 final int nCols = getColumnDimension();
1184 final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1185
1186 // perform transpose block-wise, to ensure good cache behavior
1187 int blockIndex = 0;
1188 for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1189 for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1190
1191 // transpose current block
1192 final T[] outBlock = out.blocks[blockIndex];
1193 final T[] tBlock = blocks[jBlock * blockColumns + iBlock];
1194 final int pStart = iBlock * BLOCK_SIZE;
1195 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, columns);
1196 final int qStart = jBlock * BLOCK_SIZE;
1197 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, rows);
1198 int k = 0;
1199 for (int p = pStart; p < pEnd; ++p) {
1200 final int lInc = pEnd - pStart;
1201 int l = p - pStart;
1202 for (int q = qStart; q < qEnd; ++q) {
1203 outBlock[k] = tBlock[l];
1204 ++k;
1205 l+= lInc;
1206 }
1207 }
1208
1209 // go to next block
1210 ++blockIndex;
1211
1212 }
1213 }
1214
1215 return out;
1216 }
1217
1218 /** {@inheritDoc} */
1219 @Override
1220 public int getRowDimension() {
1221 return rows;
1222 }
1223
1224 /** {@inheritDoc} */
1225 @Override
1226 public int getColumnDimension() {
1227 return columns;
1228 }
1229
1230 /** {@inheritDoc} */
1231 @Override
1232 public T[] operate(final T[] v) {
1233 if (v.length != columns) {
1234 throw new DimensionMismatchException(v.length, columns);
1235 }
1236 final T[] out = buildArray(getField(), rows);
1237 final T zero = getField().getZero();
1238
1239 // perform multiplication block-wise, to ensure good cache behavior
1240 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1241 final int pStart = iBlock * BLOCK_SIZE;
1242 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1243 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1244 final T[] block = blocks[iBlock * blockColumns + jBlock];
1245 final int qStart = jBlock * BLOCK_SIZE;
1246 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1247 int k = 0;
1248 for (int p = pStart; p < pEnd; ++p) {
1249 T sum = zero;
1250 int q = qStart;
1251 while (q < qEnd - 3) {
1252 sum = sum.
1253 add(block[k].multiply(v[q])).
1254 add(block[k + 1].multiply(v[q + 1])).
1255 add(block[k + 2].multiply(v[q + 2])).
1256 add(block[k + 3].multiply(v[q + 3]));
1257 k += 4;
1258 q += 4;
1259 }
1260 while (q < qEnd) {
1261 sum = sum.add(block[k++].multiply(v[q++]));
1262 }
1263 out[p] = out[p].add(sum);
1264 }
1265 }
1266 }
1267
1268 return out;
1269 }
1270
1271 /** {@inheritDoc} */
1272 @Override
1273 public T[] preMultiply(final T[] v) {
1274
1275 if (v.length != rows) {
1276 throw new DimensionMismatchException(v.length, rows);
1277 }
1278 final T[] out = buildArray(getField(), columns);
1279 final T zero = getField().getZero();
1280
1281 // perform multiplication block-wise, to ensure good cache behavior
1282 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1283 final int jWidth = blockWidth(jBlock);
1284 final int jWidth2 = jWidth + jWidth;
1285 final int jWidth3 = jWidth2 + jWidth;
1286 final int jWidth4 = jWidth3 + jWidth;
1287 final int qStart = jBlock * BLOCK_SIZE;
1288 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1289 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1290 final T[] block = blocks[iBlock * blockColumns + jBlock];
1291 final int pStart = iBlock * BLOCK_SIZE;
1292 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1293 for (int q = qStart; q < qEnd; ++q) {
1294 int k = q - qStart;
1295 T sum = zero;
1296 int p = pStart;
1297 while (p < pEnd - 3) {
1298 sum = sum.
1299 add(block[k].multiply(v[p])).
1300 add(block[k + jWidth].multiply(v[p + 1])).
1301 add(block[k + jWidth2].multiply(v[p + 2])).
1302 add(block[k + jWidth3].multiply(v[p + 3]));
1303 k += jWidth4;
1304 p += 4;
1305 }
1306 while (p < pEnd) {
1307 sum = sum.add(block[k].multiply(v[p++]));
1308 k += jWidth;
1309 }
1310 out[q] = out[q].add(sum);
1311 }
1312 }
1313 }
1314
1315 return out;
1316 }
1317
1318 /** {@inheritDoc} */
1319 @Override
1320 public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor) {
1321 visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1322 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1323 final int pStart = iBlock * BLOCK_SIZE;
1324 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1325 for (int p = pStart; p < pEnd; ++p) {
1326 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1327 final int jWidth = blockWidth(jBlock);
1328 final int qStart = jBlock * BLOCK_SIZE;
1329 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1330 final T[] block = blocks[iBlock * blockColumns + jBlock];
1331 int k = (p - pStart) * jWidth;
1332 for (int q = qStart; q < qEnd; ++q) {
1333 block[k] = visitor.visit(p, q, block[k]);
1334 ++k;
1335 }
1336 }
1337 }
1338 }
1339 return visitor.end();
1340 }
1341
1342 /** {@inheritDoc} */
1343 @Override
1344 public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1345 visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1346 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1347 final int pStart = iBlock * BLOCK_SIZE;
1348 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1349 for (int p = pStart; p < pEnd; ++p) {
1350 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1351 final int jWidth = blockWidth(jBlock);
1352 final int qStart = jBlock * BLOCK_SIZE;
1353 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1354 final T[] block = blocks[iBlock * blockColumns + jBlock];
1355 int k = (p - pStart) * jWidth;
1356 for (int q = qStart; q < qEnd; ++q) {
1357 visitor.visit(p, q, block[k]);
1358 ++k;
1359 }
1360 }
1361 }
1362 }
1363 return visitor.end();
1364 }
1365
1366 /** {@inheritDoc} */
1367 @Override
1368 public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1369 final int startRow, final int endRow,
1370 final int startColumn, final int endColumn) {
1371 checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1372 visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1373 for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1374 final int p0 = iBlock * BLOCK_SIZE;
1375 final int pStart = FastMath.max(startRow, p0);
1376 final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1377 for (int p = pStart; p < pEnd; ++p) {
1378 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1379 final int jWidth = blockWidth(jBlock);
1380 final int q0 = jBlock * BLOCK_SIZE;
1381 final int qStart = FastMath.max(startColumn, q0);
1382 final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1383 final T[] block = blocks[iBlock * blockColumns + jBlock];
1384 int k = (p - p0) * jWidth + qStart - q0;
1385 for (int q = qStart; q < qEnd; ++q) {
1386 block[k] = visitor.visit(p, q, block[k]);
1387 ++k;
1388 }
1389 }
1390 }
1391 }
1392 return visitor.end();
1393 }
1394
1395 /** {@inheritDoc} */
1396 @Override
1397 public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1398 final int startRow, final int endRow,
1399 final int startColumn, final int endColumn) {
1400 checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1401 visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1402 for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1403 final int p0 = iBlock * BLOCK_SIZE;
1404 final int pStart = FastMath.max(startRow, p0);
1405 final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1406 for (int p = pStart; p < pEnd; ++p) {
1407 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1408 final int jWidth = blockWidth(jBlock);
1409 final int q0 = jBlock * BLOCK_SIZE;
1410 final int qStart = FastMath.max(startColumn, q0);
1411 final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1412 final T[] block = blocks[iBlock * blockColumns + jBlock];
1413 int k = (p - p0) * jWidth + qStart - q0;
1414 for (int q = qStart; q < qEnd; ++q) {
1415 visitor.visit(p, q, block[k]);
1416 ++k;
1417 }
1418 }
1419 }
1420 }
1421 return visitor.end();
1422 }
1423
1424 /** {@inheritDoc} */
1425 @Override
1426 public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor) {
1427 visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1428 int blockIndex = 0;
1429 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1430 final int pStart = iBlock * BLOCK_SIZE;
1431 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1432 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1433 final int qStart = jBlock * BLOCK_SIZE;
1434 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1435 final T[] block = blocks[blockIndex];
1436 int k = 0;
1437 for (int p = pStart; p < pEnd; ++p) {
1438 for (int q = qStart; q < qEnd; ++q) {
1439 block[k] = visitor.visit(p, q, block[k]);
1440 ++k;
1441 }
1442 }
1443 ++blockIndex;
1444 }
1445 }
1446 return visitor.end();
1447 }
1448
1449 /** {@inheritDoc} */
1450 @Override
1451 public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1452 visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1453 int blockIndex = 0;
1454 for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1455 final int pStart = iBlock * BLOCK_SIZE;
1456 final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1457 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1458 final int qStart = jBlock * BLOCK_SIZE;
1459 final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1460 final T[] block = blocks[blockIndex];
1461 int k = 0;
1462 for (int p = pStart; p < pEnd; ++p) {
1463 for (int q = qStart; q < qEnd; ++q) {
1464 visitor.visit(p, q, block[k]);
1465 ++k;
1466 }
1467 }
1468 ++blockIndex;
1469 }
1470 }
1471 return visitor.end();
1472 }
1473
1474 /** {@inheritDoc} */
1475 @Override
1476 public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1477 final int startRow, final int endRow,
1478 final int startColumn, final int endColumn) {
1479 checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1480 visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1481 for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1482 final int p0 = iBlock * BLOCK_SIZE;
1483 final int pStart = FastMath.max(startRow, p0);
1484 final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1485 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1486 final int jWidth = blockWidth(jBlock);
1487 final int q0 = jBlock * BLOCK_SIZE;
1488 final int qStart = FastMath.max(startColumn, q0);
1489 final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1490 final T[] block = blocks[iBlock * blockColumns + jBlock];
1491 for (int p = pStart; p < pEnd; ++p) {
1492 int k = (p - p0) * jWidth + qStart - q0;
1493 for (int q = qStart; q < qEnd; ++q) {
1494 block[k] = visitor.visit(p, q, block[k]);
1495 ++k;
1496 }
1497 }
1498 }
1499 }
1500 return visitor.end();
1501 }
1502
1503 /** {@inheritDoc} */
1504 @Override
1505 public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1506 final int startRow, final int endRow,
1507 final int startColumn, final int endColumn) {
1508 checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1509 visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1510 for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1511 final int p0 = iBlock * BLOCK_SIZE;
1512 final int pStart = FastMath.max(startRow, p0);
1513 final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1514 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1515 final int jWidth = blockWidth(jBlock);
1516 final int q0 = jBlock * BLOCK_SIZE;
1517 final int qStart = FastMath.max(startColumn, q0);
1518 final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1519 final T[] block = blocks[iBlock * blockColumns + jBlock];
1520 for (int p = pStart; p < pEnd; ++p) {
1521 int k = (p - p0) * jWidth + qStart - q0;
1522 for (int q = qStart; q < qEnd; ++q) {
1523 visitor.visit(p, q, block[k]);
1524 ++k;
1525 }
1526 }
1527 }
1528 }
1529 return visitor.end();
1530 }
1531
1532 /**
1533 * Get the height of a block.
1534 * @param blockRow row index (in block sense) of the block
1535 * @return height (number of rows) of the block
1536 */
1537 private int blockHeight(final int blockRow) {
1538 return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1539 }
1540
1541 /**
1542 * Get the width of a block.
1543 * @param blockColumn column index (in block sense) of the block
1544 * @return width (number of columns) of the block
1545 */
1546 private int blockWidth(final int blockColumn) {
1547 return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1548 }
1549 }