BlockRealMatrix.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.math4.legacy.linear;

  18. import java.io.Serializable;
  19. import java.util.Arrays;

  20. import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
  21. import org.apache.commons.math4.legacy.exception.NoDataException;
  22. import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
  23. import org.apache.commons.math4.legacy.exception.NullArgumentException;
  24. import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
  25. import org.apache.commons.math4.legacy.exception.OutOfRangeException;
  26. import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
  27. import org.apache.commons.math4.core.jdkmath.JdkMath;

  28. /**
  29.  * Cache-friendly implementation of RealMatrix using a flat arrays to store
  30.  * square blocks of the matrix.
  31.  * <p>
  32.  * This implementation is specially designed to be cache-friendly. Square blocks are
  33.  * stored as small arrays and allow efficient traversal of data both in row major direction
  34.  * and columns major direction, one block at a time. This greatly increases performances
  35.  * for algorithms that use crossed directions loops like multiplication or transposition.
  36.  * </p>
  37.  * <p>
  38.  * The size of square blocks is a static parameter. It may be tuned according to the cache
  39.  * size of the target computer processor. As a rule of thumbs, it should be the largest
  40.  * value that allows three blocks to be simultaneously cached (this is necessary for example
  41.  * for matrix multiplication). The default value is to use 52x52 blocks which is well suited
  42.  * for processors with 64k L1 cache (one block holds 2704 values or 21632 bytes). This value
  43.  * could be lowered to 36x36 for processors with 32k L1 cache.
  44.  * </p>
  45.  * <p>
  46.  * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
  47.  * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
  48.  * blocks are flattened in row major order in single dimension arrays which are therefore
  49.  * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
  50.  * organized in row major order.
  51.  * </p>
  52.  * <p>
  53.  * As an example, for a block size of 52x52, a 100x60 matrix would be stored in 4 blocks.
  54.  * Block 0 would be a double[2704] array holding the upper left 52x52 square, block 1 would be
  55.  * a double[416] array holding the upper right 52x8 rectangle, block 2 would be a double[2496]
  56.  * array holding the lower left 48x52 rectangle and block 3 would be a double[384] array
  57.  * holding the lower right 48x8 rectangle.
  58.  * </p>
  59.  * <p>
  60.  * The layout complexity overhead versus simple mapping of matrices to java
  61.  * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
  62.  * to up to 3-fold improvements for matrices of moderate to large size.
  63.  * </p>
  64.  * @since 2.0
  65.  */
  66. public class BlockRealMatrix extends AbstractRealMatrix implements Serializable {
  67.     /** Block size. */
  68.     public static final int BLOCK_SIZE = 52;
  69.     /** Serializable version identifier. */
  70.     private static final long serialVersionUID = 4991895511313664478L;
  71.     /** Blocks of matrix entries. */
  72.     private final double[][] blocks;
  73.     /** Number of rows of the matrix. */
  74.     private final int rows;
  75.     /** Number of columns of the matrix. */
  76.     private final int columns;
  77.     /** Number of block rows of the matrix. */
  78.     private final int blockRows;
  79.     /** Number of block columns of the matrix. */
  80.     private final int blockColumns;

  81.     /**
  82.      * Create a new matrix with the supplied row and column dimensions.
  83.      *
  84.      * @param rows  the number of rows in the new matrix
  85.      * @param columns  the number of columns in the new matrix
  86.      * @throws NotStrictlyPositiveException if row or column dimension is not
  87.      * positive.
  88.      */
  89.     public BlockRealMatrix(final int rows, final int columns)
  90.         throws NotStrictlyPositiveException {
  91.         super(rows, columns);
  92.         this.rows = rows;
  93.         this.columns = columns;

  94.         // number of blocks
  95.         blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
  96.         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;

  97.         // allocate storage blocks, taking care of smaller ones at right and bottom
  98.         blocks = createBlocksLayout(rows, columns);
  99.     }

  100.     /**
  101.      * Create a new dense matrix copying entries from raw layout data.
  102.      * <p>The input array <em>must</em> already be in raw layout.</p>
  103.      * <p>Calling this constructor is equivalent to call:
  104.      * <pre>matrix = new BlockRealMatrix(rawData.length, rawData[0].length,
  105.      *                                   toBlocksLayout(rawData), false);</pre>
  106.      *
  107.      * @param rawData data for new matrix, in raw layout
  108.      * @throws DimensionMismatchException if the shape of {@code blockData} is
  109.      * inconsistent with block layout.
  110.      * @throws NotStrictlyPositiveException if row or column dimension is not
  111.      * positive.
  112.      * @see #BlockRealMatrix(int, int, double[][], boolean)
  113.      */
  114.     public BlockRealMatrix(final double[][] rawData)
  115.         throws DimensionMismatchException, NotStrictlyPositiveException {
  116.         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
  117.     }

  118.     /**
  119.      * Create a new dense matrix copying entries from block layout data.
  120.      * <p>The input array <em>must</em> already be in blocks layout.</p>
  121.      *
  122.      * @param rows Number of rows in the new matrix.
  123.      * @param columns Number of columns in the new matrix.
  124.      * @param blockData data for new matrix
  125.      * @param copyArray Whether the input array will be copied or referenced.
  126.      * @throws DimensionMismatchException if the shape of {@code blockData} is
  127.      * inconsistent with block layout.
  128.      * @throws NotStrictlyPositiveException if row or column dimension is not
  129.      * positive.
  130.      * @see #createBlocksLayout(int, int)
  131.      * @see #toBlocksLayout(double[][])
  132.      * @see #BlockRealMatrix(double[][])
  133.      */
  134.     public BlockRealMatrix(final int rows, final int columns,
  135.                            final double[][] blockData, final boolean copyArray)
  136.         throws DimensionMismatchException, NotStrictlyPositiveException {
  137.         super(rows, columns);
  138.         this.rows = rows;
  139.         this.columns = columns;

  140.         // number of blocks
  141.         blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
  142.         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;

  143.         if (copyArray) {
  144.             // allocate storage blocks, taking care of smaller ones at right and bottom
  145.             blocks = new double[blockRows * blockColumns][];
  146.         } else {
  147.             // reference existing array
  148.             blocks = blockData;
  149.         }

  150.         int index = 0;
  151.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  152.             final int iHeight = blockHeight(iBlock);
  153.             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
  154.                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
  155.                     throw new DimensionMismatchException(blockData[index].length,
  156.                                                          iHeight * blockWidth(jBlock));
  157.                 }
  158.                 if (copyArray) {
  159.                     blocks[index] = blockData[index].clone();
  160.                 }
  161.             }
  162.         }
  163.     }

  164.     /**
  165.      * Convert a data array from raw layout to blocks layout.
  166.      * <p>
  167.      * Raw layout is the straightforward layout where element at row i and
  168.      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
  169.      * is the layout used in {@link BlockRealMatrix} instances, where the matrix
  170.      * is split in square blocks (except at right and bottom side where blocks may
  171.      * be rectangular to fit matrix size) and each block is stored in a flattened
  172.      * one-dimensional array.
  173.      * </p>
  174.      * <p>
  175.      * This method creates an array in blocks layout from an input array in raw layout.
  176.      * It can be used to provide the array argument of the {@link
  177.      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
  178.      * </p>
  179.      * @param rawData Data array in raw layout.
  180.      * @return a new data array containing the same entries but in blocks layout.
  181.      * @throws DimensionMismatchException if {@code rawData} is not rectangular.
  182.      * @see #createBlocksLayout(int, int)
  183.      * @see #BlockRealMatrix(int, int, double[][], boolean)
  184.      */
  185.     public static double[][] toBlocksLayout(final double[][] rawData)
  186.         throws DimensionMismatchException {
  187.         final int rows = rawData.length;
  188.         final int columns = rawData[0].length;
  189.         final int blockRows = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
  190.         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;

  191.         // safety checks
  192.         for (int i = 0; i < rawData.length; ++i) {
  193.             final int length = rawData[i].length;
  194.             if (length != columns) {
  195.                 throw new DimensionMismatchException(columns, length);
  196.             }
  197.         }

  198.         // convert array
  199.         final double[][] blocks = new double[blockRows * blockColumns][];
  200.         int blockIndex = 0;
  201.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  202.             final int pStart = iBlock * BLOCK_SIZE;
  203.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  204.             final int iHeight = pEnd - pStart;
  205.             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  206.                 final int qStart = jBlock * BLOCK_SIZE;
  207.                 final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  208.                 final int jWidth = qEnd - qStart;

  209.                 // allocate new block
  210.                 final double[] block = new double[iHeight * jWidth];
  211.                 blocks[blockIndex] = block;

  212.                 // copy data
  213.                 int index = 0;
  214.                 for (int p = pStart; p < pEnd; ++p) {
  215.                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
  216.                     index += jWidth;
  217.                 }
  218.                 ++blockIndex;
  219.             }
  220.         }

  221.         return blocks;
  222.     }

  223.     /**
  224.      * Create a data array in blocks layout.
  225.      * <p>
  226.      * This method can be used to create the array argument of the {@link
  227.      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
  228.      * </p>
  229.      * @param rows Number of rows in the new matrix.
  230.      * @param columns Number of columns in the new matrix.
  231.      * @return a new data array in blocks layout.
  232.      * @see #toBlocksLayout(double[][])
  233.      * @see #BlockRealMatrix(int, int, double[][], boolean)
  234.      */
  235.     public static double[][] createBlocksLayout(final int rows, final int columns) {
  236.         final int blockRows = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
  237.         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;

  238.         final double[][] blocks = new double[blockRows * blockColumns][];
  239.         int blockIndex = 0;
  240.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  241.             final int pStart = iBlock * BLOCK_SIZE;
  242.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  243.             final int iHeight = pEnd - pStart;
  244.             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  245.                 final int qStart = jBlock * BLOCK_SIZE;
  246.                 final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  247.                 final int jWidth = qEnd - qStart;
  248.                 blocks[blockIndex] = new double[iHeight * jWidth];
  249.                 ++blockIndex;
  250.             }
  251.         }

  252.         return blocks;
  253.     }

  254.     /** {@inheritDoc} */
  255.     @Override
  256.     public BlockRealMatrix createMatrix(final int rowDimension,
  257.                                         final int columnDimension)
  258.         throws NotStrictlyPositiveException {
  259.         return new BlockRealMatrix(rowDimension, columnDimension);
  260.     }

  261.     /** {@inheritDoc} */
  262.     @Override
  263.     public BlockRealMatrix copy() {
  264.         // create an empty matrix
  265.         BlockRealMatrix copied = new BlockRealMatrix(rows, columns);

  266.         // copy the blocks
  267.         for (int i = 0; i < blocks.length; ++i) {
  268.             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
  269.         }

  270.         return copied;
  271.     }

  272.     /** {@inheritDoc} */
  273.     @Override
  274.     public BlockRealMatrix add(final RealMatrix m)
  275.         throws MatrixDimensionMismatchException {
  276.         if (m instanceof BlockRealMatrix) {
  277.             return add((BlockRealMatrix) m);
  278.         }

  279.         // safety check
  280.         checkAdd(m);

  281.         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);

  282.         // perform addition block-wise, to ensure good cache behavior
  283.         int blockIndex = 0;
  284.         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
  285.             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {

  286.                 // perform addition on the current block
  287.                 final double[] outBlock = out.blocks[blockIndex];
  288.                 final double[] tBlock   = blocks[blockIndex];
  289.                 final int pStart = iBlock * BLOCK_SIZE;
  290.                 final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  291.                 final int qStart = jBlock * BLOCK_SIZE;
  292.                 final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  293.                 int k = 0;
  294.                 for (int p = pStart; p < pEnd; ++p) {
  295.                     for (int q = qStart; q < qEnd; ++q) {
  296.                         outBlock[k] = tBlock[k] + m.getEntry(p, q);
  297.                         ++k;
  298.                     }
  299.                 }
  300.                 // go to next block
  301.                 ++blockIndex;
  302.             }
  303.         }

  304.         return out;
  305.     }

  306.     /**
  307.      * Compute the sum of this matrix and {@code m}.
  308.      *
  309.      * @param m Matrix to be added.
  310.      * @return {@code this} + m.
  311.      * @throws MatrixDimensionMismatchException if {@code m} is not the same
  312.      * size as this matrix.
  313.      */
  314.     public BlockRealMatrix add(final BlockRealMatrix m)
  315.         throws MatrixDimensionMismatchException {
  316.         // safety check
  317.         checkAdd(m);

  318.         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);

  319.         // perform addition block-wise, to ensure good cache behavior
  320.         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
  321.             final double[] outBlock = out.blocks[blockIndex];
  322.             final double[] tBlock = blocks[blockIndex];
  323.             final double[] mBlock = m.blocks[blockIndex];
  324.             for (int k = 0; k < outBlock.length; ++k) {
  325.                 outBlock[k] = tBlock[k] + mBlock[k];
  326.             }
  327.         }

  328.         return out;
  329.     }

  330.     /** {@inheritDoc} */
  331.     @Override
  332.     public BlockRealMatrix subtract(final RealMatrix m)
  333.         throws MatrixDimensionMismatchException {
  334.         if (m instanceof BlockRealMatrix) {
  335.             return subtract((BlockRealMatrix) m);
  336.         }

  337.         // safety check
  338.         checkAdd(m);

  339.         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);

  340.         // perform subtraction block-wise, to ensure good cache behavior
  341.         int blockIndex = 0;
  342.         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
  343.             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {

  344.                 // perform subtraction on the current block
  345.                 final double[] outBlock = out.blocks[blockIndex];
  346.                 final double[] tBlock = blocks[blockIndex];
  347.                 final int pStart = iBlock * BLOCK_SIZE;
  348.                 final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  349.                 final int qStart = jBlock * BLOCK_SIZE;
  350.                 final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  351.                 int k = 0;
  352.                 for (int p = pStart; p < pEnd; ++p) {
  353.                     for (int q = qStart; q < qEnd; ++q) {
  354.                         outBlock[k] = tBlock[k] - m.getEntry(p, q);
  355.                         ++k;
  356.                     }
  357.                 }
  358.                 // go to next block
  359.                 ++blockIndex;
  360.             }
  361.         }

  362.         return out;
  363.     }

  364.     /**
  365.      * Subtract {@code m} from this matrix.
  366.      *
  367.      * @param m Matrix to be subtracted.
  368.      * @return {@code this} - m.
  369.      * @throws MatrixDimensionMismatchException if {@code m} is not the
  370.      * same size as this matrix.
  371.      */
  372.     public BlockRealMatrix subtract(final BlockRealMatrix m)
  373.         throws MatrixDimensionMismatchException {
  374.         // safety check
  375.         checkAdd(m);

  376.         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);

  377.         // perform subtraction block-wise, to ensure good cache behavior
  378.         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
  379.             final double[] outBlock = out.blocks[blockIndex];
  380.             final double[] tBlock = blocks[blockIndex];
  381.             final double[] mBlock = m.blocks[blockIndex];
  382.             for (int k = 0; k < outBlock.length; ++k) {
  383.                 outBlock[k] = tBlock[k] - mBlock[k];
  384.             }
  385.         }

  386.         return out;
  387.     }

  388.     /** {@inheritDoc} */
  389.     @Override
  390.     public BlockRealMatrix scalarAdd(final double d) {

  391.         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);

  392.         // perform subtraction block-wise, to ensure good cache behavior
  393.         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
  394.             final double[] outBlock = out.blocks[blockIndex];
  395.             final double[] tBlock = blocks[blockIndex];
  396.             for (int k = 0; k < outBlock.length; ++k) {
  397.                 outBlock[k] = tBlock[k] + d;
  398.             }
  399.         }

  400.         return out;
  401.     }

  402.     /** {@inheritDoc} */
  403.     @Override
  404.     public RealMatrix scalarMultiply(final double d) {
  405.         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);

  406.         // perform subtraction block-wise, to ensure good cache behavior
  407.         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
  408.             final double[] outBlock = out.blocks[blockIndex];
  409.             final double[] tBlock = blocks[blockIndex];
  410.             for (int k = 0; k < outBlock.length; ++k) {
  411.                 outBlock[k] = tBlock[k] * d;
  412.             }
  413.         }

  414.         return out;
  415.     }

  416.     /** {@inheritDoc} */
  417.     @Override
  418.     public BlockRealMatrix multiply(final RealMatrix m)
  419.         throws DimensionMismatchException {
  420.         if (m instanceof BlockRealMatrix) {
  421.             return multiply((BlockRealMatrix) m);
  422.         }

  423.         // safety check
  424.         checkMultiply(m);

  425.         final BlockRealMatrix out = new BlockRealMatrix(rows, m.getColumnDimension());

  426.         // perform multiplication block-wise, to ensure good cache behavior
  427.         int blockIndex = 0;
  428.         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
  429.             final int pStart = iBlock * BLOCK_SIZE;
  430.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);

  431.             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
  432.                 final int qStart = jBlock * BLOCK_SIZE;
  433.                 final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());

  434.                 // select current block
  435.                 final double[] outBlock = out.blocks[blockIndex];

  436.                 // perform multiplication on current block
  437.                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
  438.                     final int kWidth = blockWidth(kBlock);
  439.                     final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
  440.                     final int rStart = kBlock * BLOCK_SIZE;
  441.                     int k = 0;
  442.                     for (int p = pStart; p < pEnd; ++p) {
  443.                         final int lStart = (p - pStart) * kWidth;
  444.                         final int lEnd = lStart + kWidth;
  445.                         for (int q = qStart; q < qEnd; ++q) {
  446.                             double sum = 0;
  447.                             int r = rStart;
  448.                             for (int l = lStart; l < lEnd; ++l) {
  449.                                 sum += tBlock[l] * m.getEntry(r, q);
  450.                                 ++r;
  451.                             }
  452.                             outBlock[k] += sum;
  453.                             ++k;
  454.                         }
  455.                     }
  456.                 }
  457.                 // go to next block
  458.                 ++blockIndex;
  459.             }
  460.         }

  461.         return out;
  462.     }

  463.     /**
  464.      * Returns the result of postmultiplying this by {@code m}.
  465.      *
  466.      * @param m Matrix to postmultiply by.
  467.      * @return {@code this} * m.
  468.      * @throws DimensionMismatchException if the matrices are not compatible.
  469.      */
  470.     public BlockRealMatrix multiply(BlockRealMatrix m)
  471.         throws DimensionMismatchException {
  472.         // safety check
  473.         checkMultiply(m);

  474.         final BlockRealMatrix out = new BlockRealMatrix(rows, m.columns);

  475.         // perform multiplication block-wise, to ensure good cache behavior
  476.         int blockIndex = 0;
  477.         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {

  478.             final int pStart = iBlock * BLOCK_SIZE;
  479.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);

  480.             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
  481.                 final int jWidth = out.blockWidth(jBlock);
  482.                 final int jWidth2 = jWidth  + jWidth;
  483.                 final int jWidth3 = jWidth2 + jWidth;
  484.                 final int jWidth4 = jWidth3 + jWidth;

  485.                 // select current block
  486.                 final double[] outBlock = out.blocks[blockIndex];

  487.                 // perform multiplication on current block
  488.                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
  489.                     final int kWidth = blockWidth(kBlock);
  490.                     final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
  491.                     final double[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
  492.                     int k = 0;
  493.                     for (int p = pStart; p < pEnd; ++p) {
  494.                         final int lStart = (p - pStart) * kWidth;
  495.                         final int lEnd = lStart + kWidth;
  496.                         for (int nStart = 0; nStart < jWidth; ++nStart) {
  497.                             double sum = 0;
  498.                             int l = lStart;
  499.                             int n = nStart;
  500.                             while (l < lEnd - 3) {
  501.                                 sum += tBlock[l] * mBlock[n] +
  502.                                        tBlock[l + 1] * mBlock[n + jWidth] +
  503.                                        tBlock[l + 2] * mBlock[n + jWidth2] +
  504.                                        tBlock[l + 3] * mBlock[n + jWidth3];
  505.                                 l += 4;
  506.                                 n += jWidth4;
  507.                             }
  508.                             while (l < lEnd) {
  509.                                 sum += tBlock[l++] * mBlock[n];
  510.                                 n += jWidth;
  511.                             }
  512.                             outBlock[k] += sum;
  513.                             ++k;
  514.                         }
  515.                     }
  516.                 }
  517.                 // go to next block
  518.                 ++blockIndex;
  519.             }
  520.         }

  521.         return out;
  522.     }

  523.     /** {@inheritDoc} */
  524.     @Override
  525.     public double[][] getData() {
  526.         final double[][] data = new double[getRowDimension()][getColumnDimension()];
  527.         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;

  528.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  529.             final int pStart = iBlock * BLOCK_SIZE;
  530.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  531.             int regularPos = 0;
  532.             int lastPos = 0;
  533.             for (int p = pStart; p < pEnd; ++p) {
  534.                 final double[] dataP = data[p];
  535.                 int blockIndex = iBlock * blockColumns;
  536.                 int dataPos = 0;
  537.                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
  538.                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
  539.                     dataPos += BLOCK_SIZE;
  540.                 }
  541.                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
  542.                 regularPos += BLOCK_SIZE;
  543.                 lastPos    += lastColumns;
  544.             }
  545.         }

  546.         return data;
  547.     }

  548.     /** {@inheritDoc} */
  549.     @Override
  550.     public double getNorm() {
  551.         final double[] colSums = new double[BLOCK_SIZE];
  552.         double maxColSum = 0;
  553.         for (int jBlock = 0; jBlock < blockColumns; jBlock++) {
  554.             final int jWidth = blockWidth(jBlock);
  555.             Arrays.fill(colSums, 0, jWidth, 0.0);
  556.             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  557.                 final int iHeight = blockHeight(iBlock);
  558.                 final double[] block = blocks[iBlock * blockColumns + jBlock];
  559.                 for (int j = 0; j < jWidth; ++j) {
  560.                     double sum = 0;
  561.                     for (int i = 0; i < iHeight; ++i) {
  562.                         sum += JdkMath.abs(block[i * jWidth + j]);
  563.                     }
  564.                     colSums[j] += sum;
  565.                 }
  566.             }
  567.             for (int j = 0; j < jWidth; ++j) {
  568.                 maxColSum = JdkMath.max(maxColSum, colSums[j]);
  569.             }
  570.         }
  571.         return maxColSum;
  572.     }

  573.     /** {@inheritDoc} */
  574.     @Override
  575.     public double getFrobeniusNorm() {
  576.         double sum2 = 0;
  577.         for (int blockIndex = 0; blockIndex < blocks.length; ++blockIndex) {
  578.             for (final double entry : blocks[blockIndex]) {
  579.                 sum2 += entry * entry;
  580.             }
  581.         }
  582.         return JdkMath.sqrt(sum2);
  583.     }

  584.     /** {@inheritDoc} */
  585.     @Override
  586.     public BlockRealMatrix getSubMatrix(final int startRow, final int endRow,
  587.                                         final int startColumn,
  588.                                         final int endColumn)
  589.         throws OutOfRangeException, NumberIsTooSmallException {
  590.         // safety checks
  591.         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);

  592.         // create the output matrix
  593.         final BlockRealMatrix out =
  594.             new BlockRealMatrix(endRow - startRow + 1, endColumn - startColumn + 1);

  595.         // compute blocks shifts
  596.         final int blockStartRow = startRow / BLOCK_SIZE;
  597.         final int rowsShift = startRow % BLOCK_SIZE;
  598.         final int blockStartColumn = startColumn / BLOCK_SIZE;
  599.         final int columnsShift = startColumn % BLOCK_SIZE;

  600.         // perform extraction block-wise, to ensure good cache behavior
  601.         int pBlock = blockStartRow;
  602.         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
  603.             final int iHeight = out.blockHeight(iBlock);
  604.             int qBlock = blockStartColumn;
  605.             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
  606.                 final int jWidth = out.blockWidth(jBlock);

  607.                 // handle one block of the output matrix
  608.                 final int outIndex = iBlock * out.blockColumns + jBlock;
  609.                 final double[] outBlock = out.blocks[outIndex];
  610.                 final int index = pBlock * blockColumns + qBlock;
  611.                 final int width = blockWidth(qBlock);

  612.                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
  613.                 final int widthExcess = jWidth + columnsShift - BLOCK_SIZE;
  614.                 if (heightExcess > 0) {
  615.                     // the submatrix block spans on two blocks rows from the original matrix
  616.                     if (widthExcess > 0) {
  617.                         // the submatrix block spans on two blocks columns from the original matrix
  618.                         final int width2 = blockWidth(qBlock + 1);
  619.                         copyBlockPart(blocks[index], width,
  620.                                       rowsShift, BLOCK_SIZE,
  621.                                       columnsShift, BLOCK_SIZE,
  622.                                       outBlock, jWidth, 0, 0);
  623.                         copyBlockPart(blocks[index + 1], width2,
  624.                                       rowsShift, BLOCK_SIZE,
  625.                                       0, widthExcess,
  626.                                       outBlock, jWidth, 0, jWidth - widthExcess);
  627.                         copyBlockPart(blocks[index + blockColumns], width,
  628.                                       0, heightExcess,
  629.                                       columnsShift, BLOCK_SIZE,
  630.                                       outBlock, jWidth, iHeight - heightExcess, 0);
  631.                         copyBlockPart(blocks[index + blockColumns + 1], width2,
  632.                                       0, heightExcess,
  633.                                       0, widthExcess,
  634.                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
  635.                     } else {
  636.                         // the submatrix block spans on one block column from the original matrix
  637.                         copyBlockPart(blocks[index], width,
  638.                                       rowsShift, BLOCK_SIZE,
  639.                                       columnsShift, jWidth + columnsShift,
  640.                                       outBlock, jWidth, 0, 0);
  641.                         copyBlockPart(blocks[index + blockColumns], width,
  642.                                       0, heightExcess,
  643.                                       columnsShift, jWidth + columnsShift,
  644.                                       outBlock, jWidth, iHeight - heightExcess, 0);
  645.                     }
  646.                 } else {
  647.                     // the submatrix block spans on one block row from the original matrix
  648.                     if (widthExcess > 0) {
  649.                         // the submatrix block spans on two blocks columns from the original matrix
  650.                         final int width2 = blockWidth(qBlock + 1);
  651.                         copyBlockPart(blocks[index], width,
  652.                                       rowsShift, iHeight + rowsShift,
  653.                                       columnsShift, BLOCK_SIZE,
  654.                                       outBlock, jWidth, 0, 0);
  655.                         copyBlockPart(blocks[index + 1], width2,
  656.                                       rowsShift, iHeight + rowsShift,
  657.                                       0, widthExcess,
  658.                                       outBlock, jWidth, 0, jWidth - widthExcess);
  659.                     } else {
  660.                         // the submatrix block spans on one block column from the original matrix
  661.                         copyBlockPart(blocks[index], width,
  662.                                       rowsShift, iHeight + rowsShift,
  663.                                       columnsShift, jWidth + columnsShift,
  664.                                       outBlock, jWidth, 0, 0);
  665.                     }
  666.                }
  667.                 ++qBlock;
  668.             }
  669.             ++pBlock;
  670.         }

  671.         return out;
  672.     }

  673.     /**
  674.      * Copy a part of a block into another one
  675.      * <p>This method can be called only when the specified part fits in both
  676.      * blocks, no verification is done here.</p>
  677.      * @param srcBlock source block
  678.      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
  679.      * @param srcStartRow start row in the source block
  680.      * @param srcEndRow end row (exclusive) in the source block
  681.      * @param srcStartColumn start column in the source block
  682.      * @param srcEndColumn end column (exclusive) in the source block
  683.      * @param dstBlock destination block
  684.      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
  685.      * @param dstStartRow start row in the destination block
  686.      * @param dstStartColumn start column in the destination block
  687.      */
  688.     private void copyBlockPart(final double[] srcBlock, final int srcWidth,
  689.                                final int srcStartRow, final int srcEndRow,
  690.                                final int srcStartColumn, final int srcEndColumn,
  691.                                final double[] dstBlock, final int dstWidth,
  692.                                final int dstStartRow, final int dstStartColumn) {
  693.         final int length = srcEndColumn - srcStartColumn;
  694.         int srcPos = srcStartRow * srcWidth + srcStartColumn;
  695.         int dstPos = dstStartRow * dstWidth + dstStartColumn;
  696.         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
  697.             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
  698.             srcPos += srcWidth;
  699.             dstPos += dstWidth;
  700.         }
  701.     }

  702.     /** {@inheritDoc} */
  703.     @Override
  704.     public void setSubMatrix(final double[][] subMatrix, final int row,
  705.                              final int column)
  706.         throws OutOfRangeException, NoDataException, NullArgumentException,
  707.         DimensionMismatchException {
  708.         // safety checks
  709.         NullArgumentException.check(subMatrix);
  710.         final int refLength = subMatrix[0].length;
  711.         if (refLength == 0) {
  712.             throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
  713.         }
  714.         final int endRow = row + subMatrix.length - 1;
  715.         final int endColumn = column + refLength - 1;
  716.         MatrixUtils.checkSubMatrixIndex(this, row, endRow, column, endColumn);
  717.         for (final double[] subRow : subMatrix) {
  718.             if (subRow.length != refLength) {
  719.                 throw new DimensionMismatchException(refLength, subRow.length);
  720.             }
  721.         }

  722.         // compute blocks bounds
  723.         final int blockStartRow = row / BLOCK_SIZE;
  724.         final int blockEndRow = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
  725.         final int blockStartColumn = column / BLOCK_SIZE;
  726.         final int blockEndColumn = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;

  727.         // perform copy block-wise, to ensure good cache behavior
  728.         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
  729.             final int iHeight = blockHeight(iBlock);
  730.             final int firstRow = iBlock * BLOCK_SIZE;
  731.             final int iStart = JdkMath.max(row,    firstRow);
  732.             final int iEnd = JdkMath.min(endRow + 1, firstRow + iHeight);

  733.             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
  734.                 final int jWidth = blockWidth(jBlock);
  735.                 final int firstColumn = jBlock * BLOCK_SIZE;
  736.                 final int jStart = JdkMath.max(column,    firstColumn);
  737.                 final int jEnd = JdkMath.min(endColumn + 1, firstColumn + jWidth);
  738.                 final int jLength = jEnd - jStart;

  739.                 // handle one block, row by row
  740.                 final double[] block = blocks[iBlock * blockColumns + jBlock];
  741.                 for (int i = iStart; i < iEnd; ++i) {
  742.                     System.arraycopy(subMatrix[i - row], jStart - column,
  743.                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
  744.                                      jLength);
  745.                 }
  746.             }
  747.         }
  748.     }

  749.     /** {@inheritDoc} */
  750.     @Override
  751.     public BlockRealMatrix getRowMatrix(final int row)
  752.         throws OutOfRangeException {
  753.         MatrixUtils.checkRowIndex(this, row);
  754.         final BlockRealMatrix out = new BlockRealMatrix(1, columns);

  755.         // perform copy block-wise, to ensure good cache behavior
  756.         final int iBlock = row / BLOCK_SIZE;
  757.         final int iRow = row - iBlock * BLOCK_SIZE;
  758.         int outBlockIndex = 0;
  759.         int outIndex = 0;
  760.         double[] outBlock = out.blocks[outBlockIndex];
  761.         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  762.             final int jWidth = blockWidth(jBlock);
  763.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  764.             final int available = outBlock.length - outIndex;
  765.             if (jWidth > available) {
  766.                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
  767.                 outBlock = out.blocks[++outBlockIndex];
  768.                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
  769.                 outIndex = jWidth - available;
  770.             } else {
  771.                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
  772.                 outIndex += jWidth;
  773.             }
  774.         }

  775.         return out;
  776.     }

  777.     /** {@inheritDoc} */
  778.     @Override
  779.     public void setRowMatrix(final int row, final RealMatrix matrix)
  780.         throws OutOfRangeException, MatrixDimensionMismatchException {
  781.         if (matrix instanceof BlockRealMatrix) {
  782.             setRowMatrix(row, (BlockRealMatrix) matrix);
  783.         } else {
  784.             super.setRowMatrix(row, matrix);
  785.         }
  786.     }

  787.     /**
  788.      * Sets the entries in row number <code>row</code>
  789.      * as a row matrix.  Row indices start at 0.
  790.      *
  791.      * @param row the row to be set
  792.      * @param matrix row matrix (must have one row and the same number of columns
  793.      * as the instance)
  794.      * @throws OutOfRangeException if the specified row index is invalid.
  795.      * @throws MatrixDimensionMismatchException if the matrix dimensions do
  796.      * not match one instance row.
  797.      */
  798.     public void setRowMatrix(final int row, final BlockRealMatrix matrix)
  799.         throws OutOfRangeException, MatrixDimensionMismatchException {
  800.         MatrixUtils.checkRowIndex(this, row);
  801.         final int nCols = getColumnDimension();
  802.         if (matrix.getRowDimension() != 1 ||
  803.             matrix.getColumnDimension() != nCols) {
  804.             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
  805.                                                        matrix.getColumnDimension(),
  806.                                                        1, nCols);
  807.         }

  808.         // perform copy block-wise, to ensure good cache behavior
  809.         final int iBlock = row / BLOCK_SIZE;
  810.         final int iRow = row - iBlock * BLOCK_SIZE;
  811.         int mBlockIndex = 0;
  812.         int mIndex = 0;
  813.         double[] mBlock = matrix.blocks[mBlockIndex];
  814.         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  815.             final int jWidth = blockWidth(jBlock);
  816.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  817.             final int available  = mBlock.length - mIndex;
  818.             if (jWidth > available) {
  819.                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
  820.                 mBlock = matrix.blocks[++mBlockIndex];
  821.                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
  822.                 mIndex = jWidth - available;
  823.             } else {
  824.                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
  825.                 mIndex += jWidth;
  826.            }
  827.         }
  828.     }

  829.     /** {@inheritDoc} */
  830.     @Override
  831.     public BlockRealMatrix getColumnMatrix(final int column)
  832.         throws OutOfRangeException {
  833.         MatrixUtils.checkColumnIndex(this, column);
  834.         final BlockRealMatrix out = new BlockRealMatrix(rows, 1);

  835.         // perform copy block-wise, to ensure good cache behavior
  836.         final int jBlock = column / BLOCK_SIZE;
  837.         final int jColumn = column - jBlock * BLOCK_SIZE;
  838.         final int jWidth = blockWidth(jBlock);
  839.         int outBlockIndex = 0;
  840.         int outIndex = 0;
  841.         double[] outBlock = out.blocks[outBlockIndex];
  842.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  843.             final int iHeight = blockHeight(iBlock);
  844.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  845.             for (int i = 0; i < iHeight; ++i) {
  846.                 if (outIndex >= outBlock.length) {
  847.                     outBlock = out.blocks[++outBlockIndex];
  848.                     outIndex = 0;
  849.                 }
  850.                 outBlock[outIndex++] = block[i * jWidth + jColumn];
  851.             }
  852.         }

  853.         return out;
  854.     }

  855.     /** {@inheritDoc} */
  856.     @Override
  857.     public void setColumnMatrix(final int column, final RealMatrix matrix)
  858.         throws OutOfRangeException, MatrixDimensionMismatchException {
  859.         if (matrix instanceof BlockRealMatrix) {
  860.             setColumnMatrix(column, (BlockRealMatrix) matrix);
  861.         } else {
  862.             super.setColumnMatrix(column, matrix);
  863.         }
  864.     }

  865.     /**
  866.      * Sets the entries in column number <code>column</code>
  867.      * as a column matrix.  Column indices start at 0.
  868.      *
  869.      * @param column the column to be set
  870.      * @param matrix column matrix (must have one column and the same number of rows
  871.      * as the instance)
  872.      * @throws OutOfRangeException if the specified column index is invalid.
  873.      * @throws MatrixDimensionMismatchException if the matrix dimensions do
  874.      * not match one instance column.
  875.      */
  876.     void setColumnMatrix(final int column, final BlockRealMatrix matrix)
  877.         throws OutOfRangeException, MatrixDimensionMismatchException {
  878.         MatrixUtils.checkColumnIndex(this, column);
  879.         final int nRows = getRowDimension();
  880.         if (matrix.getRowDimension() != nRows ||
  881.             matrix.getColumnDimension() != 1) {
  882.             throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
  883.                                                        matrix.getColumnDimension(),
  884.                                                        nRows, 1);
  885.         }

  886.         // perform copy block-wise, to ensure good cache behavior
  887.         final int jBlock = column / BLOCK_SIZE;
  888.         final int jColumn = column - jBlock * BLOCK_SIZE;
  889.         final int jWidth = blockWidth(jBlock);
  890.         int mBlockIndex = 0;
  891.         int mIndex = 0;
  892.         double[] mBlock = matrix.blocks[mBlockIndex];
  893.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  894.             final int iHeight = blockHeight(iBlock);
  895.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  896.             for (int i = 0; i < iHeight; ++i) {
  897.                 if (mIndex >= mBlock.length) {
  898.                     mBlock = matrix.blocks[++mBlockIndex];
  899.                     mIndex = 0;
  900.                 }
  901.                 block[i * jWidth + jColumn] = mBlock[mIndex++];
  902.             }
  903.         }
  904.     }

  905.     /** {@inheritDoc} */
  906.     @Override
  907.     public RealVector getRowVector(final int row)
  908.         throws OutOfRangeException {
  909.         MatrixUtils.checkRowIndex(this, row);
  910.         final double[] outData = new double[columns];

  911.         // perform copy block-wise, to ensure good cache behavior
  912.         final int iBlock = row / BLOCK_SIZE;
  913.         final int iRow = row - iBlock * BLOCK_SIZE;
  914.         int outIndex = 0;
  915.         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  916.             final int jWidth = blockWidth(jBlock);
  917.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  918.             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
  919.             outIndex += jWidth;
  920.         }

  921.         return new ArrayRealVector(outData, false);
  922.     }

  923.     /** {@inheritDoc} */
  924.     @Override
  925.     public void setRowVector(final int row, final RealVector vector)
  926.         throws OutOfRangeException, MatrixDimensionMismatchException {
  927.         if (vector instanceof ArrayRealVector) {
  928.             setRow(row, ((ArrayRealVector) vector).getDataRef());
  929.         } else {
  930.             super.setRowVector(row, vector);
  931.         }
  932.     }

  933.     /** {@inheritDoc} */
  934.     @Override
  935.     public RealVector getColumnVector(final int column)
  936.         throws OutOfRangeException {
  937.         MatrixUtils.checkColumnIndex(this, column);
  938.         final double[] outData = new double[rows];

  939.         // perform copy block-wise, to ensure good cache behavior
  940.         final int jBlock = column / BLOCK_SIZE;
  941.         final int jColumn = column - jBlock * BLOCK_SIZE;
  942.         final int jWidth = blockWidth(jBlock);
  943.         int outIndex = 0;
  944.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  945.             final int iHeight = blockHeight(iBlock);
  946.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  947.             for (int i = 0; i < iHeight; ++i) {
  948.                 outData[outIndex++] = block[i * jWidth + jColumn];
  949.             }
  950.         }

  951.         return new ArrayRealVector(outData, false);
  952.     }

  953.     /** {@inheritDoc} */
  954.     @Override
  955.     public void setColumnVector(final int column, final RealVector vector)
  956.         throws OutOfRangeException, MatrixDimensionMismatchException {
  957.         if (vector instanceof ArrayRealVector) {
  958.             setColumn(column, ((ArrayRealVector) vector).getDataRef());
  959.         } else {
  960.             super.setColumnVector(column, vector);
  961.         }
  962.     }

  963.     /** {@inheritDoc} */
  964.     @Override
  965.     public double[] getRow(final int row) throws OutOfRangeException {
  966.         MatrixUtils.checkRowIndex(this, row);
  967.         final double[] out = new double[columns];

  968.         // perform copy block-wise, to ensure good cache behavior
  969.         final int iBlock = row / BLOCK_SIZE;
  970.         final int iRow = row - iBlock * BLOCK_SIZE;
  971.         int outIndex = 0;
  972.         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  973.             final int jWidth     = blockWidth(jBlock);
  974.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  975.             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
  976.             outIndex += jWidth;
  977.         }

  978.         return out;
  979.     }

  980.     /** {@inheritDoc} */
  981.     @Override
  982.     public void setRow(final int row, final double[] array)
  983.         throws OutOfRangeException, MatrixDimensionMismatchException {
  984.         MatrixUtils.checkRowIndex(this, row);
  985.         final int nCols = getColumnDimension();
  986.         if (array.length != nCols) {
  987.             throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
  988.         }

  989.         // perform copy block-wise, to ensure good cache behavior
  990.         final int iBlock = row / BLOCK_SIZE;
  991.         final int iRow = row - iBlock * BLOCK_SIZE;
  992.         int outIndex = 0;
  993.         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  994.             final int jWidth     = blockWidth(jBlock);
  995.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  996.             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
  997.             outIndex += jWidth;
  998.         }
  999.     }

  1000.     /** {@inheritDoc} */
  1001.     @Override
  1002.     public double[] getColumn(final int column) throws OutOfRangeException {
  1003.         MatrixUtils.checkColumnIndex(this, column);
  1004.         final double[] out = new double[rows];

  1005.         // perform copy block-wise, to ensure good cache behavior
  1006.         final int jBlock  = column / BLOCK_SIZE;
  1007.         final int jColumn = column - jBlock * BLOCK_SIZE;
  1008.         final int jWidth  = blockWidth(jBlock);
  1009.         int outIndex = 0;
  1010.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  1011.             final int iHeight = blockHeight(iBlock);
  1012.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  1013.             for (int i = 0; i < iHeight; ++i) {
  1014.                 out[outIndex++] = block[i * jWidth + jColumn];
  1015.             }
  1016.         }

  1017.         return out;
  1018.     }

  1019.     /** {@inheritDoc} */
  1020.     @Override
  1021.     public void setColumn(final int column, final double[] array)
  1022.         throws OutOfRangeException, MatrixDimensionMismatchException {
  1023.         MatrixUtils.checkColumnIndex(this, column);
  1024.         final int nRows = getRowDimension();
  1025.         if (array.length != nRows) {
  1026.             throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
  1027.         }

  1028.         // perform copy block-wise, to ensure good cache behavior
  1029.         final int jBlock  = column / BLOCK_SIZE;
  1030.         final int jColumn = column - jBlock * BLOCK_SIZE;
  1031.         final int jWidth = blockWidth(jBlock);
  1032.         int outIndex = 0;
  1033.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  1034.             final int iHeight = blockHeight(iBlock);
  1035.             final double[] block = blocks[iBlock * blockColumns + jBlock];
  1036.             for (int i = 0; i < iHeight; ++i) {
  1037.                 block[i * jWidth + jColumn] = array[outIndex++];
  1038.             }
  1039.         }
  1040.     }

  1041.     /** {@inheritDoc} */
  1042.     @Override
  1043.     public double getEntry(final int row, final int column)
  1044.         throws OutOfRangeException {
  1045.         MatrixUtils.checkMatrixIndex(this, row, column);
  1046.         final int iBlock = row / BLOCK_SIZE;
  1047.         final int jBlock = column / BLOCK_SIZE;
  1048.         final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
  1049.             (column - jBlock * BLOCK_SIZE);
  1050.         return blocks[iBlock * blockColumns + jBlock][k];
  1051.     }

  1052.     /** {@inheritDoc} */
  1053.     @Override
  1054.     public void setEntry(final int row, final int column, final double value)
  1055.         throws OutOfRangeException {
  1056.         MatrixUtils.checkMatrixIndex(this, row, column);
  1057.         final int iBlock = row / BLOCK_SIZE;
  1058.         final int jBlock = column / BLOCK_SIZE;
  1059.         final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
  1060.             (column - jBlock * BLOCK_SIZE);
  1061.         blocks[iBlock * blockColumns + jBlock][k] = value;
  1062.     }

  1063.     /** {@inheritDoc} */
  1064.     @Override
  1065.     public void addToEntry(final int row, final int column,
  1066.                            final double increment)
  1067.         throws OutOfRangeException {
  1068.         MatrixUtils.checkMatrixIndex(this, row, column);
  1069.         final int iBlock = row    / BLOCK_SIZE;
  1070.         final int jBlock = column / BLOCK_SIZE;
  1071.         final int k = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
  1072.             (column - jBlock * BLOCK_SIZE);
  1073.         blocks[iBlock * blockColumns + jBlock][k] += increment;
  1074.     }

  1075.     /** {@inheritDoc} */
  1076.     @Override
  1077.     public void multiplyEntry(final int row, final int column,
  1078.                               final double factor)
  1079.         throws OutOfRangeException {
  1080.         MatrixUtils.checkMatrixIndex(this, row, column);
  1081.         final int iBlock = row / BLOCK_SIZE;
  1082.         final int jBlock = column / BLOCK_SIZE;
  1083.         final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
  1084.             (column - jBlock * BLOCK_SIZE);
  1085.         blocks[iBlock * blockColumns + jBlock][k] *= factor;
  1086.     }

  1087.     /** {@inheritDoc} */
  1088.     @Override
  1089.     public BlockRealMatrix transpose() {
  1090.         final int nRows = getRowDimension();
  1091.         final int nCols = getColumnDimension();
  1092.         final BlockRealMatrix out = new BlockRealMatrix(nCols, nRows);

  1093.         // perform transpose block-wise, to ensure good cache behavior
  1094.         int blockIndex = 0;
  1095.         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
  1096.             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
  1097.                 // transpose current block
  1098.                 final double[] outBlock = out.blocks[blockIndex];
  1099.                 final double[] tBlock = blocks[jBlock * blockColumns + iBlock];
  1100.                 final int pStart = iBlock * BLOCK_SIZE;
  1101.                 final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, columns);
  1102.                 final int qStart = jBlock * BLOCK_SIZE;
  1103.                 final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, rows);
  1104.                 int k = 0;
  1105.                 for (int p = pStart; p < pEnd; ++p) {
  1106.                     final int lInc = pEnd - pStart;
  1107.                     int l = p - pStart;
  1108.                     for (int q = qStart; q < qEnd; ++q) {
  1109.                         outBlock[k] = tBlock[l];
  1110.                         ++k;
  1111.                         l+= lInc;
  1112.                     }
  1113.                 }
  1114.                 // go to next block
  1115.                 ++blockIndex;
  1116.             }
  1117.         }

  1118.         return out;
  1119.     }

  1120.     /** {@inheritDoc} */
  1121.     @Override
  1122.     public int getRowDimension() {
  1123.         return rows;
  1124.     }

  1125.     /** {@inheritDoc} */
  1126.     @Override
  1127.     public int getColumnDimension() {
  1128.         return columns;
  1129.     }

  1130.     /** {@inheritDoc} */
  1131.     @Override
  1132.     public double[] operate(final double[] v)
  1133.         throws DimensionMismatchException {
  1134.         if (v.length != columns) {
  1135.             throw new DimensionMismatchException(v.length, columns);
  1136.         }
  1137.         final double[] out = new double[rows];

  1138.         // perform multiplication block-wise, to ensure good cache behavior
  1139.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  1140.             final int pStart = iBlock * BLOCK_SIZE;
  1141.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  1142.             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  1143.                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
  1144.                 final int qStart = jBlock * BLOCK_SIZE;
  1145.                 final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  1146.                 int k = 0;
  1147.                 for (int p = pStart; p < pEnd; ++p) {
  1148.                     double sum = 0;
  1149.                     int q = qStart;
  1150.                     while (q < qEnd - 3) {
  1151.                         sum += block[k]     * v[q]     +
  1152.                                block[k + 1] * v[q + 1] +
  1153.                                block[k + 2] * v[q + 2] +
  1154.                                block[k + 3] * v[q + 3];
  1155.                         k += 4;
  1156.                         q += 4;
  1157.                     }
  1158.                     while (q < qEnd) {
  1159.                         sum += block[k++] * v[q++];
  1160.                     }
  1161.                     out[p] += sum;
  1162.                 }
  1163.             }
  1164.         }

  1165.         return out;
  1166.     }

  1167.     /** {@inheritDoc} */
  1168.     @Override
  1169.     public double[] preMultiply(final double[] v)
  1170.         throws DimensionMismatchException {
  1171.         if (v.length != rows) {
  1172.             throw new DimensionMismatchException(v.length, rows);
  1173.         }
  1174.         final double[] out = new double[columns];

  1175.         // perform multiplication block-wise, to ensure good cache behavior
  1176.         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  1177.             final int jWidth  = blockWidth(jBlock);
  1178.             final int jWidth2 = jWidth  + jWidth;
  1179.             final int jWidth3 = jWidth2 + jWidth;
  1180.             final int jWidth4 = jWidth3 + jWidth;
  1181.             final int qStart = jBlock * BLOCK_SIZE;
  1182.             final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  1183.             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  1184.                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
  1185.                 final int pStart = iBlock * BLOCK_SIZE;
  1186.                 final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  1187.                 for (int q = qStart; q < qEnd; ++q) {
  1188.                     int k = q - qStart;
  1189.                     double sum = 0;
  1190.                     int p = pStart;
  1191.                     while (p < pEnd - 3) {
  1192.                         sum += block[k]           * v[p]     +
  1193.                                block[k + jWidth]  * v[p + 1] +
  1194.                                block[k + jWidth2] * v[p + 2] +
  1195.                                block[k + jWidth3] * v[p + 3];
  1196.                         k += jWidth4;
  1197.                         p += 4;
  1198.                     }
  1199.                     while (p < pEnd) {
  1200.                         sum += block[k] * v[p++];
  1201.                         k += jWidth;
  1202.                     }
  1203.                     out[q] += sum;
  1204.                 }
  1205.             }
  1206.         }

  1207.         return out;
  1208.     }

  1209.     /** {@inheritDoc} */
  1210.     @Override
  1211.     public double walkInRowOrder(final RealMatrixChangingVisitor visitor) {
  1212.         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
  1213.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  1214.             final int pStart = iBlock * BLOCK_SIZE;
  1215.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  1216.             for (int p = pStart; p < pEnd; ++p) {
  1217.                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  1218.                     final int jWidth = blockWidth(jBlock);
  1219.                     final int qStart = jBlock * BLOCK_SIZE;
  1220.                     final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  1221.                     final double[] block = blocks[iBlock * blockColumns + jBlock];
  1222.                     int k = (p - pStart) * jWidth;
  1223.                     for (int q = qStart; q < qEnd; ++q) {
  1224.                         block[k] = visitor.visit(p, q, block[k]);
  1225.                         ++k;
  1226.                     }
  1227.                 }
  1228.              }
  1229.         }
  1230.         return visitor.end();
  1231.     }

  1232.     /** {@inheritDoc} */
  1233.     @Override
  1234.     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor) {
  1235.         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
  1236.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  1237.             final int pStart = iBlock * BLOCK_SIZE;
  1238.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  1239.             for (int p = pStart; p < pEnd; ++p) {
  1240.                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  1241.                     final int jWidth = blockWidth(jBlock);
  1242.                     final int qStart = jBlock * BLOCK_SIZE;
  1243.                     final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  1244.                     final double[] block = blocks[iBlock * blockColumns + jBlock];
  1245.                     int k = (p - pStart) * jWidth;
  1246.                     for (int q = qStart; q < qEnd; ++q) {
  1247.                         visitor.visit(p, q, block[k]);
  1248.                         ++k;
  1249.                     }
  1250.                 }
  1251.              }
  1252.         }
  1253.         return visitor.end();
  1254.     }

  1255.     /** {@inheritDoc} */
  1256.     @Override
  1257.     public double walkInRowOrder(final RealMatrixChangingVisitor visitor,
  1258.                                  final int startRow, final int endRow,
  1259.                                  final int startColumn, final int endColumn)
  1260.         throws OutOfRangeException, NumberIsTooSmallException {
  1261.         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
  1262.         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
  1263.         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
  1264.             final int p0 = iBlock * BLOCK_SIZE;
  1265.             final int pStart = JdkMath.max(startRow, p0);
  1266.             final int pEnd = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
  1267.             for (int p = pStart; p < pEnd; ++p) {
  1268.                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
  1269.                     final int jWidth = blockWidth(jBlock);
  1270.                     final int q0 = jBlock * BLOCK_SIZE;
  1271.                     final int qStart = JdkMath.max(startColumn, q0);
  1272.                     final int qEnd = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
  1273.                     final double[] block = blocks[iBlock * blockColumns + jBlock];
  1274.                     int k = (p - p0) * jWidth + qStart - q0;
  1275.                     for (int q = qStart; q < qEnd; ++q) {
  1276.                         block[k] = visitor.visit(p, q, block[k]);
  1277.                         ++k;
  1278.                     }
  1279.                 }
  1280.              }
  1281.         }
  1282.         return visitor.end();
  1283.     }

  1284.     /** {@inheritDoc} */
  1285.     @Override
  1286.     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor,
  1287.                                  final int startRow, final int endRow,
  1288.                                  final int startColumn, final int endColumn)
  1289.         throws OutOfRangeException, NumberIsTooSmallException {
  1290.         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
  1291.         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
  1292.         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
  1293.             final int p0 = iBlock * BLOCK_SIZE;
  1294.             final int pStart = JdkMath.max(startRow, p0);
  1295.             final int pEnd = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
  1296.             for (int p = pStart; p < pEnd; ++p) {
  1297.                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
  1298.                     final int jWidth = blockWidth(jBlock);
  1299.                     final int q0 = jBlock * BLOCK_SIZE;
  1300.                     final int qStart = JdkMath.max(startColumn, q0);
  1301.                     final int qEnd = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
  1302.                     final double[] block = blocks[iBlock * blockColumns + jBlock];
  1303.                     int k = (p - p0) * jWidth + qStart - q0;
  1304.                     for (int q = qStart; q < qEnd; ++q) {
  1305.                         visitor.visit(p, q, block[k]);
  1306.                         ++k;
  1307.                     }
  1308.                 }
  1309.              }
  1310.         }
  1311.         return visitor.end();
  1312.     }

  1313.     /** {@inheritDoc} */
  1314.     @Override
  1315.     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor) {
  1316.         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
  1317.         int blockIndex = 0;
  1318.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  1319.             final int pStart = iBlock * BLOCK_SIZE;
  1320.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  1321.             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  1322.                 final int qStart = jBlock * BLOCK_SIZE;
  1323.                 final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  1324.                 final double[] block = blocks[blockIndex];
  1325.                 int k = 0;
  1326.                 for (int p = pStart; p < pEnd; ++p) {
  1327.                     for (int q = qStart; q < qEnd; ++q) {
  1328.                         block[k] = visitor.visit(p, q, block[k]);
  1329.                         ++k;
  1330.                     }
  1331.                 }
  1332.                 ++blockIndex;
  1333.             }
  1334.         }
  1335.         return visitor.end();
  1336.     }

  1337.     /** {@inheritDoc} */
  1338.     @Override
  1339.     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor) {
  1340.         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
  1341.         int blockIndex = 0;
  1342.         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
  1343.             final int pStart = iBlock * BLOCK_SIZE;
  1344.             final int pEnd = JdkMath.min(pStart + BLOCK_SIZE, rows);
  1345.             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
  1346.                 final int qStart = jBlock * BLOCK_SIZE;
  1347.                 final int qEnd = JdkMath.min(qStart + BLOCK_SIZE, columns);
  1348.                 final double[] block = blocks[blockIndex];
  1349.                 int k = 0;
  1350.                 for (int p = pStart; p < pEnd; ++p) {
  1351.                     for (int q = qStart; q < qEnd; ++q) {
  1352.                         visitor.visit(p, q, block[k]);
  1353.                         ++k;
  1354.                     }
  1355.                 }
  1356.                 ++blockIndex;
  1357.             }
  1358.         }
  1359.         return visitor.end();
  1360.     }

  1361.     /** {@inheritDoc} */
  1362.     @Override
  1363.     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor,
  1364.                                        final int startRow, final int endRow,
  1365.                                        final int startColumn,
  1366.                                        final int endColumn)
  1367.         throws OutOfRangeException, NumberIsTooSmallException {
  1368.         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
  1369.         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
  1370.         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
  1371.             final int p0 = iBlock * BLOCK_SIZE;
  1372.             final int pStart = JdkMath.max(startRow, p0);
  1373.             final int pEnd = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
  1374.             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
  1375.                 final int jWidth = blockWidth(jBlock);
  1376.                 final int q0 = jBlock * BLOCK_SIZE;
  1377.                 final int qStart = JdkMath.max(startColumn, q0);
  1378.                 final int qEnd = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
  1379.                 final double[] block = blocks[iBlock * blockColumns + jBlock];
  1380.                 for (int p = pStart; p < pEnd; ++p) {
  1381.                     int k = (p - p0) * jWidth + qStart - q0;
  1382.                     for (int q = qStart; q < qEnd; ++q) {
  1383.                         block[k] = visitor.visit(p, q, block[k]);
  1384.                         ++k;
  1385.                     }
  1386.                 }
  1387.             }
  1388.         }
  1389.         return visitor.end();
  1390.     }

  1391.     /** {@inheritDoc} */
  1392.     @Override
  1393.     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor,
  1394.                                        final int startRow, final int endRow,
  1395.                                        final int startColumn,
  1396.                                        final int endColumn)
  1397.         throws OutOfRangeException, NumberIsTooSmallException {
  1398.         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
  1399.         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
  1400.         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
  1401.             final int p0 = iBlock * BLOCK_SIZE;
  1402.             final int pStart = JdkMath.max(startRow, p0);
  1403.             final int pEnd = JdkMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
  1404.             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
  1405.                 final int jWidth = blockWidth(jBlock);
  1406.                 final int q0 = jBlock * BLOCK_SIZE;
  1407.                 final int qStart = JdkMath.max(startColumn, q0);
  1408.                 final int qEnd = JdkMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
  1409.                 final double[] block = blocks[iBlock * blockColumns + jBlock];
  1410.                 for (int p = pStart; p < pEnd; ++p) {
  1411.                     int k = (p - p0) * jWidth + qStart - q0;
  1412.                     for (int q = qStart; q < qEnd; ++q) {
  1413.                         visitor.visit(p, q, block[k]);
  1414.                         ++k;
  1415.                     }
  1416.                 }
  1417.             }
  1418.         }
  1419.         return visitor.end();
  1420.     }

  1421.     /**
  1422.      * Get the height of a block.
  1423.      * @param blockRow row index (in block sense) of the block
  1424.      * @return height (number of rows) of the block
  1425.      */
  1426.     private int blockHeight(final int blockRow) {
  1427.         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
  1428.     }

  1429.     /**
  1430.      * Get the width of a block.
  1431.      * @param blockColumn column index (in block sense) of the block
  1432.      * @return width (number of columns) of the block
  1433.      */
  1434.     private int blockWidth(final int blockColumn) {
  1435.         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
  1436.     }
  1437. }