MemoryMappedFileInputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.io.input;

  18. import static org.apache.commons.io.IOUtils.EOF;

  19. import java.io.BufferedInputStream;
  20. import java.io.IOException;
  21. import java.io.InputStream;
  22. import java.nio.ByteBuffer;
  23. import java.nio.channels.FileChannel;
  24. import java.nio.channels.FileChannel.MapMode;
  25. import java.nio.file.Path;
  26. import java.nio.file.StandardOpenOption;

  27. import org.apache.commons.io.build.AbstractStreamBuilder;

  28. /**
  29.  * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
  30.  * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
  31.  * configurable.
  32.  * <p>
  33.  * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
  34.  * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
  35.  * memory.
  36.  * </p>
  37.  * <p>
  38.  * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
  39.  * use case, the use of buffering may still further improve performance. For example:
  40.  * </p>
  41.  * <p>
  42.  * To build an instance, use {@link Builder}.
  43.  * </p>
  44.  * <pre>{@code
  45.  * BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
  46.  *   MemoryMappedFileInputStream.builder()
  47.  *     .setPath(path)
  48.  *     .setBufferSize(256 * 1024)
  49.  *     .get()));}
  50.  * </pre>
  51.  * <p>
  52.  * should outperform:
  53.  * </p>
  54.  * <pre>
  55.  * new GzipInputStream(new MemoryMappedFileInputStream(path))
  56.  * </pre>
  57.  * <pre>{@code
  58.  * GzipInputStream s = new GzipInputStream(
  59.  *   MemoryMappedFileInputStream.builder()
  60.  *     .setPath(path)
  61.  *     .setBufferSize(256 * 1024)
  62.  *     .get());}
  63.  * </pre>
  64.  *
  65.  * @see Builder
  66.  * @since 2.12.0
  67.  */
  68. public final class MemoryMappedFileInputStream extends AbstractInputStream {

  69.     // @formatter:off
  70.     /**
  71.      * Builds a new {@link MemoryMappedFileInputStream}.
  72.      *
  73.      * <p>
  74.      * For example:
  75.      * </p>
  76.      * <pre>{@code
  77.      * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
  78.      *   .setPath(path)
  79.      *   .setBufferSize(256 * 1024)
  80.      *   .get();}
  81.      * </pre>
  82.      *
  83.      * @see #get()
  84.      * @since 2.12.0
  85.      */
  86.     // @formatter:on
  87.     public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {

  88.         /**
  89.          * Constructs a new builder of {@link MemoryMappedFileInputStream}.
  90.          */
  91.         public Builder() {
  92.             setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
  93.             setBufferSize(DEFAULT_BUFFER_SIZE);
  94.         }

  95.         /**
  96.          * Builds a new {@link MemoryMappedFileInputStream}.
  97.          * <p>
  98.          * You must set an aspect that supports {@link #getPath()}, otherwise, this method throws an exception.
  99.          * </p>
  100.          * <p>
  101.          * This builder uses the following aspects:
  102.          * </p>
  103.          * <ul>
  104.          * <li>{@link #getPath()} gets the target aspect.</li>
  105.          * <li>{@link #getBufferSize()}</li>
  106.          * </ul>
  107.          *
  108.          * @return a new instance.
  109.          * @throws IllegalStateException         if the {@code origin} is {@code null}.
  110.          * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
  111.          * @throws IOException                   if an I/O error occurs converting to an {@link Path} using {@link #getPath()}.
  112.          * @see #getPath()
  113.          * @see #getBufferSize()
  114.          * @see #getUnchecked()
  115.          */
  116.         @Override
  117.         public MemoryMappedFileInputStream get() throws IOException {
  118.             return new MemoryMappedFileInputStream(this);
  119.         }
  120.     }

  121.     /**
  122.      * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
  123.      * Increasing the value beyond the default size will generally not provide any increase in throughput.
  124.      */
  125.     private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;

  126.     private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();

  127.     /**
  128.      * Constructs a new {@link Builder}.
  129.      *
  130.      * @return a new {@link Builder}.
  131.      * @since 2.12.0
  132.      */
  133.     public static Builder builder() {
  134.         return new Builder();
  135.     }

  136.     private final int bufferSize;
  137.     private final FileChannel channel;
  138.     private ByteBuffer buffer = EMPTY_BUFFER;

  139.     /**
  140.      * The starting position (within the file) of the next sliding buffer.
  141.      */
  142.     private long nextBufferPosition;

  143.     /**
  144.      * Constructs a new instance.
  145.      *
  146.      * @param builder The builder.
  147.      * @throws IOException If an I/O error occurs.
  148.      */
  149.     private MemoryMappedFileInputStream(final Builder builder) throws IOException {
  150.         this.bufferSize = builder.getBufferSize();
  151.         this.channel = FileChannel.open(builder.getPath(), StandardOpenOption.READ);
  152.     }

  153.     @Override
  154.     public int available() throws IOException {
  155.         //return buffer != null ? buffer.remaining(): 0;
  156.         return buffer.remaining();
  157.     }

  158.     private void cleanBuffer() {
  159.         if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
  160.             ByteBufferCleaner.clean(buffer);
  161.         }
  162.     }

  163.     @Override
  164.     public void close() throws IOException {
  165.         if (!isClosed()) {
  166.             cleanBuffer();
  167.             buffer = EMPTY_BUFFER;
  168.             channel.close();
  169.             super.close();
  170.         }
  171.     }

  172.     int getBufferSize() {
  173.         return bufferSize;
  174.     }

  175.     private void nextBuffer() throws IOException {
  176.         final long remainingInFile = channel.size() - nextBufferPosition;
  177.         if (remainingInFile > 0) {
  178.             final long amountToMap = Math.min(remainingInFile, bufferSize);
  179.             cleanBuffer();
  180.             buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
  181.             nextBufferPosition += amountToMap;
  182.         } else {
  183.             buffer = EMPTY_BUFFER;
  184.         }
  185.     }

  186.     @Override
  187.     public int read() throws IOException {
  188.         checkOpen();
  189.         if (!buffer.hasRemaining()) {
  190.             nextBuffer();
  191.             if (!buffer.hasRemaining()) {
  192.                 return EOF;
  193.             }
  194.         }
  195.         return Short.toUnsignedInt(buffer.get());
  196.     }

  197.     @Override
  198.     public int read(final byte[] b, final int off, final int len) throws IOException {
  199.         checkOpen();
  200.         if (!buffer.hasRemaining()) {
  201.             nextBuffer();
  202.             if (!buffer.hasRemaining()) {
  203.                 return EOF;
  204.             }
  205.         }
  206.         final int numBytes = Math.min(buffer.remaining(), len);
  207.         buffer.get(b, off, numBytes);
  208.         return numBytes;
  209.     }

  210.     @Override
  211.     public long skip(final long n) throws IOException {
  212.         checkOpen();
  213.         if (n <= 0) {
  214.             return 0;
  215.         }
  216.         if (n <= buffer.remaining()) {
  217.             buffer.position((int) (buffer.position() + n));
  218.             return n;
  219.         }
  220.         final long remainingInFile = channel.size() - nextBufferPosition;
  221.         final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
  222.         nextBufferPosition += skipped - buffer.remaining();
  223.         nextBuffer();
  224.         return skipped;
  225.     }

  226. }