View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.apache.commons.io.IOUtils.EOF;
20  
21  import java.io.BufferedInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.ByteBuffer;
25  import java.nio.channels.FileChannel;
26  import java.nio.channels.FileChannel.MapMode;
27  import java.nio.file.Path;
28  import java.nio.file.StandardOpenOption;
29  
30  import org.apache.commons.io.build.AbstractStreamBuilder;
31  
32  /**
33   * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
34   * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
35   * configurable.
36   * <p>
37   * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
38   * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
39   * memory.
40   * </p>
41   * <p>
42   * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
43   * use case, the use of buffering may still further improve performance. For example:
44   * </p>
45   * <p>
46   * To build an instance, use {@link Builder}.
47   * </p>
48   * <pre>{@code
49   * BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
50   *   MemoryMappedFileInputStream.builder()
51   *     .setPath(path)
52   *     .setBufferSize(256 * 1024)
53   *     .get()));}
54   * </pre>
55   * <p>
56   * should outperform:
57   * </p>
58   * <pre>
59   * new GzipInputStream(new MemoryMappedFileInputStream(path))
60   * </pre>
61   * <pre>{@code
62   * GzipInputStream s = new GzipInputStream(
63   *   MemoryMappedFileInputStream.builder()
64   *     .setPath(path)
65   *     .setBufferSize(256 * 1024)
66   *     .get());}
67   * </pre>
68   *
69   * @see Builder
70   * @since 2.12.0
71   */
72  public final class MemoryMappedFileInputStream extends InputStream {
73  
74      // @formatter:off
75      /**
76       * Builds a new {@link MemoryMappedFileInputStream}.
77       *
78       * <p>
79       * For example:
80       * </p>
81       * <pre>{@code
82       * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
83       *   .setPath(path)
84       *   .setBufferSize(256 * 1024)
85       *   .get();}
86       * </pre>
87       *
88       * @see #get()
89       * @since 2.12.0
90       */
91      // @formatter:on
92      public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
93  
94          /**
95           * Constructs a new {@link Builder}.
96           */
97          public Builder() {
98              setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
99              setBufferSize(DEFAULT_BUFFER_SIZE);
100         }
101 
102         /**
103          * Builds a new {@link MemoryMappedFileInputStream}.
104          * <p>
105          * You must set input that supports {@link #getPath()}, otherwise, this method throws an exception.
106          * </p>
107          * <p>
108          * This builder use the following aspects:
109          * </p>
110          * <ul>
111          * <li>{@link #getPath()}</li>
112          * <li>{@link #getBufferSize()}</li>
113          * </ul>
114          *
115          * @return a new instance.
116          * @throws IllegalStateException         if the {@code origin} is {@code null}.
117          * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
118          * @throws IOException                   if an I/O error occurs.
119          * @see #getPath()
120          * @see #getBufferSize()
121          */
122         @Override
123         public MemoryMappedFileInputStream get() throws IOException {
124             return new MemoryMappedFileInputStream(getPath(), getBufferSize());
125         }
126     }
127 
128     /**
129      * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
130      * Increasing the value beyond the default size will generally not provide any increase in throughput.
131      */
132     private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
133 
134     private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
135 
136     /**
137      * Constructs a new {@link Builder}.
138      *
139      * @return a new {@link Builder}.
140      * @since 2.12.0
141      */
142     public static Builder builder() {
143         return new Builder();
144     }
145 
146     private final int bufferSize;
147     private final FileChannel channel;
148     private ByteBuffer buffer = EMPTY_BUFFER;
149     private boolean closed;
150 
151     /**
152      * The starting position (within the file) of the next sliding buffer.
153      */
154     private long nextBufferPosition;
155 
156     /**
157      * Constructs a new instance.
158      *
159      * @param file The path of the file to open.
160      * @param bufferSize Size of the sliding buffer.
161      * @throws IOException If an I/O error occurs.
162      */
163     private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException {
164         this.bufferSize = bufferSize;
165         this.channel = FileChannel.open(file, StandardOpenOption.READ);
166     }
167 
168     @Override
169     public int available() throws IOException {
170         return buffer.remaining();
171     }
172 
173     private void cleanBuffer() {
174         if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
175             ByteBufferCleaner.clean(buffer);
176         }
177     }
178 
179     @Override
180     public void close() throws IOException {
181         if (!closed) {
182             cleanBuffer();
183             buffer = null;
184             channel.close();
185             closed = true;
186         }
187     }
188 
189     private void ensureOpen() throws IOException {
190         if (closed) {
191             throw new IOException("Stream closed");
192         }
193     }
194 
195     int getBufferSize() {
196         return bufferSize;
197     }
198 
199     private void nextBuffer() throws IOException {
200         final long remainingInFile = channel.size() - nextBufferPosition;
201         if (remainingInFile > 0) {
202             final long amountToMap = Math.min(remainingInFile, bufferSize);
203             cleanBuffer();
204             buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
205             nextBufferPosition += amountToMap;
206         } else {
207             buffer = EMPTY_BUFFER;
208         }
209     }
210 
211     @Override
212     public int read() throws IOException {
213         ensureOpen();
214         if (!buffer.hasRemaining()) {
215             nextBuffer();
216             if (!buffer.hasRemaining()) {
217                 return EOF;
218             }
219         }
220         return Short.toUnsignedInt(buffer.get());
221     }
222 
223     @Override
224     public int read(final byte[] b, final int off, final int len) throws IOException {
225         ensureOpen();
226         if (!buffer.hasRemaining()) {
227             nextBuffer();
228             if (!buffer.hasRemaining()) {
229                 return EOF;
230             }
231         }
232         final int numBytes = Math.min(buffer.remaining(), len);
233         buffer.get(b, off, numBytes);
234         return numBytes;
235     }
236 
237     @Override
238     public long skip(final long n) throws IOException {
239         ensureOpen();
240         if (n <= 0) {
241             return 0;
242         }
243         if (n <= buffer.remaining()) {
244             buffer.position((int) (buffer.position() + n));
245             return n;
246         }
247         final long remainingInFile = channel.size() - nextBufferPosition;
248         final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
249         nextBufferPosition += skipped - buffer.remaining();
250         nextBuffer();
251         return skipped;
252     }
253 
254 }