View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.apache.commons.io.IOUtils.EOF;
20  
21  import java.io.BufferedInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.ByteBuffer;
25  import java.nio.channels.FileChannel;
26  import java.nio.channels.FileChannel.MapMode;
27  import java.nio.file.Path;
28  import java.nio.file.StandardOpenOption;
29  
30  import org.apache.commons.io.IOUtils;
31  import org.apache.commons.io.build.AbstractStreamBuilder;
32  
33  /**
34   * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
35   * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
36   * configurable.
37   * <p>
38   * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
39   * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
40   * memory.
41   * </p>
42   * <p>
43   * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
44   * use case, the use of buffering may still further improve performance. For example:
45   * </p>
46   * <p>
47   * To build an instance, use {@link Builder}.
48   * </p>
49   * <pre>{@code
50   * BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
51   *   MemoryMappedFileInputStream.builder()
52   *     .setPath(path)
53   *     .setBufferSize(256 * 1024)
54   *     .get()));}
55   * </pre>
56   * <p>
57   * should outperform:
58   * </p>
59   * <pre>
60   * new GzipInputStream(new MemoryMappedFileInputStream(path))
61   * </pre>
62   * <pre>{@code
63   * GzipInputStream s = new GzipInputStream(
64   *   MemoryMappedFileInputStream.builder()
65   *     .setPath(path)
66   *     .setBufferSize(256 * 1024)
67   *     .get());}
68   * </pre>
69   *
70   * @see Builder
71   * @since 2.12.0
72   */
73  public final class MemoryMappedFileInputStream extends AbstractInputStream {
74  
75      // @formatter:off
76      /**
77       * Builds a new {@link MemoryMappedFileInputStream}.
78       *
79       * <p>
80       * For example:
81       * </p>
82       * <pre>{@code
83       * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
84       *   .setPath(path)
85       *   .setBufferSize(256 * 1024)
86       *   .get();}
87       * </pre>
88       *
89       * @see #get()
90       * @since 2.12.0
91       */
92      // @formatter:on
93      public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
94  
95          /**
96           * Constructs a new builder of {@link MemoryMappedFileInputStream}.
97           */
98          public Builder() {
99              setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
100             setBufferSize(DEFAULT_BUFFER_SIZE);
101         }
102 
103         /**
104          * Builds a new {@link MemoryMappedFileInputStream}.
105          * <p>
106          * You must set an aspect that supports {@link #getPath()}, otherwise, this method throws an exception.
107          * </p>
108          * <p>
109          * This builder uses the following aspects:
110          * </p>
111          * <ul>
112          * <li>{@link #getPath()} gets the target aspect.</li>
113          * <li>{@link #getBufferSize()}</li>
114          * </ul>
115          *
116          * @return a new instance.
117          * @throws IllegalStateException         if the {@code origin} is {@code null}.
118          * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
119          * @throws IOException                   if an I/O error occurs converting to an {@link Path} using {@link #getPath()}.
120          * @see #getPath()
121          * @see #getBufferSize()
122          * @see #getUnchecked()
123          */
124         @Override
125         public MemoryMappedFileInputStream get() throws IOException {
126             return new MemoryMappedFileInputStream(this);
127         }
128     }
129 
130     /**
131      * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
132      * Increasing the value beyond the default size will generally not provide any increase in throughput.
133      */
134     private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
135 
136     private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
137 
138     /**
139      * Constructs a new {@link Builder}.
140      *
141      * @return a new {@link Builder}.
142      * @since 2.12.0
143      */
144     public static Builder builder() {
145         return new Builder();
146     }
147 
148     private final int bufferSize;
149     private final FileChannel channel;
150     private ByteBuffer buffer = EMPTY_BUFFER;
151 
152     /**
153      * The starting position (within the file) of the next sliding buffer.
154      */
155     private long nextBufferPosition;
156 
157     /**
158      * Constructs a new instance.
159      *
160      * @param builder The builder.
161      * @throws IOException If an I/O error occurs.
162      */
163     private MemoryMappedFileInputStream(final Builder builder) throws IOException {
164         this.bufferSize = builder.getBufferSize();
165         this.channel = FileChannel.open(builder.getPath(), StandardOpenOption.READ);
166     }
167 
168     @Override
169     public int available() throws IOException {
170         //return buffer != null ? buffer.remaining(): 0;
171         return buffer.remaining();
172     }
173 
174     private void cleanBuffer() {
175         if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
176             ByteBufferCleaner.clean(buffer);
177         }
178     }
179 
180     @Override
181     public void close() throws IOException {
182         if (!isClosed()) {
183             cleanBuffer();
184             buffer = EMPTY_BUFFER;
185             channel.close();
186             super.close();
187         }
188     }
189 
190     int getBufferSize() {
191         return bufferSize;
192     }
193 
194     private void nextBuffer() throws IOException {
195         final long remainingInFile = channel.size() - nextBufferPosition;
196         if (remainingInFile > 0) {
197             final long amountToMap = Math.min(remainingInFile, bufferSize);
198             cleanBuffer();
199             buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
200             nextBufferPosition += amountToMap;
201         } else {
202             buffer = EMPTY_BUFFER;
203         }
204     }
205 
206     @Override
207     public int read() throws IOException {
208         checkOpen();
209         if (!buffer.hasRemaining()) {
210             nextBuffer();
211             if (!buffer.hasRemaining()) {
212                 return EOF;
213             }
214         }
215         return Short.toUnsignedInt(buffer.get());
216     }
217 
218     @Override
219     public int read(final byte[] b, final int off, final int len) throws IOException {
220         IOUtils.checkFromIndexSize(b, off, len);
221         if (len == 0) {
222             return 0;
223         }
224         checkOpen();
225         if (!buffer.hasRemaining()) {
226             nextBuffer();
227             if (!buffer.hasRemaining()) {
228                 return EOF;
229             }
230         }
231         final int numBytes = Math.min(buffer.remaining(), len);
232         buffer.get(b, off, numBytes);
233         return numBytes;
234     }
235 
236     @Override
237     public long skip(final long n) throws IOException {
238         checkOpen();
239         if (n <= 0) {
240             return 0;
241         }
242         if (n <= buffer.remaining()) {
243             buffer.position((int) (buffer.position() + n));
244             return n;
245         }
246         final long remainingInFile = channel.size() - nextBufferPosition;
247         final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
248         nextBufferPosition += skipped - buffer.remaining();
249         nextBuffer();
250         return skipped;
251     }
252 
253 }