1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io.input;
18
19 import static org.apache.commons.io.IOUtils.EOF;
20
21 import java.io.BufferedInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.nio.ByteBuffer;
25 import java.nio.channels.FileChannel;
26 import java.nio.channels.FileChannel.MapMode;
27 import java.nio.file.Path;
28 import java.nio.file.StandardOpenOption;
29
30 import org.apache.commons.io.build.AbstractStreamBuilder;
31
32 /**
33 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
34 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
35 * configurable.
36 * <p>
37 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
38 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
39 * memory.
40 * </p>
41 * <p>
42 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
43 * use case, the use of buffering may still further improve performance. For example:
44 * </p>
45 * <p>
46 * To build an instance, use {@link Builder}.
47 * </p>
48 * <pre>{@code
49 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
50 * MemoryMappedFileInputStream.builder()
51 * .setPath(path)
52 * .setBufferSize(256 * 1024)
53 * .get()));}
54 * </pre>
55 * <p>
56 * should outperform:
57 * </p>
58 * <pre>
59 * new GzipInputStream(new MemoryMappedFileInputStream(path))
60 * </pre>
61 * <pre>{@code
62 * GzipInputStream s = new GzipInputStream(
63 * MemoryMappedFileInputStream.builder()
64 * .setPath(path)
65 * .setBufferSize(256 * 1024)
66 * .get());}
67 * </pre>
68 *
69 * @see Builder
70 * @since 2.12.0
71 */
72 public final class MemoryMappedFileInputStream extends InputStream {
73
74 // @formatter:off
75 /**
76 * Builds a new {@link MemoryMappedFileInputStream}.
77 *
78 * <p>
79 * For example:
80 * </p>
81 * <pre>{@code
82 * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
83 * .setPath(path)
84 * .setBufferSize(256 * 1024)
85 * .get();}
86 * </pre>
87 *
88 * @see #get()
89 * @since 2.12.0
90 */
91 // @formatter:on
92 public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
93
94 /**
95 * Constructs a new {@link Builder}.
96 */
97 public Builder() {
98 setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
99 setBufferSize(DEFAULT_BUFFER_SIZE);
100 }
101
102 /**
103 * Builds a new {@link MemoryMappedFileInputStream}.
104 * <p>
105 * You must set input that supports {@link #getPath()}, otherwise, this method throws an exception.
106 * </p>
107 * <p>
108 * This builder use the following aspects:
109 * </p>
110 * <ul>
111 * <li>{@link #getPath()}</li>
112 * <li>{@link #getBufferSize()}</li>
113 * </ul>
114 *
115 * @return a new instance.
116 * @throws IllegalStateException if the {@code origin} is {@code null}.
117 * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
118 * @throws IOException if an I/O error occurs.
119 * @see #getPath()
120 * @see #getBufferSize()
121 */
122 @Override
123 public MemoryMappedFileInputStream get() throws IOException {
124 return new MemoryMappedFileInputStream(getPath(), getBufferSize());
125 }
126 }
127
128 /**
129 * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
130 * Increasing the value beyond the default size will generally not provide any increase in throughput.
131 */
132 private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
133
134 private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
135
136 /**
137 * Constructs a new {@link Builder}.
138 *
139 * @return a new {@link Builder}.
140 * @since 2.12.0
141 */
142 public static Builder builder() {
143 return new Builder();
144 }
145
146 private final int bufferSize;
147 private final FileChannel channel;
148 private ByteBuffer buffer = EMPTY_BUFFER;
149 private boolean closed;
150
151 /**
152 * The starting position (within the file) of the next sliding buffer.
153 */
154 private long nextBufferPosition;
155
156 /**
157 * Constructs a new instance.
158 *
159 * @param file The path of the file to open.
160 * @param bufferSize Size of the sliding buffer.
161 * @throws IOException If an I/O error occurs.
162 */
163 private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException {
164 this.bufferSize = bufferSize;
165 this.channel = FileChannel.open(file, StandardOpenOption.READ);
166 }
167
168 @Override
169 public int available() throws IOException {
170 return buffer.remaining();
171 }
172
173 private void cleanBuffer() {
174 if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
175 ByteBufferCleaner.clean(buffer);
176 }
177 }
178
179 @Override
180 public void close() throws IOException {
181 if (!closed) {
182 cleanBuffer();
183 buffer = null;
184 channel.close();
185 closed = true;
186 }
187 }
188
189 private void ensureOpen() throws IOException {
190 if (closed) {
191 throw new IOException("Stream closed");
192 }
193 }
194
195 int getBufferSize() {
196 return bufferSize;
197 }
198
199 private void nextBuffer() throws IOException {
200 final long remainingInFile = channel.size() - nextBufferPosition;
201 if (remainingInFile > 0) {
202 final long amountToMap = Math.min(remainingInFile, bufferSize);
203 cleanBuffer();
204 buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
205 nextBufferPosition += amountToMap;
206 } else {
207 buffer = EMPTY_BUFFER;
208 }
209 }
210
211 @Override
212 public int read() throws IOException {
213 ensureOpen();
214 if (!buffer.hasRemaining()) {
215 nextBuffer();
216 if (!buffer.hasRemaining()) {
217 return EOF;
218 }
219 }
220 return Short.toUnsignedInt(buffer.get());
221 }
222
223 @Override
224 public int read(final byte[] b, final int off, final int len) throws IOException {
225 ensureOpen();
226 if (!buffer.hasRemaining()) {
227 nextBuffer();
228 if (!buffer.hasRemaining()) {
229 return EOF;
230 }
231 }
232 final int numBytes = Math.min(buffer.remaining(), len);
233 buffer.get(b, off, numBytes);
234 return numBytes;
235 }
236
237 @Override
238 public long skip(final long n) throws IOException {
239 ensureOpen();
240 if (n <= 0) {
241 return 0;
242 }
243 if (n <= buffer.remaining()) {
244 buffer.position((int) (buffer.position() + n));
245 return n;
246 }
247 final long remainingInFile = channel.size() - nextBufferPosition;
248 final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
249 nextBufferPosition += skipped - buffer.remaining();
250 nextBuffer();
251 return skipped;
252 }
253
254 }