001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import static org.apache.commons.io.IOUtils.EOF;
020
021import java.io.BufferedInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.channels.FileChannel;
026import java.nio.channels.FileChannel.MapMode;
027import java.nio.file.Path;
028import java.nio.file.StandardOpenOption;
029
030import org.apache.commons.io.build.AbstractStreamBuilder;
031
032/**
033 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
034 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
035 * configurable.
036 * <p>
037 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
038 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
039 * memory.
040 * </p>
041 * <p>
042 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
043 * use case, the use of buffering may still further improve performance. For example:
044 * </p>
045 * <p>
046 * To build an instance, use {@link Builder}.
047 * </p>
048 * <pre>{@code
049 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
050 *   MemoryMappedFileInputStream.builder()
051 *     .setPath(path)
052 *     .setBufferSize(256 * 1024)
053 *     .get()));}
054 * </pre>
055 * <p>
056 * should outperform:
057 * </p>
058 * <pre>
059 * new GzipInputStream(new MemoryMappedFileInputStream(path))
060 * </pre>
061 * <pre>{@code
062 * GzipInputStream s = new GzipInputStream(
063 *   MemoryMappedFileInputStream.builder()
064 *     .setPath(path)
065 *     .setBufferSize(256 * 1024)
066 *     .get());}
067 * </pre>
068 *
069 * @see Builder
070 * @since 2.12.0
071 */
072public final class MemoryMappedFileInputStream extends InputStream {
073
074    // @formatter:off
075    /**
076     * Builds a new {@link MemoryMappedFileInputStream}.
077     *
078     * <p>
079     * For example:
080     * </p>
081     * <pre>{@code
082     * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
083     *   .setPath(path)
084     *   .setBufferSize(256 * 1024)
085     *   .get();}
086     * </pre>
087     *
088     * @see #get()
089     * @since 2.12.0
090     */
091    // @formatter:on
092    public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
093
094        /**
095         * Constructs a new {@link Builder}.
096         */
097        public Builder() {
098            setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
099            setBufferSize(DEFAULT_BUFFER_SIZE);
100        }
101
102        /**
103         * Builds a new {@link MemoryMappedFileInputStream}.
104         * <p>
105         * You must set input that supports {@link #getPath()}, otherwise, this method throws an exception.
106         * </p>
107         * <p>
108         * This builder use the following aspects:
109         * </p>
110         * <ul>
111         * <li>{@link #getPath()}</li>
112         * <li>{@link #getBufferSize()}</li>
113         * </ul>
114         *
115         * @return a new instance.
116         * @throws IllegalStateException         if the {@code origin} is {@code null}.
117         * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
118         * @throws IOException                   if an I/O error occurs.
119         * @see #getPath()
120         * @see #getBufferSize()
121         */
122        @Override
123        public MemoryMappedFileInputStream get() throws IOException {
124            return new MemoryMappedFileInputStream(getPath(), getBufferSize());
125        }
126    }
127
128    /**
129     * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
130     * Increasing the value beyond the default size will generally not provide any increase in throughput.
131     */
132    private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
133
134    private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
135
136    /**
137     * Constructs a new {@link Builder}.
138     *
139     * @return a new {@link Builder}.
140     * @since 2.12.0
141     */
142    public static Builder builder() {
143        return new Builder();
144    }
145
146    private final int bufferSize;
147    private final FileChannel channel;
148    private ByteBuffer buffer = EMPTY_BUFFER;
149    private boolean closed;
150
151    /**
152     * The starting position (within the file) of the next sliding buffer.
153     */
154    private long nextBufferPosition;
155
156    /**
157     * Constructs a new instance.
158     *
159     * @param file The path of the file to open.
160     * @param bufferSize Size of the sliding buffer.
161     * @throws IOException If an I/O error occurs.
162     */
163    private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException {
164        this.bufferSize = bufferSize;
165        this.channel = FileChannel.open(file, StandardOpenOption.READ);
166    }
167
168    @Override
169    public int available() throws IOException {
170        return buffer.remaining();
171    }
172
173    private void cleanBuffer() {
174        if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
175            ByteBufferCleaner.clean(buffer);
176        }
177    }
178
179    @Override
180    public void close() throws IOException {
181        if (!closed) {
182            cleanBuffer();
183            buffer = null;
184            channel.close();
185            closed = true;
186        }
187    }
188
189    private void ensureOpen() throws IOException {
190        if (closed) {
191            throw new IOException("Stream closed");
192        }
193    }
194
195    int getBufferSize() {
196        return bufferSize;
197    }
198
199    private void nextBuffer() throws IOException {
200        final long remainingInFile = channel.size() - nextBufferPosition;
201        if (remainingInFile > 0) {
202            final long amountToMap = Math.min(remainingInFile, bufferSize);
203            cleanBuffer();
204            buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
205            nextBufferPosition += amountToMap;
206        } else {
207            buffer = EMPTY_BUFFER;
208        }
209    }
210
211    @Override
212    public int read() throws IOException {
213        ensureOpen();
214        if (!buffer.hasRemaining()) {
215            nextBuffer();
216            if (!buffer.hasRemaining()) {
217                return EOF;
218            }
219        }
220        return Short.toUnsignedInt(buffer.get());
221    }
222
223    @Override
224    public int read(final byte[] b, final int off, final int len) throws IOException {
225        ensureOpen();
226        if (!buffer.hasRemaining()) {
227            nextBuffer();
228            if (!buffer.hasRemaining()) {
229                return EOF;
230            }
231        }
232        final int numBytes = Math.min(buffer.remaining(), len);
233        buffer.get(b, off, numBytes);
234        return numBytes;
235    }
236
237    @Override
238    public long skip(final long n) throws IOException {
239        ensureOpen();
240        if (n <= 0) {
241            return 0;
242        }
243        if (n <= buffer.remaining()) {
244            buffer.position((int) (buffer.position() + n));
245            return n;
246        }
247        final long remainingInFile = channel.size() - nextBufferPosition;
248        final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
249        nextBufferPosition += skipped - buffer.remaining();
250        nextBuffer();
251        return skipped;
252    }
253
254}