001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import static org.apache.commons.io.IOUtils.EOF;
020
021import java.io.BufferedInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.channels.FileChannel;
026import java.nio.channels.FileChannel.MapMode;
027import java.nio.file.Path;
028import java.nio.file.StandardOpenOption;
029
030import org.apache.commons.io.IOUtils;
031import org.apache.commons.io.build.AbstractStreamBuilder;
032
033/**
034 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
035 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
036 * configurable.
037 * <p>
038 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
039 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
040 * memory.
041 * </p>
042 * <p>
043 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
044 * use case, the use of buffering may still further improve performance. For example:
045 * </p>
046 * <p>
047 * To build an instance, use {@link Builder}.
048 * </p>
049 * <pre>{@code
050 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
051 *   MemoryMappedFileInputStream.builder()
052 *     .setPath(path)
053 *     .setBufferSize(256 * 1024)
054 *     .get()));}
055 * </pre>
056 * <p>
057 * should outperform:
058 * </p>
059 * <pre>
060 * new GzipInputStream(new MemoryMappedFileInputStream(path))
061 * </pre>
062 * <pre>{@code
063 * GzipInputStream s = new GzipInputStream(
064 *   MemoryMappedFileInputStream.builder()
065 *     .setPath(path)
066 *     .setBufferSize(256 * 1024)
067 *     .get());}
068 * </pre>
069 *
070 * @see Builder
071 * @since 2.12.0
072 */
073public final class MemoryMappedFileInputStream extends AbstractInputStream {
074
075    // @formatter:off
076    /**
077     * Builds a new {@link MemoryMappedFileInputStream}.
078     *
079     * <p>
080     * For example:
081     * </p>
082     * <pre>{@code
083     * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
084     *   .setPath(path)
085     *   .setBufferSize(256 * 1024)
086     *   .get();}
087     * </pre>
088     *
089     * @see #get()
090     * @since 2.12.0
091     */
092    // @formatter:on
093    public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
094
095        /**
096         * Constructs a new builder of {@link MemoryMappedFileInputStream}.
097         */
098        public Builder() {
099            setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
100            setBufferSize(DEFAULT_BUFFER_SIZE);
101        }
102
103        /**
104         * Builds a new {@link MemoryMappedFileInputStream}.
105         * <p>
106         * You must set an aspect that supports {@link #getPath()}, otherwise, this method throws an exception.
107         * </p>
108         * <p>
109         * This builder uses the following aspects:
110         * </p>
111         * <ul>
112         * <li>{@link #getPath()} gets the target aspect.</li>
113         * <li>{@link #getBufferSize()}</li>
114         * </ul>
115         *
116         * @return a new instance.
117         * @throws IllegalStateException         if the {@code origin} is {@code null}.
118         * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
119         * @throws IOException                   if an I/O error occurs converting to an {@link Path} using {@link #getPath()}.
120         * @see #getPath()
121         * @see #getBufferSize()
122         * @see #getUnchecked()
123         */
124        @Override
125        public MemoryMappedFileInputStream get() throws IOException {
126            return new MemoryMappedFileInputStream(this);
127        }
128    }
129
130    /**
131     * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
132     * Increasing the value beyond the default size will generally not provide any increase in throughput.
133     */
134    private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
135
136    private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
137
138    /**
139     * Constructs a new {@link Builder}.
140     *
141     * @return a new {@link Builder}.
142     * @since 2.12.0
143     */
144    public static Builder builder() {
145        return new Builder();
146    }
147
148    private final int bufferSize;
149    private final FileChannel channel;
150    private ByteBuffer buffer = EMPTY_BUFFER;
151
152    /**
153     * The starting position (within the file) of the next sliding buffer.
154     */
155    private long nextBufferPosition;
156
157    /**
158     * Constructs a new instance.
159     *
160     * @param builder The builder.
161     * @throws IOException If an I/O error occurs.
162     */
163    private MemoryMappedFileInputStream(final Builder builder) throws IOException {
164        this.bufferSize = builder.getBufferSize();
165        this.channel = FileChannel.open(builder.getPath(), StandardOpenOption.READ);
166    }
167
168    @Override
169    public int available() throws IOException {
170        //return buffer != null ? buffer.remaining(): 0;
171        return buffer.remaining();
172    }
173
174    private void cleanBuffer() {
175        if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
176            ByteBufferCleaner.clean(buffer);
177        }
178    }
179
180    @Override
181    public void close() throws IOException {
182        if (!isClosed()) {
183            cleanBuffer();
184            buffer = EMPTY_BUFFER;
185            channel.close();
186            super.close();
187        }
188    }
189
190    int getBufferSize() {
191        return bufferSize;
192    }
193
194    private void nextBuffer() throws IOException {
195        final long remainingInFile = channel.size() - nextBufferPosition;
196        if (remainingInFile > 0) {
197            final long amountToMap = Math.min(remainingInFile, bufferSize);
198            cleanBuffer();
199            buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
200            nextBufferPosition += amountToMap;
201        } else {
202            buffer = EMPTY_BUFFER;
203        }
204    }
205
206    @Override
207    public int read() throws IOException {
208        checkOpen();
209        if (!buffer.hasRemaining()) {
210            nextBuffer();
211            if (!buffer.hasRemaining()) {
212                return EOF;
213            }
214        }
215        return Short.toUnsignedInt(buffer.get());
216    }
217
218    @Override
219    public int read(final byte[] b, final int off, final int len) throws IOException {
220        IOUtils.checkFromIndexSize(b, off, len);
221        if (len == 0) {
222            return 0;
223        }
224        checkOpen();
225        if (!buffer.hasRemaining()) {
226            nextBuffer();
227            if (!buffer.hasRemaining()) {
228                return EOF;
229            }
230        }
231        final int numBytes = Math.min(buffer.remaining(), len);
232        buffer.get(b, off, numBytes);
233        return numBytes;
234    }
235
236    @Override
237    public long skip(final long n) throws IOException {
238        checkOpen();
239        if (n <= 0) {
240            return 0;
241        }
242        if (n <= buffer.remaining()) {
243            buffer.position((int) (buffer.position() + n));
244            return n;
245        }
246        final long remainingInFile = channel.size() - nextBufferPosition;
247        final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
248        nextBufferPosition += skipped - buffer.remaining();
249        nextBuffer();
250        return skipped;
251    }
252
253}