001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import static org.apache.commons.io.IOUtils.EOF; 020 021import java.io.BufferedInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.channels.FileChannel; 026import java.nio.channels.FileChannel.MapMode; 027import java.nio.file.Path; 028import java.nio.file.StandardOpenOption; 029 030import org.apache.commons.io.IOUtils; 031import org.apache.commons.io.build.AbstractStreamBuilder; 032 033/** 034 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is 035 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is 036 * configurable. 037 * <p> 038 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of 039 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into 040 * memory. 041 * </p> 042 * <p> 043 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the 044 * use case, the use of buffering may still further improve performance. For example: 045 * </p> 046 * <p> 047 * To build an instance, use {@link Builder}. 048 * </p> 049 * <pre>{@code 050 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream( 051 * MemoryMappedFileInputStream.builder() 052 * .setPath(path) 053 * .setBufferSize(256 * 1024) 054 * .get()));} 055 * </pre> 056 * <p> 057 * should outperform: 058 * </p> 059 * <pre> 060 * new GzipInputStream(new MemoryMappedFileInputStream(path)) 061 * </pre> 062 * <pre>{@code 063 * GzipInputStream s = new GzipInputStream( 064 * MemoryMappedFileInputStream.builder() 065 * .setPath(path) 066 * .setBufferSize(256 * 1024) 067 * .get());} 068 * </pre> 069 * 070 * @see Builder 071 * @since 2.12.0 072 */ 073public final class MemoryMappedFileInputStream extends AbstractInputStream { 074 075 // @formatter:off 076 /** 077 * Builds a new {@link MemoryMappedFileInputStream}. 078 * 079 * <p> 080 * For example: 081 * </p> 082 * <pre>{@code 083 * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder() 084 * .setPath(path) 085 * .setBufferSize(256 * 1024) 086 * .get();} 087 * </pre> 088 * 089 * @see #get() 090 * @since 2.12.0 091 */ 092 // @formatter:on 093 public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> { 094 095 /** 096 * Constructs a new builder of {@link MemoryMappedFileInputStream}. 097 */ 098 public Builder() { 099 setBufferSizeDefault(DEFAULT_BUFFER_SIZE); 100 setBufferSize(DEFAULT_BUFFER_SIZE); 101 } 102 103 /** 104 * Builds a new {@link MemoryMappedFileInputStream}. 105 * <p> 106 * You must set an aspect that supports {@link #getPath()}, otherwise, this method throws an exception. 107 * </p> 108 * <p> 109 * This builder uses the following aspects: 110 * </p> 111 * <ul> 112 * <li>{@link #getPath()} gets the target aspect.</li> 113 * <li>{@link #getBufferSize()}</li> 114 * </ul> 115 * 116 * @return a new instance. 117 * @throws IllegalStateException if the {@code origin} is {@code null}. 118 * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}. 119 * @throws IOException if an I/O error occurs converting to an {@link Path} using {@link #getPath()}. 120 * @see #getPath() 121 * @see #getBufferSize() 122 * @see #getUnchecked() 123 */ 124 @Override 125 public MemoryMappedFileInputStream get() throws IOException { 126 return new MemoryMappedFileInputStream(this); 127 } 128 } 129 130 /** 131 * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size). 132 * Increasing the value beyond the default size will generally not provide any increase in throughput. 133 */ 134 private static final int DEFAULT_BUFFER_SIZE = 256 * 1024; 135 136 private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer(); 137 138 /** 139 * Constructs a new {@link Builder}. 140 * 141 * @return a new {@link Builder}. 142 * @since 2.12.0 143 */ 144 public static Builder builder() { 145 return new Builder(); 146 } 147 148 private final int bufferSize; 149 private final FileChannel channel; 150 private ByteBuffer buffer = EMPTY_BUFFER; 151 152 /** 153 * The starting position (within the file) of the next sliding buffer. 154 */ 155 private long nextBufferPosition; 156 157 /** 158 * Constructs a new instance. 159 * 160 * @param builder The builder. 161 * @throws IOException If an I/O error occurs. 162 */ 163 private MemoryMappedFileInputStream(final Builder builder) throws IOException { 164 this.bufferSize = builder.getBufferSize(); 165 this.channel = FileChannel.open(builder.getPath(), StandardOpenOption.READ); 166 } 167 168 @Override 169 public int available() throws IOException { 170 //return buffer != null ? buffer.remaining(): 0; 171 return buffer.remaining(); 172 } 173 174 private void cleanBuffer() { 175 if (ByteBufferCleaner.isSupported() && buffer.isDirect()) { 176 ByteBufferCleaner.clean(buffer); 177 } 178 } 179 180 @Override 181 public void close() throws IOException { 182 if (!isClosed()) { 183 cleanBuffer(); 184 buffer = EMPTY_BUFFER; 185 channel.close(); 186 super.close(); 187 } 188 } 189 190 int getBufferSize() { 191 return bufferSize; 192 } 193 194 private void nextBuffer() throws IOException { 195 final long remainingInFile = channel.size() - nextBufferPosition; 196 if (remainingInFile > 0) { 197 final long amountToMap = Math.min(remainingInFile, bufferSize); 198 cleanBuffer(); 199 buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap); 200 nextBufferPosition += amountToMap; 201 } else { 202 buffer = EMPTY_BUFFER; 203 } 204 } 205 206 @Override 207 public int read() throws IOException { 208 checkOpen(); 209 if (!buffer.hasRemaining()) { 210 nextBuffer(); 211 if (!buffer.hasRemaining()) { 212 return EOF; 213 } 214 } 215 return Short.toUnsignedInt(buffer.get()); 216 } 217 218 @Override 219 public int read(final byte[] b, final int off, final int len) throws IOException { 220 IOUtils.checkFromIndexSize(b, off, len); 221 if (len == 0) { 222 return 0; 223 } 224 checkOpen(); 225 if (!buffer.hasRemaining()) { 226 nextBuffer(); 227 if (!buffer.hasRemaining()) { 228 return EOF; 229 } 230 } 231 final int numBytes = Math.min(buffer.remaining(), len); 232 buffer.get(b, off, numBytes); 233 return numBytes; 234 } 235 236 @Override 237 public long skip(final long n) throws IOException { 238 checkOpen(); 239 if (n <= 0) { 240 return 0; 241 } 242 if (n <= buffer.remaining()) { 243 buffer.position((int) (buffer.position() + n)); 244 return n; 245 } 246 final long remainingInFile = channel.size() - nextBufferPosition; 247 final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining()); 248 nextBufferPosition += skipped - buffer.remaining(); 249 nextBuffer(); 250 return skipped; 251 } 252 253}