001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import static org.apache.commons.io.IOUtils.EOF; 020 021import java.io.BufferedInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.channels.FileChannel; 026import java.nio.channels.FileChannel.MapMode; 027import java.nio.file.Path; 028import java.nio.file.StandardOpenOption; 029 030import org.apache.commons.io.build.AbstractOrigin; 031import org.apache.commons.io.build.AbstractStreamBuilder; 032 033/** 034 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is 035 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is 036 * configurable. 037 * <p> 038 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of 039 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into 040 * memory. 041 * </p> 042 * <p> 043 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the 044 * use case, the use of buffering may still further improve performance. For example: 045 * </p> 046 * <p> 047 * To build an instance, see {@link Builder}. 048 * </p> 049 * <pre>{@code 050 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream( 051 * MemoryMappedFileInputStream.builder() 052 * .setPath(path) 053 * .setBufferSize(256 * 1024) 054 * .get()));} 055 * </pre> 056 * <p> 057 * should outperform: 058 * </p> 059 * <pre> 060 * new GzipInputStream(new MemoryMappedFileInputStream(path)) 061 * </pre> 062 * <pre>{@code 063 * GzipInputStream s = new GzipInputStream( 064 * MemoryMappedFileInputStream.builder() 065 * .setPath(path) 066 * .setBufferSize(256 * 1024) 067 * .get());} 068 * </pre> 069 * 070 * @since 2.12.0 071 */ 072public final class MemoryMappedFileInputStream extends InputStream { 073 074 /** 075 * Builds a new {@link MemoryMappedFileInputStream} instance. 076 * <p> 077 * For example: 078 * </p> 079 * <pre>{@code 080 * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder() 081 * .setPath(path) 082 * .setBufferSize(256 * 1024) 083 * .get();} 084 * </pre> 085 * 086 * @since 2.12.0 087 */ 088 public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> { 089 090 /** 091 * Constructs a new Builder. 092 */ 093 public Builder() { 094 setBufferSizeDefault(DEFAULT_BUFFER_SIZE); 095 setBufferSize(DEFAULT_BUFFER_SIZE); 096 } 097 098 /** 099 * Constructs a new instance. 100 * <p> 101 * This builder use the aspects Path and buffer size. 102 * </p> 103 * <p> 104 * You must provide an origin that can be converted to a Path by this builder, otherwise, this call will throw an 105 * {@link UnsupportedOperationException}. 106 * </p> 107 * 108 * @return a new instance. 109 * @throws UnsupportedOperationException if the origin cannot provide a Path. 110 * @see AbstractOrigin#getPath() 111 */ 112 @Override 113 public MemoryMappedFileInputStream get() throws IOException { 114 return new MemoryMappedFileInputStream(getPath(), getBufferSize()); 115 } 116 } 117 118 /** 119 * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size). 120 * Increasing the value beyond the default size will generally not provide any increase in throughput. 121 */ 122 private static final int DEFAULT_BUFFER_SIZE = 256 * 1024; 123 124 private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer(); 125 126 /** 127 * Constructs a new {@link Builder}. 128 * 129 * @return a new {@link Builder}. 130 * @since 2.12.0 131 */ 132 public static Builder builder() { 133 return new Builder(); 134 } 135 136 private final int bufferSize; 137 private final FileChannel channel; 138 private ByteBuffer buffer = EMPTY_BUFFER; 139 private boolean closed; 140 141 /** 142 * The starting position (within the file) of the next sliding buffer. 143 */ 144 private long nextBufferPosition; 145 146 /** 147 * Constructs a new instance. 148 * 149 * @param file The path of the file to open. 150 * @param bufferSize Size of the sliding buffer. 151 * @throws IOException If an I/O error occurs. 152 */ 153 private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException { 154 this.bufferSize = bufferSize; 155 this.channel = FileChannel.open(file, StandardOpenOption.READ); 156 } 157 158 @Override 159 public int available() throws IOException { 160 return buffer.remaining(); 161 } 162 163 private void cleanBuffer() { 164 if (ByteBufferCleaner.isSupported() && buffer.isDirect()) { 165 ByteBufferCleaner.clean(buffer); 166 } 167 } 168 169 @Override 170 public void close() throws IOException { 171 if (!closed) { 172 cleanBuffer(); 173 buffer = null; 174 channel.close(); 175 closed = true; 176 } 177 } 178 179 private void ensureOpen() throws IOException { 180 if (closed) { 181 throw new IOException("Stream closed"); 182 } 183 } 184 185 int getBufferSize() { 186 return bufferSize; 187 } 188 189 private void nextBuffer() throws IOException { 190 final long remainingInFile = channel.size() - nextBufferPosition; 191 if (remainingInFile > 0) { 192 final long amountToMap = Math.min(remainingInFile, bufferSize); 193 cleanBuffer(); 194 buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap); 195 nextBufferPosition += amountToMap; 196 } else { 197 buffer = EMPTY_BUFFER; 198 } 199 } 200 201 @Override 202 public int read() throws IOException { 203 ensureOpen(); 204 if (!buffer.hasRemaining()) { 205 nextBuffer(); 206 if (!buffer.hasRemaining()) { 207 return EOF; 208 } 209 } 210 return Short.toUnsignedInt(buffer.get()); 211 } 212 213 @Override 214 public int read(final byte[] b, final int off, final int len) throws IOException { 215 ensureOpen(); 216 if (!buffer.hasRemaining()) { 217 nextBuffer(); 218 if (!buffer.hasRemaining()) { 219 return EOF; 220 } 221 } 222 final int numBytes = Math.min(buffer.remaining(), len); 223 buffer.get(b, off, numBytes); 224 return numBytes; 225 } 226 227 @Override 228 public long skip(final long n) throws IOException { 229 ensureOpen(); 230 if (n <= 0) { 231 return 0; 232 } 233 if (n <= buffer.remaining()) { 234 buffer.position((int) (buffer.position() + n)); 235 return n; 236 } 237 final long remainingInFile = channel.size() - nextBufferPosition; 238 final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining()); 239 nextBufferPosition += skipped - buffer.remaining(); 240 nextBuffer(); 241 return skipped; 242 } 243 244}