-
-
Save pditommaso/2920468 to your computer and use it in GitHub Desktop.
Memory mapping files larger than Integer.MAX_VALUE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.RandomAccessFile | |
import java.nio.channels.FileChannel | |
import org.jboss.netty.buffer.{ByteBufferBackedChannelBuffer, ChannelBuffer, ChannelBuffers} | |
/** | |
* Maps a the filename to a memory mapped random access file across 1 or more buffers. | |
* Support files up to Long.MAX_VALUE. | |
* | |
* @param filename the file to map | |
* @param maxBufferSize the maximum number of bytes to map per buffer | |
*/ | |
class MemoryMappedFile(filename: String, maxBufferSize: Int = Int.MaxValue) { | |
private[this] val file = new RandomAccessFile(filename, "r") | |
val size = file.length() | |
// OK, so we can only handle 2bn buffers, but that should suffice | |
private[this] val bufferCount = (if (size % maxBufferSize == 0) size / maxBufferSize else size / maxBufferSize + 1).toInt | |
private[io] val buffers : Array[ChannelBuffer] = Array.ofDim[ChannelBuffer](bufferCount) | |
// Initialize buffers | |
(0 until bufferCount).foreach { i => | |
val startIndex = i * maxBufferSize | |
val remainingInFile = size - startIndex | |
val bufferSize = math.min(remainingInFile, maxBufferSize) | |
buffers(i) = new ByteBufferBackedChannelBuffer(file.getChannel.map(FileChannel.MapMode.READ_ONLY, startIndex, bufferSize)) | |
} | |
/** | |
* Gets a specific byte in the file. Use getBuffer is large(ish) chunks of continuous data is to be | |
* returned. | |
* | |
* @param index | |
* @return | |
*/ | |
def apply(index: Long) = { | |
val bufferIndex = (index / maxBufferSize).toInt | |
val indexInBuffer = (index - bufferIndex * maxBufferSize).toInt | |
buffers(bufferIndex).getByte(indexInBuffer) | |
} | |
/** | |
* Gets a ChannelBuffer for the specified range in the file. | |
* | |
* @param index the start index | |
* @param size the number of bytes to return | |
* @return a ChannelBuffer with the specified range | |
*/ | |
def getBuffer(index: Long, size: Int) = { | |
val destination = ChannelBuffers.buffer(size) | |
val endIndex = index + size | |
def copyBuffers(startIndex: Long, destinationIndex: Int) { | |
val bufferIndex = (startIndex / maxBufferSize).toInt | |
val buffer = buffers(bufferIndex) | |
val bufferStartIndex: Long = bufferIndex.toLong * maxBufferSize | |
// Where in the buffer should we start? | |
val indexInBuffer = (startIndex - bufferStartIndex).toInt | |
val bufferEndIndex: Long = bufferStartIndex + maxBufferSize | |
val bytesToRead = if (endIndex > bufferEndIndex) maxBufferSize - indexInBuffer else math.min(size, endIndex - bufferStartIndex).toInt | |
// Mutate destination buffer | |
buffer.getBytes(indexInBuffer, destination, destinationIndex, bytesToRead) | |
if (endIndex > bufferEndIndex) { | |
copyBuffers(bufferEndIndex, destinationIndex + bytesToRead) | |
} | |
} | |
copyBuffers(index, 0) | |
destination.readerIndex(0) | |
destination.writerIndex(size) | |
destination | |
} | |
} | |
object MemoryMappedFile { | |
def apply(filename: String) = new MemoryMappedFile(filename) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment