Created
September 28, 2016 23:51
-
-
Save alancnet/f6e192afb05608b394809abbb4dc29f5 to your computer and use it in GitHub Desktop.
HashSet implemented against a file system.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.{File, RandomAccessFile} | |
class HashSetFile(file: File, modulus: Int = 32) { | |
val raf = new RandomAccessFile(file, "rw") | |
def filePosition = raf.getFilePointer | |
def fileLength = raf.length | |
val bytesInLong = java.lang.Long.BYTES | |
val indexesStart = bytesInLong | |
val start = System.currentTimeMillis() | |
if (fileLength == 0) { | |
initFile() | |
} | |
def incrementRecordCount() = { | |
val recordCount = getRecordCount + 1 | |
raf.seek(0) | |
raf.writeLong(recordCount) | |
println(s"UniqueCount: $recordCount") | |
} | |
def getRecordCount = { | |
raf.seek(0) | |
raf.readLong() | |
} | |
def initFile() = { | |
raf.seek(0) | |
raf.writeLong(0) | |
val emptyBuffer: Array[Byte] = scala.Array.fill(1024 * 32){0} | |
0L.until(modulus.toLong * bytesInLong / emptyBuffer.length).foreach(x => { | |
raf.write(emptyBuffer) | |
}) | |
} | |
def getOffsetOfIndexOfValue(value: Array[Byte]): Int = { | |
(bytesInLong * math.abs(java.util.Arrays.hashCode(value) % modulus)) + bytesInLong | |
} | |
def add(value: Array[Byte]): Boolean = { | |
def writeNewRecord() = { | |
raf.seek(fileLength) | |
raf.writeLong(value.length.toLong) // Write Length of value | |
raf.writeLong(-1L) // Make space for offset of next value in list | |
raf.write(value) // Write the value | |
incrementRecordCount() | |
true | |
} | |
def checkCurrentLinkedListValue(offset: Long): Boolean = { | |
raf.seek(offset) | |
val lengthOfCurrentValue = raf.readLong | |
val nextOffsetInList = raf.readLong | |
if (lengthOfCurrentValue == value.length.toLong) { // possibly the same value | |
val buffer: Array[Byte] = Array.fill(lengthOfCurrentValue.toInt) {0} | |
raf.read(buffer) | |
if (java.util.Arrays.equals(value, buffer)) { // value already exists | |
return false | |
} | |
} | |
if (nextOffsetInList == -1L) { // if here, then value is new | |
// update the nextOffsetInList | |
// then write the value to that offset | |
raf.seek(offset + bytesInLong) // Seek to where nextOffsetInListIsWritten | |
raf.writeLong(fileLength) // The next value will be written at the end of the file | |
writeNewRecord() // Write the new value | |
} else { | |
checkCurrentLinkedListValue(nextOffsetInList) | |
} | |
} | |
val indexOffset = getOffsetOfIndexOfValue(value) | |
raf.seek(indexOffset) | |
val linkedListOffset = raf.readLong() | |
if (linkedListOffset == 0L){ | |
raf.seek(indexOffset) | |
raf.writeLong(fileLength) | |
writeNewRecord() | |
} else { | |
checkCurrentLinkedListValue(linkedListOffset) | |
} | |
} | |
def exists(value: Array[Byte]): Boolean = { | |
def checkCurrentLinkedListValue(offset: Long): Boolean = { | |
raf.seek(offset) | |
val lengthOfCurrentValue = raf.readLong | |
val nextOffsetInList = raf.readLong | |
if (lengthOfCurrentValue == value.length.toLong) { | |
val buffer: Array[Byte] = Array.fill(lengthOfCurrentValue.toInt) {0} | |
raf.read(buffer) | |
if (java.util.Arrays.equals(value, buffer)) { | |
return true | |
} | |
} | |
if (nextOffsetInList == -1L) { | |
false | |
} else { | |
checkCurrentLinkedListValue(nextOffsetInList) | |
} | |
} | |
val indexOffset = getOffsetOfIndexOfValue(value) | |
raf.seek(indexOffset) | |
val nextOffsetInList = raf.readLong | |
if (nextOffsetInList == 0L) { | |
false | |
} else { | |
checkCurrentLinkedListValue(nextOffsetInList) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment