Last active
May 2, 2016 17:47
-
-
Save taesiri/3babfaac5475f42e7451af71e77a228c to your computer and use it in GitHub Desktop.
Sequential vs Random access to File
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# the following code will create a file containing random integers (2**30 ~ 1 Gigabyte) | |
$ openssl rand -out data -base64 $((2**30 * 3/4)) | |
# you can also use dd in linux: | |
$ dd if=/dev/urandom of=data bs=64M count=16 | |
# in mac os x (lower case m) | |
$ dd if=/dev/urandom of=data bs=64m count=16 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import time | |
def bench(length): | |
data = range(length) | |
ordererdidx = range(length) | |
randomidx = range(length) | |
random.shuffle(randomidx) | |
j = 0 | |
start = time.time() | |
for i in range(length): | |
j = data[ordererdidx[i]] | |
p1 = time.time() - start | |
start = time.time() | |
for i in range(length): | |
j = data[randomidx[i]] | |
p2 = time.time() -start | |
print "%d \t %.20f \t %.20f" % (length, p1, p2) | |
for i in range(2,9): | |
bench((10**i)/2) | |
bench(10**i) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import time | |
def bench(length): | |
data = list(range(int(length))) | |
ordererdidx = list(range(int(length))) | |
randomidx = list(range(int(length))) | |
random.shuffle(randomidx) | |
j = 0 | |
start = time.time() | |
for i in range(int(length)): | |
j = data[ordererdidx[i]] | |
p1 = time.time() - start | |
start = time.time() | |
for i in range(int(length)): | |
j = data[randomidx[i]] | |
p2 = time.time() -start | |
print("%d, %.20f, %.20f" % (length, p1, p2)) | |
for i in range(2,9): | |
bench((10**i)/2) | |
bench(10**i) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mmap | |
import os | |
import time | |
import random | |
def random_read(str): | |
with open(str, "r+b") as f: | |
mm = mmap.mmap(f.fileno(), 0) | |
fsize = os.fstat(f.fileno()).st_size | |
list = range(fsize) | |
start = time.time() | |
for i in list: | |
j=mm[list[i]] | |
seq = time.time()-start | |
random.shuffle(list) | |
start = time.time() | |
for i in list: | |
j=mm[list[i]] | |
rnd = time.time() - start | |
print "%d \t %.20f \t %.20f" % (fsize, seq, rnd) | |
random_read("data") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment