Last active
November 29, 2018 15:02
-
-
Save lironsade/4426f4f821a875b8115c4fc489eab64d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#################################################### | |
# Measure times of executions for DB's External Sort | |
# Assumptions: | |
# - Your ExternalMemory.jar is in /tmp | |
# - fileGenerator.jar in /tmp | |
# What does it do? | |
# Generates the files 250, 500, 750, 1000 in /tmp. | |
# They should be in about the size of their name. | |
# Create the "/tmp/sortemp" directory if not exists | |
# Measures running time of your program for each | |
# type (B,C), substring, file size and generates | |
# appropriate graphs. | |
# It selects and sorts col 1. | |
#################################################### | |
import os | |
import subprocess as sp | |
import time | |
import matplotlib.pyplot as plt | |
EX3_JAR = 'ExternalMemory.jar' | |
TMP_FOLDER = 'sortemp' | |
ROWS_PER_RANDOM = 12500000 | |
FILE_SIZES = [250, 500, 750, 1000] | |
SUBSTR1 = 'a' | |
SUBSTR4 = 'xyzw' | |
SUBSTR7 = 'lironsa' | |
SUBSTRS = [SUBSTR1, SUBSTR4, SUBSTR7] | |
COLSORT = 1 | |
COLSEL = 1 | |
def genRandomFile(outFile, numCols, numRows): | |
sp.run(['java', '-jar', 'fileGenerator.jar', str(outFile), str(numCols), str(numRows)]) | |
def runProg(ex, input_file, outFile, colNumSort, tmpFolder, colNumSel, substrSelect): | |
sp.run(['java', '-jar', '-Xmx50m', '-Xms50m', EX3_JAR, ex, str(input_file), | |
str(outFile), str(colNumSort), tmpFolder, str(colNumSel), substrSelect]) | |
def generateFiles(): | |
for i in [1,2,3,4]: | |
print("Generating file of size {} MB".format(str(i * 250))) | |
genRandomFile(str(i * 250), i, ROWS_PER_RANDOM) | |
def timesOf(substr): | |
print('Generating times of substr {}'.format(substr)) | |
b_times = [] | |
c_times = [] | |
for f in FILE_SIZES: | |
print('Generating time for B, size {}'.format(str(f))) | |
startTime = time.time() | |
runProg('B', str(f), substr + str(f) + '.B', 1, 'sortemp', 1, substr) | |
runTime = time.time() - startTime | |
b_times.append(runTime) | |
print('Time was {}.'.format(str(runTime))) | |
print('Generating time for C, size {}'.format(str(f))) | |
startTime = time.time() | |
runProg('C', f, substr + str(f) + '.C', 1, 'sortemp', 1, substr) | |
c_times.append(time.time() - startTime) | |
return b_times, c_times | |
def genGraph(substr): | |
b_times, c_times = timesOf(substr) | |
plt.plot(FILE_SIZES, b_times, marker='o') | |
plt.plot(FILE_SIZES, c_times, marker='o') | |
plt.legend(['B', 'C']) | |
plt.title('ColNumSort = {}, ColNumSel = {}, SubstrSel = {}'.format | |
(str(COLSORT), str(COLSEL), substr)) | |
plt.savefig(substr + '.png') | |
def genGraphs(): | |
for substr in SUBSTRS: | |
genGraph(substr) | |
plt.figure() | |
if __name__ == '__main__': | |
os.chdir('/tmp') | |
generateFiles() | |
if not os.path.exists(TMP_FOLDER): | |
os.mkdir(TMP_FOLDER) | |
genGraphs() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Very helpful! thanks!