-
-
Save MichaelPereira/3425950 to your computer and use it in GitHub Desktop.
Two scripts for 1. generating a git repo with lots of files and 2. generating and timing a series of small, synthetic commits to a repo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python -t | |
# Originally by Joshua Redstone | |
# | |
# Hacky script to generate gobs of files to test git operations as a function | |
# of the number of files in the repo. | |
# | |
# Call at least with setting baseDir | |
# ./loadGen.py --baseDir=foo | |
# This creates a git repo with lots of files in dir 'foo' | |
# | |
import sys | |
import os | |
import random | |
import subprocess | |
import time | |
import optparse | |
def scanFiles(gitRoot): | |
"""Return tuple of list of all files and a list of all dirs in repo""" | |
print "Walking tree rooted at %s..." % gitRoot | |
# Build up list of all files in the repo currently | |
allFiles=[] | |
allDirs=[gitRoot] | |
for root, subFolders, files in os.walk(gitRoot): | |
if root.startswith(os.path.join(gitRoot, '.git')): | |
continue | |
for file in files: | |
fullFile=os.path.join(root,file) | |
allFiles.append(fullFile) | |
for subfolder in subFolders: | |
fullDir=os.path.join(root,subfolder) | |
if fullDir == os.path.join(gitRoot, '.git'): | |
continue | |
allDirs.append(fullDir) | |
return (allFiles, allDirs) | |
parser = optparse.OptionParser() | |
parser.add_option('--numFiles', help='Num files to create', default=1000000) | |
parser.add_option('--filesPerDir', help='Max files per directory', default=1000) | |
parser.add_option('--baseDir', help='Root of git repo to create') | |
(options, args) = parser.parse_args() | |
assert options.baseDir is not None | |
if not os.path.isdir(options.baseDir): | |
os.makedirs(options.baseDir) | |
subprocess.check_call(["git", "init", options.baseDir]) | |
(allFiles, allDirs) = scanFiles(options.baseDir) | |
print "Generating files...." | |
for count in xrange(len(allFiles), options.numFiles): | |
dirNum = int(count / options.filesPerDir) | |
dirName = os.path.join(options.baseDir, "dir-num-%08d" % dirNum) | |
if not os.path.isdir(dirName): | |
os.mkdir(dirName) | |
fileName = os.path.join(dirName, "file-num-%08d" % count) | |
fh = open(fileName, "w") | |
fh.write("some contents for file %s" % fileName) | |
fh.close() | |
if (count % 10000) == 0: | |
print "Created file number %d" % count | |
os.chdir(options.baseDir) | |
print "Git adding new files" | |
subprocess.check_call(["git", "add", "."]) | |
print "Git committing new files" | |
subprocess.check_call(["git", "commit", "-m", "some new files", "--no-status", "--quiet"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python -t | |
# Originally by Joshua Redstone | |
# | |
# Script to generate small, synthetic commits to a git repo and time the | |
# operations. There's cruft in this script around running gdb or evaluating | |
# pygit2 | |
# | |
# run with: | |
# git reset --hard HEAD && simulate.py . ~/local/git/git | |
# where '.' is the root of the git repo and the second arg is the path to the | |
# git binary to use. | |
# | |
# | |
import sys | |
import os | |
import random | |
import subprocess | |
import time | |
import math | |
usePygit = False | |
#import pygit2 | |
gitProg = "/usr/bin/git" | |
assert len(sys.argv) > 1 | |
gitRoot=sys.argv[1] | |
os.chdir(gitRoot) | |
if len(sys.argv) > 2: | |
gitProg = sys.argv[2] | |
print "Reading dict..." | |
# All english words | |
words=[] | |
for word in open('/usr/share/dict/words','r').readlines(): | |
words.append(word.strip()) | |
def genLine(maxLen=75): | |
"""Gen random string of word-like stuff""" | |
res="" | |
while len(res) < maxLen: | |
if res: | |
res += " " | |
res += words[random.randint(0, len(words)-1)] | |
return res | |
profFile = "/tmp/gdb-poorman" | |
fh = open(profFile, "w") | |
fh.write("set pagination 0\n") | |
fh.write("thread apply all bt\n") | |
#fh.write("print 7\n") | |
#fh.write("detach\n") | |
#fh.write("print 3\n") | |
fh.close() | |
print "Walking tree rooted at %s..." % gitRoot | |
# Build up list of all files in the repo currently | |
allFiles=[] | |
allDirs=[gitRoot] | |
for root, subFolders, files in os.walk(gitRoot): | |
if root.startswith(os.path.join(gitRoot, '.git')): | |
continue | |
for file in files: | |
fullFile=os.path.join(root,file) | |
allFiles.append(fullFile) | |
for subfolder in subFolders: | |
fullDir=os.path.join(root,subfolder) | |
if fullDir == os.path.join(gitRoot, '.git'): | |
continue | |
allDirs.append(fullDir) | |
if usePygit: | |
repo = pygit2.Repository(os.path.join(gitRoot, '.git')) | |
lastCommitSecs=1 | |
newFiles=[] | |
commitCount = 0 | |
while True: | |
tt = time.time() | |
print "%s %0.3f: Bonus commit %5d" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(tt)), | |
tt - math.floor(tt), commitCount) | |
# Slowed down things by like 8x | |
#print time.strftime(" %H:%M:%S", time.localtime()) + ": Executing optional read-tree" | |
#subprocess.check_call([gitProg,"read-tree", "HEAD"]) | |
#print time.strftime(" %H:%M:%S", time.localtime()) + ": Executing optional update index" | |
#subprocess.check_call([gitProg,"update-index", "--refresh"]) | |
print time.strftime(" %H:%M:%S", time.localtime()) + ": Modifying files" | |
modifiedFiles=[] | |
totalNumFiles = len(allFiles) + len(newFiles) | |
numFilesToMod = random.randint(1, 5) | |
for i in xrange(numFilesToMod): | |
# Generate filename | |
fileIdx = random.randint(0, totalNumFiles - 1) | |
if fileIdx < len(allFiles): | |
filename = allFiles[fileIdx] | |
else: | |
filename = newFiles[fileIdx - len(allFiles)] | |
modifiedFiles.append(filename) | |
print " Modifying file %s" % filename | |
# Mess up some data in the file | |
size = os.path.getsize(filename) | |
fh = open(filename, 'r+') | |
if size <= 80: | |
fh.seek(0) | |
else: | |
fh.seek(random.randint(0, size-80)) | |
newStr=genLine()[:75] + "\n" | |
fh.write(newStr) | |
# write a few lines at the end | |
fh.seek(0, os.SEEK_END) | |
for j in xrange(random.randint(1, 4)): | |
fh.write(genLine()) | |
fh.close() | |
# Every third commit create a new file | |
if (commitCount % 3) == 0: | |
idx = random.randint(0, len(allDirs)-1) | |
inDir = allDirs[idx] | |
newName = os.path.join(inDir, | |
"new-file-%010d-%010d" % (commitCount, random.randint(0, 10000000))) | |
print " Adding new file %s(%d %s)" % (newName, idx, inDir) | |
fh = open(newName, 'w') | |
for j in xrange(random.randint(10, 15)): | |
fh.write(genLine()) | |
fh.write("\n") | |
fh.close() | |
modifiedFiles.append(newName) | |
# Now do the actual commit | |
print time.strftime(" %H:%M:%S", time.localtime()) + ": Adding files.." | |
if usePygit: | |
index = repo.index | |
for filen in modifiedFiles: | |
assert filen.startswith(gitRoot + "/") | |
modf = filen[(len(gitRoot)+1):] | |
index.add(modf) | |
index.write() | |
author = pygit2.Signature('Test', '[email protected]', int(time.time()), 0) | |
print author | |
masterSha = repo.lookup_reference('refs/heads/master') | |
else: | |
subprocess.check_call([gitProg,"add"] + modifiedFiles) | |
#print time.strftime(" %H:%M:%S", time.localtime()) + ": Executing optional write-tree" | |
#subprocess.check_call([gitProg,"write-tree"]) | |
msg = genLine(50) | |
print time.strftime(" %H:%M:%S", time.localtime()) + ": Committing with msg \"%s\".." % msg | |
starts = time.time() | |
if usePygit: | |
sha = repo.create_commit("refs/heads/master", author, author, msg, index.create_tree(), [masterSha.oid]) | |
#repo.create_reference('refs/heads/master', sha) | |
ends = time.time() | |
else: | |
comm = [gitProg, "commit", "--no-status", "-m", msg] | |
#proc = subprocess.Popen(["gdb", "--args"] + comm) | |
proc = subprocess.Popen(comm) | |
pid = proc.pid | |
if False: | |
time.sleep(random.uniform(0.1, lastCommitSecs)) | |
print "popen" | |
traceProc = subprocess.Popen(["gdb", "--command=" + profFile, "-batch", "--pid=%d" % (pid), "-l", "2", gitProg ]) | |
print "About to wait for tracepid %d of pid %d" % (traceProc.pid, pid) | |
assert proc.wait() == 0 | |
ends = time.time() | |
lastCommitSecs = ends - starts | |
#time.sleep(1) | |
#traceProc.kill() | |
print " Commit took %.3f seconds. Whole op took %.3f seconds" % (ends-starts, | |
ends-tt) | |
commitCount += 1 | |
#break | |
print "Should never get here" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment