Skip to content

Instantly share code, notes, and snippets.

@therealkevinard
Last active August 29, 2015 14:26
Show Gist options
  • Save therealkevinard/4f9af27c80e560222ed1 to your computer and use it in GitHub Desktop.
Save therealkevinard/4f9af27c80e560222ed1 to your computer and use it in GitHub Desktop.
A simple python script that takes a directory of organized images, parses the structure into sql inserts and a re-organized directory for upload. useful for migrating websites that have huge image galleries.
#!/usr/bin/python
import sys
import getopt
import os
import shutil
basedir = ''
outputdir = ''
sqlpath = ''
problempath = ''
uploaddir = ''
def main(argv):
global basedir
global outputdir
global sqlpath
global problempath
global uploaddir
try:
opts, args = getopt.getopt(argv, "hb:o:u:", ["basedir=", "outputdir=", "uploaddir="])
except getopt.GetoptError:
print 'mig-dirs-to-sql.py -b <basedir> -o <outputdir> -u <uploaddir>'
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print 'mig-dirs-to-sql.py -b <basedir> -o <outputdir> -u <uploaddir>'
sys.exit()
elif opt in ("-b", "--basedir"):
basedir = arg
elif opt in ("-o", "--outputdir"):
outputdir = arg
problempath = outputdir + '/problems.csv'
sqlpath = outputdir + '/migration.sql'
elif opt in ("-u", "--uploaddir"):
uploaddir = arg
welcomemessage()
prepare()
getcontents(basedir)
def prepare():
# prepare output directory
if os.path.isdir(outputdir):
shutil.rmtree(outputdir)
os.makedirs(outputdir)
def welcomemessage():
print "================================================================="
print "+ Welcome to kevin's migration utility\n" \
"+ Give me a directory, and i'll turn it into sql inserts";
print "================================================================="
print "++ Base is:", basedir
print "++ Output directory will be:", outputdir
print "++ BTW:I'm removing " + outputdir + " first if it exists"
print "================================================================="
def getcontents(path):
sqlfile = open(sqlpath, 'w')
problemfile = open(problempath, 'w')
problemfile.write('file,owner,sku')
for root, dirs, files in os.walk(path):
curr = os.path.basename(root)
# for my use: i pulled item sku and owner code by splicing directory name.
# from the start to the first character is the sku.
# everything after the space char is the owner code.
# not ALL dirs can parse the owner code, so this is handled in the first if.
# you will probably want to use a different splice for what you're doing.
eoSku = curr.find(' ')
if eoSku == -1:
sku = curr
trail = None
else:
sku = curr[0:eoSku]
trail = curr[eoSku + 1:]
print 'curr', curr, 'sku:', sku, 'trail:', trail
for f in files:
# imgs are moved to a new dir structure according to:
# {uploaddir}/owner/{ownercode}/sku/{sku}/
# or, if there was no owner code, just to upload/sku/{sku}
if not trail:
copypath = uploaddir + '/sku/' + sku + '/' + f
else:
copypath = uploaddir + '/owner/' + trail + '/sku/' + sku + '/' + f
mirror = outputdir + '/' + copypath
if not sku.isdigit():
# sku failed, write problem log for manual resolution later.
problemfile.write('"' + root + '/' + f + '"\r\n')
else:
# sku passed. copy the file and write its sql.
# if not os.path.exists(mirror):
# os.makedirs(mirror)
# shutil.copy(root+'/'+f, mirror+'/'+f)
ins = getsqlinsert(sku, copypath, 0)
sqlfile.write(ins + "\n")
sqlfile.close()
problemfile.close()
def getsqlinsert(itemsku, path, sort):
sql = 'INSERT INTO itemimage(path, sort, itemid, published)'
sql += 'VALUES("' + path + '",' + str(sort) + ',' + str(itemsku) + ',1);'
# print sql
return sql
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment