yonglai · June 3, 2017 14:02
diff --git a/bookmanager.py b/bookmanager.py
 import os
 from sets import Set
 import shutil
 import re

 duplicates = []
 folder = '/vagrant/books/unorganized'
 totalNumOfFiles = 0
 numOfDupFiles = 0
 numOfNonDupFiles = 0

 for folderName, subfolders, filenames in os.walk(folder):

  for f in filenames:
    totalNumOfFiles += 1
    filename, file_extension = os.path.splitext(f)
    if file_extension != '':
      fileTypeFolder = '/vagrant/books/organized/' + file_extension[1:]
      if not os.path.exists(fileTypeFolder):
        os.makedirs(fileTypeFolder)
      oldFilePath = folderName + '/' + f
      newFilePath = fileTypeFolder + '/' + f
      if not os.path.exists(newFilePath):
        print('move ' + oldFilePath + ' to ' + newFilePath)
        numOfNonDupFiles += 1
        shutil.move(oldFilePath, newFilePath)
      else:
        duplicates.append(oldFilePath)
        numOfDupFiles += 1

 if duplicates:
  duplicatesFile = open('/vagrant/books/duplicates.txt', 'w')
  for filename in duplicates:
    duplicatesFile.write(filename + '\n')
  duplicatesFile.close()

 print("total num of files={0}, num of dup files={1}, num of non dup files={2}".format(totalNumOfFiles, numOfDupFiles, numOfNonDupFiles))
 if (totalNumOfFiles == (numOfDupFiles + numOfNonDupFiles)):
  print 'correct counting'
 else:
  print 'incorrect counting'
diff --git a/changeBookNames.py b/changeBookNames.py
 import os
 import shutil
 import re
 import csv
 from checkBookNames import *

 duplicates = []
 '''
 folder = '/vagrant/books/organized'
 for folderName, subfolders, filenames in os.walk(folder):

  for filename in filenames:
    fileNameRegex = re.compile(r'^([^\w]+[\w.]+[^\w]+)')
    found = fileNameRegex.search(filename)
    if found != None:
      #result.append((filename, )
      oldFilePath = folderName + '/' + filename
      newFilePath = folderName + '/' + fileNameRegex.sub('', filename)
      if not os.path.exists(newFilePath):
        print('Rename: ' + oldFilePath + '--->' + newFilePath)
        shutil.move(oldFilePath, newFilePath)
      else:
        print('Duplicates found: ' + oldFilePath)
        duplicates.append(oldFilePath)
        #os.unlink(oldFilePath)
 '''

 for (oldFilePath, newFilePath) in getUpdatedNames():
  if not os.path.exists(newFilePath):
    #print('Rename: ' + oldFilePath + '--->' + newFilePath)
    shutil.move(oldFilePath, newFilePath)
  else:
    #print('Duplicates found: ' + oldFilePath)
    duplicates.append(oldFilePath)
    os.unlink(oldFilePath)

  if duplicates:
    duplicatesFile = open('/vagrant/books/duplicates_organized.txt', 'w')
    for filename in duplicates:
      duplicatesFile.write(filename + '\n')
    duplicatesFile.close()

diff --git a/checkBookNames.py b/checkBookNames.py
 import os
 import shutil
 import re
 import csv

 def getUpdatedNames():
  result = []
  folder = '/vagrant/books/organized'
  for folderName, subfolders, filenames in os.walk(folder):

    for filename in filenames:
      fileNameRegex = re.compile(r'\[.*?\][^a-zA-Z(\[]*')
      found = fileNameRegex.search(filename)
      if found != None:
        new_name = fileNameRegex.sub('', filename)
        special_files = ['Artifacts', 'Witchblade', 'X-Men', 'le.Dragon', 'Fables', 'TW', 'X-Force']
        if any (x in filename for x in special_files):
          new_name = re.sub('^\[.*?\]', '', filename)
          #print(name_name)
        #pattern2 = re.compile(r'\(.*?\)')
        #new_name = pattern2.sub('', new_name)
        result.append((folderName + '/' + filename, folderName + '/' + (new_name if '.' in new_name else filename)))
  return result


 if __name__ == "__main__":
  with open('/vagrant/books/name_changes.csv', 'w') as csvfile:
    fieldnames = ['old_name', 'new_name']
    write = csv.DictWriter(csvfile, fieldnames = fieldnames)

    write.writeheader()
    result = getUpdatedNames()
    for oldname, newname in result:
      write.writerow({'old_name': oldname, 'new_name': newname})
diff --git a/cleanupStaging.py b/cleanupStaging.py
 import os
 import shutil
 import re

 duplicates = []
 folder = '/vagrant/books/unorganized'

 for the_file in os.listdir(folder):
  file_path = os.path.join(folder, the_file)
  print(file_path)
  try:
    if os.path.isfile(file_path):
      print('remove file ' + file_path)
      os.unlink(file_path)
    elif os.path.isdir(file_path):
      print('remove folder ' + file_path)
      shutil.rmtree(file_path)
  except Exception as e:
    print(e)
diff --git a/removingInvalidFiles.py b/removingInvalidFiles.py
 import os
 from sets import Set
 import shutil
 import re

 duplicates = []
 folder = '/vagrant/books/organized'

 for folderName, subfolders, filenames in os.walk(folder):

  for filename in filenames:
    if filename.startswith('._'):
      filepath = folderName + '/' + filename
      print('remove: ' + filepath)
      os.unlink(filepath)
	import os
	from sets import Set
	import shutil
	import re

	duplicates = []
	folder = '/vagrant/books/unorganized'
	totalNumOfFiles = 0
	numOfDupFiles = 0
	numOfNonDupFiles = 0

	for folderName, subfolders, filenames in os.walk(folder):

	for f in filenames:
	totalNumOfFiles += 1
	filename, file_extension = os.path.splitext(f)
	if file_extension != '':
	fileTypeFolder = '/vagrant/books/organized/' + file_extension[1:]
	if not os.path.exists(fileTypeFolder):
	os.makedirs(fileTypeFolder)
	oldFilePath = folderName + '/' + f
	newFilePath = fileTypeFolder + '/' + f
	if not os.path.exists(newFilePath):
	print('move ' + oldFilePath + ' to ' + newFilePath)
	numOfNonDupFiles += 1
	shutil.move(oldFilePath, newFilePath)
	else:
	duplicates.append(oldFilePath)
	numOfDupFiles += 1

	if duplicates:
	duplicatesFile = open('/vagrant/books/duplicates.txt', 'w')
	for filename in duplicates:
	duplicatesFile.write(filename + '\n')
	duplicatesFile.close()

	print("total num of files={0}, num of dup files={1}, num of non dup files={2}".format(totalNumOfFiles, numOfDupFiles, numOfNonDupFiles))
	if (totalNumOfFiles == (numOfDupFiles + numOfNonDupFiles)):
	print 'correct counting'
	else:
	print 'incorrect counting'
No results found