Created
November 21, 2015 13:05
-
-
Save rho333/34f5e21c80261bfbf5d2 to your computer and use it in GitHub Desktop.
Useful script for bulk renaming files on the basis of their type.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Small script which uses the "file" command to | |
# determine the types of all files in the current | |
# directory with names starting with '#'. | |
# | |
# It then sets the file extension as appropriate. | |
# Written to recover data from a badly damaged filesystem's | |
# lost&found folder, after testdisk. | |
# | |
# Author: Richard Hofman, 2015 | |
# | |
# This script is licensed under the MIT License. | |
#!/usr/bin/python | |
import subprocess | |
import os | |
extensions = { | |
'JPEG image data': 'jpg', | |
'RAR archive data': 'rar', | |
'avi': 'avi', | |
'ISO Media': 'iso', | |
'Matroska': 'mkv', | |
'MPEG sequence, v2': 'mpg', | |
'XML document text': 'xml', | |
'Zip archive data': 'zip', | |
'PDF document': 'pdf', | |
'PE32 executable': 'exe', | |
'Microsoft Word 2007+': 'docx', | |
'SQLite 3.x database': 'sqlite', | |
'Rich Text Format': 'rtf', | |
} | |
def main(): | |
type_mappings = {} | |
listing = subprocess.check_output("file $( ls | awk '/^#/' )", shell=True) | |
print "LISTING\n\n" + listing | |
for l in listing.split('\n'): | |
l = l.strip() | |
if len(l) == 0: | |
continue | |
print "Processing %s" % l | |
index = l.find(':') | |
filename = l[:index] | |
type = (l[index+1:]).strip() | |
type_mappings[filename] = lookup_extension(type) | |
for filename, extension in type_mappings.iteritems(): | |
if len(extension) == 0: | |
print "NOT renaming: %s" % filename | |
continue | |
print "Renaming: %s" % filename | |
new_filename = "%s.%s" % (filename, extension) | |
new_filename = new_filename[1:] | |
os.rename(filename, new_filename) | |
def lookup_extension(type_string): | |
type_string = type_string.lower() | |
debug = "" | |
for ts, te in extensions.iteritems(): | |
if ts.lower() in type_string: | |
return te | |
return "" | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment