Created
January 2, 2019 11:23
-
-
Save whinette/a46ccb980d93ee8189c2c6629a7e2e4c to your computer and use it in GitHub Desktop.
tailored script to cleanup mess from pixiv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os, sys | |
from stat import ST_MTIME | |
import distutils | |
from distutils import dir_util | |
import shutil | |
import logging | |
from datetime import datetime | |
g_logger = logging.getLogger() | |
g_logger.setLevel(logging.INFO) | |
def merge_subfolders(subfolders): | |
for f in subfolders: | |
if f == subfolders[-1]: | |
break | |
g_logger.info('{} -> {}'.format(f, subfolders[-1])) | |
if f.lower() == subfolders[-1].lower(): | |
g_logger.info('Same name! {} | {}'.format(f, subfolders[-1])) | |
distutils.dir_util.copy_tree(f, os.path.join('/tmp', f), verbose=1, update=1) | |
distutils.dir_util.copy_tree(os.path.join('/tmp', f), subfolders[-1], verbose=1, update=1) | |
shutil.rmtree(f) | |
else: | |
# distutils.dir_util.copy_tree(f, subfolders[-1], verbose=1, update=1) | |
for src_dir, dirs, files in os.walk(f): | |
dst_dir = src_dir.replace(f, subfolders[-1], 1) | |
if not os.path.exists(dst_dir): | |
os.makedirs(dst_dir) | |
for file_ in files: | |
src_file = os.path.join(src_dir, file_) | |
dst_file = os.path.join(dst_dir, file_) | |
if os.path.exists(dst_file): | |
os.remove(dst_file) | |
shutil.move(src_file, dst_dir) | |
if f is not subfolders[-1]: | |
shutil.rmtree(f) | |
def merge_dupe_users_subfolder(home): | |
g_logger.info('---=== Merging subfolders ===---') | |
total = len(home) - 1 | |
i = 0 | |
for d in home: | |
if d == '.duplicates': | |
continue | |
i += 1 | |
subfolders = [os.path.join(d, x) for x in os.listdir(d) if os.path.isdir(os.path.join(d, x))] | |
if len(subfolders) > 1: | |
g_logger.info('{}/{} merging {}'.format(i, total, d)) | |
subfolders.sort(key=os.path.getmtime) | |
merge_subfolders(subfolders) | |
g_logger.info('done!') | |
g_logger.info('---=== Subfolders done! ===---') | |
def merge_dupe_users(home): | |
g_logger.info('---=== Merging dupes users ===---') | |
total = len(home) - 1 | |
i = 0 | |
ret = 0 | |
old_tup = ('', '', 0) | |
for d in home: | |
i += 1 | |
new_tup = (d, os.stat(d)[ST_MTIME], d.split(' ')[0]) | |
if new_tup[2] == old_tup[2]: | |
ret = 1 | |
g_logger.info('{}/{} found match with {} ({}) and {} ({})'.format(i, total, new_tup[0], datetime.utcfromtimestamp(new_tup[1]).strftime('%Y-%m-%d %H:%M:%S'), old_tup[0], datetime.utcfromtimestamp(old_tup[1]).strftime('%Y-%m-%d %H:%M:%S'))) | |
if new_tup[1] > old_tup[1]: | |
g_logger.info('keeping {}'.format(new_tup[0])) | |
distutils.dir_util.copy_tree(old_tup[0], new_tup[0], verbose=1, update=1) | |
shutil.rmtree(old_tup[0]) | |
else: | |
g_logger.info('keeping {}'.format(old_tup[0])) | |
distutils.dir_util.copy_tree(new_tup[0], old_tup[0], verbose=1, update=1) | |
shutil.rmtree(new_tup[0]) | |
g_logger.info('done!') | |
old_tup = new_tup | |
g_logger.info('---=== Dupe users done! ===---') | |
return ret | |
if __name__ == '__main__': | |
formatter = logging.Formatter('[%(asctime)s] - %(message)s') | |
fh = logging.FileHandler('cleanup_{:%Y-%m-%d}.log'.format(datetime.now())) | |
ch = logging.StreamHandler(sys.stdout) | |
fh.setFormatter(formatter) | |
ch.setFormatter(formatter) | |
g_logger.addHandler(fh) | |
g_logger.addHandler(ch) | |
g_logger.info('-' * 80) | |
home = sorted([x for x in os.listdir(os.getcwd()) if os.path.isdir(x)], key=lambda k: int(k.split()[0])) | |
changed = merge_dupe_users(home) | |
if changed: | |
home = sorted([x for x in os.listdir(os.getcwd()) if os.path.isdir(x)], key=lambda k: int(k.split()[0])) | |
merge_dupe_users_subfolder(home) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python2.
filenameformat = %artist% (%member_id%)/%urlFilename% - %title%
filenamemangaformat = %artist% (%member_id%)/%urlFilename% - %title%
filenameinfoformat = %artist% (%member_id%)/%urlFilename% - %title%