Created
July 3, 2017 17:03
-
-
Save ccwang002/925fb0c5496ee3a6b4b2e9b4833d4c4a to your computer and use it in GitHub Desktop.
Clean up Zotero reference directory
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import os | |
from pathlib import Path | |
# Export Zotero library as CSV | |
ZOTERO_LIBRARY_PTH = '/Users/liang/Desktop/My Library.csv' | |
REFERENCES_ROOT = Path('/Users/liang/Dropbox/References/') | |
df = pd.read_csv(ZOTERO_LIBRARY_PTH) | |
df_with_attch = df[df['File Attachments'].notnull()] | |
all_attachments = set() | |
for attachments_str in df_with_attch['File Attachments']: | |
all_attachments.update(filter(None, attachments_str.split('; '))) | |
IGNORED_FILES = set([ | |
'.DS_Store', | |
]) | |
for root, dirs, files in os.walk(REFERENCES_ROOT): | |
if not dirs: | |
# Reach end of the structure | |
for f in files: | |
if f in IGNORED_FILES: | |
continue | |
full_path = os.path.join(root, f) | |
if full_path not in all_attachments: | |
print(f'{full_path} is not in Zotero database. Removed!') | |
Path(full_path).unlink() | |
# Remove Empty Folder | |
for root, dirs, files in os.walk(REFERENCES_ROOT): | |
if not dirs: | |
# Reach end of the structure | |
filtered_files = [f for f in files if f not in IGNORED_FILES] | |
if not filtered_files: | |
print(f'Dir: {root} is empty. Removed') | |
# Remove dir | |
for f in files: | |
Path(root, f).unlink() | |
Path(root).rmdir() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment