Last active
May 19, 2025 03:09
-
-
Save specter119/b79dc35a6091d0fd0896a9536fbddb5a to your computer and use it in GitHub Desktop.
remove files not maintained by zotero
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
from __future__ import print_function | |
import configparser | |
import re | |
import shutil | |
import sqlite3 | |
import sys | |
try: | |
from pathlib import Path | |
except ImportError: | |
from pathlib2 import Path | |
if sys.version_info.major == 2: | |
reload(sys) | |
sys.setdefaultencoding('UTF8') | |
def get_zotfile_dest_and_zotero_data_dirs(): | |
''' | |
Get the Zotero data dir and the Zotfile destination dir in PosixPath type | |
''' | |
profile_dirs = { | |
'darwin': Path.home() / 'Library/Application Support/Zotero', | |
'linux': Path.home() / '.zotero/zotero', | |
'linux2': Path.home() / '.zotero/zotero', | |
'win32': Path.home() / 'AppData/Roaming/Zotero/Zotero' | |
} | |
profile_dir = profile_dirs[sys.platform] | |
config = configparser.ConfigParser() | |
config.read('{}'.format(profile_dir / 'profiles.ini')) | |
configs_loc = profile_dir / config['Profile0']['Path'] / 'prefs.js' | |
configs = configs_loc.read_text() | |
zotero_data_pat = re.compile( | |
r'user_pref\("extensions.zotero.dataDir",\ "(?P<zotero_data>.+)"\);') | |
zotero_data_dir = Path(zotero_data_pat.search( | |
configs).group('zotero_data')) | |
zotfile_dest_pat = re.compile( | |
r'user_pref\("extensions.zotfile.dest_dir",\ "(?P<zotfile_dest>.+)"\);') | |
zotfile_dest_dir = Path( | |
zotfile_dest_pat.search(configs).group('zotfile_dest')) | |
return zotero_data_dir, zotfile_dest_dir | |
def get_unmaintained_files(zotero_data_dir, | |
zotfile_dest_dir, | |
case_sensitive='auto'): | |
''' | |
Get a list of atthchment in PosixPath type that unmaintained in the Zotero | |
Args: | |
zotero_data_dir(PosixPath): Zotero data dir | |
zotfile_dest_dir(PosixPath): Zotfile destination dir | |
case_sensitive(bool or str): wether the os is case sensitive, | |
default set linux as True, and rest as False | |
''' | |
attachments_local = set(p.as_posix() for p in zotfile_dest_dir.glob('**/*') | |
if p.is_file() and p.name[0] != '.') | |
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite')) | |
with con: | |
cur = con.cursor() | |
cur.execute('SELECT path FROM itemAttachments WHERE linkMode = 2') | |
attachments_zotero = set([ | |
p.as_posix() for p in [ | |
zotfile_dest_dir / p[0].replace('attachments:', '', 1) | |
for p in cur.fetchall() | |
] | |
]) | |
if sys.platform == 'darwin': | |
import unicodedata | |
attachments_zotero = set( | |
list(attachments_zotero) + | |
[unicodedata.normalize('NFD', p) for p in attachments_zotero]) | |
if case_sensitive == 'auto': | |
case_sensitive = { | |
'darwin': False, | |
'linux': True, | |
'linux2': True, | |
'win32': False | |
}[sys.platform] | |
if not case_sensitive: | |
attachments_local = set([p.lower() for p in attachments_local]) | |
attachments_zotero = set([p.lower() for p in attachments_zotero]) | |
attachments_to_remove = attachments_local - attachments_zotero | |
return [Path(p) for p in attachments_to_remove] | |
def remove_unmaintained(attachments_to_remove): | |
''' | |
Remove the unmaintained files in PosixPath type, and clear empty dirs | |
''' | |
[p.unlink() for p in attachments_to_remove] | |
empty_dirs = [ | |
p for p in zotfile_dest_dir.glob('**/*') if (not p.is_file()) and ( | |
not len([f for f in list(p.iterdir()) if f.name[0] != '.'])) | |
] | |
[shutil.rmtree(p.as_posix(), ignore_errors=True) for p in empty_dirs] | |
if __name__ == '__main__': | |
zotero_data_dir, zotfile_dest_dir = get_zotfile_dest_and_zotero_data_dirs() | |
attachments_to_remove = get_unmaintained_files(zotero_data_dir, | |
zotfile_dest_dir) | |
try: | |
import click | |
print('The following files are no longer managed by Zotero:') | |
print('\n'.join([' {}'.format(p) for p in attachments_to_remove])) | |
if click.confirm('Do you want remove them?', default=True): | |
remove_unmaintained(attachments_to_remove) | |
except ImportError: | |
print( | |
'The following files no longer managed by Zotero will be removed:') | |
print('\n'.join([' {}'.format(p) for p in attachments_to_remove])) | |
remove_unmaintained(attachments_to_remove) |
zsc233
commented
Mar 11, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment