Last active
May 19, 2025 03:09
-
-
Save specter119/b79dc35a6091d0fd0896a9536fbddb5a to your computer and use it in GitHub Desktop.
remove files not maintained by zotero
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
from __future__ import print_function | |
import configparser | |
import re | |
import shutil | |
import sqlite3 | |
import sys | |
try: | |
from pathlib import Path | |
except ImportError: | |
from pathlib2 import Path | |
if sys.version_info.major == 2: | |
reload(sys) | |
sys.setdefaultencoding('UTF8') | |
def get_zotfile_dest_and_zotero_data_dirs(): | |
''' | |
Get the Zotero data dir and the Zotfile destination dir in PosixPath type | |
''' | |
profile_dirs = { | |
'darwin': Path.home() / 'Library/Application Support/Zotero', | |
'linux': Path.home() / '.zotero/zotero', | |
'linux2': Path.home() / '.zotero/zotero', | |
'win32': Path.home() / 'AppData/Roaming/Zotero/Zotero' | |
} | |
profile_dir = profile_dirs[sys.platform] | |
config = configparser.ConfigParser() | |
config.read('{}'.format(profile_dir / 'profiles.ini')) | |
configs_loc = profile_dir / config['Profile0']['Path'] / 'prefs.js' | |
configs = configs_loc.read_text() | |
zotero_data_pat = re.compile( | |
r'user_pref\("extensions.zotero.dataDir",\ "(?P<zotero_data>.+)"\);') | |
zotero_data_dir = Path(zotero_data_pat.search( | |
configs).group('zotero_data')) | |
zotfile_dest_pat = re.compile( | |
r'user_pref\("extensions.zotfile.dest_dir",\ "(?P<zotfile_dest>.+)"\);') | |
zotfile_dest_dir = Path( | |
zotfile_dest_pat.search(configs).group('zotfile_dest')) | |
return zotero_data_dir, zotfile_dest_dir | |
def get_unmaintained_files(zotero_data_dir, | |
zotfile_dest_dir, | |
case_sensitive='auto'): | |
''' | |
Get a list of atthchment in PosixPath type that unmaintained in the Zotero | |
Args: | |
zotero_data_dir(PosixPath): Zotero data dir | |
zotfile_dest_dir(PosixPath): Zotfile destination dir | |
case_sensitive(bool or str): wether the os is case sensitive, | |
default set linux as True, and rest as False | |
''' | |
attachments_local = set(p.as_posix() for p in zotfile_dest_dir.glob('**/*') | |
if p.is_file() and p.name[0] != '.') | |
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite')) | |
with con: | |
cur = con.cursor() | |
cur.execute('SELECT path FROM itemAttachments WHERE linkMode = 2') | |
attachments_zotero = set([ | |
p.as_posix() for p in [ | |
zotfile_dest_dir / p[0].replace('attachments:', '', 1) | |
for p in cur.fetchall() | |
] | |
]) | |
if sys.platform == 'darwin': | |
import unicodedata | |
attachments_zotero = set( | |
list(attachments_zotero) + | |
[unicodedata.normalize('NFD', p) for p in attachments_zotero]) | |
if case_sensitive == 'auto': | |
case_sensitive = { | |
'darwin': False, | |
'linux': True, | |
'linux2': True, | |
'win32': False | |
}[sys.platform] | |
if not case_sensitive: | |
attachments_local = set([p.lower() for p in attachments_local]) | |
attachments_zotero = set([p.lower() for p in attachments_zotero]) | |
attachments_to_remove = attachments_local - attachments_zotero | |
return [Path(p) for p in attachments_to_remove] | |
def remove_unmaintained(attachments_to_remove): | |
''' | |
Remove the unmaintained files in PosixPath type, and clear empty dirs | |
''' | |
[p.unlink() for p in attachments_to_remove] | |
empty_dirs = [ | |
p for p in zotfile_dest_dir.glob('**/*') if (not p.is_file()) and ( | |
not len([f for f in list(p.iterdir()) if f.name[0] != '.'])) | |
] | |
[shutil.rmtree(p.as_posix(), ignore_errors=True) for p in empty_dirs] | |
if __name__ == '__main__': | |
zotero_data_dir, zotfile_dest_dir = get_zotfile_dest_and_zotero_data_dirs() | |
attachments_to_remove = get_unmaintained_files(zotero_data_dir, | |
zotfile_dest_dir) | |
try: | |
import click | |
print('The following files are no longer managed by Zotero:') | |
print('\n'.join([' {}'.format(p) for p in attachments_to_remove])) | |
if click.confirm('Do you want remove them?', default=True): | |
remove_unmaintained(attachments_to_remove) | |
except ImportError: | |
print( | |
'The following files no longer managed by Zotero will be removed:') | |
print('\n'.join([' {}'.format(p) for p in attachments_to_remove])) | |
remove_unmaintained(attachments_to_remove) |
请问zotero5没有profiles.ini怎么办?
我写这个脚本的时候,zotero 早就是5以后的版本了,脚本找不到文件可能是用户中文目录的问题。
有没有考虑增加删除zotero数据库无效attachment链接?
有没有考虑增加删除zotero数据库无效attachment链接?
嗯,这个脚本反过来比就行,问题是你怎么会产生大量无效的链接附件呢?
如果是你不小心删除了,或者挪地方了,不会想着先抢救下吗?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
abc abc1 abc2 不叫相同文件名不同后缀,这已经是不同文件名了。
”有些文献关联相同 pdf“这个说法不妥,zotero 或者 zotfile 都很难做到这个,除非手动。按照你的描述,更像是因导入产生的重复条目。
造成abc,abc1,abc2 的在我所见有一种情况,就是zotfile 在同一目录下产生多个同名附件,原因可能是同一条目多附件,也可能是相同目录下的不同条目生成的重名附件。而zotfile在处理这种重名的时候,本来就有bug。