-
-
Save alanzchen/6c30227d82f411da1baba4b7d1ec088b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3 | |
import sqlite3 | |
import re | |
import json | |
import argparse | |
def convert(db, filename): | |
con = sqlite3.connect(db) | |
cur = con.cursor() | |
with open(filename, "r") as f: | |
md = f.read() | |
raw_match = re.findall('\[ITEM CSL_CITATION .*?\]\(.*?\)', md) | |
citeid_match = [re.findall(r'\\"id\\":(\d*?),\\"uris', i) for i in raw_match] | |
for i in range(len(raw_match)): | |
s = "[" | |
for j in citeid_match[i]: | |
s += "@" + list(cur.execute('SELECT * FROM citekeys WHERE itemID={}'.format(j)))[0][3] + "; " | |
s = s.strip("; ") | |
s += "]" | |
print(s) | |
md = md.replace(raw_match[i], s) | |
with open(filename, "w") as f: | |
f.write(md) | |
parser = argparse.ArgumentParser(description='Convert markdown in Zotero-style citations format (e.g., convert MS Word to Markdown via pandoc) to cite-key format. Requires better-bibtex.') | |
parser.add_argument('db', metavar='db', type=str, | |
help='Your better-bibtex-search.sqlite path.') | |
parser.add_argument('filename', metavar='filename', type=str, | |
help='The markdown file you converted with pandoc. Note that you will need to use pandoc option --wrap=none when converting it to markdown.') | |
args = parser.parse_args() | |
convert(args.db, args.filename) |
@phaeton6680
Hi,
this is great! Thank you. Exactly what I was looking for. But unfortunately I get an error:
Any ideas how to solve this?
Looks like there is a cite key that exists in your Word file but it is no longer in your Zotero database. Refresh your Word file with Zotero and try again?
@phaeton6680
Hi,
this is great! Thank you. Exactly what I was looking for. But unfortunately I get an error:
Any ideas how to solve this?Looks like there is a cite key that exists in your Word file but it is no longer in your Zotero database. Refresh your Word file with Zotero and try again?
Thanks, I had manually edited some of the cite keys within the Word document. That seems to have caused the error. Thank you for your help and for this nice little script.
Thanks very much for this. Adding encoding='utf-8', errors='ignore'
to the open()
statements helped me overcome some issues.
Thank you ;D However, I had to adapt the script in line 13
to citeid_match = [re.findall(r'\\"id\\":(\d*?),\\"type', i) for i in raw_match]
changing uris
to type
in the regex pattern. But with this small change the script worked like a charm :)
Hi all,
Thanks for all of your feedback. I would now recommend https://retorque.re/zotero-better-bibtex/citing/migrating/ over this script.
Thank you all, I tried the Update on https://retorque.re/zotero-better-bibtex/citing/migrating/ as pandoc seems to be able to read the citations:
pandoc -f docx+citations -t markdown -i Aristotle.docx -o Aristotle.md
Nevertheless, the markdown I get does not use the correct citekeys from my Zotero library (it just uses numbers, for example @345), making it impossible to convert back for my supervisors (they just need old-school word). Any idea what I need to add to the comment for pandoc to understand which citekeys to use? I tried by adding @retorquere lua.zotero, but this failed (I know, it is for using CSL to get to Zotero keys for word... so the other way).
@schlittenhardtm that depends on the word document being created with Zotero+BBT. I don't know if you can refresh all citations in the word doc with Zotero+BBT running, otherwise you'll have to fall back to the CSL style.
Hi,
this is great! Thank you. Exactly what I was looking for. But unfortunately I get an error:
Any ideas how to solve this?