Skip to content

Instantly share code, notes, and snippets.

@himbosatsu
Created February 10, 2022 22:22
Show Gist options
  • Save himbosatsu/cc710895c3b0a23da628626c7d0a6296 to your computer and use it in GitHub Desktop.
Save himbosatsu/cc710895c3b0a23da628626c7d0a6296 to your computer and use it in GitHub Desktop.
mining sentences to jpdb using yomichan via ankiconnect
save this to the folder for the ankiconnect plugin — you can
get that by hitting "view files" in the list of addons — and
run
patch __init__.py < ankiconnnect.patch
--- __init__.py 2022-02-11 03:47:57.262335097 +0530
+++ __init__.py 2022-02-10 22:06:13.383091229 +0530
@@ -667,6 +667,14 @@
@util.api()
def addNote(self, note):
+ word = note['fields']['front']
+ reading = note['fields']['Reading']
+ sentence = note['fields']['Sentence']
+
+ r = requests.get(f'http://127.0.0.1:5000/add_mined_sentence/{word}/{reading}/{sentence}')
+
+ print(r.status_code)
+
ankiNote = self.createNote(note)
self.addMediaFromNote(ankiNote, note)
# deps: requests furl beautifulsoup4 flask
# run with
# $ FLASK_APP=mining-server flask run
import requests
from bs4 import BeautifulSoup as Soup
import http
import pathlib
import furl
import urllib
import re
from flask import Flask
app = Flask(__name__)
DOMAIN = "https://jpdb.io"
cookies = pathlib.Path('./jpdb-cookie.txt')
jar = http.cookiejar.MozillaCookieJar(cookies);
jar.load()
sess = requests.Session()
sess.cookies = jar # or maybe s.cookies.update(jar)
# folding the ability to drop a url param into this function
# this allows us to make the urls printed to console look prettier
def make_abs(rel_link, drop=None):
url = furl.furl(rel_link)
if drop is not None:
if drop in url.args:
del url.args[drop]
return urllib.parse.urljoin(DOMAIN, url.url)
@app.route("/add_mined_sentence/<vocab>/<reading>/<sentence>")
def add_mined_sentence(vocab, reading, sentence):
# search for the vocab
search_page = sess.get(f'https://jpdb.io/search?q={vocab}&lang=english#a')
search_soup = Soup(search_page.content, "html.parser")
# find a link to the page with the right reading
vocab_page_link_rel = search_soup.find("a", href=re.compile(f"{vocab}\/{reading}\?"))['href']
vocab_page_link_abs = make_abs(vocab_page_link_rel, drop='expand')
vocab_page = sess.get(vocab_page_link_abs)
vocab_soup = Soup(vocab_page.content, "html.parser")
# grab the "Edit sentence" link
# strip the origin param, since it doesn't really do anything for us
edit_sentence_link_rel = vocab_soup.find("a", href=re.compile("\/edit-shown-sentence"))['href']
edit_sentence_link_abs = make_abs(edit_sentence_link_rel, drop='origin')
# then POST to it with the sentence we want
payload = { 'sentence': sentence, 'translation': '' }
resp = sess.post(edit_sentence_link_abs, data=payload)
# add to a deck (i'm using deck id 20)
params = urllib.parse.parse_qs(urllib.parse.urlparse(edit_sentence_link_abs).query)
id_vocab = params['v'][0]
id_spelling = params['s'][0]
id_reading = params['r'][0]
origin = f'/search?q={vocab}&lang=english'
add_url_abs = make_abs("/deck/20/add")
payload = { 'v': id_vocab, 's': id_spelling, 'origin': origin }
resp = sess.post(add_url_abs, data=payload)
print()
print(f'{vocab} with reading {reading} added to deck #20')
print(f' vocab: {urllib.parse.unquote(vocab_page_link_abs)}')
print(f' sentence: {edit_sentence_link_abs}')
print()
return "ok"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment