Created
February 10, 2022 22:22
-
-
Save himbosatsu/cc710895c3b0a23da628626c7d0a6296 to your computer and use it in GitHub Desktop.
mining sentences to jpdb using yomichan via ankiconnect
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
save this to the folder for the ankiconnect plugin — you can | |
get that by hitting "view files" in the list of addons — and | |
run | |
patch __init__.py < ankiconnnect.patch | |
--- __init__.py 2022-02-11 03:47:57.262335097 +0530 | |
+++ __init__.py 2022-02-10 22:06:13.383091229 +0530 | |
@@ -667,6 +667,14 @@ | |
@util.api() | |
def addNote(self, note): | |
+ word = note['fields']['front'] | |
+ reading = note['fields']['Reading'] | |
+ sentence = note['fields']['Sentence'] | |
+ | |
+ r = requests.get(f'http://127.0.0.1:5000/add_mined_sentence/{word}/{reading}/{sentence}') | |
+ | |
+ print(r.status_code) | |
+ | |
ankiNote = self.createNote(note) | |
self.addMediaFromNote(ankiNote, note) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# deps: requests furl beautifulsoup4 flask | |
# run with | |
# $ FLASK_APP=mining-server flask run | |
import requests | |
from bs4 import BeautifulSoup as Soup | |
import http | |
import pathlib | |
import furl | |
import urllib | |
import re | |
from flask import Flask | |
app = Flask(__name__) | |
DOMAIN = "https://jpdb.io" | |
cookies = pathlib.Path('./jpdb-cookie.txt') | |
jar = http.cookiejar.MozillaCookieJar(cookies); | |
jar.load() | |
sess = requests.Session() | |
sess.cookies = jar # or maybe s.cookies.update(jar) | |
# folding the ability to drop a url param into this function | |
# this allows us to make the urls printed to console look prettier | |
def make_abs(rel_link, drop=None): | |
url = furl.furl(rel_link) | |
if drop is not None: | |
if drop in url.args: | |
del url.args[drop] | |
return urllib.parse.urljoin(DOMAIN, url.url) | |
@app.route("/add_mined_sentence/<vocab>/<reading>/<sentence>") | |
def add_mined_sentence(vocab, reading, sentence): | |
# search for the vocab | |
search_page = sess.get(f'https://jpdb.io/search?q={vocab}&lang=english#a') | |
search_soup = Soup(search_page.content, "html.parser") | |
# find a link to the page with the right reading | |
vocab_page_link_rel = search_soup.find("a", href=re.compile(f"{vocab}\/{reading}\?"))['href'] | |
vocab_page_link_abs = make_abs(vocab_page_link_rel, drop='expand') | |
vocab_page = sess.get(vocab_page_link_abs) | |
vocab_soup = Soup(vocab_page.content, "html.parser") | |
# grab the "Edit sentence" link | |
# strip the origin param, since it doesn't really do anything for us | |
edit_sentence_link_rel = vocab_soup.find("a", href=re.compile("\/edit-shown-sentence"))['href'] | |
edit_sentence_link_abs = make_abs(edit_sentence_link_rel, drop='origin') | |
# then POST to it with the sentence we want | |
payload = { 'sentence': sentence, 'translation': '' } | |
resp = sess.post(edit_sentence_link_abs, data=payload) | |
# add to a deck (i'm using deck id 20) | |
params = urllib.parse.parse_qs(urllib.parse.urlparse(edit_sentence_link_abs).query) | |
id_vocab = params['v'][0] | |
id_spelling = params['s'][0] | |
id_reading = params['r'][0] | |
origin = f'/search?q={vocab}&lang=english' | |
add_url_abs = make_abs("/deck/20/add") | |
payload = { 'v': id_vocab, 's': id_spelling, 'origin': origin } | |
resp = sess.post(add_url_abs, data=payload) | |
print() | |
print(f'{vocab} with reading {reading} added to deck #20') | |
print(f' vocab: {urllib.parse.unquote(vocab_page_link_abs)}') | |
print(f' sentence: {edit_sentence_link_abs}') | |
print() | |
return "ok" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment