Created
June 18, 2022 18:40
-
-
Save luizbafilho/02e6592ab27e51aa0bd73948f0b9877e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# deps: requests furl beautifulsoup4 flask | |
import requests | |
from bs4 import BeautifulSoup as Soup | |
import http | |
import pathlib | |
import furl | |
import urllib | |
import re | |
import csv | |
DOMAIN = "https://jpdb.io" | |
cookies = pathlib.Path('./jpdb-cookie.txt') | |
jar = http.cookiejar.MozillaCookieJar(cookies) | |
jar.load() | |
sess = requests.Session() | |
sess.cookies = jar # or maybe s.cookies.update(jar) | |
# folding the ability to drop a url param into this function | |
# this allows us to make the urls printed to console look prettier | |
def make_abs(rel_link, drop=None): | |
url = furl.furl(rel_link) | |
if drop is not None: | |
if drop in url.args: | |
del url.args[drop] | |
return urllib.parse.urljoin(DOMAIN, url.url) | |
def set_custom_sentence(vocab, sentence, sentence_eng): | |
# search for the vocab | |
search_page = sess.get(f'https://jpdb.io/search?q={vocab}&lang=english#a') | |
search_soup = Soup(search_page.content, "html.parser") | |
# find a link to the page with the right reading | |
vocab_page_link_rel = search_soup.find("a", href=re.compile(f"{vocab}(\/.*)?\?"))['href'] | |
vocab_page_link_abs = make_abs(vocab_page_link_rel, drop='expand') | |
vocab_page = sess.get(vocab_page_link_abs) | |
vocab_soup = Soup(vocab_page.content, "html.parser") | |
# grab the "Edit sentence" link | |
# strip the origin param, since it doesn't really do anything for us | |
edit_sentence_link_rel = vocab_soup.find("a", href=re.compile("\/edit-shown-sentence"))['href'] | |
edit_sentence_link_abs = make_abs(edit_sentence_link_rel, drop='origin') | |
# then POST to it with the sentence we want | |
payload = { 'sentence': sentence, 'translation': sentence_eng } | |
resp = sess.post(edit_sentence_link_abs, data=payload) | |
def reading_anki_notes(): | |
print("Reading Anki notes...") | |
vocab_idx = 0 | |
sentence_idx = 4 | |
sentence_eng_idx = 6 | |
extracted = [] | |
with open('anki-notes-reduced.txt', newline = '') as notes: | |
note_reader = csv.reader(notes, delimiter='\t') | |
for note in note_reader: | |
if len(note) >= 7: | |
extracted.append({'vocab': note[vocab_idx], 'sentence': note[sentence_idx], 'sentence_eng': note[sentence_eng_idx]}) | |
return extracted | |
if __name__ == "__main__": | |
notes = reading_anki_notes() | |
print("Setting custom sentences...") | |
for note in notes: | |
try: | |
set_custom_sentence(note['vocab'], note['sentence'], note['sentence_eng']) | |
except: | |
print(f'failed importing: {note["vocab"]}, {note["sentence"]}, {note["sentence_eng"]}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment