luizbafilho · June 18, 2022 18:40
diff --git a/jpdb-sentence-importer.py b/jpdb-sentence-importer.py
 # deps: requests furl beautifulsoup4 flask

 import requests
 from bs4 import BeautifulSoup as Soup
 import http
 import pathlib
 import furl
 import urllib
 import re
 import csv

 DOMAIN = "https://jpdb.io"

 cookies = pathlib.Path('./jpdb-cookie.txt')
 jar = http.cookiejar.MozillaCookieJar(cookies)
 jar.load()

 sess = requests.Session()
 sess.cookies = jar # or maybe s.cookies.update(jar)

 # folding the ability to drop a url param into this function
 # this allows us to make the urls printed to console look prettier
 def make_abs(rel_link, drop=None):
  url = furl.furl(rel_link)
  if drop is not None:
      if drop in url.args:
          del url.args[drop]
  return urllib.parse.urljoin(DOMAIN, url.url)

 def set_custom_sentence(vocab, sentence, sentence_eng):
    # search for the vocab
    search_page = sess.get(f'https://jpdb.io/search?q={vocab}&lang=english#a')
    search_soup = Soup(search_page.content, "html.parser")

    # find a link to the page with the right reading
    vocab_page_link_rel = search_soup.find("a", href=re.compile(f"{vocab}(\/.*)?\?"))['href']
    vocab_page_link_abs = make_abs(vocab_page_link_rel, drop='expand')
    vocab_page = sess.get(vocab_page_link_abs)
    vocab_soup = Soup(vocab_page.content, "html.parser")

    # grab the "Edit sentence" link
    # strip the origin param, since it doesn't really do anything for us
    edit_sentence_link_rel = vocab_soup.find("a", href=re.compile("\/edit-shown-sentence"))['href']
    edit_sentence_link_abs = make_abs(edit_sentence_link_rel, drop='origin')

    # then POST to it with the sentence we want
    payload = { 'sentence': sentence, 'translation': sentence_eng }
    resp = sess.post(edit_sentence_link_abs, data=payload)


 def reading_anki_notes():
    print("Reading Anki notes...")

    vocab_idx = 0
    sentence_idx = 4
    sentence_eng_idx = 6

    extracted = []

    with open('anki-notes-reduced.txt', newline = '') as notes:
        note_reader = csv.reader(notes, delimiter='\t')
        for note in note_reader:
            if len(note) >= 7:
                extracted.append({'vocab': note[vocab_idx], 'sentence': note[sentence_idx], 'sentence_eng': note[sentence_eng_idx]})

    return extracted


 if __name__ == "__main__":
    notes = reading_anki_notes()

    print("Setting custom sentences...")

    for note in notes:
        try:
            set_custom_sentence(note['vocab'], note['sentence'], note['sentence_eng'])
        except:
            print(f'failed importing: {note["vocab"]}, {note["sentence"]}, {note["sentence_eng"]}')
	# deps: requests furl beautifulsoup4 flask

	import requests
	from bs4 import BeautifulSoup as Soup
	import http
	import pathlib
	import furl
	import urllib
	import re
	import csv

	DOMAIN = "https://jpdb.io"

	cookies = pathlib.Path('./jpdb-cookie.txt')
	jar = http.cookiejar.MozillaCookieJar(cookies)
	jar.load()

	sess = requests.Session()
	sess.cookies = jar # or maybe s.cookies.update(jar)

	# folding the ability to drop a url param into this function
	# this allows us to make the urls printed to console look prettier
	def make_abs(rel_link, drop=None):
	url = furl.furl(rel_link)
	if drop is not None:
	if drop in url.args:
	del url.args[drop]
	return urllib.parse.urljoin(DOMAIN, url.url)

	def set_custom_sentence(vocab, sentence, sentence_eng):
	# search for the vocab
	search_page = sess.get(f'https://jpdb.io/search?q={vocab}&lang=english#a')
	search_soup = Soup(search_page.content, "html.parser")

	# find a link to the page with the right reading
	vocab_page_link_rel = search_soup.find("a", href=re.compile(f"{vocab}(\/.*)?\?"))['href']
	vocab_page_link_abs = make_abs(vocab_page_link_rel, drop='expand')
	vocab_page = sess.get(vocab_page_link_abs)
	vocab_soup = Soup(vocab_page.content, "html.parser")

	# grab the "Edit sentence" link
	# strip the origin param, since it doesn't really do anything for us
	edit_sentence_link_rel = vocab_soup.find("a", href=re.compile("\/edit-shown-sentence"))['href']
	edit_sentence_link_abs = make_abs(edit_sentence_link_rel, drop='origin')

	# then POST to it with the sentence we want
	payload = { 'sentence': sentence, 'translation': sentence_eng }
	resp = sess.post(edit_sentence_link_abs, data=payload)


	def reading_anki_notes():
	print("Reading Anki notes...")

	vocab_idx = 0
	sentence_idx = 4
	sentence_eng_idx = 6

	extracted = []

	with open('anki-notes-reduced.txt', newline = '') as notes:
	note_reader = csv.reader(notes, delimiter='\t')
	for note in note_reader:
	if len(note) >= 7:
	extracted.append({'vocab': note[vocab_idx], 'sentence': note[sentence_idx], 'sentence_eng': note[sentence_eng_idx]})

	return extracted


	if __name__ == "__main__":
	notes = reading_anki_notes()

	print("Setting custom sentences...")

	for note in notes:
	try:
	set_custom_sentence(note['vocab'], note['sentence'], note['sentence_eng'])
	except:
	print(f'failed importing: {note["vocab"]}, {note["sentence"]}, {note["sentence_eng"]}')