Skip to content

Instantly share code, notes, and snippets.

@DrLulz
Created March 7, 2016 02:02
Show Gist options
  • Save DrLulz/115d09011c31c80163bf to your computer and use it in GitHub Desktop.
Save DrLulz/115d09011c31c80163bf to your computer and use it in GitHub Desktop.
############################################################################################
# This script creates Anki decks from a web application.
# Designed specifically for usmle-rx.com flashfacts, but can be modified for any scenerio.
#
# Automation provided by /u/SYMPATHETIC_GANG_LION
# Anki import provided by /u/DrLulz
############################################################################################
import os
import sys
import wget
import ntpath
import unicodedata
import codecs
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from operator import itemgetter
from bs4 import BeautifulSoup
from anki import Collection as aopen
import linecache, sys
### OS X
coll_path = "/Users/Ayman/Documents/Anki/User 1/collection.anki2"
img_dir = "/Users/Ayman/Documents/tempImg/"
### WIN
#coll_path = os.path.abspath("C:\\Users\\your_name\\Documents\\Anki\\User 1\\collection.anki2")
#img_dir = os.path.abspath("C:/tempImg")
userName = ''
password = ''
def hyphenate(word):
return '-'.join(word.split())
def unishit(uni):
return [unicode(i.decode("iso-8859-4"))]
def PrintException():
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
linecache.checkcache(filename)
line = linecache.getline(filename, lineno, f.f_globals)
print 'EXCEPTION IN: {}'.format(filename)
print 'LINE: {}'.format(lineno)
print 'CODE: {}'.format(line.strip())
print 'ERROR: {}'.format(exc_obj)
sys.exit()
def make_cards(card_front, card_back, img, rx_tags):
try:
fpath = img.decode('utf-8')
fname = ntpath.basename(fpath).decode('utf-8')
a_coll = aopen(coll_path)
a_coll.media.addFile(fpath)
card_type = 'Basic'
deck_name = 'USMLErx' + '::' + itemgetter(0)(rx_tags)
deck_id = a_coll.decks.id(deck_name)
a_coll.decks.select(deck_id)
model = a_coll.models.byName(card_type)
model['did'] = deck_id
a_coll.models.save(model)
a_coll.models.setCurrent(model)
card = a_coll.newNote()
card['Front'] = card_front
card['Back'] = card_back
card['Back Image'] = u'<img src="%s">' % fname
card.tags = rx_tags
a_coll.addNote(card)
a_coll.save()
a_coll.close()
except:
PrintException()
def login():
# open browser, load page, login
try:
driver = webdriver.Safari()
wait = WebDriverWait(driver, 60)
driver.get("http://usmle-rx.com/dashboard")
user = driver.find_element_by_xpath('//*[(@id = "edit-name")]')
pass2 = driver.find_element_by_xpath('//*[(@id = "edit-pass")]')
user.send_keys(userName)
pass2.send_keys(password)
loginform = driver.find_element_by_xpath('//*[(@id = "edit-submit")]')
loginform.submit()
load_cards(driver, wait)
except:
PrintException()
def load_cards(driver, wait):
# click the image and load the flashfact ui
try:
element = wait.until(EC.element_to_be_clickable((By.XPATH,'//*[@id="content-inner-main"]')))
ff = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'tr.dashboard-row:nth-child(1) > td:nth-child(1) > a:nth-child(1) > img:nth-child(1)')))
ff.click()
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel')))
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading')))
faLink = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'span#cmdFirstAid')))
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading')))
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel')))
faLink.click()
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading')))
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel')))
iterate_this_muthafucka(driver, wait)
except:
PrintException()
def iterate_this_muthafucka(driver, wait):
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel')))
ul = driver.find_element_by_css_selector('ul#ulSection')
sections = ul.find_elements_by_css_selector('li.liSection')
# Section
try:
for section in sections:
secClick = section.find_element_by_css_selector('span.spnSectionName')
secClick.click()
subSection_ul = section.find_element_by_css_selector('ul.ulSubsection')
subSections = subSection_ul.find_elements_by_css_selector('li.liSubsection')
# Subsection
for subSection in subSections:
subSection_click = subSection.find_element_by_css_selector('span.spnSubsectionName')
subSection_click.click()
topic_ul = subSection.find_element_by_css_selector('ul.ulTopic')
topics = topic_ul.find_elements_by_css_selector('li.liTopic')
# Topic
for topic in topics:
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading')))
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel')))
clicker = topic.find_element_by_css_selector('span')
clicker.click()
# let that shit load
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading')))
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel')))
# save image
img = driver.find_element_by_id('imgFFImg')
url = img.get_attribute('src')
file = wget.download(url,img_dir)
# define card content
driver.find_element_by_id('spnViewCards').click()
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading')))
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel')))
qa = driver.find_elements_by_css_selector('#ulFactCardList')[0].text
qaT=qa.split('\n')
qaTup = zip(qaT[0::2],qaT[1::2])
# define card tags
tags=[]
tags.append(hyphenate(secClick.text))
tags.append(hyphenate(subSection_click.text.lower()))
tags.append(hyphenate(clicker.text.lower()))
# show progression in console
print tags
driver.page_source.encode('utf-8')
# make cards
for qa in qaTup:
front = qa[0]
back = qa[1]
make_cards(front, back, file, tags)
except:
PrintException()
login()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment