Created
March 22, 2015 18:45
-
-
Save DrLulz/53e01e0e1f39e7f086c7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################################ | |
# This script creates Anki decks from a web application. | |
# Designed specifically for usmle-rx.com flashfacts, but can be modified for any scenerio. | |
# | |
# Automation provided by /u/SYMPATHETIC_GANG_LION | |
# Anki import provided by /u/DrLulz | |
############################################################################################ | |
import os | |
import sys | |
import wget | |
import ntpath | |
import unicodedata | |
import codecs | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from operator import itemgetter | |
from bs4 import BeautifulSoup | |
from anki import Collection as aopen | |
### OS X | |
coll_path = "/Users/your_name/Documents/Anki/User 1/collection.anki2" | |
img_dir = "/Users/your_name/Documents/tempImg/" | |
### WIN | |
#coll_path = os.path.abspath("C:\\Users\\your_name\\Documents\\Anki\\User 1\\collection.anki2") | |
#img_dir = os.path.abspath("C:/tempImg") | |
userName = '' | |
password = '' | |
def hyphenate(word): | |
return '-'.join(word.split()) | |
def unishit(uni): | |
return [unicode(i.decode("iso-8859-4"))] | |
def make_cards(card_front, card_back, img, rx_tags): | |
fpath = img.decode('utf-8') | |
fname = ntpath.basename(fpath).decode('utf-8') | |
a_coll = aopen(coll_path) | |
a_coll.media.addFile(fpath) | |
card_type = 'Basic' | |
deck_name = 'USMLErx' + '::' + itemgetter(0)(rx_tags) | |
deck_id = a_coll.decks.id(deck_name) | |
a_coll.decks.select(deck_id) | |
model = a_coll.models.byName(card_type) | |
model['did'] = deck_id | |
a_coll.models.save(model) | |
a_coll.models.setCurrent(model) | |
card = a_coll.newNote() | |
card['Front'] = card_front | |
card['Back'] = card_back | |
card['Back Image'] = u'<img src="%s">' % fname | |
card.tags = rx_tags | |
a_coll.addNote(card) | |
a_coll.save() | |
a_coll.close() | |
def login(): | |
# open browser, load page, login | |
driver = webdriver.Firefox() | |
wait = WebDriverWait(driver, 60) | |
driver.get("http://usmle-rx.com/dashboard") | |
user = driver.find_element_by_xpath('//*[(@id = "edit-name-wrapper")]//*[(@id = "edit-name")]') | |
pass2 = driver.find_element_by_xpath('//*[(@id = "edit-pass-wrapper")]//*[(@id = "edit-pass")]') | |
user.send_keys(userName) | |
pass2.send_keys(password) | |
loginform = driver.find_element_by_xpath('//*[(@id = "content-inner-main")]//*[(@id = "edit-submit")]') | |
loginform.submit() | |
load_cards(driver, wait) | |
def load_cards(driver, wait): | |
# click the image and load the flashfact ui | |
element = wait.until(EC.element_to_be_clickable((By.XPATH,'//*[@id="content-inner-main"]/table[2]/tbody/tr[1]/td[1]/a'))) | |
ff = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'tr.dashboard-row:nth-child(1) > td:nth-child(1) > a:nth-child(1) > img:nth-child(1)'))) | |
ff.click() | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel'))) | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading'))) | |
faLink = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'span#cmdFirstAid'))) | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading'))) | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel'))) | |
faLink.click() | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading'))) | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel'))) | |
iterate_this_muthafucka(driver, wait) | |
def iterate_this_muthafucka(driver, wait): | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel'))) | |
ul = driver.find_element_by_css_selector('ul#ulSection') | |
sections = ul.find_elements_by_css_selector('li.liSection') | |
# Section | |
for section in sections: | |
secClick = section.find_element_by_css_selector('span.spnSectionName') | |
secClick.click() | |
subSection_ul = section.find_element_by_css_selector('ul.ulSubsection') | |
subSections = subSection_ul.find_elements_by_css_selector('li.liSubsection') | |
# Subsection | |
for subSection in subSections: | |
subSection_click = subSection.find_element_by_css_selector('span.spnSubsectionName') | |
subSection_click.click() | |
topic_ul = subSection.find_element_by_css_selector('ul.ulTopic') | |
topics = topic_ul.find_elements_by_css_selector('li.liTopic') | |
# Topic | |
for topic in topics: | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading'))) | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel'))) | |
clicker = topic.find_element_by_css_selector('span') | |
clicker.click() | |
# let that shit load | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading'))) | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel'))) | |
# save image | |
img = driver.find_element_by_id('imgFFImg') | |
url = img.get_attribute('src') | |
file = wget.download(url,img_dir) | |
# define card content | |
driver.find_element_by_id('spnViewCards').click() | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'.divLoading'))) | |
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,'span#spnLoadingLabel'))) | |
qa = driver.find_elements_by_css_selector('#ulFactCardList')[0].text | |
qaT=qa.split('\n') | |
qaTup = zip(qaT[0::2],qaT[1::2]) | |
# define card tags | |
tags=[] | |
tags.append(hyphenate(secClick.text)) | |
tags.append(hyphenate(subSection_click.text.lower())) | |
tags.append(hyphenate(clicker.text.lower())) | |
# show progression in console | |
print tags | |
driver.page_source.encode('utf-8') | |
# make cards | |
for qa in qaTup: | |
front = qa[0] | |
back = qa[1] | |
make_cards(front, back, file, tags) | |
login() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment