Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save mtholder/c0efd7410ff2983b73803e14dde07846 to your computer and use it in GitHub Desktop.

Select an option

Save mtholder/c0efd7410ff2983b73803e14dde07846 to your computer and use it in GitHub Desktop.
brittle scrape of download archive from rb tutorial
#!/usr/bin/env python
from __future__ import print_function
import sys
try:
from selenium import webdriver
except:
sys.exit('Need to run:\npip install selenium\n')
import tempfile
import time
import os
td = os.path.abspath(tempfile.mkdtemp())
print('tempdir = {}'.format(td))
options = webdriver.ChromeOptions()
prefs = {'download.default_directory' : td}
options.add_experimental_option('prefs', prefs)
browser = webdriver.Chrome(chrome_options=options)
url = 'https://willpett.github.io/revbayes_tutorials/tutorials/ctmc/'
browser.get(url)
df_el = browser.find_element_by_class_name("download_files")
df_el.click()
time.sleep(1)
while True:
x = os.listdir(td)
still_working = False
for el in x:
if '.crdownload' in el:
still_working = True
break
if still_working:
time.sleep(1)
else:
break
print('Downloaded: "{}"'.format('", "'.join([os.path.join(td, i) for i in x])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment