Skip to content

Instantly share code, notes, and snippets.

@mattvonrocketstein
Created January 7, 2015 09:28
Show Gist options
  • Select an option

  • Save mattvonrocketstein/15a3c62a48171e671fa1 to your computer and use it in GitHub Desktop.

Select an option

Save mattvonrocketstein/15a3c62a48171e671fa1 to your computer and use it in GitHub Desktop.
this code downloads all the grays anatomy images on wikimedia into ./grays_anatomy_plates
#
# this code downloads all the grays anatomy images on wikimedia into ./grays_anatomy_plates
#
import time
import os, urllib2
from BeautifulSoup import BeautifulSoup
plates_url = 'http://commons.wikimedia.org/wiki/Gray%27s_Anatomy_plates'
base = 'https://commons.wikimedia.org'
save_dir = 'grays_anatomy_plates'
assert os.path.exists(save_dir)
print 'getting index..'
tmp = BeautifulSoup(urllib2.urlopen(plates_url).read())
links = tmp.findAll('li',attrs={'class':'gallerybox'})
links = [l.find('a').get('href') for l in links]
count = 0
for link in links:
count += 1
tmp = base + link
print tmp
tmp = urllib2.urlopen(tmp).read()
tmp = BeautifulSoup(tmp)
img = tmp.find('div', attrs={'class':'fullMedia'}).find('a').get('href')
img = img[1:] if img.startswith('//') else img
img = img[1:] if img.startswith('/') else img
fname = img.split('/')[-1].lower()
fext=os.path.splitext(fname)[-1]
if fname.startswith('gray{0}'.format(count)):
fname = fext
else:
fname = '_'+fname
print img
u3 = img
newf = 'gray{0}{1}'.format(count, fname)
os.system('cd {0} && wget -4 {1} -O {2}'.format(
save_dir, 'http://'+u3,
newf))
print
time.sleep(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment