Skip to content

Instantly share code, notes, and snippets.

@splitline
Last active January 10, 2017 17:33
Show Gist options
  • Save splitline/07fca7c8d0b92bbf10bb5d3a9a474b75 to your computer and use it in GitHub Desktop.
Save splitline/07fca7c8d0b92bbf10bb5d3a9a474b75 to your computer and use it in GitHub Desktop.
# coding=utf-8
import requests
from bs4 import BeautifulSoup
import json
import urllib2
import os
dirName='wikiArts'
if not os.path.exists(dirName):
os.makedirs(dirName)
artists_url="https://www.wikiart.org/en/alphabet/"
for char in range(ord('a'),ord('z')+1):
html=requests.get(artists_url+chr(char))
soup = BeautifulSoup(html.text, "lxml")
lists= soup.find(class_='artists-list')
for li in lists.find_all_next('li',class_=''):
num=li.find('s',class_='total-link')
if num!=None :
if int(num.find('span',class_='total-text').text)>=100:
name=li.find('li',class_='title').text
print "====="+name+"====="
if not os.path.exists(dirName+"/"+name.strip()):
os.makedirs(dirName+"/"+name.strip())
nowNum=0
page=1
count=1
while True:
url='https://www.wikiart.org'+num.find('a').get('href')+'?json=2&page='+str(page)
print url
artJson=json.loads(requests.get(url).text)
nowNum+=len(artJson['Paintings'])
for img in artJson['Paintings']:
filename = img['title']+".jpg"
req = urllib2.Request(url = img['image'])
result = urllib2.urlopen(req).read()
picf = open("./wikiArts/"+name.strip()+"/"+filename,"wb")
picf.write(urllib2.urlopen(req).read())
picf.close()
print "["+str(count)+"/"+str(artJson['AllPaintingsCount'])+"] : "+filename
count+=1
if nowNum>=int(artJson['AllPaintingsCount']):
break
page+=1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment