Created
November 1, 2015 15:16
-
-
Save devpruthvi/c07e486c9be72494b9fa to your computer and use it in GitHub Desktop.
A python script to download all images returned by a search from wall.alphacoders.com , reworked :)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### A program to download wallpapers from alphacoders.com | |
###Author: N.V.Pruthvi Raj, Anantapur, India. | |
###Date: 23/12/2014 | |
###Modified on 1/11/2015 according to the new thumbnail system by the site | |
import requests,urllib2 | |
import shutil | |
import re | |
import os | |
from bs4 import BeautifulSoup | |
url = 'http://wall.alphacoders.com/search.php?search=digital+art&name=Naruto&page=1' | |
baseurl = url[0:url.find('page=')+5] | |
print(baseurl) | |
r = requests.get(url) | |
soup = BeautifulSoup(r.content) | |
downloadable = [] | |
noofpages = -1 | |
alla = soup.find_all("a") | |
def getnopages(noofpages): | |
for link in alla: | |
href = str(link.get('href')) | |
if href.find('page') >=0: | |
currpages = int((re.findall('\d+',href))[-1]) | |
if currpages > noofpages: | |
noofpages = currpages | |
return noofpages | |
noofpages = getnopages(noofpages) | |
def getlink(url): | |
soup = BeautifulSoup(urllib2.urlopen(url)) | |
alla = soup.findAll("img") | |
for link in alla: | |
href = str(link.get('src')) | |
if href.startswith('http://images'): | |
parts = re.search('(.*\/\d*\/)(thumb.*?(\d+\.\w+))',href).groups() | |
href = parts[0] + parts[-1] | |
downloadable.append(href) | |
print(noofpages) | |
for each in range(1,noofpages+1): | |
getlink(baseurl+str(each)) | |
print(len(downloadable)) | |
def downloadfiles(downloadable): | |
no = 0 | |
os.chdir('I:/emma/') | |
for url in downloadable[0:]: | |
print(url) | |
print(no) | |
response = requests.get(url, stream=True) | |
with open('img'+str(no)+'.jpg', 'wb') as out_file: | |
shutil.copyfileobj(response.raw, out_file) | |
no+=1 | |
del response | |
downloadfiles(downloadable) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment