Skip to content

Instantly share code, notes, and snippets.

@devpruthvi
Created November 1, 2015 15:16
Show Gist options
  • Save devpruthvi/c07e486c9be72494b9fa to your computer and use it in GitHub Desktop.
Save devpruthvi/c07e486c9be72494b9fa to your computer and use it in GitHub Desktop.
A python script to download all images returned by a search from wall.alphacoders.com , reworked :)
### A program to download wallpapers from alphacoders.com
###Author: N.V.Pruthvi Raj, Anantapur, India.
###Date: 23/12/2014
###Modified on 1/11/2015 according to the new thumbnail system by the site
import requests,urllib2
import shutil
import re
import os
from bs4 import BeautifulSoup
url = 'http://wall.alphacoders.com/search.php?search=digital+art&name=Naruto&page=1'
baseurl = url[0:url.find('page=')+5]
print(baseurl)
r = requests.get(url)
soup = BeautifulSoup(r.content)
downloadable = []
noofpages = -1
alla = soup.find_all("a")
def getnopages(noofpages):
for link in alla:
href = str(link.get('href'))
if href.find('page') >=0:
currpages = int((re.findall('\d+',href))[-1])
if currpages > noofpages:
noofpages = currpages
return noofpages
noofpages = getnopages(noofpages)
def getlink(url):
soup = BeautifulSoup(urllib2.urlopen(url))
alla = soup.findAll("img")
for link in alla:
href = str(link.get('src'))
if href.startswith('http://images'):
parts = re.search('(.*\/\d*\/)(thumb.*?(\d+\.\w+))',href).groups()
href = parts[0] + parts[-1]
downloadable.append(href)
print(noofpages)
for each in range(1,noofpages+1):
getlink(baseurl+str(each))
print(len(downloadable))
def downloadfiles(downloadable):
no = 0
os.chdir('I:/emma/')
for url in downloadable[0:]:
print(url)
print(no)
response = requests.get(url, stream=True)
with open('img'+str(no)+'.jpg', 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
no+=1
del response
downloadfiles(downloadable)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment