tlehman · December 14, 2011 00:04 · tlehman · Dec 14, 2011
diff --git a/imgscr.py b/imgscr.py
 # a simple image scraper by tlehman
 # this code is too basic for me to care what you do with it, so have at it.
 # 
 from urllib import urlopen
 from BeautifulSoup import BeautifulSoup

 # usage: getimg(url, filetype)
 #    return will be list of src attributes of a tags in page 
 #    referred to by url
 def getimg(url, filetype):
    # get html source from url
    text = urlopen(url).read()
    # parse the html source using BeautifulSoup
    soup = BeautifulSoup(text)
    
    # set of image urls to be returned
    imgurls = set()
    for img in soup.findAll('img'):
        s=str(img['src'])
        if s[len(s)-3:] == filetype:
            imgurls.add(s)
                
    return list(imgurls)
	# a simple image scraper by tlehman
	# this code is too basic for me to care what you do with it, so have at it.
	#
	from urllib import urlopen
	from BeautifulSoup import BeautifulSoup

	# usage: getimg(url, filetype)
	# return will be list of src attributes of a tags in page
	# referred to by url
	def getimg(url, filetype):
	# get html source from url
	text = urlopen(url).read()
	# parse the html source using BeautifulSoup
	soup = BeautifulSoup(text)

	# set of image urls to be returned
	imgurls = set()
	for img in soup.findAll('img'):
	s=str(img['src'])
	if s[len(s)-3:] == filetype:
	imgurls.add(s)

	return list(imgurls)