Skip to content

Instantly share code, notes, and snippets.

@alice1017
Created December 4, 2010 06:15
Show Gist options
  • Save alice1017/727974 to your computer and use it in GitHub Desktop.
Save alice1017/727974 to your computer and use it in GitHub Desktop.
aタグのhrefが画像拡張子のURLの場合、それをダウンロードするもの。
#!/usr/bin/python
#coding:utf-8
"""
This Module can Download image.
CopyRight Alice All Rights Reserved.
maked by 2010.12.01
Usage::python getimg.py [url] [directory]
Option::
[url] -- http-site's url.
ex. http://www.google.com/
[directory] -- Download Directory name.
WARNING!!
1, If you not install a 'BeautifulSoup' module, This Module has exited by force.
2, If you input a none-exist directory, This Module has exited by force.
Thank you for using this module.bye.
"""
import sys
import os
import urllib
# Error proccess for module
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
print "ImportError::"
sys.exit(1)
# Error proccess for arguments
if ( len(sys.argv) != 3 ):
print "ArgumentsError::"
sys.exit(1)
url = str(sys.argv[1])
opened = urllib.urlopen(url)
source = []
for i in opened:
source.append(i)
# Dipp Soup
soup = BeautifulSoup(''.join(source))
a_tags = soup.findAll('a')
a_dict = []
for i in a_tags:
a_dict.append(dict(i.attrs))
# Search 'href'
href_numbers = []
for i in range(len(a_dict)):
if ( u'href' in a_dict[i] ):
href_numbers.append(i)
else:
pass
# aタグの中にはhref以外にもname等があるので、それを排除するためのプロセス
href = []
for i in href_numbers:
href.append(a_dict[i])
hrefurl = []
for i in range(len(href)):
hrefurl.append(href[i][u'href'])
# Search image url
unnecessary_var = unnecessary_var2 = unnecessary_var3 = ""
imgurl = []
# unnecessary_varにindexの数字を入れることによってindexメソッドの出力を防いでいる
for i in range(len(hrefurl)):
try:
unnecessary_var = str(hrefurl[i]).index('.jpg')
imgurl.append(hrefurl[i])
except ValueError:
try:
unnecessary_var2 = str(hrefurl[i]).index('.png')
imgurl.append(hrefurl[i])
except ValueError:
try:
unnecessary_var3 = str(hrefurl[i]).index('.bmp')
imgurl.append(hrefurl[i])
except ValueError:
pass
# Move Directory
dirname = str(sys.argv[2])
if ( os.path.isdir(dirname) == False ):
print "DirectoryError::not found"
sys.exit(1)
else:
os.chdir(dirname)
# Download image
print "Now Downloading....."
for i in range(len(imgurl)):
cmd = "wget -q "+str(imgurl[i])
os.system(cmd)
print "Finished."
# こんなもんよりGUIのダウンローダーのほうが早いとかいったら泣くよ
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment