Created
December 4, 2010 06:15
-
-
Save alice1017/727974 to your computer and use it in GitHub Desktop.
aタグのhrefが画像拡張子のURLの場合、それをダウンロードするもの。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
#coding:utf-8 | |
""" | |
This Module can Download image. | |
CopyRight Alice All Rights Reserved. | |
maked by 2010.12.01 | |
Usage::python getimg.py [url] [directory] | |
Option:: | |
[url] -- http-site's url. | |
ex. http://www.google.com/ | |
[directory] -- Download Directory name. | |
WARNING!! | |
1, If you not install a 'BeautifulSoup' module, This Module has exited by force. | |
2, If you input a none-exist directory, This Module has exited by force. | |
Thank you for using this module.bye. | |
""" | |
import sys | |
import os | |
import urllib | |
# Error proccess for module | |
try: | |
from BeautifulSoup import BeautifulSoup | |
except ImportError: | |
print "ImportError::" | |
sys.exit(1) | |
# Error proccess for arguments | |
if ( len(sys.argv) != 3 ): | |
print "ArgumentsError::" | |
sys.exit(1) | |
url = str(sys.argv[1]) | |
opened = urllib.urlopen(url) | |
source = [] | |
for i in opened: | |
source.append(i) | |
# Dipp Soup | |
soup = BeautifulSoup(''.join(source)) | |
a_tags = soup.findAll('a') | |
a_dict = [] | |
for i in a_tags: | |
a_dict.append(dict(i.attrs)) | |
# Search 'href' | |
href_numbers = [] | |
for i in range(len(a_dict)): | |
if ( u'href' in a_dict[i] ): | |
href_numbers.append(i) | |
else: | |
pass | |
# aタグの中にはhref以外にもname等があるので、それを排除するためのプロセス | |
href = [] | |
for i in href_numbers: | |
href.append(a_dict[i]) | |
hrefurl = [] | |
for i in range(len(href)): | |
hrefurl.append(href[i][u'href']) | |
# Search image url | |
unnecessary_var = unnecessary_var2 = unnecessary_var3 = "" | |
imgurl = [] | |
# unnecessary_varにindexの数字を入れることによってindexメソッドの出力を防いでいる | |
for i in range(len(hrefurl)): | |
try: | |
unnecessary_var = str(hrefurl[i]).index('.jpg') | |
imgurl.append(hrefurl[i]) | |
except ValueError: | |
try: | |
unnecessary_var2 = str(hrefurl[i]).index('.png') | |
imgurl.append(hrefurl[i]) | |
except ValueError: | |
try: | |
unnecessary_var3 = str(hrefurl[i]).index('.bmp') | |
imgurl.append(hrefurl[i]) | |
except ValueError: | |
pass | |
# Move Directory | |
dirname = str(sys.argv[2]) | |
if ( os.path.isdir(dirname) == False ): | |
print "DirectoryError::not found" | |
sys.exit(1) | |
else: | |
os.chdir(dirname) | |
# Download image | |
print "Now Downloading....." | |
for i in range(len(imgurl)): | |
cmd = "wget -q "+str(imgurl[i]) | |
os.system(cmd) | |
print "Finished." | |
# こんなもんよりGUIのダウンローダーのほうが早いとかいったら泣くよ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment