Created
February 1, 2011 17:03
-
-
Save ymotongpoo/806160 to your computer and use it in GitHub Desktop.
download script for specific file in mod_uploader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
download_dir=./download | |
cd $download_dir | |
for file in `find . -name "*.zip"`; do | |
unzip $file | |
rm $file | |
touch $file | |
done | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
import string | |
import urllib | |
import urllib2 | |
import cookielib | |
import os.path | |
# mod_uploaderのURLとエンコード | |
#mod_uploader = ur"http://upload0.dyndns.org/up/2/_/" | |
mod_uploader = ur"http://up02.ayame.jp/up/" | |
encoding = "euc-jp" | |
form_dict = {u'download_pass': u"junk", # パスワード決め打ち | |
u'code_pat': u"京", | |
u'submit': u"ダウンロード"} | |
link_pattern = u'<a\ href="(?P<url>' + mod_uploader \ | |
+ 'jump/\d+\.zip/attatch)">(?P<name>%s)</a>' | |
max_page = 32 | |
# ファイル名のリスト 正規表現利用可能 | |
files = [u".*馬鹿力.*cut.*\.zip", | |
u".*爆笑\ ?cut.*\.zip", | |
u".*バナナ.*\.zip"] | |
seek_size = 1024 * 512 | |
boundary = u"--------python" | |
# ダウンロード用ディレクトリ | |
download_dir = "./download" | |
def build_opener(): | |
""" | |
ヘッダを偽装したopenerを作成 | |
""" | |
jar = cookielib.CookieJar() | |
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar), | |
urllib2.HTTPRedirectHandler()) | |
opener.addheaders = [("User-Agent", "Mozilla/5.0 (compatible; python)"), | |
("Connection", "keep-alive"), | |
("Accept-Encoding", "gzip,deflate,sdch")] | |
return opener | |
def multipart_formdata(form_dict): | |
""" | |
multipart/form-dataのbody部分を作成 | |
""" | |
disposition = u'Content-Disposition: form-data; name="%s"' | |
lines = [] | |
for k, v in form_dict.iteritems(): | |
lines.append(u'--' + boundary) | |
lines.append(disposition % k) | |
lines.append(u'') | |
lines.append(v) | |
lines.append(u"--" + boundary + u"--") | |
lines.append(u'') | |
value = u"\r\n".join(lines) | |
return value.encode(encoding) | |
def extract_file_url(opener, files, page=max_page): | |
""" | |
1ページからpageページまでアクセスし、 | |
対象ファイルのURLとファイル名のタプルのリストを返す | |
""" | |
patterns = [] | |
for i in range(1, page+1): | |
url = mod_uploader + "index/" + str(i) | |
print "===> parsing %s" % url | |
fp = opener.open(url) | |
data = fp.read().decode(encoding) | |
fp.close() | |
for f in files: | |
for m in re.finditer(link_pattern % f, data, re.UNICODE): | |
patterns.append(m.groupdict()) | |
return patterns | |
def download_file(opener, patterns): | |
""" | |
patternsにあるURLすべてにアクセスしファイルをダウンロードする | |
""" | |
for p in patterns: | |
# リクエストを作成 | |
url = string.replace(p['url'], u'jump', u'download') | |
req = urllib2.Request(url) | |
# 基本的なヘッダを追加 | |
req.add_header("Referer", p['url']) | |
req.add_header("Content-Type", | |
"multipart/form-data; boundary=%s" % boundary) | |
data = multipart_formdata(form_dict) | |
conn = opener.open(req, data) | |
length = conn.info()['Content-Length'] | |
length = int(length) | |
# 5MB以下は失敗 | |
if length < 1024 * 1024 * 5: | |
print ("### %s : size too small !!! -> %d byte" | |
% (p['name'], length) ) | |
continue | |
# ファイルが既存か確認し、レジュームかどうかを判断 | |
save_path = os.path.join(download_dir, p['name']) | |
redownload = False | |
if os.path.exists(save_path): | |
redownload = True | |
size = os.path.getsize(save_path) | |
if size >= length: | |
print "### %s is already downloaded" % p['name'] | |
continue | |
else: | |
conn = opener.open(req, data) | |
zipfile = open(save_path, 'wb') | |
if redownload: | |
print "### '%s' : re-download" % p['name'] | |
print "### '%s' : start download !!!" % p['name'] | |
while True: | |
print " === downloading '%s' : %d bytes left" % (p['name'], length) | |
data = conn.read(seek_size) | |
if not data: | |
break | |
zipfile.write(data) | |
length = length - seek_size if length > seek_size else 0 | |
zipfile.close() | |
def main(): | |
opener = build_opener() | |
patterns = extract_file_url(opener, files) | |
download_file(opener, patterns) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment