Created
April 27, 2014 23:52
-
-
Save dodola/11358336 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'dodola' | |
#encoding: utf-8 | |
import time | |
import urllib.request | |
import threading | |
import contextlib | |
import queue | |
import string | |
import shutil | |
import os | |
from urllib.parse import ( | |
urlparse, urlsplit, urljoin, unwrap, quote, unquote, | |
splittype, splithost, splitport, splituser, splitpasswd, | |
splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) | |
def myurlretrieve(url, filename=None, reporthook=None, data=None): | |
url_type, path = splittype(url) | |
#user_agent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)' | |
#headers = {'User-Agent': user_agent} | |
req = urllib.request.Request(url, data); | |
with contextlib.closing(urllib.request.urlopen(req)) as fp: | |
headers = fp.info() | |
if url_type == "file" and not filename: | |
return os.path.normpath(path), headers | |
# Handle temporary file setup. | |
if filename: | |
tfp = open(filename, 'wb') | |
else: | |
tfp = tempfile.NamedTemporaryFile(delete=False) | |
filename = tfp.name | |
_url_tempfiles.append(filename) | |
with tfp: | |
result = filename, headers | |
bs = 1024 * 8 | |
size = -1 | |
read = 0 | |
blocknum = 0 | |
if "content-length" in headers: | |
size = int(headers["Content-Length"]) | |
if reporthook: | |
reporthook(blocknum, 0, size) | |
while True: | |
block = fp.read(bs) | |
if not block: | |
break | |
read += len(block) | |
tfp.write(block) | |
blocknum += 1 | |
if reporthook: | |
reporthook(blocknum, len(block), size) | |
if size >= 0 and read < size: | |
raise ContentTooShortError( | |
"retrieval incomplete: got only %i out of %i bytes" | |
% (read, size), result) | |
return result | |
#myurlretrieve("http://www.wallcoo.com//animal/penguin//wallpapers/1600x1200/%5Bwallcoo_com%5D_penguin_wallpapers_34.jpg","d:\lll.jpg") | |
def downloadFromTxt(): | |
rootdir="g:/wallcoo/wallcoo/" | |
for parent,dirnames,filenames in os.walk(rootdir): | |
#print(dirnames) | |
for dirname in dirnames: | |
print(dirname) | |
if not os.path.exists(rootdir+dirname+"/"+dirname+".txt"): | |
continue | |
f=open(rootdir+dirname+"/"+dirname+".txt") | |
for line in f: | |
#print(line) | |
fname=line[line.rfind("/")+1:].replace("\n","") | |
# print(fname) | |
saveName=rootdir+dirname+"/"+fname; | |
if not os.path.exists(saveName): | |
try: | |
myurlretrieve(line,saveName) | |
except Exception as ex: | |
print(ex) | |
# print(rootdir+dirname+"/"+fname) | |
f.close() | |
# os.remove(rootdir+dirname+"/"+dirname+".txt") | |
downloadFromTxt() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment