Skip to content

Instantly share code, notes, and snippets.

@lowstz
Last active December 14, 2015 17:49
Show Gist options
  • Save lowstz/5125124 to your computer and use it in GitHub Desktop.
Save lowstz/5125124 to your computer and use it in GitHub Desktop.
Download All photo from twitter user

Usage

python tw-pic.py username
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import twitter
import re
import urllib2
import os
import sys
import threading
import Queue
from shutil import copyfileobj
def url_extender(url, queue):
extender_url = urllib2.urlopen(url).url
if SCREEN_NAME in extender_url:
page_html = urllib2.urlopen(extender_url).read()
img_url = img_regex.findall(page_html)[0]
img_outname = SCREEN_NAME + '-' + img_url.split('/')[4]
if os.path.exists(img_outname):
print "%s already downloaded" % img_outname
else:
resp = urllib2.urlopen(img_url)
outfile = open(img_outname, 'w')
copyfileobj(resp, outfile)
outfile.close()
print "Downloaded %s to %s" % (img_url, img_outname)
queue.put(extender_url)
def fetch_paraller():
result = Queue.Queue()
threads = [threading.Thread(target=url_extender, args = (url,result)) for url in urls_to_load]
for t in threads:
t.start()
for t in threads:
t.join()
return result
SCREEN_NAME = sys.argv[1]
CONSUMER_KEY = 'YOUR_CONSUMER_KEY'
CONSUMER_SECRET= 'YOUR_CONSUMER_SECRET'
ACCESS_TOKEN_KEY = 'YOUR_ACCESS_TOKEN_KEY'
ACCESS_TOKEN_SECRET = 'YOUR_ACCESS_TOKEN_SECRET'
api = twitter.Api(consumer_key = CONSUMER_KEY,
consumer_secret = CONSUMER_SECRET,
access_token_key = ACCESS_TOKEN_KEY,
access_token_secret = ACCESS_TOKEN_SECRET,)
try:
if api.VerifyCredentials():
print 'done'
else:
print 'Verify Failed'
sys.exit(1)
except:
print 'oops'
sys.exit(1)
user_info = api.GetUser(SCREEN_NAME)
statuses_count = user_info.GetStatusesCount()
statuses = api.GetUserTimeline(screen_name=SCREEN_NAME, count=statuses_count)
urls_to_load = []
for s in statuses:
tweet_inside_url = re.findall("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",s.text)
if tweet_inside_url:
urls_to_load.append(tweet_inside_url[0])
img_regex = re.compile(ur'class="large media-slideshow-image" alt="" src="(.+?):large')
fetch_paraller()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment