Skip to content

Instantly share code, notes, and snippets.

@vuori
Created December 22, 2019 11:30
Show Gist options
  • Save vuori/67ed39b85785e273ff051a69e26874d8 to your computer and use it in GitHub Desktop.
Save vuori/67ed39b85785e273ff051a69e26874d8 to your computer and use it in GitHub Desktop.
Download Twitter favorite images
import json
import sys
import os
import re
import urllib.request
import time
import datetime
# Install twint: https://github.com/twintproject/twint
# Grab favorites: twint --username <myuser> --images --favorites --json -o tweets.json
# Fetch images: getimg.py tweets.json
UNSAFE_RE=re.compile(r'[^-_a-zA-Z0-9]')
IMGDIR='images'
infile=sys.argv[1]
os.makedirs(IMGDIR, exist_ok=True)
images=[]
for line in open(infile):
entry = json.loads(line)
username = UNSAFE_RE.sub('', entry['username'])
entry_id = entry['id']
tstamp_str = f"{entry['date']}T{entry['time']}"
tstamp_dt = datetime.datetime.strptime(tstamp_str, '%Y-%m-%dT%H:%M:%S')
tstamp = tstamp_dt.timestamp()
for idx, p_url in enumerate(entry['photos']):
if p_url.endswith('png'):
suffix = 'png'
else:
suffix = 'jpg'
fname = f'{username}_{entry_id}_{idx+1}.{suffix}'
images.append((fname, p_url, tstamp))
print(f'found {len(images)} images')
def geturl(dpath, url):
try:
resp = urllib.request.urlopen(url)
data = resp.read()
with open(dpath, 'wb') as fp:
fp.write(data)
return len(data)
except Exception as exc:
print(f'failed to download {url}: {exc}')
return False
return 0
atime = time.time()
def settime(dpath, mtime):
os.utime(dpath, (atime, mtime))
for fname, src, tstamp in images:
dst = os.path.join(IMGDIR, fname)
if os.path.exists(dst):
print(f'image {fname} exists, skipping')
settime(dst, tstamp)
continue
dlen = geturl(dst, src)
if dlen:
print(f'image {fname} downloaded ({dlen} bytes)')
settime(dst, tstamp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment