Skip to content

Instantly share code, notes, and snippets.

@skypenguins
Created July 31, 2022 12:11
Show Gist options
  • Save skypenguins/020bec509325cb5692fcc7c09b597db5 to your computer and use it in GitHub Desktop.
Save skypenguins/020bec509325cb5692fcc7c09b597db5 to your computer and use it in GitHub Desktop.
Download media that an authorized user has RTed
using Twitter
using JSON, OAuth
using Dates
using TimeZones
using Downloads
using ProgressBars
# set debugging
ENV["JULIA_DEBUG"] = Twitter
# ~/.julia/config/startup.jl
twitterauth(ENV["CONSUMER_KEY"], ENV["CONSUMER_SECRET"], ENV["ACCESS_TOKEN"], ENV["ACCESS_TOKEN_SECRET"])
@info "Loaded creds"
function convert_to_jst(origin_dt)
t2 = ZonedDateTime(origin_dt, dateformat"e u d H:M:S zzzz y")
zdt = astimezone(t2, tz"Asia/Tokyo")
dt = DateTime(zdt)
end
function get_rt_list()
rts = []
@info "starting to get retweets..."
tl = get_user_timeline(screen_name="zyuer", count=400, include_rts=true)
# get retweets
for status in tl
if typeof(status.retweeted_status) != Nothing
push!(rts, status) # push "RT" tweet
end
end
@info "RTs: $(length(rts))"
@info "oldest RT date: $(convert_to_jst(rts[end].created_at))"
@info "getting RTs finished"
rts
end
function get_media_urls_from_tl(retweets)
urls = []
iter = ProgressBar(retweets)
for status in iter
rted_status = status.retweeted_status
rted_dt = convert_to_jst(status.created_at)
orig_dt = convert_to_jst(rted_status["created_at"])
orig_user = Users(rted_status["user"])
println(
iter,
""" RTed: $(rted_dt), orig: $(orig_dt)
$(orig_user.name) @$(orig_user.screen_name)
$(rted_status["text"])"""
)
if haskey(rted_status, "extended_entities")
media_list = rted_status["extended_entities"]["media"]
for media in media_list
# get multiple image urls
if media["type"] == "photo"
push!(urls, media["media_url_https"])
println(iter, "image url: $(media["media_url_https"])")
# get videos urls
else
videos = []
bitrates = []
if haskey(media, "video_info")
for variant in media["video_info"]["variants"]
if haskey(variant, "bitrate")
push!(bitrates, Int(variant["bitrate"]))
push!(videos, variant)
end
end
# select best quality
best_bitrate = maximum(bitrates)
for video in videos
if best_bitrate == video["bitrate"]
push!(urls, video["url"])
@info "video url: $(video["url"])"
end
end
end
end
end
else
println(iter, "no media")
end
end
@info "extracted media: $(length(urls))"
urls
end
function dl_media(url_list)
downloader = Downloader()
n_files = length(url_list)
fails = 0
time = @elapsed for (idx, url) in enumerate(url_list)
file_name = basename(url)
file_name = replace(file_name, r"\?.*" => "")
if occursin(".mp4", file_name) == false
url *= "?name=large"
end
headers = Dict(
"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0"
)
try
Downloads.download(url, pwd() * "/dl_RTed_media/media2/" * file_name; headers=headers, timeout=Inf, downloader=downloader)
println("downloaded $file_name ... ($idx / $n_files)")
catch e
fails += 1
@error "止まるんじゃねぇぞ… $file_name ($idx)\n$(e)"
end
end
@info """elapsed time: $time (s) / $n_files file(s)
failed download(s): $(fails) """
end
dl_media(get_media_urls_from_tl(get_rt_list()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment