Last active
July 3, 2019 09:04
-
-
Save askareija/6c777df9600c7d52ec61bd0f73a31b46 to your computer and use it in GitHub Desktop.
Instagram Scraper Metadata Script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'json' | |
require 'net/http' | |
require 'uri' | |
require 'fileutils' | |
puts "============= Instagram Scraper =============" | |
puts "================== v1.0 ===================" | |
puts "============== Megumi Aliya ===============" | |
3.times do | |
puts "" | |
end | |
puts "input instagram username: " | |
puts "* separated with comma if more than 1" | |
ig_user = gets | |
users = ig_user.delete(" ").delete("\n").split(",") | |
users.each do |user| | |
puts "Scraping profile metadata : #{user}" | |
puts "" | |
################### GENERAL PROFILE ######################### | |
# Initialize request to Instagram | |
uri = URI("https://www.instagram.com/#{user.delete("\n")}/?__a=1") | |
res = Net::HTTP.get_response(uri) | |
if res.is_a?(Net::HTTPSuccess) | |
# Create directory by IG username | |
FileUtils.mkdir_p user.delete("\n") | |
# Creating file | |
file = File.open("#{user.delete("\n")}/#{user.delete("\n")}_profile.json", "w") | |
# Parse the JSON response | |
profile = JSON.parse(res.body) | |
file.puts res | |
file.close | |
puts "Scraping profile completed." | |
################### GENERAL PROFILE ######################### | |
puts "Scraping All #{user} posts metadata" | |
puts "" | |
# Variables | |
query_hash = "f2405b236d85e8296cf30347c9f08c2a" | |
user_id = profile['graphql']['user']['id'].to_i | |
cursor_end = profile['graphql']['user']["edge_owner_to_timeline_media"]["page_info"]["end_cursor"] | |
has_next_page = profile['graphql']['user']["edge_owner_to_timeline_media"]["page_info"]["has_next_page"] | |
page = 1 | |
while has_next_page != false do | |
begin | |
puts "Scraping posts page #{page}" | |
# Creating file | |
file = File.open("#{user.delete("\n")}/#{user.delete("\n")}_posts_page_#{page}.json", "w") | |
# Initialize request to Instagram | |
uri = URI("https://www.instagram.com/graphql/query/") | |
variables = {'id': user_id, 'first': 50, 'after': cursor_end } | |
params = { query_hash: query_hash, variables: variables.to_json } | |
uri.query = URI.encode_www_form(params) | |
http_post = Net::HTTP.new(uri.host, uri.port) | |
http_post.use_ssl = true | |
request = Net::HTTP::Get.new(uri.request_uri) | |
request['Cookie'] = "ig_pr=1" | |
res = http_post.request(request) | |
posts = JSON.parse(res.body) | |
file.puts res.body | |
file.close | |
cursor_end = posts["data"]["user"]["edge_owner_to_timeline_media"]["page_info"]["end_cursor"] | |
has_next_page = posts["data"]["user"]["edge_owner_to_timeline_media"]["page_info"]["has_next_page"] | |
page+= 1 | |
puts "Hold request (20s)" | |
sleep(20) | |
rescue | |
puts "Scraping page #{page} failed, retrying.." | |
File.delete(file) | |
retry | |
end | |
end | |
puts "Scraping for user : #{user} has been finished." | |
puts "" | |
puts "" | |
else | |
puts "Account not found" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment