Created
August 17, 2018 14:22
-
-
Save altbdoor/19e5ee7a053ad9362b9b3da50d279dd4 to your computer and use it in GitHub Desktop.
Basic tag analysis, generates CSV of tags over time
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import datetime | |
import json | |
import sys | |
import time | |
import urllib.request | |
tag = 'shutteredco' | |
base_url = 'https://www.instagram.com/explore/tags/' + tag + '/?__a=1' | |
def get_next_url(json_data): | |
media_edge = json_data['graphql']['hashtag']['edge_hashtag_to_media'] | |
has_next_page = media_edge['page_info']['has_next_page'] | |
end_cursor = None | |
end_url = None | |
if has_next_page: | |
end_cursor = media_edge['page_info']['end_cursor'] | |
end_url = base_url + '&max_id=' + end_cursor | |
return end_url | |
def populate_json_data(json_data): | |
edges = json_data['graphql']['hashtag']['edge_hashtag_to_media']['edges'] | |
for e in edges: | |
unix_time = e['node']['taken_at_timestamp'] | |
final_json_list.append(unix_time) | |
json_file_path = 'data.json' | |
final_json_list = [] | |
counter = 0 | |
next_url = base_url | |
while next_url: | |
print('counter', counter + 1) | |
response = urllib.request.urlopen(next_url) | |
json_data = json.loads(response.read()) | |
populate_json_data(json_data) | |
next_url = get_next_url(json_data) | |
sys.stdout.flush() | |
counter += 1 | |
time.sleep(15) | |
with open(json_file_path, 'w') as fp: | |
json.dump(final_json_list, fp) | |
# ===== | |
with open(json_file_path, 'r') as fp: | |
timestamps = json.load(fp) | |
time_data = {} | |
for t in timestamps: | |
dt = datetime.datetime.fromtimestamp(t) | |
dt_year = dt.year | |
dt_month = dt.month | |
if dt_year not in time_data: | |
time_data[dt_year] = {} | |
if dt_month not in time_data[dt_year]: | |
time_data[dt_year][dt_month] = 0 | |
time_data[dt_year][dt_month] += 1 | |
year_list = list(time_data.keys()) | |
year_list.sort() | |
print('=====') | |
for y in year_list: | |
for m in range(1, 13): | |
if m not in time_data[y]: | |
time_data[y][m] = 0 | |
print('{}-{}, {}'.format(y, m, time_data[y][m])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment