altbdoor · August 17, 2018 14:22
diff --git a/insta_tag_analysis.py b/insta_tag_analysis.py
 #!/usr/bin/env python

 import datetime
 import json
 import sys
 import time
 import urllib.request

 tag = 'shutteredco'
 base_url = 'https://www.instagram.com/explore/tags/' + tag + '/?__a=1'


 def get_next_url(json_data):
    media_edge = json_data['graphql']['hashtag']['edge_hashtag_to_media']
    has_next_page = media_edge['page_info']['has_next_page']
    end_cursor = None
    end_url = None

    if has_next_page:
        end_cursor = media_edge['page_info']['end_cursor']
        end_url = base_url + '&max_id=' + end_cursor

    return end_url


 def populate_json_data(json_data):
    edges = json_data['graphql']['hashtag']['edge_hashtag_to_media']['edges']
    for e in edges:
        unix_time = e['node']['taken_at_timestamp']
        final_json_list.append(unix_time)


 json_file_path = 'data.json'
 final_json_list = []
 counter = 0
 next_url = base_url

 while next_url:
    print('counter', counter + 1)

    response = urllib.request.urlopen(next_url)
    json_data = json.loads(response.read())

    populate_json_data(json_data)
    next_url = get_next_url(json_data)

    sys.stdout.flush()
    counter += 1

    time.sleep(15)

 with open(json_file_path, 'w') as fp:
    json.dump(final_json_list, fp)

 # =====

 with open(json_file_path, 'r') as fp:
    timestamps = json.load(fp)
    time_data = {}

    for t in timestamps:
        dt = datetime.datetime.fromtimestamp(t)

        dt_year = dt.year
        dt_month = dt.month

        if dt_year not in time_data:
            time_data[dt_year] = {}

        if dt_month not in time_data[dt_year]:
            time_data[dt_year][dt_month] = 0

        time_data[dt_year][dt_month] += 1

    year_list = list(time_data.keys())
    year_list.sort()

    print('=====')

    for y in year_list:
        for m in range(1, 13):
            if m not in time_data[y]:
                time_data[y][m] = 0

            print('{}-{}, {}'.format(y, m, time_data[y][m]))
	#!/usr/bin/env python

	import datetime
	import json
	import sys
	import time
	import urllib.request

	tag = 'shutteredco'
	base_url = 'https://www.instagram.com/explore/tags/' + tag + '/?__a=1'


	def get_next_url(json_data):
	media_edge = json_data['graphql']['hashtag']['edge_hashtag_to_media']
	has_next_page = media_edge['page_info']['has_next_page']
	end_cursor = None
	end_url = None

	if has_next_page:
	end_cursor = media_edge['page_info']['end_cursor']
	end_url = base_url + '&max_id=' + end_cursor

	return end_url


	def populate_json_data(json_data):
	edges = json_data['graphql']['hashtag']['edge_hashtag_to_media']['edges']
	for e in edges:
	unix_time = e['node']['taken_at_timestamp']
	final_json_list.append(unix_time)


	json_file_path = 'data.json'
	final_json_list = []
	counter = 0
	next_url = base_url

	while next_url:
	print('counter', counter + 1)

	response = urllib.request.urlopen(next_url)
	json_data = json.loads(response.read())

	populate_json_data(json_data)
	next_url = get_next_url(json_data)

	sys.stdout.flush()
	counter += 1

	time.sleep(15)

	with open(json_file_path, 'w') as fp:
	json.dump(final_json_list, fp)

	# =====

	with open(json_file_path, 'r') as fp:
	timestamps = json.load(fp)
	time_data = {}

	for t in timestamps:
	dt = datetime.datetime.fromtimestamp(t)

	dt_year = dt.year
	dt_month = dt.month

	if dt_year not in time_data:
	time_data[dt_year] = {}

	if dt_month not in time_data[dt_year]:
	time_data[dt_year][dt_month] = 0

	time_data[dt_year][dt_month] += 1

	year_list = list(time_data.keys())
	year_list.sort()

	print('=====')

	for y in year_list:
	for m in range(1, 13):
	if m not in time_data[y]:
	time_data[y][m] = 0

	print('{}-{}, {}'.format(y, m, time_data[y][m]))