mistermichaelll · August 24, 2023 16:46
diff --git a/hinge_cleanup.py b/hinge_cleanup.py
 # This script is a first pass at creating a clean Hinge dataset that one can use to 
 # visualize using Python or R. 
 # 
 # You can request your Hinge data from the app, and the file in question that we're
 # using here is "matches.json".
 #
 # Author: Michael Johnson
 # Last Updated: June 2, 2021
 # 

 # ==================================
 # Project Setup
 # ==================================

 # libraries utilized
 import os
 import json
 import pandas as pd
 import datetime as dt

 # open the Hinge matches JSON file.
 cd = "users/name/Hinge Export Folder/"
 os.chdir(cd)
 with open("matches.json") as m:
    matches = json.load(m) 

 # normalize the JSON using pandas
 data = pd.json_normalize(matches)

 # ==================================
 # Gathering the Data 
 # ==================================

 # We are going to pull apart each type of "like" you can receive on the app from the 
 # messy JSON file that Hinge sends us.
 # Basically, the logic I'm using is as follows: 
 # 
 # - If "like" is not null, and "match" is not null, then this is where we sent a like and got a match.
 # - If "like" is not null, and "match" is null, then this is where we sent a like and got no match.
 # - If "match" is not null, and "like" is null, then this is where we received a like and got a match.
 # - If "match" is null, and "like" is null, then this is where we redeived a like but did not match.

 # outgoing likes, match
 outgoing_matches = data.loc[(data["like"].isna() == False) & (data["match"].isna() == False)].reset_index()

 # outgoing likes, no match
 outgoing_no_matches = data.loc[(data["like"].isna() == False) & (data["match"].isna() == True)].reset_index()

 # incoming likes, match
 incoming_match = data.loc[(data["match"].isna() == False) & (data["like"].isna() == True)].reset_index()

 # incoming likes, no match
 incoming_no_match = data.loc[(data["like"].isna() == True) & (data["match"].isna() == True)].reset_index()

 # Here's a quick sanity check for if we're categorizing things correctly. 
 # The sum of the length of the 4 categories should be equal to the sum of 
 # the length of the original data.
 # ------------------------------------------------------------------------
 # num_items_categorized = len(outgoing_matches) + len(outgoing_no_matches) + len(incoming_match) + len(incoming_no_match)
 # num_items_og_data = len(data)
 # 
 # print("Num. Categorized Items: ", num_items_categorized)
 # print("Num. Items OG Data: ", num_items_og_data)
 # 
 # if num_items_categorized == num_items_og_data:
 #     print("The number of items is the same.")
 # else: 
 #     print("The number of items is not the same. Check your work")

 # ==================================
 # Quick Stats
 # -----------
 # This prints out some quick stats
 # from the data we're working with.
 # ==================================
 print("Total Likes Sent:", len(outgoing_matches) + len(outgoing_no_matches))
 print("Total Matches from Likes Sent:", len(outgoing_matches))
 print("Match % from Likes Sent:", round(len(outgoing_matches) / (len(outgoing_matches) + len(outgoing_no_matches)) * 100), "%")

 print("Total Likes Received:", len(incoming_match) + len(incoming_no_match))
 print("Total Matches from Likes Received:", len(incoming_match))
 print("Match % from Likes Received:", round(len(incoming_match) / (len(incoming_match) + len(incoming_no_match)) * 100), "%")

 # ==================================
 # Creating a Clean Dataframe 
 # --------------------------
 # Really, all I want is a clean
 # dataframe that has the timestamp, 
 # the date without the time, and 
 # the type of like we're dealing with.
 # ==================================

 # Create lists of timestamps from our data

 # sent likes, match
 # -----------------
 sent_like_timestamps = []
 for i in range(0, len(outgoing_matches)):
    sent_like_timestamps.append(outgoing_matches["like"][i][0]["timestamp"])
    
 # sent like, no match    
 # --------------------
 sent_like_no_match_timestamps = []
 for i in range(0, len(outgoing_no_matches)):
    sent_like_no_match_timestamps.append(outgoing_no_matches["like"][i][0]["timestamp"])
    
 # received like, no match
 rec_no_match_timestamps = []
 for i in range(0, len(incoming_no_match)):
    rec_no_match_timestamps.append(incoming_no_match["block"][i][0]["timestamp"])
    
 # received like, match
 rec_match_timestamps = []
 for i in range(0, len(incoming_match)):
    rec_match_timestamps.append(incoming_match["match"][i][0]["timestamp"])

 # take these lists of timestamps, create a dataframe with the timestamp and "like type"
 sent_match = pd.DataFrame({"Timestamp":sent_like_timestamps, "Type":"Sent Like, Match"})
 sent_no_match = pd.DataFrame({"Timestamp":sent_like_no_match_timestamps, "Type":"Sent Like, No Match"})
 rec_match = pd.DataFrame({"Timestamp":rec_match_timestamps, "Type":"Received Like, Match"})
 rec_no_match = pd.DataFrame({"Timestamp":rec_no_match_timestamps, "Type":"Received Like, No Match"})

 dfs = [sent_match, sent_no_match, rec_match, rec_no_match]

 # create clean dataset
 clean_data = pd.concat(dfs).reset_index().drop('index', axis = 1)
 clean_data["Timestamp"] = pd.to_datetime(clean_data["Timestamp"])
 clean_data["Date"] = clean_data["Timestamp"].dt.date

 # export this data so we can use it elsewhere
 clean_data.to_csv("~/Downloads/Hinge Export/clean_data.csv")
	# This script is a first pass at creating a clean Hinge dataset that one can use to
	# visualize using Python or R.
	#
	# You can request your Hinge data from the app, and the file in question that we're
	# using here is "matches.json".
	#
	# Author: Michael Johnson
	# Last Updated: June 2, 2021
	#

	# ==================================
	# Project Setup
	# ==================================

	# libraries utilized
	import os
	import json
	import pandas as pd
	import datetime as dt

	# open the Hinge matches JSON file.
	cd = "users/name/Hinge Export Folder/"
	os.chdir(cd)
	with open("matches.json") as m:
	matches = json.load(m)

	# normalize the JSON using pandas
	data = pd.json_normalize(matches)

	# ==================================
	# Gathering the Data
	# ==================================

	# We are going to pull apart each type of "like" you can receive on the app from the
	# messy JSON file that Hinge sends us.
	# Basically, the logic I'm using is as follows:
	#
	# - If "like" is not null, and "match" is not null, then this is where we sent a like and got a match.
	# - If "like" is not null, and "match" is null, then this is where we sent a like and got no match.
	# - If "match" is not null, and "like" is null, then this is where we received a like and got a match.
	# - If "match" is null, and "like" is null, then this is where we redeived a like but did not match.

	# outgoing likes, match
	outgoing_matches = data.loc[(data["like"].isna() == False) & (data["match"].isna() == False)].reset_index()

	# outgoing likes, no match
	outgoing_no_matches = data.loc[(data["like"].isna() == False) & (data["match"].isna() == True)].reset_index()

	# incoming likes, match
	incoming_match = data.loc[(data["match"].isna() == False) & (data["like"].isna() == True)].reset_index()

	# incoming likes, no match
	incoming_no_match = data.loc[(data["like"].isna() == True) & (data["match"].isna() == True)].reset_index()

	# Here's a quick sanity check for if we're categorizing things correctly.
	# The sum of the length of the 4 categories should be equal to the sum of
	# the length of the original data.
	# ------------------------------------------------------------------------
	# num_items_categorized = len(outgoing_matches) + len(outgoing_no_matches) + len(incoming_match) + len(incoming_no_match)
	# num_items_og_data = len(data)
	#
	# print("Num. Categorized Items: ", num_items_categorized)
	# print("Num. Items OG Data: ", num_items_og_data)
	#
	# if num_items_categorized == num_items_og_data:
	# print("The number of items is the same.")
	# else:
	# print("The number of items is not the same. Check your work")

	# ==================================
	# Quick Stats
	# -----------
	# This prints out some quick stats
	# from the data we're working with.
	# ==================================
	print("Total Likes Sent:", len(outgoing_matches) + len(outgoing_no_matches))
	print("Total Matches from Likes Sent:", len(outgoing_matches))
	print("Match % from Likes Sent:", round(len(outgoing_matches) / (len(outgoing_matches) + len(outgoing_no_matches)) * 100), "%")

	print("Total Likes Received:", len(incoming_match) + len(incoming_no_match))
	print("Total Matches from Likes Received:", len(incoming_match))
	print("Match % from Likes Received:", round(len(incoming_match) / (len(incoming_match) + len(incoming_no_match)) * 100), "%")

	# ==================================
	# Creating a Clean Dataframe
	# --------------------------
	# Really, all I want is a clean
	# dataframe that has the timestamp,
	# the date without the time, and
	# the type of like we're dealing with.
	# ==================================

	# Create lists of timestamps from our data

	# sent likes, match
	# -----------------
	sent_like_timestamps = []
	for i in range(0, len(outgoing_matches)):
	sent_like_timestamps.append(outgoing_matches["like"][i][0]["timestamp"])

	# sent like, no match
	# --------------------
	sent_like_no_match_timestamps = []
	for i in range(0, len(outgoing_no_matches)):
	sent_like_no_match_timestamps.append(outgoing_no_matches["like"][i][0]["timestamp"])

	# received like, no match
	rec_no_match_timestamps = []
	for i in range(0, len(incoming_no_match)):
	rec_no_match_timestamps.append(incoming_no_match["block"][i][0]["timestamp"])

	# received like, match
	rec_match_timestamps = []
	for i in range(0, len(incoming_match)):
	rec_match_timestamps.append(incoming_match["match"][i][0]["timestamp"])

	# take these lists of timestamps, create a dataframe with the timestamp and "like type"
	sent_match = pd.DataFrame({"Timestamp":sent_like_timestamps, "Type":"Sent Like, Match"})
	sent_no_match = pd.DataFrame({"Timestamp":sent_like_no_match_timestamps, "Type":"Sent Like, No Match"})
	rec_match = pd.DataFrame({"Timestamp":rec_match_timestamps, "Type":"Received Like, Match"})
	rec_no_match = pd.DataFrame({"Timestamp":rec_no_match_timestamps, "Type":"Received Like, No Match"})

	dfs = [sent_match, sent_no_match, rec_match, rec_no_match]

	# create clean dataset
	clean_data = pd.concat(dfs).reset_index().drop('index', axis = 1)
	clean_data["Timestamp"] = pd.to_datetime(clean_data["Timestamp"])
	clean_data["Date"] = clean_data["Timestamp"].dt.date

	# export this data so we can use it elsewhere
	clean_data.to_csv("~/Downloads/Hinge Export/clean_data.csv")