Last active
August 24, 2023 16:46
-
-
Save mistermichaelll/3afcc33188f29576915243a5a2ea2a72 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This script is a first pass at creating a clean Hinge dataset that one can use to | |
| # visualize using Python or R. | |
| # | |
| # You can request your Hinge data from the app, and the file in question that we're | |
| # using here is "matches.json". | |
| # | |
| # Author: Michael Johnson | |
| # Last Updated: June 2, 2021 | |
| # | |
| # ================================== | |
| # Project Setup | |
| # ================================== | |
| # libraries utilized | |
| import os | |
| import json | |
| import pandas as pd | |
| import datetime as dt | |
| # open the Hinge matches JSON file. | |
| cd = "users/name/Hinge Export Folder/" | |
| os.chdir(cd) | |
| with open("matches.json") as m: | |
| matches = json.load(m) | |
| # normalize the JSON using pandas | |
| data = pd.json_normalize(matches) | |
| # ================================== | |
| # Gathering the Data | |
| # ================================== | |
| # We are going to pull apart each type of "like" you can receive on the app from the | |
| # messy JSON file that Hinge sends us. | |
| # Basically, the logic I'm using is as follows: | |
| # | |
| # - If "like" is not null, and "match" is not null, then this is where we sent a like and got a match. | |
| # - If "like" is not null, and "match" is null, then this is where we sent a like and got no match. | |
| # - If "match" is not null, and "like" is null, then this is where we received a like and got a match. | |
| # - If "match" is null, and "like" is null, then this is where we redeived a like but did not match. | |
| # outgoing likes, match | |
| outgoing_matches = data.loc[(data["like"].isna() == False) & (data["match"].isna() == False)].reset_index() | |
| # outgoing likes, no match | |
| outgoing_no_matches = data.loc[(data["like"].isna() == False) & (data["match"].isna() == True)].reset_index() | |
| # incoming likes, match | |
| incoming_match = data.loc[(data["match"].isna() == False) & (data["like"].isna() == True)].reset_index() | |
| # incoming likes, no match | |
| incoming_no_match = data.loc[(data["like"].isna() == True) & (data["match"].isna() == True)].reset_index() | |
| # Here's a quick sanity check for if we're categorizing things correctly. | |
| # The sum of the length of the 4 categories should be equal to the sum of | |
| # the length of the original data. | |
| # ------------------------------------------------------------------------ | |
| # num_items_categorized = len(outgoing_matches) + len(outgoing_no_matches) + len(incoming_match) + len(incoming_no_match) | |
| # num_items_og_data = len(data) | |
| # | |
| # print("Num. Categorized Items: ", num_items_categorized) | |
| # print("Num. Items OG Data: ", num_items_og_data) | |
| # | |
| # if num_items_categorized == num_items_og_data: | |
| # print("The number of items is the same.") | |
| # else: | |
| # print("The number of items is not the same. Check your work") | |
| # ================================== | |
| # Quick Stats | |
| # ----------- | |
| # This prints out some quick stats | |
| # from the data we're working with. | |
| # ================================== | |
| print("Total Likes Sent:", len(outgoing_matches) + len(outgoing_no_matches)) | |
| print("Total Matches from Likes Sent:", len(outgoing_matches)) | |
| print("Match % from Likes Sent:", round(len(outgoing_matches) / (len(outgoing_matches) + len(outgoing_no_matches)) * 100), "%") | |
| print("Total Likes Received:", len(incoming_match) + len(incoming_no_match)) | |
| print("Total Matches from Likes Received:", len(incoming_match)) | |
| print("Match % from Likes Received:", round(len(incoming_match) / (len(incoming_match) + len(incoming_no_match)) * 100), "%") | |
| # ================================== | |
| # Creating a Clean Dataframe | |
| # -------------------------- | |
| # Really, all I want is a clean | |
| # dataframe that has the timestamp, | |
| # the date without the time, and | |
| # the type of like we're dealing with. | |
| # ================================== | |
| # Create lists of timestamps from our data | |
| # sent likes, match | |
| # ----------------- | |
| sent_like_timestamps = [] | |
| for i in range(0, len(outgoing_matches)): | |
| sent_like_timestamps.append(outgoing_matches["like"][i][0]["timestamp"]) | |
| # sent like, no match | |
| # -------------------- | |
| sent_like_no_match_timestamps = [] | |
| for i in range(0, len(outgoing_no_matches)): | |
| sent_like_no_match_timestamps.append(outgoing_no_matches["like"][i][0]["timestamp"]) | |
| # received like, no match | |
| rec_no_match_timestamps = [] | |
| for i in range(0, len(incoming_no_match)): | |
| rec_no_match_timestamps.append(incoming_no_match["block"][i][0]["timestamp"]) | |
| # received like, match | |
| rec_match_timestamps = [] | |
| for i in range(0, len(incoming_match)): | |
| rec_match_timestamps.append(incoming_match["match"][i][0]["timestamp"]) | |
| # take these lists of timestamps, create a dataframe with the timestamp and "like type" | |
| sent_match = pd.DataFrame({"Timestamp":sent_like_timestamps, "Type":"Sent Like, Match"}) | |
| sent_no_match = pd.DataFrame({"Timestamp":sent_like_no_match_timestamps, "Type":"Sent Like, No Match"}) | |
| rec_match = pd.DataFrame({"Timestamp":rec_match_timestamps, "Type":"Received Like, Match"}) | |
| rec_no_match = pd.DataFrame({"Timestamp":rec_no_match_timestamps, "Type":"Received Like, No Match"}) | |
| dfs = [sent_match, sent_no_match, rec_match, rec_no_match] | |
| # create clean dataset | |
| clean_data = pd.concat(dfs).reset_index().drop('index', axis = 1) | |
| clean_data["Timestamp"] = pd.to_datetime(clean_data["Timestamp"]) | |
| clean_data["Date"] = clean_data["Timestamp"].dt.date | |
| # export this data so we can use it elsewhere | |
| clean_data.to_csv("~/Downloads/Hinge Export/clean_data.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment