Created
July 5, 2023 14:32
-
-
Save guy4261/c55cccaac4c816d287349bfaf4d76e4b to your computer and use it in GitHub Desktop.
Turn you LinkedIn messages from a data export to csv (and help RIF'd friends)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import os | |
import webbrowser | |
import zipfile | |
from datetime import datetime | |
from glob import glob | |
from io import BytesIO | |
import pandas as pd | |
# Find your downloaded LinkedIn data | |
linkedin_data_export_url = "https://www.linkedin.com/mypreferences/d/download-my-data" | |
google_sheet_url = "https://drive.google.com/" | |
default_path_glob = "~/Downloads/Basic_LinkedInDataExport_*.zip" | |
print(f"Looking for your export from {linkedin_data_export_url}") | |
print(f"under {default_path_glob}") | |
paths = glob(os.path.expanduser(default_path_glob)) | |
assert len(paths) > 0, "No files found!" | |
# If you made multiple exports, get the latest | |
if len(paths) == 1: | |
path = paths[0] | |
else: | |
path = max( | |
paths, | |
key=lambda s: datetime.strptime( | |
os.path.basename(s), "Basic_LinkedInDataExport_%d-%m-%Y.zip" | |
), | |
) | |
# Ensure your chosen export has your messages.csv in it | |
zf = zipfile.ZipFile(path) | |
messages_zipinfo = None | |
for zipinfo in zf.filelist: | |
if zipinfo.filename == "messages.csv": | |
messages_zipinfo = zipinfo | |
break | |
assert ( | |
messages_zipinfo is not None | |
), f"Did not find messages.csv in your data export! Re-run {linkedin_data_export_url}" | |
# Load the CSV from inside the zipfile using pandas | |
buf = BytesIO(zf.read(messages_zipinfo)) | |
buf.seek(0) | |
df = pd.read_csv(buf) | |
# Get only incoming messages sent to me | |
whoami = df["TO"].mode().tolist() | |
assert len(whoami) == 1, f"Not sure who you are: {whoami}" | |
whoami = whoami[0] | |
df = df[df["TO"] == whoami] | |
df = df[df["FOLDER"] == "INBOX"] | |
# Get the earliest message sent by each recruiter to ignore nagging | |
df = df.groupby("FROM")["DATE"].min().reset_index().merge(df) | |
# Get only messages sent after the RIF | |
rif_date = "2023-06-01 00:00:00 UTC" | |
df = df[df["DATE"] >= rif_date] | |
# Order and save | |
df = df[["DATE", "TO", "FROM", "SENDER PROFILE URL", "SUBJECT", "CONTENT"]] | |
outfile = f"{whoami}_job_offers.csv" | |
df.to_csv(outfile, index=False) | |
# Now go upload the data! | |
print(f"Now upload {os.path.realpath(outfile)} to {google_sheet_url}") | |
webbrowser.open(google_sheet_url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment