This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas import DataFrame | |
def main(): | |
path = 'input/mail.mbox' | |
mails = get_mails(path) | |
unsub_links = add_unsubscribe(mails) | |
df = DataFrame(unsub_links).T | |
df = df.sort_values(by='count', ascending=False) | |
df.to_csv('output.csv') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def add_unsubscribe(mails: List, unsub_links: dict = {}) -> dict: | |
for mail in mails: | |
#getting sender | |
sender_line = mail.split('\n')[0] #first line of the mail | |
sender_words = sender_line.split(' ')[1:] #removes the 'From: ' part | |
sender = ' '.join(sender_words) #recreates the name string | |
if 'Q?' in sender: | |
sender = re.findall('.Q.?(.*?)\?', sender)[0] #removing some char coding |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List | |
def get_mails(path: str, limit: int = None) -> List[str]: | |
'''Extract emails from an .mbox file.''' | |
mails = [] | |
c = -1 | |
mail = str() | |
with open(path, 'r', encoding = 'UTF-8') as file: | |
for line in file: #read every line | |
if c == limit: break |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(token) | |
>> T2_wi8XsUpJrbD0BHQH4vX0BrSuYV0D88sVrQ_wMgApU... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"endTime" : "2019–01–26 07:49", | |
"artistName" : "Tom Waits", | |
"trackName" : "Heartattack And Vine", | |
"msPlayed" : 8850 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = pd.DataFrame(with_features) | |
df.to_csv('streaming_history.csv') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
streamings = get_streamings() | |
unique_tracks = list(set([streaming['trackName'] | |
for streaming in streamings])) | |
all_features = {} | |
for track in unique_tracks: | |
track_id = get_id(track, token) | |
features = get_features(track_id, token) | |
if features: | |
all_features[track] = features |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lucy_features = get_features(lucy_id, token) | |
print(lucy_features) | |
>> {'danceability': 0.311, | |
'energy': 0.325, | |
'key': 2, | |
'loudness': -9.042, | |
'mode': 1, | |
'speechiness': 0.0283 | |
...} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_features(track_id: str, token: str) -> dict: | |
sp = spotipy.Spotify(auth=token) | |
try: | |
features = sp.audio_features([track_id]) | |
return features[0] | |
except: | |
return None |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lucy_id = get_id('Lucy', token, artist = 'The Beatles') | |
print(lucy_id) | |
>> '25yQPHgC35WNnnOUqFhgVR' |