Last active
December 11, 2023 02:41
-
-
Save MartinWeiss12/d7bb3ff9d0e1879b7c1ea5529c1a1d15 to your computer and use it in GitHub Desktop.
JSON ETL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# path to your folder with all endsong json files | |
path = '' | |
# path where excel files will be saved to | |
output_path = '' | |
files = [os.path.join(path, f) for f in os.listdir(path) | |
if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')] | |
data_frames = [pd.read_json(file).astype({col: 'float64' for col in pd.read_json(file) | |
.select_dtypes(include='bool').columns}) for file in files] | |
data = pd.concat([pd.read_json(file) for file in files]) | |
# filter out streams less than 30 seconds | |
filtered_data = data[data['ms_played'] >= 30000].reset_index(drop=True) | |
# drop rows that do not have a URI | |
filtered_data = filtered_data[filtered_data['spotify_track_uri'].notnull()] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment