Last active
October 24, 2020 06:11
-
-
Save debboutr/7e24da9b35f871079ee33a48d9c6228e to your computer and use it in GitHub Desktop.
pandas operations for stacking data -- jon launspach
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import os | |
import pandas as pd | |
uid = "GEO_ID" # <-- set the column that will hold the unique ID | |
directory = "." # <-- set the directory that you want to read from here | |
def is_csv(x): | |
return x.split(".")[-1] == "csv" | |
final = pd.DataFrame() | |
for f in filter(is_csv, os.listdir(directory)): | |
tbl = pd.read_csv(directory + "/" + f).set_index(uid) | |
if final.empty: | |
final = tbl | |
continue | |
if not final.index.isin(tbl.index).all(): # append uid's that don't exist | |
missing = tbl.loc[~tbl.index.isin(final.index),[]] | |
final = final.append(missing) | |
final.loc[tbl.index, tbl.columns] = tbl | |
final.to_csv('bologna.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment