Created
August 31, 2017 22:09
-
-
Save brevans/38b18b376a429c2cd933559f147f5170 to your computer and use it in GitHub Desktop.
saving some time summarizing individual excel files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from glob import glob | |
import pandas as pd | |
import re | |
#keep a running list of results | |
results_list = [] | |
#for each excel file, which we will call xlsx | |
for xlsx in glob("*.xlsx"): | |
#see if we can extract the ID and isolation number with a regular expression | |
#regular expression: http://regexr.com/3glnf | |
match = re.match(r'(\d+)_isolation(\d+)\.xlsx', xlsx) | |
if match: | |
#trick to convert the strings into numbers called "list comprehension" | |
sample_id, isolation = [int(x) for x in match.groups()] | |
else: | |
print("File {} doesn't seem to be named like I thought it should be.".format(xlsx)) | |
continue | |
#read in the data from excel into a pandas data frame | |
df = pd.read_excel(xlsx) | |
#select all rows (:) and columns from mult_steps to noise_dist ('mult_steps':'noise_dist') | |
# sum them, then divide by the number of rows, which is the first number in df.shape | |
summary = df.loc[:,'mult_steps':'noise_dist'].sum() / df.shape[0] | |
#add the sample ID and isolation number into the result | |
summary.name = sample_id | |
summary['isolation'] = isolation | |
#add our results to the list | |
results_list.append(summary) | |
#finally, concatenate our results and save to 'sumary.csv' | |
results = pd.concat(results_list, axis=1) | |
results.to_csv('summary.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment