Skip to content

Instantly share code, notes, and snippets.

@smutch
Created May 17, 2016 06:28
Show Gist options
  • Save smutch/9d8e5f8b106315279b2a5f82390cc69c to your computer and use it in GitHub Desktop.
Save smutch/9d8e5f8b106315279b2a5f82390cc69c to your computer and use it in GitHub Desktop.
py: pandas grouping and aggregating to plot relations with confidence intervals (CIs)
#!/usr/bin/env python
import numpy as np
import matplotlib.pyplot as plt
from dragons import meraxes
import pandas as pd
__author__ = "Simon Mutch"
__date__ = "2016-05-17"
# I am reading in Meraxes galaxies here, but once you have your SAGE galaxies
# read in, the process will be the same.
fname = 'YOUR FILE NAME HERE'
meraxes.set_little_h(fname)
gals = meraxes.read_gals(fname, snapshot=32)
# convert your ndarray to a pandas datatframe
# N.B. you can't have any vectors in here, so no 'Vel', 'Pos' etc.
gals = pd.DataFrame(gals[['StellarMass', 'Sfr']])
# select only galaxies in stellar mass range that we want
g = gals.query('StellarMass > 1e-4 and StellarMass < 100')
nbins = 15 # number of bins
relation = g.groupby(pd.cut(np.log10(g.StellarMass * 1e10), nbins) # group the galaxies into nbins of stellar mass
).aggregate({ # for each group, calculate the following aggregated statistics
'Sfr': ['median', # median
['q025', lambda v: v.quantile(0.025)], # 2.5 percentile
['q975', lambda v: v.quantile(0.975)]], # 97.5 percentile
'StellarMass': [['median', lambda v: np.median(np.log10(1e10 * v))], # median in log space
['count', lambda v: np.count_nonzero(v)] # the number in each group
]})
# get rid of bins with less than 30 galaxies
relation = relation.loc[relation.StellarMass['count'] > 30, :]
# plot!
fig, ax = plt.subplots(1, 1)
l, = ax.plot(np.log10(relation.StellarMass['median']) + 10, relation.Sfr['q025'],
ls='--', label='_none_')
ax.plot(np.log10(relation.StellarMass['median']) + 10, relation.Sfr['q975'],
ls='--', color=l.get_color(), label='_none_')
ax.plot(np.log10(relation.StellarMass['median']) + 10, relation.Sfr['median'],
ls='-', color=l.get_color(), label='Meraxes z~11')
ax.set_yscale('log')
ax.legend(loc=2)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment