Skip to content

Instantly share code, notes, and snippets.

@DustinAlandzes
Created September 1, 2018 08:08
Show Gist options
  • Save DustinAlandzes/3cbfc7c0da83898848670fbb5d55dad3 to your computer and use it in GitHub Desktop.
Save DustinAlandzes/3cbfc7c0da83898848670fbb5d55dad3 to your computer and use it in GitHub Desktop.
import pandas as pd
df = pd.read_excel("https://uofi.box.com/shared/static/d27425539c9d662a7041.xls")
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
order ctryname year aclpcode cowcode cowcode2 qogctycode qogctylett qogctyyear ccdcodelet ... regime tt ttd tta flagc flagdem flagreg agedem agereg stra
9154 9155 Zimbabwe 2004 48 552.0 552 716 ZWE 7162004 ZWE ... 3.0 0.0 0.0 0.0 0.0 0.0 0.0 40.0 40.0 0.0
9155 9156 Zimbabwe 2005 48 552.0 552 716 ZWE 7162005 ZWE ... 3.0 0.0 0.0 0.0 0.0 0.0 0.0 41.0 41.0 0.0
9156 9157 Zimbabwe 2006 48 552.0 552 716 ZWE 7162006 ZWE ... 3.0 0.0 0.0 0.0 0.0 0.0 0.0 42.0 42.0 0.0
9157 9158 Zimbabwe 2007 48 552.0 552 716 ZWE 7162007 ZWE ... 3.0 0.0 0.0 0.0 0.0 0.0 0.0 43.0 43.0 0.0
9158 9159 Zimbabwe 2008 48 552.0 552 716 ZWE 7162008 ZWE ... 3.0 0.0 0.0 0.0 0.0 0.0 0.0 44.0 44.0 0.0

5 rows × 81 columns

df.dtypes
order               int64
ctryname           object
year                int64
aclpcode            int64
cowcode           float64
cowcode2            int64
qogctycode          int64
qogctylett         object
qogctyyear          int64
ccdcodelet         object
ccdcodenum          int64
aclpyear            int64
cowcode2year        int64
cowcodeyear         int64
chgterr             int64
ychgterr            int64
flagc_cowcode2      int64
flage_cowcode2      int64
entryy              int64
exity               int64
cid               float64
wdicode            object
imf_code          float64
politycode        float64
bankscode         float64
dpicode            object
uncode            float64
un_region         float64
un_region_name     object
un_continent      float64
                   ...   
nmil              float64
nhead              object
npost              object
ndate              object
eheads            float64
ageeh             float64
emil              float64
royal             float64
headdiff          float64
ehead              object
epost              object
edate              object
tenure08          float64
comm              float64
ecens08           float64
edeath            float64
flageh            float64
democracy         float64
assconfid         float64
poppreselec       float64
regime            float64
tt                float64
ttd               float64
tta               float64
flagc             float64
flagdem           float64
flagreg           float64
agedem            float64
agereg            float64
stra              float64
Length: 81, dtype: object
from lifelines import KaplanMeierFitter

kmf = KaplanMeierFitter()

T = df['durations']
C = df['obvserved']

kmf.fit(T, C)
---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3077             try:
-> 3078                 return self._engine.get_loc(key)
   3079             except KeyError:


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


KeyError: 'durations'


During handling of the above exception, another exception occurred:


KeyError                                  Traceback (most recent call last)

<ipython-input-8-8af6bb3d7bb5> in <module>()
      3 kmf = KaplanMeierFitter()
      4 
----> 5 T = df['durations']
      6 C = df['obvserved']
      7 


~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2686             return self._getitem_multilevel(key)
   2687         else:
-> 2688             return self._getitem_column(key)
   2689 
   2690     def _getitem_column(self, key):


~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2693         # get column
   2694         if self.columns.is_unique:
-> 2695             return self._get_item_cache(key)
   2696 
   2697         # duplicate columns & possible reduce dimensionality


~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   2487         res = cache.get(item)
   2488         if res is None:
-> 2489             values = self._data.get(item)
   2490             res = self._box_item_values(item, values)
   2491             cache[item] = res


~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   4113 
   4114             if not isna(item):
-> 4115                 loc = self.items.get_loc(item)
   4116             else:
   4117                 indexer = np.arange(len(self.items))[isna(self.items)]


~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3078                 return self._engine.get_loc(key)
   3079             except KeyError:
-> 3080                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   3081 
   3082         indexer = self.get_indexer([key], method=method, tolerance=tolerance)


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


KeyError: 'durations'
%matplotlib inline
kmf.plot()
df['regime'].unique()
array([ 5.,  3.,  4.,  0.,  2.,  1., nan])
ax = subplot(111)

for r in df['regime'].unique():
    ix = df['regime'] == r
    kmf.fit(T.ix[ix], C.ix[ix])
    km.plot(ax=ax)
  File "<ipython-input-10-ff3cc25f6b31>", line 1
    for r in df['regime'].unique():
                                   ^
SyntaxError: unexpected EOF while parsing
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment