import pandas as pd
df = pd.read_excel("https://uofi.box.com/shared/static/d27425539c9d662a7041.xls")
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
order | ctryname | year | aclpcode | cowcode | cowcode2 | qogctycode | qogctylett | qogctyyear | ccdcodelet | ... | regime | tt | ttd | tta | flagc | flagdem | flagreg | agedem | agereg | stra | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9154 | 9155 | Zimbabwe | 2004 | 48 | 552.0 | 552 | 716 | ZWE | 7162004 | ZWE | ... | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 40.0 | 40.0 | 0.0 |
9155 | 9156 | Zimbabwe | 2005 | 48 | 552.0 | 552 | 716 | ZWE | 7162005 | ZWE | ... | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 41.0 | 41.0 | 0.0 |
9156 | 9157 | Zimbabwe | 2006 | 48 | 552.0 | 552 | 716 | ZWE | 7162006 | ZWE | ... | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 42.0 | 42.0 | 0.0 |
9157 | 9158 | Zimbabwe | 2007 | 48 | 552.0 | 552 | 716 | ZWE | 7162007 | ZWE | ... | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 43.0 | 43.0 | 0.0 |
9158 | 9159 | Zimbabwe | 2008 | 48 | 552.0 | 552 | 716 | ZWE | 7162008 | ZWE | ... | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 44.0 | 44.0 | 0.0 |
5 rows × 81 columns
df.dtypes
order int64
ctryname object
year int64
aclpcode int64
cowcode float64
cowcode2 int64
qogctycode int64
qogctylett object
qogctyyear int64
ccdcodelet object
ccdcodenum int64
aclpyear int64
cowcode2year int64
cowcodeyear int64
chgterr int64
ychgterr int64
flagc_cowcode2 int64
flage_cowcode2 int64
entryy int64
exity int64
cid float64
wdicode object
imf_code float64
politycode float64
bankscode float64
dpicode object
uncode float64
un_region float64
un_region_name object
un_continent float64
...
nmil float64
nhead object
npost object
ndate object
eheads float64
ageeh float64
emil float64
royal float64
headdiff float64
ehead object
epost object
edate object
tenure08 float64
comm float64
ecens08 float64
edeath float64
flageh float64
democracy float64
assconfid float64
poppreselec float64
regime float64
tt float64
ttd float64
tta float64
flagc float64
flagdem float64
flagreg float64
agedem float64
agereg float64
stra float64
Length: 81, dtype: object
from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()
T = df['durations']
C = df['obvserved']
kmf.fit(T, C)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3077 try:
-> 3078 return self._engine.get_loc(key)
3079 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'durations'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-8-8af6bb3d7bb5> in <module>()
3 kmf = KaplanMeierFitter()
4
----> 5 T = df['durations']
6 C = df['obvserved']
7
~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2686 return self._getitem_multilevel(key)
2687 else:
-> 2688 return self._getitem_column(key)
2689
2690 def _getitem_column(self, key):
~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2693 # get column
2694 if self.columns.is_unique:
-> 2695 return self._get_item_cache(key)
2696
2697 # duplicate columns & possible reduce dimensionality
~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
2487 res = cache.get(item)
2488 if res is None:
-> 2489 values = self._data.get(item)
2490 res = self._box_item_values(item, values)
2491 cache[item] = res
~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
4113
4114 if not isna(item):
-> 4115 loc = self.items.get_loc(item)
4116 else:
4117 indexer = np.arange(len(self.items))[isna(self.items)]
~/anaconda/envs/machine_learning_cookbook/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3078 return self._engine.get_loc(key)
3079 except KeyError:
-> 3080 return self._engine.get_loc(self._maybe_cast_indexer(key))
3081
3082 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'durations'
%matplotlib inline
kmf.plot()
df['regime'].unique()
array([ 5., 3., 4., 0., 2., 1., nan])
ax = subplot(111)
for r in df['regime'].unique():
ix = df['regime'] == r
kmf.fit(T.ix[ix], C.ix[ix])
km.plot(ax=ax)
File "<ipython-input-10-ff3cc25f6b31>", line 1
for r in df['regime'].unique():
^
SyntaxError: unexpected EOF while parsing