Skip to content

Instantly share code, notes, and snippets.

@tcotav
Last active August 29, 2015 14:16
Show Gist options
  • Save tcotav/e7d63fe3785d811592c9 to your computer and use it in GitHub Desktop.
Save tcotav/e7d63fe3785d811592c9 to your computer and use it in GitHub Desktop.
pandas timeseries dataframe from source
import pandas as pd
import datetime as dt
testdata={
"host1":[
(dt.datetime(2015, 3, 5, 12), 100),
(dt.datetime(2015, 3, 5, 12,1), 100),
(dt.datetime(2015, 3, 5, 12,2), 100),
(dt.datetime(2015, 3, 5, 12,3), 100),
(dt.datetime(2015, 3, 5, 12,4), 100),
],
"host2":[
(dt.datetime(2015, 3, 5, 12), 300),
(dt.datetime(2015, 3, 5, 12,2), 300),
(dt.datetime(2015, 3, 5, 12,3), 300),
(dt.datetime(2015, 3, 5, 12,4), 300)
]
}
host1=pd.Series([100, 101, 102, 103, 104], index=[dt.datetime(2015, 3, 5, 12),
dt.datetime(2015, 3, 5, 12,1),
dt.datetime(2015, 3, 5, 12,2),
dt.datetime(2015, 3, 5, 12,3),
dt.datetime(2015, 3, 5, 12,4)])
host2=pd.Series([300, 302, 303, 304], index=[dt.datetime(2015, 3, 5, 12),
dt.datetime(2015, 3, 5, 12,2),
dt.datetime(2015, 3, 5, 12,3),
dt.datetime(2015, 3, 5, 12,4)])
d={
"host1":host1,
"host2": host2
}
idx=[dt.datetime(2015, 3, 5, 12),
dt.datetime(2015, 3, 5, 12,1),
dt.datetime(2015, 3, 5, 12,2),
dt.datetime(2015, 3, 5, 12,3),
dt.datetime(2015, 3, 5, 12,4),
dt.datetime(2015, 3, 5, 12,5)
]
df=pd.DataFrame(d, index=idx)
"""
>>> df
2015-03-05 12:00:00 100 100
2015-03-05 12:01:00 101 NaN
2015-03-05 12:02:00 102 102
2015-03-05 12:03:00 103 103
2015-03-05 12:04:00 104 104
2015-03-05 12:05:00 NaN NaN
>>> df.index
<class 'pandas.tseries.index.DatetimeIndex'>
[2015-03-05 12:00:00, ..., 2015-03-05 12:05:00]
Length: 6, Freq: None, Timezone: None
>>> df.columns
Index([u'host1', u'host2'], dtype='object')
# select a column
>>> df['host1']
2015-03-05 12:00:00 100
2015-03-05 12:01:00 101
2015-03-05 12:02:00 102
2015-03-05 12:03:00 103
2015-03-05 12:04:00 104
2015-03-05 12:05:00 NaN
Name: host1, dtype: float64
"""
df=df.interpolate() # fills a gap in the data replacing NaN with interpolated numbers
"""
# drop a column
df=.drop('_span', 1)
# set a column as the index
df= df.set_index("_time")
"""
@tcotav
Copy link
Author

tcotav commented Mar 6, 2015

used to parse and clean up dumped zabbix data

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment