Created
March 17, 2013 02:03
-
-
Save eloraburns/5179224 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from cStringIO import StringIO | |
log_file = StringIO( | |
'posix_timestamp,elapsed,sys,user,queries,query_time,rows,' | |
'accountid,userid,contactid,level,silo,method\n' | |
'1343103150,0.062353,0,4,6,0.01690,3,' | |
'12345,1,-1,3,invoice_InvoiceResource,search\n' | |
) | |
import pandas as pd | |
from datetime import datetime | |
import gc | |
def posix_string_to_datetime(posix_string): | |
return datetime.utcfromtimestamp(int(posix_string)) | |
# This works on pandas 0.9.0, but not on 0.10.1 or github master | |
df = pd.io.parsers.read_csv( | |
log_file, | |
# index_col is the first column, our posix_timestamp | |
index_col=0, | |
# Interpret the index column as a date | |
parse_dates=0, | |
date_parser=posix_string_to_datetime) | |
# The crash looks like this (on master) | |
--------------------------------------------------------------------------- | |
AttributeError Traceback (most recent call last) | |
<ipython-input-2-c3fa4840399b> in <module>() | |
17 # Interpret the index column as a date | |
18 parse_dates=0, | |
---> 19 date_parser=posix_string_to_datetime) | |
20 gc.enable() | |
/Users/taavi/src/pandas/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, nrows, iterator, chunksize, verbose, encoding, squeeze) | |
398 buffer_lines=buffer_lines) | |
399 | |
--> 400 return _read(filepath_or_buffer, kwds) | |
401 | |
402 parser_f.__name__ = name | |
/Users/taavi/src/pandas/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds) | |
213 return parser | |
214 | |
--> 215 return parser.read() | |
216 | |
217 _parser_defaults = { | |
/Users/taavi/src/pandas/pandas/io/parsers.pyc in read(self, nrows) | |
630 # self._engine.set_error_bad_lines(False) | |
631 | |
--> 632 ret = self._engine.read(nrows) | |
633 | |
634 if self.options.get('as_recarray'): | |
/Users/taavi/src/pandas/pandas/io/parsers.pyc in read(self, nrows) | |
1006 | |
1007 names, data = self._do_date_conversions(names, data) | |
-> 1008 index = self._make_index(data, alldata, names) | |
1009 | |
1010 return index, names, data | |
/Users/taavi/src/pandas/pandas/io/parsers.pyc in _make_index(self, data, alldata, columns) | |
706 elif not self._has_complex_date_col: | |
707 index = self._get_simple_index(alldata, columns) | |
--> 708 index = self._agg_index(index) | |
709 | |
710 elif self._has_complex_date_col: | |
/Users/taavi/src/pandas/pandas/io/parsers.pyc in _agg_index(self, index, try_parse_dates) | |
789 self.na_values) | |
790 | |
--> 791 arr, _ = self._convert_types(arr, col_na_values) | |
792 arrays.append(arr) | |
793 | |
/Users/taavi/src/pandas/pandas/io/parsers.pyc in _convert_types(self, values, na_values, try_num_bool) | |
815 def _convert_types(self, values, na_values, try_num_bool=True): | |
816 na_count = 0 | |
--> 817 if issubclass(values.dtype.type, (np.number, np.bool_)): | |
818 mask = lib.ismember(values, na_values) | |
819 na_count = mask.sum() | |
AttributeError: 'datetime.datetime' object has no attribute 'dtype' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment