Created
January 21, 2014 02:05
-
-
Save danbirken/8533199 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Timing to_datetime(): | |
Datetime format: %m-%d-%Y | |
--------------- | |
Without infer_format: cad7e6333f1cd2ebe272d7fc7553cd27 - 3.082s | |
With infer_format: cad7e6333f1cd2ebe272d7fc7553cd27 - 0.268s (11.51x baseline) | |
Passing the format: cad7e6333f1cd2ebe272d7fc7553cd27 - 0.267s (11.54x baseline) | |
Datetime format: %m/%d/%Y %H:%M:%S.%f | |
--------------- | |
Without infer_format: ec2ef27adb2e95e2c5386d8d5d8513a1 - 5.162s | |
With infer_format: ec2ef27adb2e95e2c5386d8d5d8513a1 - 0.501s (10.31x baseline) | |
Passing the format: ec2ef27adb2e95e2c5386d8d5d8513a1 - 0.509s (10.14x baseline) | |
Datetime format: %Y-%m-%dT%H:%M:%S.%f | |
--------------- | |
Without infer_format: ec2ef27adb2e95e2c5386d8d5d8513a1 - 0.013s | |
With infer_format: ec2ef27adb2e95e2c5386d8d5d8513a1 - 0.012s (1.06x baseline) | |
Passing the format: ec2ef27adb2e95e2c5386d8d5d8513a1 - 0.491s (0.03x baseline) | |
Testing reading CSV: | |
Datetime format: %m-%d-%Y | |
--------------- | |
Without infer_format: e8bde3ae42c769a7509cb94e9f36ca5e - 3.085s | |
With infer_format: e8bde3ae42c769a7509cb94e9f36ca5e - 0.298s (10.37x baseline) | |
With strptime date_parser: e8bde3ae42c769a7509cb94e9f36ca5e - 0.906s (3.41x baseline) | |
Datetime format: %m/%d/%Y %H:%M:%S.%f | |
--------------- | |
Without infer_format: 2f7019b7f7795146b4de4e83fbc0ebb5 - 5.230s | |
With infer_format: 2f7019b7f7795146b4de4e83fbc0ebb5 - 0.555s (9.42x baseline) | |
With strptime date_parser: 2f7019b7f7795146b4de4e83fbc0ebb5 - 1.245s (4.20x baseline) | |
Datetime format: %Y-%m-%dT%H:%M:%S.%f | |
--------------- | |
Without infer_format: f0028f10cf5d2fc66900e1a57d5bbf9a - 0.073s | |
With infer_format: f0028f10cf5d2fc66900e1a57d5bbf9a - 0.062s (1.19x baseline) | |
With strptime date_parser: f0028f10cf5d2fc66900e1a57d5bbf9a - 1.249s (0.06x baseline) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import hashlib | |
import time | |
import pandas as pd | |
def time_with_hash(desc, f, baseline=None): | |
start = time.time() | |
output = f() | |
timing = time.time() - start | |
if baseline is None: | |
print '%30s: %s - %.3fs' % ( | |
desc, | |
hashlib.md5(output.to_json()).hexdigest(), | |
timing | |
) | |
else: | |
print '%30s: %s - %.3fs (%.2fx baseline)' % ( | |
desc, | |
hashlib.md5(output.to_json()).hexdigest(), | |
timing, | |
baseline / timing | |
) | |
return timing | |
test_formats = [ | |
'%m-%d-%Y', | |
'%m/%d/%Y %H:%M:%S.%f', | |
'%Y-%m-%dT%H:%M:%S.%f', | |
] | |
print 'Timing to_datetime():' | |
for test_format in test_formats: | |
s = ( | |
pd | |
.Series(pd.date_range('20000101', periods=50000, freq='H')) | |
.apply(lambda x: x.strftime(test_format)) | |
) | |
print 'Datetime format:', test_format | |
print '---------------' | |
baseline = time_with_hash('Without infer_format', lambda: pd.to_datetime(s)) | |
time_with_hash( | |
'With infer_format', | |
lambda: pd.to_datetime(s, infer_format=True), | |
baseline=baseline | |
) | |
time_with_hash( | |
'Passing the format', | |
lambda: pd.to_datetime(s, format=test_format), | |
baseline=baseline | |
) | |
print 'Testing reading CSV:' | |
for test_format in test_formats: | |
s = ( | |
pd | |
.Series(pd.date_range('20000101', periods=50000, freq='H')) | |
.apply(lambda x: x.strftime(test_format)) | |
) | |
s.to_csv('/tmp/test.csv') | |
def date_parser(s): | |
return datetime.datetime.strptime(s, test_format) | |
print 'Datetime format:', test_format | |
print '---------------' | |
baseline = time_with_hash('Without infer_format', | |
lambda: pd.read_csv( | |
'/tmp/test.csv', | |
parse_dates=[1,], | |
) | |
) | |
time_with_hash('With infer_format', | |
lambda: pd.read_csv( | |
'/tmp/test.csv', | |
parse_dates=[1,], | |
infer_datetime_format=True | |
), | |
baseline=baseline | |
) | |
time_with_hash('With strptime date_parser', | |
lambda: pd.read_csv( | |
'/tmp/test.csv', | |
parse_dates=[1,], | |
date_parser=date_parser, | |
), | |
baseline=baseline | |
) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment