Created
March 9, 2016 00:28
-
-
Save kokes/51a164426137f6fc387b to your computer and use it in GitHub Desktop.
Testing pandas issue #2697
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from StringIO import StringIO\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Test 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"True\n", | |
"True\n" | |
] | |
} | |
], | |
"source": [ | |
"f=StringIO(\"\"\"id, text\n", | |
"135217135789158401, 'testing lost precision from csv'\n", | |
"1352171357E+5, 'any item scientific format loses the precision on all other entries'\"\"\")\n", | |
"\n", | |
"test = pd.read_csv(f)\n", | |
"print test['id'][0] == 135217135789158401\n", | |
"print test['id'][1] == 1352171357E+5" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"f2=StringIO(\"\"\"id,A,B,C\n", | |
"1, 99999999999, 'a', 99999999999\n", | |
"2, 123456789012345, 'b', 123456789012345\n", | |
"3, 1234E+0, 'c', 1234\"\"\")\n", | |
"df = pd.read_csv(f2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Test 2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>1.000000e+11</td>\n", | |
" <td>'a'</td>\n", | |
" <td>99999999999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>1.234568e+14</td>\n", | |
" <td>'b'</td>\n", | |
" <td>123456789012345</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>1.234000e+03</td>\n", | |
" <td>'c'</td>\n", | |
" <td>1234</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id A B C\n", | |
"0 1 1.000000e+11 'a' 99999999999\n", | |
"1 2 1.234568e+14 'b' 123456789012345\n", | |
"2 3 1.234000e+03 'c' 1234" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 True\n", | |
"1 True\n", | |
"2 True\n", | |
"dtype: bool" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['A'] == df['C']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Test 3\n", | |
"> Well the problem here is that if you have an integer above 2^53 and some other number in the column causes the entire column to be interpreted as float64, say, you are going to lose precision. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.log(1234567890123333345)/np.log(2) > 53" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>1.000000e+11</td>\n", | |
" <td>'a'</td>\n", | |
" <td>99999999999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>1.234568e+18</td>\n", | |
" <td>'b'</td>\n", | |
" <td>1234567890123333345</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>1.234000e+03</td>\n", | |
" <td>'c'</td>\n", | |
" <td>1234</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id A B C\n", | |
"0 1 1.000000e+11 'a' 99999999999\n", | |
"1 2 1.234568e+18 'b' 1234567890123333345\n", | |
"2 3 1.234000e+03 'c' 1234" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f2=StringIO(\"\"\"id,A,B,C\n", | |
"1, 99999999999, 'a', 99999999999\n", | |
"2, 1234567890123333345, 'b', 1234567890123333345\n", | |
"3, 1234E+0, 'c', 1234\"\"\")\n", | |
"df = pd.read_csv(f2)\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 True\n", | |
"1 False\n", | |
"2 True\n", | |
"dtype: bool" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['A'] == df['C']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment