kokes · March 9, 2016 00:28
diff --git a/Issue_2697.ipynb b/Issue_2697.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from StringIO import StringIO\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "f=StringIO(\"\"\"id, text\n",
    "135217135789158401, 'testing lost precision from csv'\n",
    "1352171357E+5, 'any item scientific format loses the precision on all other entries'\"\"\")\n",
    "\n",
    "test = pd.read_csv(f)\n",
    "print test['id'][0] == 135217135789158401\n",
    "print test['id'][1] == 1352171357E+5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "f2=StringIO(\"\"\"id,A,B,C\n",
    "1, 99999999999, 'a', 99999999999\n",
    "2, 123456789012345, 'b', 123456789012345\n",
    "3, 1234E+0, 'c', 1234\"\"\")\n",
    "df = pd.read_csv(f2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1.000000e+11</td>\n",
       "      <td>'a'</td>\n",
       "      <td>99999999999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1.234568e+14</td>\n",
       "      <td>'b'</td>\n",
       "      <td>123456789012345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1.234000e+03</td>\n",
       "      <td>'c'</td>\n",
       "      <td>1234</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id             A     B                C\n",
       "0   1  1.000000e+11   'a'      99999999999\n",
       "1   2  1.234568e+14   'b'  123456789012345\n",
       "2   3  1.234000e+03   'c'             1234"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    True\n",
       "1    True\n",
       "2    True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['A'] == df['C']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test 3\n",
    "> Well the problem here is that if you have an integer above 2^53 and some other number in the column causes the entire column to be interpreted as float64, say, you are going to lose precision. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.log(1234567890123333345)/np.log(2) > 53"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1.000000e+11</td>\n",
       "      <td>'a'</td>\n",
       "      <td>99999999999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1.234568e+18</td>\n",
       "      <td>'b'</td>\n",
       "      <td>1234567890123333345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1.234000e+03</td>\n",
       "      <td>'c'</td>\n",
       "      <td>1234</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id             A     B                    C\n",
       "0   1  1.000000e+11   'a'          99999999999\n",
       "1   2  1.234568e+18   'b'  1234567890123333345\n",
       "2   3  1.234000e+03   'c'                 1234"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f2=StringIO(\"\"\"id,A,B,C\n",
    "1, 99999999999, 'a', 99999999999\n",
    "2, 1234567890123333345, 'b', 1234567890123333345\n",
    "3, 1234E+0, 'c', 1234\"\"\")\n",
    "df = pd.read_csv(f2)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     True\n",
       "1    False\n",
       "2     True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['A'] == df['C']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from StringIO import StringIO\n",
	"import pandas as pd"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Test 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"True\n",
	"True\n"
	]
	}
	],
	"source": [
	"f=StringIO(\"\"\"id, text\n",
	"135217135789158401, 'testing lost precision from csv'\n",
	"1352171357E+5, 'any item scientific format loses the precision on all other entries'\"\"\")\n",
	"\n",
	"test = pd.read_csv(f)\n",
	"print test['id'][0] == 135217135789158401\n",
	"print test['id'][1] == 1352171357E+5"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"f2=StringIO(\"\"\"id,A,B,C\n",
	"1, 99999999999, 'a', 99999999999\n",
	"2, 123456789012345, 'b', 123456789012345\n",
	"3, 1234E+0, 'c', 1234\"\"\")\n",
	"df = pd.read_csv(f2)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Test 2"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>A</th>\n",
	" <th>B</th>\n",
	" <th>C</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>1.000000e+11</td>\n",
	" <td>'a'</td>\n",
	" <td>99999999999</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>2</td>\n",
	" <td>1.234568e+14</td>\n",
	" <td>'b'</td>\n",
	" <td>123456789012345</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>1.234000e+03</td>\n",
	" <td>'c'</td>\n",
	" <td>1234</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id A B C\n",
	"0 1 1.000000e+11 'a' 99999999999\n",
	"1 2 1.234568e+14 'b' 123456789012345\n",
	"2 3 1.234000e+03 'c' 1234"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0 True\n",
	"1 True\n",
	"2 True\n",
	"dtype: bool"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df['A'] == df['C']"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Test 3\n",
	"> Well the problem here is that if you have an integer above 2^53 and some other number in the column causes the entire column to be interpreted as float64, say, you are going to lose precision. "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"np.log(1234567890123333345)/np.log(2) > 53"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>A</th>\n",
	" <th>B</th>\n",
	" <th>C</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>1.000000e+11</td>\n",
	" <td>'a'</td>\n",
	" <td>99999999999</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>2</td>\n",
	" <td>1.234568e+18</td>\n",
	" <td>'b'</td>\n",
	" <td>1234567890123333345</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>1.234000e+03</td>\n",
	" <td>'c'</td>\n",
	" <td>1234</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id A B C\n",
	"0 1 1.000000e+11 'a' 99999999999\n",
	"1 2 1.234568e+18 'b' 1234567890123333345\n",
	"2 3 1.234000e+03 'c' 1234"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"f2=StringIO(\"\"\"id,A,B,C\n",
	"1, 99999999999, 'a', 99999999999\n",
	"2, 1234567890123333345, 'b', 1234567890123333345\n",
	"3, 1234E+0, 'c', 1234\"\"\")\n",
	"df = pd.read_csv(f2)\n",
	"df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0 True\n",
	"1 False\n",
	"2 True\n",
	"dtype: bool"
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df['A'] == df['C']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.10"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}