Created
December 9, 2012 12:31
-
-
Save jankatins/4244669 to your computer and use it in GitHub Desktop.
Umlaut problem with pandas and ipython
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "Unicode tests" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas\n", | |
"\n", | |
"data = [\"a\",\"b\",\"c\",\"d\"]\n", | |
"df = pandas.DataFrame({\"a\":data})\n", | |
"df" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td><strong>0</strong></td>\n", | |
" <td> a</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>1</strong></td>\n", | |
" <td> b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>2</strong></td>\n", | |
" <td> c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>3</strong></td>\n", | |
" <td> d</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
" a\n", | |
"0 a\n", | |
"1 b\n", | |
"2 c\n", | |
"3 d" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data2 = [u\"test\", u\"\u00df\", u\"\u00e4\", u\"\u00e1\"]\n", | |
"df2 = pandas.DataFrame({\"a\":data2})\n", | |
"print(df2[\"a\"][1])\n", | |
"df2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\u00df\n" | |
] | |
}, | |
{ | |
"ename": "UnicodeDecodeError", | |
"evalue": "'utf8' codec can't decode byte 0xdf in position 22: invalid continuation byte", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-5-0bb7d6f7d515>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mdf2\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;34m\"a\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mdata2\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"a\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mdf2\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;32mC:\\portabel\\Python27\\lib\\site-packages\\IPython\\core\\displayhook.pyc\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, result)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate_user_ns\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 245\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog_output\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mformat_dict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 246\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfinish_displayhook\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 247\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 248\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mflush\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32mC:\\portabel\\Python27\\lib\\site-packages\\IPython\\zmq\\displayhook.pyc\u001b[0m in \u001b[0;36mfinish_displayhook\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstdout\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 61\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpub_socket\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mident\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtopic\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 62\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmsg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 63\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32mC:\\portabel\\Python27\\lib\\site-packages\\IPython\\zmq\\session.pyc\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, stream, msg_or_type, content, parent, ident, buffers, track, header, metadata)\u001b[0m\n\u001b[0;32m 576\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 577\u001b[0m \u001b[0mbuffers\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mbuffers\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mbuffers\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 578\u001b[1;33m \u001b[0mto_send\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mserialize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mident\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 579\u001b[0m \u001b[0mto_send\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbuffers\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 580\u001b[0m \u001b[0mlongest\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmax\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0ms\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mto_send\u001b[0m \u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32mC:\\portabel\\Python27\\lib\\site-packages\\IPython\\zmq\\session.pyc\u001b[0m in \u001b[0;36mserialize\u001b[1;34m(self, msg, ident)\u001b[0m\n\u001b[0;32m 484\u001b[0m \u001b[0mcontent\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 485\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcontent\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 486\u001b[1;33m \u001b[0mcontent\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpack\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcontent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 487\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcontent\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbytes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 488\u001b[0m \u001b[1;31m# content is already packed, as in a relayed message\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32mC:\\portabel\\Python27\\lib\\site-packages\\IPython\\zmq\\session.pyc\u001b[0m in \u001b[0;36m<lambda>\u001b[1;34m(obj)\u001b[0m\n\u001b[0;32m 78\u001b[0m \u001b[0m_version_info_list\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mIPython\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mversion_info\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 79\u001b[0m \u001b[1;31m# ISO8601-ify datetime objects\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 80\u001b[1;33m \u001b[0mjson_packer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mjsonapi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdate_default\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 81\u001b[0m \u001b[0mjson_unpacker\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mextract_dates\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjsonapi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 82\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32mC:\\portabel\\Python27\\lib\\site-packages\\zmq\\utils\\jsonapi.pyc\u001b[0m in \u001b[0;36mdumps\u001b[1;34m(o, **kwargs)\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'separators'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m','\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m':'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 72\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_squash_unicode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjsonmod\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mo\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 73\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mloads\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32mC:\\portabel\\Python27\\lib\\site-packages\\simplejson\\__init__.pyc\u001b[0m in \u001b[0;36mdumps\u001b[1;34m(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, encoding, default, use_decimal, namedtuple_as_object, tuple_as_array, bigint_as_string, sort_keys, item_sort_key, **kw)\u001b[0m\n\u001b[0;32m 332\u001b[0m \u001b[0msort_keys\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msort_keys\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[0mitem_sort_key\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_sort_key\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 334\u001b[1;33m **kw).encode(obj)\n\u001b[0m\u001b[0;32m 335\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 336\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32mC:\\portabel\\Python27\\lib\\site-packages\\simplejson\\encoder.pyc\u001b[0m in \u001b[0;36mencode\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 235\u001b[0m \u001b[1;31m# exceptions aren't as detailed. The list call should be roughly\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[1;31m# equivalent to the PySequence_Fast that ''.join() would do.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 237\u001b[1;33m \u001b[0mchunks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miterencode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mo\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_one_shot\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 238\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mchunks\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 239\u001b[0m \u001b[0mchunks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mchunks\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;32mC:\\portabel\\Python27\\lib\\site-packages\\simplejson\\encoder.pyc\u001b[0m in \u001b[0;36miterencode\u001b[1;34m(self, o, _one_shot)\u001b[0m\n\u001b[0;32m 309\u001b[0m Decimal=Decimal)\n\u001b[0;32m 310\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 311\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_iterencode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mo\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 312\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 313\u001b[0m \u001b[0mkey_memo\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclear\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;31mUnicodeDecodeError\u001b[0m: 'utf8' codec can't decode byte 0xdf in position 22: invalid continuation byte" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment