Last active
April 22, 2016 15:37
-
-
Save manugarri/4c0d68f9c2c8434654402e6053620b68 to your computer and use it in GitHub Desktop.
Support por python object CSV serialization in python 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'3.5.1 |Continuum Analytics, Inc.| (default, Dec 7 2015, 11:16:01) \\n[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]'" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import sys\n", | |
"sys.version" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import json" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"INSTALLED VERSIONS\n", | |
"------------------\n", | |
"commit: None\n", | |
"python: 3.5.1.final.0\n", | |
"python-bits: 64\n", | |
"OS: Linux\n", | |
"OS-release: 3.19.0-58-generic\n", | |
"machine: x86_64\n", | |
"processor: x86_64\n", | |
"byteorder: little\n", | |
"LC_ALL: None\n", | |
"LANG: en_US.UTF-8\n", | |
"\n", | |
"pandas: 0.18.0\n", | |
"nose: 1.3.7\n", | |
"pip: 8.1.1\n", | |
"setuptools: 20.2.2\n", | |
"Cython: 0.23.4\n", | |
"numpy: 1.11.0\n", | |
"scipy: 0.17.0\n", | |
"statsmodels: None\n", | |
"xarray: None\n", | |
"IPython: 4.1.1\n", | |
"sphinx: None\n", | |
"patsy: None\n", | |
"dateutil: 2.5.2\n", | |
"pytz: 2016.3\n", | |
"blosc: None\n", | |
"bottleneck: None\n", | |
"tables: None\n", | |
"numexpr: 2.4.6\n", | |
"matplotlib: 1.5.1\n", | |
"openpyxl: None\n", | |
"xlrd: 0.9.4\n", | |
"xlwt: None\n", | |
"xlsxwriter: None\n", | |
"lxml: None\n", | |
"bs4: None\n", | |
"html5lib: None\n", | |
"httplib2: None\n", | |
"apiclient: None\n", | |
"sqlalchemy: None\n", | |
"pymysql: None\n", | |
"psycopg2: 2.6.1 (dt dec pq3 ext)\n", | |
"jinja2: 2.8\n", | |
"boto: 2.39.0\n" | |
] | |
} | |
], | |
"source": [ | |
"pd.show_versions()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Unicode json lines" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{\"first_name\":\"Manuel\", \"last_name\":\"Garrido Peña\",\"experience\":[{\"name\":\"Peña Industries\", \"start\":\"May 2014\"}]}\r\n", | |
"{\"first_name\":\"Ana\", \"last_name\":\"Cañón\", \"experience\":[{\"name\":\"Cañón technologies\",\"start\":\"April 2016\"}]}\r\n" | |
] | |
} | |
], | |
"source": [ | |
"cat test.jl" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We load the jl with pandas, then save to csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>experience</th>\n", | |
" <th>first_name</th>\n", | |
" <th>last_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>[{'name': 'Peña Industries', 'start': 'May 201...</td>\n", | |
" <td>Manuel</td>\n", | |
" <td>Garrido Peña</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>[{'name': 'Cañón technologies', 'start': 'Apri...</td>\n", | |
" <td>Ana</td>\n", | |
" <td>Cañón</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" experience first_name last_name\n", | |
"0 [{'name': 'Peña Industries', 'start': 'May 201... Manuel Garrido Peña\n", | |
"1 [{'name': 'Cañón technologies', 'start': 'Apri... Ana Cañón" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with open('test.jl') as fname:\n", | |
" lines = fname.readlines()\n", | |
" df = pd.DataFrame([json.loads(l) for l in lines])\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df.to_csv('test_p3c.csv',index=False, encoding='utf8')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"experience,first_name,last_name\r\n", | |
"\"[{'name': 'Peña Industries', 'start': 'May 2014'}]\",Manuel,Garrido Peña\r\n", | |
"\"[{'name': 'Cañón technologies', 'start': 'April 2016'}]\",Ana,Cañón\r\n" | |
] | |
} | |
], | |
"source": [ | |
"cat test_p3c.csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/manuel/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: 'engine' keyword is deprecated and will be removed in a future version\n", | |
" if __name__ == '__main__':\n" | |
] | |
} | |
], | |
"source": [ | |
"df.to_csv('test_p3.csv',index=False, encoding='utf8',engine='python')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"experience,first_name,last_name\r\n", | |
"\"[{'name': 'Peña Industries', 'start': 'May 2014'}]\",Manuel,Garrido Peña\r\n", | |
"\"[{'name': 'Cañón technologies', 'start': 'April 2016'}]\",Ana,Cañón\r\n" | |
] | |
} | |
], | |
"source": [ | |
"cat test_p3.csv" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"first_name":"Manuel", "last_name":"Garrido Peña","experience":[{"name":"Peña Industries", "start":"May 2014"}]} | |
{"first_name":"Ana", "last_name":"Cañón", "experience":[{"name":"Cañón technologies","start":"April 2016"}]} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
experience | first_name | last_name | |
---|---|---|---|
[{u'start': u'May 2014', u'name': u'Peña Industries'}] | Manuel | Garrido Peña | |
[{u'start': u'April 2016', u'name': u'Cañón technologies'}] | Ana | Cañón |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
experience | first_name | last_name | |
---|---|---|---|
[{u'start': u'May 2014', u'name': u'Peña Industries'}] | Manuel | Garrido Peña | |
[{u'start': u'April 2016', u'name': u'Cañón technologies'}] | Ana | Cañón |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
experience | first_name | last_name | |
---|---|---|---|
[{'name': 'Peña Industries', 'start': 'May 2014'}] | Manuel | Garrido Peña | |
[{'name': 'Cañón technologies', 'start': 'April 2016'}] | Ana | Cañón |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
experience | first_name | last_name | |
---|---|---|---|
[{'name': 'Peña Industries', 'start': 'May 2014'}] | Manuel | Garrido Peña | |
[{'name': 'Cañón technologies', 'start': 'April 2016'}] | Ana | Cañón |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment