Last active
April 22, 2016 15:37
-
-
Save manugarri/4c0d68f9c2c8434654402e6053620b68 to your computer and use it in GitHub Desktop.
Support por python object CSV serialization in python 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'2.7.6 |Continuum Analytics, Inc.| (default, May 27 2014, 14:50:58) \\n[GCC 4.1.2 20080704 (Red Hat 4.1.2-54)]'" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import sys\n", | |
"sys.version" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import json" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"INSTALLED VERSIONS\n", | |
"------------------\n", | |
"commit: None\n", | |
"python: 2.7.6.final.0\n", | |
"python-bits: 64\n", | |
"OS: Linux\n", | |
"OS-release: 3.19.0-58-generic\n", | |
"machine: x86_64\n", | |
"processor: x86_64\n", | |
"byteorder: little\n", | |
"LC_ALL: None\n", | |
"LANG: en_US.UTF-8\n", | |
"\n", | |
"pandas: 0.17.1\n", | |
"nose: 1.3.7\n", | |
"pip: 8.0.3\n", | |
"setuptools: 20.1.1\n", | |
"Cython: 0.23.4\n", | |
"numpy: 1.10.4\n", | |
"scipy: 0.17.0\n", | |
"statsmodels: None\n", | |
"IPython: 4.0.2\n", | |
"sphinx: None\n", | |
"patsy: None\n", | |
"dateutil: 2.4.2\n", | |
"pytz: 2015.7\n", | |
"blosc: 1.2.8\n", | |
"bottleneck: None\n", | |
"tables: 3.2.2\n", | |
"numexpr: 2.5.2\n", | |
"matplotlib: 1.5.1\n", | |
"openpyxl: None\n", | |
"xlrd: 0.9.4\n", | |
"xlwt: None\n", | |
"xlsxwriter: None\n", | |
"lxml: None\n", | |
"bs4: 4.4.1\n", | |
"html5lib: None\n", | |
"httplib2: None\n", | |
"apiclient: None\n", | |
"sqlalchemy: 1.0.5\n", | |
"pymysql: None\n", | |
"psycopg2: 2.6.1 (dt dec pq3 ext)\n", | |
"Jinja2: None\n" | |
] | |
} | |
], | |
"source": [ | |
"pd.show_versions()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Unicode json lines" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{\"first_name\":\"Manuel\", \"last_name\":\"Garrido Peña\",\"experience\":[{\"name\":\"Peña Industries\", \"start\":\"May 2014\"}]}\r\n", | |
"{\"first_name\":\"Ana\", \"last_name\":\"Cañón\", \"experience\":[{\"name\":\"Cañón technologies\",\"start\":\"April 2016\"}]}\r\n" | |
] | |
} | |
], | |
"source": [ | |
"cat test.jl" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We load the jl with pandas, then save to csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>experience</th>\n", | |
" <th>first_name</th>\n", | |
" <th>last_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>[{u'start': u'May 2014', u'name': u'Peña Indus...</td>\n", | |
" <td>Manuel</td>\n", | |
" <td>Garrido Peña</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>[{u'start': u'April 2016', u'name': u'Cañón te...</td>\n", | |
" <td>Ana</td>\n", | |
" <td>Cañón</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" experience first_name last_name\n", | |
"0 [{u'start': u'May 2014', u'name': u'Peña Indus... Manuel Garrido Peña\n", | |
"1 [{u'start': u'April 2016', u'name': u'Cañón te... Ana Cañón" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with open('test.jl') as fname:\n", | |
" lines = fname.readlines()\n", | |
" df = pd.DataFrame([json.loads(l) for l in lines])\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 [{u'start': u'May 2014', u'name': u'Peña Indus...\n", | |
"1 [{u'start': u'April 2016', u'name': u'Cañón te...\n", | |
"Name: experience, dtype: object" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.experience" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df.to_csv('test_p2c.csv',index=False, encoding='utf8')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"experience,first_name,last_name\r\n", | |
"\"[{u'start': u'May 2014', u'name': u'Peña Industries'}]\",Manuel,Garrido Peña\r\n", | |
"\"[{u'start': u'April 2016', u'name': u'Cañón technologies'}]\",Ana,Cañón\r\n" | |
] | |
} | |
], | |
"source": [ | |
"cat test_p2c.csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/manuel/anaconda3/envs/exporters/lib/python2.7/site-packages/ipykernel/__main__.py:1: FutureWarning: 'engine' keyword is deprecated and will be removed in a future version\n", | |
" if __name__ == '__main__':\n" | |
] | |
} | |
], | |
"source": [ | |
"df.to_csv('test_p2p.csv',index=False, encoding='utf8',engine='python')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"experience,first_name,last_name\r\n", | |
"\"[{u'start': u'May 2014', u'name': u'Peña Industries'}]\",Manuel,Garrido Peña\r\n", | |
"\"[{u'start': u'April 2016', u'name': u'Cañón technologies'}]\",Ana,Cañón\r\n" | |
] | |
} | |
], | |
"source": [ | |
"cat test_p2.csv" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"first_name":"Manuel", "last_name":"Garrido Peña","experience":[{"name":"Peña Industries", "start":"May 2014"}]} | |
{"first_name":"Ana", "last_name":"Cañón", "experience":[{"name":"Cañón technologies","start":"April 2016"}]} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
experience | first_name | last_name | |
---|---|---|---|
[{u'start': u'May 2014', u'name': u'Peña Industries'}] | Manuel | Garrido Peña | |
[{u'start': u'April 2016', u'name': u'Cañón technologies'}] | Ana | Cañón |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[{u'start': u'May 2014', u'name': u'Peña Industries'}] | Manuel | Garrido Peña | |
---|---|---|---|
[{u'start': u'April 2016', u'name': u'Cañón technologies'}] | Ana | Cañón |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
experience | first_name | last_name | |
---|---|---|---|
[{'name': 'Peña Industries', 'start': 'May 2014'}] | Manuel | Garrido Peña | |
[{'name': 'Cañón technologies', 'start': 'April 2016'}] | Ana | Cañón |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[{'name': 'Peña Industries', 'start': 'May 2014'}] | Manuel | Garrido Peña | |
---|---|---|---|
[{'name': 'Cañón technologies', 'start': 'April 2016'}] | Ana | Cañón |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment