Skip to content

Instantly share code, notes, and snippets.

@manugarri
Last active April 22, 2016 15:37
Show Gist options
  • Save manugarri/4c0d68f9c2c8434654402e6053620b68 to your computer and use it in GitHub Desktop.
Save manugarri/4c0d68f9c2c8434654402e6053620b68 to your computer and use it in GitHub Desktop.
Support por python object CSV serialization in python 2
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'2.7.6 |Continuum Analytics, Inc.| (default, May 27 2014, 14:50:58) \\n[GCC 4.1.2 20080704 (Red Hat 4.1.2-54)]'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sys\n",
"sys.version"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"INSTALLED VERSIONS\n",
"------------------\n",
"commit: None\n",
"python: 2.7.6.final.0\n",
"python-bits: 64\n",
"OS: Linux\n",
"OS-release: 3.19.0-58-generic\n",
"machine: x86_64\n",
"processor: x86_64\n",
"byteorder: little\n",
"LC_ALL: None\n",
"LANG: en_US.UTF-8\n",
"\n",
"pandas: 0.17.1\n",
"nose: 1.3.7\n",
"pip: 8.0.3\n",
"setuptools: 20.1.1\n",
"Cython: 0.23.4\n",
"numpy: 1.10.4\n",
"scipy: 0.17.0\n",
"statsmodels: None\n",
"IPython: 4.0.2\n",
"sphinx: None\n",
"patsy: None\n",
"dateutil: 2.4.2\n",
"pytz: 2015.7\n",
"blosc: 1.2.8\n",
"bottleneck: None\n",
"tables: 3.2.2\n",
"numexpr: 2.5.2\n",
"matplotlib: 1.5.1\n",
"openpyxl: None\n",
"xlrd: 0.9.4\n",
"xlwt: None\n",
"xlsxwriter: None\n",
"lxml: None\n",
"bs4: 4.4.1\n",
"html5lib: None\n",
"httplib2: None\n",
"apiclient: None\n",
"sqlalchemy: 1.0.5\n",
"pymysql: None\n",
"psycopg2: 2.6.1 (dt dec pq3 ext)\n",
"Jinja2: None\n"
]
}
],
"source": [
"pd.show_versions()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Unicode json lines"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"first_name\":\"Manuel\", \"last_name\":\"Garrido Peña\",\"experience\":[{\"name\":\"Peña Industries\", \"start\":\"May 2014\"}]}\r\n",
"{\"first_name\":\"Ana\", \"last_name\":\"Cañón\", \"experience\":[{\"name\":\"Cañón technologies\",\"start\":\"April 2016\"}]}\r\n"
]
}
],
"source": [
"cat test.jl"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We load the jl with pandas, then save to csv"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>experience</th>\n",
" <th>first_name</th>\n",
" <th>last_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>[{u'start': u'May 2014', u'name': u'Peña Indus...</td>\n",
" <td>Manuel</td>\n",
" <td>Garrido Peña</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>[{u'start': u'April 2016', u'name': u'Cañón te...</td>\n",
" <td>Ana</td>\n",
" <td>Cañón</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" experience first_name last_name\n",
"0 [{u'start': u'May 2014', u'name': u'Peña Indus... Manuel Garrido Peña\n",
"1 [{u'start': u'April 2016', u'name': u'Cañón te... Ana Cañón"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open('test.jl') as fname:\n",
" lines = fname.readlines()\n",
" df = pd.DataFrame([json.loads(l) for l in lines])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 [{u'start': u'May 2014', u'name': u'Peña Indus...\n",
"1 [{u'start': u'April 2016', u'name': u'Cañón te...\n",
"Name: experience, dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.experience"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df.to_csv('test_p2c.csv',index=False, encoding='utf8')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"experience,first_name,last_name\r\n",
"\"[{u'start': u'May 2014', u'name': u'Peña Industries'}]\",Manuel,Garrido Peña\r\n",
"\"[{u'start': u'April 2016', u'name': u'Cañón technologies'}]\",Ana,Cañón\r\n"
]
}
],
"source": [
"cat test_p2c.csv"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/manuel/anaconda3/envs/exporters/lib/python2.7/site-packages/ipykernel/__main__.py:1: FutureWarning: 'engine' keyword is deprecated and will be removed in a future version\n",
" if __name__ == '__main__':\n"
]
}
],
"source": [
"df.to_csv('test_p2p.csv',index=False, encoding='utf8',engine='python')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"experience,first_name,last_name\r\n",
"\"[{u'start': u'May 2014', u'name': u'Peña Industries'}]\",Manuel,Garrido Peña\r\n",
"\"[{u'start': u'April 2016', u'name': u'Cañón technologies'}]\",Ana,Cañón\r\n"
]
}
],
"source": [
"cat test_p2.csv"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{"first_name":"Manuel", "last_name":"Garrido Peña","experience":[{"name":"Peña Industries", "start":"May 2014"}]}
{"first_name":"Ana", "last_name":"Cañón", "experience":[{"name":"Cañón technologies","start":"April 2016"}]}
experience first_name last_name
[{u'start': u'May 2014', u'name': u'Peña Industries'}] Manuel Garrido Peña
[{u'start': u'April 2016', u'name': u'Cañón technologies'}] Ana Cañón
[{u'start': u'May 2014', u'name': u'Peña Industries'}] Manuel Garrido Peña
[{u'start': u'April 2016', u'name': u'Cañón technologies'}] Ana Cañón
experience first_name last_name
[{'name': 'Peña Industries', 'start': 'May 2014'}] Manuel Garrido Peña
[{'name': 'Cañón technologies', 'start': 'April 2016'}] Ana Cañón
[{'name': 'Peña Industries', 'start': 'May 2014'}] Manuel Garrido Peña
[{'name': 'Cañón technologies', 'start': 'April 2016'}] Ana Cañón
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment