Skip to content

Instantly share code, notes, and snippets.

@tdhopper
Created March 20, 2013 14:17
Show Gist options
  • Select an option

  • Save tdhopper/5204994 to your computer and use it in GitHub Desktop.

Select an option

Save tdhopper/5204994 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "JSON Data"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import simplejson\n",
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"f = open(\"Match_extracted_times_e-cigs1-NGL.json\",\"r\")\n",
"jfile = f.readlines()\n",
"import re\n",
"jfixed = re.sub(\"}{\",\"}#@#@#@#{\",jfile[0]).split(\"#@#@#@#\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def parse(j):\n",
" try:\n",
" j[\"longitude\"] = j[\"coordinates\"][\"coordinates\"][0]\n",
" j[\"latitude\"] = j[\"coordinates\"][\"coordinates\"][1]\n",
" except:\n",
" j[\"longitude\"] = None\n",
" j[\"latitude\"] = None\n",
" del j[\"coordinates\"]\n",
" del j[\"entities\"]\n",
" j[\"user_description\"] = j[\"user\"][\"description\"]\n",
" j[\"friends_count\"] = j[\"user\"][\"friends_count\"]\n",
" j[\"following\"] = j[\"user\"][\"following\"]\n",
" j[\"screen_name\"] = j[\"user\"][\"screen_name\"]\n",
" j[\"user_default_profile\"] = j[\"user\"][\"default_profile\"]\n",
" del j[\"user\"]\n",
" del j[\"place\"]\n",
" del j[\"geo\"]\n",
" try: \n",
" del j[\"retweeted_status\"]\n",
" except: pass\n",
" try: \n",
" del j[\"possibly_sensitive\"]\n",
" except: pass\n",
" try: \n",
" del j[\"lang\"]\n",
" except: pass\n",
" try:\n",
" del j[\"possibly_sensitive_editable\"]\n",
" except: pass\n",
" return j"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d = {}\n",
"for i, j in enumerate(jfixed):\n",
" jdict = parse(simplejson.loads(j))\n",
" for key, value in jdict.iteritems():\n",
" if d.has_key(key) == False:\n",
" d[key] = []\n",
" try:\n",
" d[key].append(value.encode(\"ascii\",\"ignore\"))\n",
" except:\n",
" d[key].append(value)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pd.DataFrame.from_dict(d)\n",
"df.to_csv(open(\"json_to_csv.csv\",\"w\"))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment