Last active
November 16, 2017 21:47
-
-
Save christopherkullenberg/a6cda0fa7deaa39a86b8e0a43e23c103 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"import pandas as pd\n", | |
"import networkx as nx\n", | |
"import re" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"In case the json won't parse, try:\n", | |
"```python\n", | |
"try:\n", | |
" jsonobject = json.load(jsonfile)\n", | |
"except json.JSONDecodeError:\n", | |
" print(\"error\") # there is just one error in the beginning of file\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"sourcefile = \"fotografiska.json\"\n", | |
"jsonfile = open(sourcefile)\n", | |
"jsonobject = json.load(jsonfile)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def commentstodataframe(jsonobject):\n", | |
" '''This function makes a dataframe of selected content\n", | |
" in the json data structure.'''\n", | |
" commentcounter = 0\n", | |
" thedata = []\n", | |
" for j in jsonobject:\n", | |
" commentdata = {}\n", | |
" postid = j['shortcode']\n", | |
" posturl = 'https://www.instagram.com/p/' + postid\n", | |
" commentdata[\"id\"] = postid\n", | |
" commentdata[\"url\"] = posturl \n", | |
" commentlist = []\n", | |
" for k in j['comments']['data']:\n", | |
" \n", | |
" commentlist.append((k['owner']['username'], k['text']))\n", | |
" commentcounter += 1 \n", | |
" commentdata[\"comments\"] = commentlist\n", | |
" thedata.append(commentdata)\n", | |
" print(\"Number of comments added to dataframe: \" + str(commentcounter))\n", | |
" df = pd.DataFrame(thedata, columns=['id','url','comments'])\n", | |
" return(df)\n", | |
" \n", | |
"df = commentstodataframe(jsonobject)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for comment in df.iterrows():\n", | |
" print(\"\\n\")\n", | |
" print(comment[1][1])\n", | |
" for c in comment[1][2]: \n", | |
" print(c[0], c[1])\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
" def makepostdirectednetwork(df):\n", | |
" '''Makes a directed network from a user to a\n", | |
" post. Can be visualised as an in- or outdegree\n", | |
" network depending on your question.'''\n", | |
" G = nx.DiGraph()\n", | |
" postcounter = 0\n", | |
" interactionscounter = 0\n", | |
" userlist = []\n", | |
" for row in df.iterrows():\n", | |
" postid = row[1][0]\n", | |
" postcounter += 1\n", | |
" for c in row[1][2]: \n", | |
" username = c[0]\n", | |
" interactionscounter += 1\n", | |
" userlist.append(username)\n", | |
" G.add_edge(username, postid) # direction of graph, from user to post\n", | |
" nx.write_gexf(G, sourcefile + \"postdirected.gexf\")\n", | |
" print(\"Posts: \" + str(postcounter))\n", | |
" print(\"Interactions (incl. multiple interactions with same post: \" \n", | |
" + str(interactionscounter))\n", | |
" print(\"Unique users: \" + str(len(set(userlist))))\n", | |
"\n", | |
"makepostdirectednetwork(df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def makeuserdirectednetwork(df):\n", | |
" '''Makes a directe network when users\n", | |
" ping each other using @'''\n", | |
" G = nx.MultiDiGraph()\n", | |
" interactionscounter = 0\n", | |
" userlist = []\n", | |
" for row in df.iterrows():\n", | |
" for c in row[1][2]: \n", | |
" match = re.findall(\"(?<=@).*?(?=[\\s])\", c[1], re.IGNORECASE)\n", | |
" if match:\n", | |
" source = c[0]\n", | |
" #G.add_node(source)\n", | |
" #print(\"\\nSource: \" + source)\n", | |
" for m in match:\n", | |
" #G.add_node(m)\n", | |
" interactionscounter += 1\n", | |
" #print(\"Target: \" + m)\n", | |
" G.add_edge(source, m)\n", | |
" userlist.append(source)\n", | |
" userlist.append(m)\n", | |
" \n", | |
" nx.write_gexf(G, sourcefile + \"userdirected.gexf\")\n", | |
" print(\"Number of interactions: \" + str(interactionscounter))\n", | |
" print(\"Number of unique users: \" + str(len(set(userlist))))\n", | |
" print(nx.info(G))\n", | |
" \n", | |
" \n", | |
"makeuserdirectednetwork(df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment