Created
April 26, 2015 23:44
-
-
Save aguestuser/854245d7221545a0e9ae to your computer and use it in GitHub Desktop.
Matthew's Script for Calculating Centrality among LS entities
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import MySQLdb as my | |
import networkx as nx | |
import csv | |
from IPython import embed | |
db = my.connect(host='localhost', user='littlesis', passwd='midm681_beet', db='littlesis') | |
cur = db.cursor(my.cursors.DictCursor) | |
# all positions | |
# sql = "SELECT l.entity1_id, l.entity2_id, l.category_id, r.is_current, e1.name AS name1, e2.name AS name2 FROM link l LEFT JOIN relationship r ON (r.id = l.relationship_id) LEFT JOIN entity e1 on (e1.id = l.entity1_id) LEFT JOIN entity e2 ON (e2.id = l.entity2_id) WHERE l.category_id = 1 AND l.is_reverse = 0" | |
# all board memberships | |
sql = "SELECT l.entity1_id, l.entity2_id, l.category_id, r.is_current, e1.name AS name1, e2.name AS name2, e1.primary_ext AS primary_ext1, e2.primary_ext AS primary_ext2 FROM link l LEFT JOIN relationship r ON (r.id = l.relationship_id) LEFT JOIN position p ON (p.relationship_id = l.relationship_id) LEFT JOIN entity e1 on (e1.id = l.entity1_id) LEFT JOIN entity e2 ON (e2.id = l.entity2_id) WHERE l.category_id = 1 AND l.is_reverse = 0 AND p.is_board = 1" | |
# all positions of people with positions in fortune 1000 companies | |
# first get fortune 1000 companies | |
# sql = "SELECT DISTINCT(le.entity_id) FROM ls_list_entity le WHERE le.list_id = 110 AND le.is_deleted = 0" | |
# cur.execute(sql) | |
# company_ids = [str(row['entity_id']) for row in cur.fetchall()] | |
# # then get all people with positions in fortune 1000 companies | |
# sql = "SELECT DISTINCT(r.entity1_id) FROM relationship r WHERE r.entity2_id IN (" + ",".join(company_ids) + ") AND r.category_id = 1 AND r.is_deleted = 0" | |
# cur.execute(sql) | |
# entity_ids = [str(row['entity1_id']) for row in cur.fetchall()] | |
# # finally get all positions of those people | |
# sql = "SELECT l.entity1_id, l.entity2_id, l.category_id, r.is_current, e1.name AS name1, e2.name AS name2 FROM link l LEFT JOIN relationship r ON (r.id = l.relationship_id) LEFT JOIN entity e1 on (e1.id = l.entity1_id) LEFT JOIN entity e2 ON (e2.id = l.entity2_id) WHERE l.entity1_id IN (" + ",".join(entity_ids) + ") AND l.category_id = 1 AND l.is_reverse = 0 AND r.is_deleted = 0" | |
cur.execute(sql) | |
links = cur.fetchall() | |
names = {} | |
exts = {} | |
g = nx.Graph() | |
# category_weights = { | |
# '1': 1, | |
# '2': 0.8, | |
# '3': 0.5, | |
# '4': 1, | |
# '5': 0.8, | |
# '6': 1, | |
# '7': 1` | |
# } | |
for i in range(len(links)): | |
link = links[i] | |
id1 = str(link['entity1_id']) | |
id2 = str(link['entity2_id']) | |
name1 = str(link['name1']) | |
name2 = str(link['name2']) | |
names[id1] = name1 | |
names[id2] = name2 | |
ext1 = str(link['primary_ext1']) | |
ext2 = str(link['primary_ext2']) | |
exts[id1] = ext1 | |
exts[id2] = ext2 | |
g.add_node(id1, name=name1) | |
g.add_node(id2, name=name2) | |
weight = (1 if link['is_current'] == '1' else 0.5) | |
g.add_edge(id1, id2, weight=weight) | |
nodes = nx.betweenness_centrality(g, weight='weight') | |
ids = sorted(nodes, key=nodes.get, reverse=True) | |
with open(sys.argv[1], 'wb') as csvfile: | |
writer = csv.writer(csvfile) | |
for id in ids: | |
writer.writerow([id, names[id], exts[id], nodes[id]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment