Forked from psychemedia/googPlusFrFo-preliminarySketch.py
Created
October 20, 2011 17:02
-
-
Save mhawksey/1301660 to your computer and use it in GitHub Desktop.
First doodling around a Google Plus friends/followers network grapher
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ABOUT: | |
# A script that grabs a list of the friends or followers of one or more folk on Google+, | |
# grabs a sample of their friends, and generates the resulting social graph | |
# USAGE: | |
# Requirements: networkx (see DEPENDENCIES) | |
# Configuration: see CONFIGURATION | |
# Output: files will be save to the reports directory | |
# To run the script: | |
# 1) Download this file to a directory somewhere as eg googPlusFrFo-preliminarySketch.py | |
# 2) cd to the directory | |
# 3) *The first time*, run the following from the command line: mkdir reports; mkdir cache | |
# 4) Call the script by running the following from the command line: | |
# python googPlusFrFo-preliminarySketch.py | |
# DEPENDENCIES | |
# The script makes use of the networkx library; you should only need to install it once. | |
# To install networkx, from the command line type: easy_install networkx | |
# If that doesn't work, follow the instructions on http://networkx.lanl.gov/install.html | |
# In short: a) download and unzip http://networkx.lanl.gov/download/networkx/networkx-1.5.zip | |
# b) cd to the networkx-1.5 directory, c) type: python setup.py install | |
# END DEPENDENCIES | |
import networkx as nx | |
#--- the following should already be available | |
import urllib2,re | |
try: import simplejson as json | |
except ImportError: import json | |
import md5,urllib,os,tempfile,time | |
import random | |
import datetime | |
# CONFIGURATION | |
#gPlusIDS - a comma separated list of Google+ IDs. I'm just doing one below... | |
gPlusIDs=['100095426689697101649'] | |
#name - is the slug in the filename the graph data will be saved to; | |
name='tonyHirst' | |
# cache time in seconds; if a file is cached and not older than cachetime, that data will be used | |
defCache=36000 | |
# Some folk have a lot of friends. The Google Social API only seems to let you grab 15 names at a time | |
# so to limit API calls - and the time the script takes to run - I only grab sampleSize random | |
# friends or followers of the target accounts(s) to construct the graph | |
sampleSize=90 | |
# Do we want to map the social connections between the friends of the friends ('fr') of the | |
# target account(s) or the friends of the followers ('fo') of the target account(s) | |
typ='fo' | |
# END CONFIGURATION | |
#---- | |
# Do some checks... | |
def checkDir(dirpath): | |
if not os.path.exists(dirpath): | |
os.makedirs(dirpath) | |
checkDir('reports') | |
checkDir('cache') | |
#--- | |
DG=nx.DiGraph() | |
#friends | |
#https://plus.google.com/u/0/_/socialgraph/lookup/visible/?o=%5Bnull%2Cnull%2C%22GOOGLEPLUSUSERID%22%5D&rt=j | |
#followers | |
#https://plus.google.com/u/0/_/socialgraph/lookup/incoming/?o=%5Bnull%2Cnull%2C%22GOOGLEPLUSUSERID%22%5D&n=1000&rt=j | |
#---------------------------------------------------------------- | |
#Yield successive n-sized chunks from l | |
def chunks(l, n): | |
for i in xrange(0, len(l), n): | |
yield l[i:i+n] | |
def report(m, verbose=False): | |
if verbose is True: | |
print m | |
class DiskCacheFetcherfname: | |
def __init__(self, cache_dir=None): | |
# If no cache directory specified, use system temp directory | |
if cache_dir is None: | |
cache_dir = tempfile.gettempdir() | |
self.cache_dir = cache_dir | |
def fetch(self, url, max_age=0): | |
# Use MD5 hash of the URL as the filename | |
filename = md5.new(url).hexdigest() | |
filepath = os.path.join(self.cache_dir, filename) | |
if os.path.exists(filepath): | |
if int(time.time()) - os.path.getmtime(filepath) < max_age: | |
#return open(filepath).read() | |
report("using "+filename+", cached copy of fetched url: "+url) | |
return filepath | |
report("fetching fresh copy of fetched url: "+url) | |
# Retrieve over HTTP and cache, using rename to avoid collisions | |
data = urllib.urlopen(url).read() | |
fd, temppath = tempfile.mkstemp() | |
fp = os.fdopen(fd, 'w') | |
fp.write(data) | |
fp.close() | |
os.rename(temppath, filepath) | |
return filepath | |
def getGenericCachedData(url, cachetime=36000): | |
fetcher=DiskCacheFetcherfname('cache') | |
fn=fetcher.fetch(url, cachetime) | |
f=open(fn) | |
data=f.read() | |
f.close() | |
#print 'data----',data | |
#jdata=json.loads(data) | |
return data #jdata | |
def getuserName(oid): | |
#http://socialgraph.apis.google.com/lookup?q=https%3A%2F%2Fplus.google.com%2F104253436939071070140%2F&pretty=1&callback= | |
url='http://socialgraph.apis.google.com/lookup?q=https%3A%2F%2Fplus.google.com%2F'+oid+'%2F&callback=' | |
xdata=getGenericCachedData(url) | |
data=json.loads(xdata) | |
#data=json.load(urllib2.urlopen(url)) | |
uid='http://profiles.google.com/'+str(oid) | |
if uid in data['nodes']: | |
name=data['nodes'][uid]['attributes']['fn'] | |
else: name='' | |
return name | |
def getUserNames(oids,namelookup={}): | |
oidlookup=[] | |
for oid in oids: | |
if oid not in namelookup and oid not in oidlookup: oidlookup.append(oid) | |
oidblocks=chunks(oidlookup,15) | |
for oidblock in oidblocks: | |
encoids='%2Chttps%3A%2F%2Fplus.google.com%2F'.join(oidblock) | |
#print encoids | |
url='http://socialgraph.apis.google.com/lookup?q=https%3A%2F%2Fplus.google.com%2F'+encoids+'%2F&callback=' | |
#data=json.load(urllib2.urlopen(url)) | |
xdata=getGenericCachedData(url) | |
try: | |
data=json.loads(xdata) | |
except: | |
print '********SOME ERROR******' | |
data={} | |
data['nodes']=[] | |
for oid in oidblock: | |
uid='http://profiles.google.com/'+str(oid) | |
if uid in data['nodes']: | |
try: | |
namelookup[oid]=data['nodes'][uid]['attributes']['fn'] | |
except: | |
namelookup[oid]='' | |
else: namelookup[oid]='' | |
return namelookup | |
#--- | |
#based on http://html5example.net/entry/tutorial/simple-python-google-plus-api | |
def getoids(oid,typ='fr'): | |
oids = [] | |
if typ=='fr': | |
url='https://plus.google.com/u/0/_/socialgraph/lookup/visible/?o=%5Bnull%2Cnull%2C%22'+oid+'%22%5D&rt=j' | |
elif typ=='fo': | |
url='https://plus.google.com/u/0/_/socialgraph/lookup/incoming/?o=%5Bnull%2Cnull%2C%22'+oid+'%22%5D&n=1000&rt=j' | |
else: | |
exit(-1) | |
#req = urllib2.Request(url) | |
#response = urllib2.urlopen(req) | |
#data = response.read() | |
print 'Fetching',url | |
data=getGenericCachedData(url,defCache) | |
#print data | |
reobj = re.compile(r'[0-9]{21}') | |
oids = reobj.findall(data) | |
oids = list(set(oids)) | |
return oids | |
#--- | |
def addDirectedEdges(DG,fromNode,toSet): | |
for toNode in toSet: | |
DG.add_edge(fromNode,toNode) | |
nx.info(DG) | |
return DG | |
def labelNodes(G,names): | |
for nodeID in G.node: | |
G.node[nodeID]['label']=names[nodeID] | |
return G | |
oidNames={} | |
for id in gPlusIDs: | |
print 'Top level run: getting',typ,id,getuserName(id) | |
oidNames[id]=getuserName(id) | |
oids=getoids(id, typ) | |
#if len(oids)>sampleSize: | |
# oidsSample=random.sample(oids, sampleSize) | |
addDirectedEdges(DG, id, oids) | |
oidNames=getUserNames(oids,oidNames) | |
count=1 | |
fsize=len(oids) | |
for oid in oids: | |
print '\tSub-level run: getting fr',oid,oidNames[oid],count,'of',fsize | |
foids=getoids(oid) | |
todo=len(foids) | |
if todo>sampleSize: | |
print oidNames[oid],'has too many fr','so using a sample of',sampleSize,'instead' | |
foids=random.sample(foids, sampleSize) | |
addDirectedEdges(DG, oid, foids) | |
oidNames=getUserNames(foids,oidNames) | |
count=count+1 | |
DG=labelNodes(DG,oidNames) | |
print nx.info(DG) | |
now = datetime.datetime.now() | |
ts = now.strftime("_%Y-%m-%d-%H-%M-%S") | |
nx.write_graphml(DG, '/'.join(['reports',name+'_google'+typ+'Friends_'+ts+".graphml"])) | |
nx.write_edgelist(DG, '/'.join(['reports',name+'_google'+typ+'Friends_'+ts+".txt"]),data=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment