Skip to content

Instantly share code, notes, and snippets.

@cbare
Created January 31, 2013 21:33
Show Gist options
  • Select an option

  • Save cbare/4686667 to your computer and use it in GitHub Desktop.

Select an option

Save cbare/4686667 to your computer and use it in GitHub Desktop.
Gather statistics on public data sets in Sage Synapse
import synapseclient
import re, sys
import requests
syn = synapseclient.Synapse()
syn.login('[email protected]', 'secret')
repoEndPoint = 'https://repo-prod.prod.sagebase.org/repo/v1'
headers = {'Accept': 'application/json', 'sessionToken': syn.sessionToken}
sage_users = ["earthlingzephyr", "isjang", "xschildwachter",
"bennett.k.ng", "bruce_hoff", "mikerkellen",
"cbare", "metteptrs", "matthew.furia", "laramangravite",
"nicole.deflaux.guest", "Nicole Deflaux gmail"]
def get_all_users():
response = requests.get(repoEndPoint + '/user?offset=0&limit=10000', headers=headers)
response.raise_for_status()
return response.json()['results']
def getUserProfile(ownerId):
url = '%s/userProfile/%s' % (repoEndPoint, str(ownerId),)
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.json()
def get_user_entities(user, entity_type):
results = syn.query('select id, name, parentId from %s where createdByPrincipalId == %s limit 10000' % (entity_type, user['ownerId']))
## fix this crappy crap
fixed_results = []
for e in results['results']:
new_e = {}
for key,value in e.iteritems():
m = re.match(entity_type + r'\.(.*)', key)
if m:
new_e[m.group(1)] = value
else:
new_e[key] = value
fixed_results.append(new_e)
return fixed_results
def get_acl(entity):
entity_id = entity['id'] if 'id' in entity else str(entity)
url = '%s/entity/%s/benefactor' % (repoEndPoint, entity_id,)
response = requests.get(url, headers=headers)
response.raise_for_status()
benefactor = response.json()['id']
url = '%s/entity/%s/acl' % (repoEndPoint, benefactor,)
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.json()
def classify_sage_user(user):
profile = getUserProfile(user['ownerId'])
return profile['email'].endswith('sagebase.org') if 'email' in profile else False
def classify_by_access(acl):
for access in acl['resourceAccess']:
if access['principalId'] in [273948, 273949] and 'READ' in access['accessType']:
return 'public'
return 'auth_users'
def quote(string):
return '\\"'.join('"' + p + '"' for p in string.split('"'))
all_users = get_all_users()
f = open('synapse_public_inventory.csv', 'w')
lines=[]
user_entities = {}
for user in all_users:
try:
if user['displayName'] in sage_users: continue
profile = getUserProfile(user['ownerId'])
if profile['email'].endswith('sagebase.org'): continue
user['email'] = profile['email'] if 'email' in profile else ''
except:
user['email'] = ''
try:
entities = get_user_entities(user, 'data')
user_entities[user['ownerId']] = entities
for entity in entities:
try:
entity['is-open'] = classify_by_access(get_acl(entity))
except:
entity['is-open'] = '??'
sys.stderr.write('couldn\'t get acl for entity %s\n' % (str(entity['id']),))
line = ', '.join( [str(a) for a in (user['ownerId'], quote(user['displayName']), user['email'], entity['id'], entity['parentId'], quote(entity['name']), entity['is-open'],)] )
lines.append(line)
print line
f.write(line)
f.write('\n')
except:
pass
sys.stderr.write('couldn\'t get entities for user %s\n' % (str(user),))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment