Created
January 31, 2013 21:33
-
-
Save cbare/4686667 to your computer and use it in GitHub Desktop.
Gather statistics on public data sets in Sage Synapse
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import synapseclient | |
| import re, sys | |
| import requests | |
| syn = synapseclient.Synapse() | |
| syn.login('[email protected]', 'secret') | |
| repoEndPoint = 'https://repo-prod.prod.sagebase.org/repo/v1' | |
| headers = {'Accept': 'application/json', 'sessionToken': syn.sessionToken} | |
| sage_users = ["earthlingzephyr", "isjang", "xschildwachter", | |
| "bennett.k.ng", "bruce_hoff", "mikerkellen", | |
| "cbare", "metteptrs", "matthew.furia", "laramangravite", | |
| "nicole.deflaux.guest", "Nicole Deflaux gmail"] | |
| def get_all_users(): | |
| response = requests.get(repoEndPoint + '/user?offset=0&limit=10000', headers=headers) | |
| response.raise_for_status() | |
| return response.json()['results'] | |
| def getUserProfile(ownerId): | |
| url = '%s/userProfile/%s' % (repoEndPoint, str(ownerId),) | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| return response.json() | |
| def get_user_entities(user, entity_type): | |
| results = syn.query('select id, name, parentId from %s where createdByPrincipalId == %s limit 10000' % (entity_type, user['ownerId'])) | |
| ## fix this crappy crap | |
| fixed_results = [] | |
| for e in results['results']: | |
| new_e = {} | |
| for key,value in e.iteritems(): | |
| m = re.match(entity_type + r'\.(.*)', key) | |
| if m: | |
| new_e[m.group(1)] = value | |
| else: | |
| new_e[key] = value | |
| fixed_results.append(new_e) | |
| return fixed_results | |
| def get_acl(entity): | |
| entity_id = entity['id'] if 'id' in entity else str(entity) | |
| url = '%s/entity/%s/benefactor' % (repoEndPoint, entity_id,) | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| benefactor = response.json()['id'] | |
| url = '%s/entity/%s/acl' % (repoEndPoint, benefactor,) | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| return response.json() | |
| def classify_sage_user(user): | |
| profile = getUserProfile(user['ownerId']) | |
| return profile['email'].endswith('sagebase.org') if 'email' in profile else False | |
| def classify_by_access(acl): | |
| for access in acl['resourceAccess']: | |
| if access['principalId'] in [273948, 273949] and 'READ' in access['accessType']: | |
| return 'public' | |
| return 'auth_users' | |
| def quote(string): | |
| return '\\"'.join('"' + p + '"' for p in string.split('"')) | |
| all_users = get_all_users() | |
| f = open('synapse_public_inventory.csv', 'w') | |
| lines=[] | |
| user_entities = {} | |
| for user in all_users: | |
| try: | |
| if user['displayName'] in sage_users: continue | |
| profile = getUserProfile(user['ownerId']) | |
| if profile['email'].endswith('sagebase.org'): continue | |
| user['email'] = profile['email'] if 'email' in profile else '' | |
| except: | |
| user['email'] = '' | |
| try: | |
| entities = get_user_entities(user, 'data') | |
| user_entities[user['ownerId']] = entities | |
| for entity in entities: | |
| try: | |
| entity['is-open'] = classify_by_access(get_acl(entity)) | |
| except: | |
| entity['is-open'] = '??' | |
| sys.stderr.write('couldn\'t get acl for entity %s\n' % (str(entity['id']),)) | |
| line = ', '.join( [str(a) for a in (user['ownerId'], quote(user['displayName']), user['email'], entity['id'], entity['parentId'], quote(entity['name']), entity['is-open'],)] ) | |
| lines.append(line) | |
| print line | |
| f.write(line) | |
| f.write('\n') | |
| except: | |
| pass | |
| sys.stderr.write('couldn\'t get entities for user %s\n' % (str(user),)) | |
| f.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment