Created
March 5, 2013 21:33
-
-
Save cbare/5094486 to your computer and use it in GitHub Desktop.
A script to add permission on a synapse entity for a whole bunch of emails
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## | |
| ## A script to add permission on a synapse entity | |
| ## for a whole bunch of emails | |
| ############################################################ | |
| import synapseclient | |
| import re, sys | |
| import requests | |
| import json | |
| ## read in emails from a two-column file | |
| def read_emails(filename): | |
| emails = [] | |
| emailset = set() | |
| with open(filename, 'r') as f: | |
| for line in f: | |
| fields = re.split(r'\s+', line, maxsplit=1) | |
| ## add email to list of emails, uniquifying case insensitively as we go | |
| email_lowercased = fields[0].lower() | |
| if email_lowercased not in emailset: | |
| emailset.add(email_lowercased) | |
| emails.append(fields[0]) | |
| return emails | |
| def get_all_users(): | |
| response = requests.get(repoEndpoint + '/user?offset=0&limit=10000', headers=headers) | |
| response.raise_for_status() | |
| return response.json()['results'] | |
| def findOwnerIds(emails, all_users=None, verbose=False): | |
| """ | |
| Given a list of emails, return a mapping from emails to zero or | |
| more synapse owner ids whose obfuscated email is a match for the | |
| email in the key. Ambiguous matches might have to be resolved by | |
| eye. | |
| """ | |
| ## in case we're given one email address | |
| if isinstance(emails, basestring): | |
| emails = [emails] | |
| if not all_users: | |
| ## get all users in synapse | |
| all_users = get_all_users() | |
| ## initialize mapping from email to an initially empty list of matching owner_ids | |
| email_to_owner_ids = {} | |
| for email in emails: | |
| email_to_owner_ids[email] = [] | |
| ## For each user in synapse, if it matches an email in the list, | |
| ## add its owner_id to the mapping. | |
| ## The reason why this is implemented a little backwards (looping | |
| ## over all synapse users rather than all emails in the given list) | |
| ## is that the users profile contains an obfuscated email address, | |
| ## which is then used a pattern to search the list of emails. | |
| for user in all_users: | |
| if verbose: | |
| sys.stdout.write(".") | |
| sys.stdout.flush() | |
| profile = syn.getUserProfile(user['ownerId']) | |
| if 'email' in profile: | |
| email_pattern = profile['email'].replace('...', '.*') | |
| match_generator = (re.match(email_pattern, email, re.IGNORECASE) for email in emails) | |
| matches = [match.group(0) for match in match_generator if match] | |
| if len(matches)>0: | |
| for match in matches: | |
| email_to_owner_ids[match].append(user['ownerId']) | |
| if verbose: | |
| print "\n" + str(matches) + " => " + user['ownerId'] | |
| return email_to_owner_ids | |
| def storeACL(entity, acl): | |
| entity_id = entity['id'] if 'id' in entity else str(entity) | |
| ## get benefactor. (An entity gets its ACL from its benefactor.) | |
| url = '%s/entity/%s/benefactor' % (repoEndpoint, entity_id,) | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| benefactor = response.json() | |
| ## update or create new ACL | |
| url = '%s/entity/%s/acl' % (repoEndpoint, entity_id,) | |
| if benefactor['id']==entity_id: | |
| response = requests.put(url, data=json.dumps(acl), headers=headers) | |
| else: | |
| response = requests.post(url, data=json.dumps(acl), headers=headers) | |
| response.raise_for_status() | |
| return response.json() | |
| syn = synapseclient.Synapse() | |
| syn.login('[email protected]', 'secret') | |
| repoEndpoint = 'https://repo-prod.prod.sagebase.org/repo/v1' | |
| headers = {'Accept': 'application/json', 'Content-type':'application/json', 'sessionToken': syn.sessionToken} | |
| filename = 'tcga.emails.txt' | |
| emails = read_emails(filename) | |
| ## get all synapse users | |
| all_users = get_all_users() | |
| email_to_owner_ids = findOwnerIds(emails, all_users=all_users, verbose=True) | |
| no_mapping = 0 | |
| ambiguous_mapping = 0 | |
| for (k,v) in email_to_owner_ids.iteritems(): | |
| if len(v)==0: | |
| print "no mapping for: " + k | |
| no_mapping += 1 | |
| elif len(v)>1: | |
| print "ambiguous: " + k + " => " + str(v) | |
| ambiguous_mapping += len(v)-1 | |
| found_emails = sum([len(ownerIds) for ownerIds in email_to_owner_ids.values()]) | |
| print "Found %d matching emails, of which %d were ambiguous (matched >1 synapse account)." % (found_emails, ambiguous_mapping,) | |
| print "No synapse account was found for %d emails." % (no_mapping,) | |
| ## add users to ACL for entity | |
| entity_id = 'synXXXXXXXX' | |
| ## get the existing access rights | |
| acl = syn._getACL(entity_id) | |
| rights = {} | |
| for access in acl['resourceAccess']: | |
| rights[access['principalId']] = access | |
| ## update rights (not destructively) with new permissions | |
| for email, ownerIds in email_to_owner_ids.iteritems(): | |
| for ownerId in ownerIds: | |
| if ownerId in rights: | |
| rights[ownerId]['accessType'] = list(union(set(rights[ownerId]['accessType']) | set(u'UPDATE', u'CREATE', u'READ'))) | |
| else: | |
| rights[ownerId] = {u'accessType': [u'UPDATE', u'CREATE', u'READ'], u'principalId': ownerId} | |
| acl['resourceAccess'] = [access for access in rights.values()] | |
| storeACL(entity_id, acl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment