Created
September 29, 2017 20:37
-
-
Save csjx/e488d104c62c214f9da7d9ab046043b2 to your computer and use it in GitHub Desktop.
Updates system metadata via hazelcast
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * | |
| */ | |
| package org.dataone.tests; | |
| import java.io.ByteArrayOutputStream; | |
| import java.io.File; | |
| import java.io.FileNotFoundException; | |
| import java.io.FileReader; | |
| import java.io.IOException; | |
| import java.lang.reflect.InvocationTargetException; | |
| import java.text.ParseException; | |
| import java.text.SimpleDateFormat; | |
| import java.util.Date; | |
| import java.util.HashSet; | |
| import java.util.List; | |
| import java.util.Set; | |
| import org.dataone.service.types.v1.AccessPolicy; | |
| import org.dataone.service.types.v1.AccessRule; | |
| import org.dataone.service.types.v1.Checksum; | |
| import org.dataone.service.types.v1.Identifier; | |
| import org.dataone.service.types.v1.Permission; | |
| import org.dataone.service.types.v1.Subject; | |
| import org.dataone.service.types.v2.SystemMetadata; | |
| import org.dataone.service.types.v2.TypeFactory; | |
| import org.dataone.service.util.TypeMarshaller; | |
| import org.jibx.runtime.JiBXException; | |
| import com.hazelcast.client.ClientConfig; | |
| import com.hazelcast.client.HazelcastClient; | |
| import com.hazelcast.config.GroupConfig; | |
| import com.hazelcast.core.IMap; | |
| import com.hazelcast.core.ISet; | |
| import com.opencsv.CSVReader; | |
| /** | |
| * @author cjones | |
| * | |
| */ | |
| public class HzSysMetadataUpdater { | |
| // private static String groupName = "metacat"; | |
| private static String groupName = ""; | |
| private static String groupPassword = ""; | |
| private static String address = "127.0.0.1:5701"; | |
| private static IMap<Identifier, SystemMetadata> hzSystemMetadata = null; | |
| private static HazelcastClient hzClient = null; | |
| private static SystemMetadata sysmeta = null; | |
| private static ISet<Identifier> hzIdentifiers = null; | |
| /** | |
| * @param args | |
| */ | |
| public static void main(String[] args) { | |
| long startTime = System.currentTimeMillis(); | |
| try { | |
| ClientConfig config = new ClientConfig(); | |
| config.addAddress(address); | |
| GroupConfig groupConfig = new GroupConfig(); | |
| groupConfig.setName(groupName); | |
| groupConfig.setPassword(groupPassword); | |
| config.setGroupConfig(groupConfig); | |
| hzClient = HazelcastClient.newHazelcastClient(config); | |
| hzSystemMetadata = hzClient.getMap("hzSystemMetadata"); | |
| hzIdentifiers = hzClient.getSet("hzIdentifiers"); | |
| System.out.println("hzIdentifiers count: " + hzIdentifiers.size()); | |
| // updateAccessPolicies(hzIdentifiers); | |
| // Update checksums | |
| // String pidChecksumFile = "/Users/cjones/2016-11-17-pids_and_new_checksums_to_update.txt"; | |
| // updateChecksums(pidChecksumFile); | |
| // Update upload dates | |
| // updateUploadDates("/Users/cjones/object_dates_initialdump.csv"); | |
| // updateArchivedAndObsoletesChain("urn:uuid:15b0247a-b681-464f-bed2-9229cc686af8"); | |
| // Evict pids from hzSystemMetadata | |
| //evictSystemMetadata(); | |
| // Evict all pids, then get() them | |
| // evictAndGetAll(); | |
| // Print system metadata to stdout | |
| //getSystemMetadata("urn:uuid:f22d78a2-719a-4704-856b-cc02fa803290"); | |
| // Add system metadata to hazelcast | |
| // String sysmetaFile = "/Users/cjones/d1-test/arctic/jeanette/obsoletes-fixes/urn-uuid-bbca6070-1ce7-49d0-9a56-04a324efc83c.sysmeta.xml"; | |
| // addSystemMetadata(sysmetaFile); | |
| // Evict pids individually | |
| // evictAndGetSystemMetadata("urn:uuid:bbca6070-1ce7-49d0-9a56-04a324efc83c"); | |
| // evictAndGetSystemMetadata("urn:uuid:feed9eee-21cf-4ba8-bb9b-7d036920f452"); | |
| Set<String> ids = new HashSet<String>(); | |
| ids.add("df35b.297.1"); | |
| ids.add("df35b.296.1"); | |
| ids.add("resourceMap_df35b.297.1"); | |
| Set<Identifier> pids = new HashSet<Identifier>(); | |
| for ( String id : ids ) { | |
| Identifier pid = new Identifier(); | |
| pid.setValue(id); | |
| pids.add(pid); | |
| } | |
| updateAccessPolicy(pids); | |
| System.exit(0); | |
| } catch (Exception e) { | |
| e.printStackTrace(); | |
| } | |
| long endTime = System.currentTimeMillis(); | |
| long duration = endTime - startTime; | |
| System.out.println(duration/1000); | |
| } | |
| private static void getSystemMetadata(String pidStr) { | |
| Identifier pid = new Identifier(); | |
| pid.setValue(pidStr); | |
| SystemMetadata sysmeta = hzSystemMetadata.get(pid); | |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); | |
| try { | |
| TypeMarshaller.marshalTypeToOutputStream(sysmeta, baos); | |
| System.out.println(baos.toString("UTF-8")); | |
| } catch (JiBXException e) { | |
| e.printStackTrace(); | |
| } catch (IOException e) { | |
| e.printStackTrace(); | |
| } | |
| } | |
| private static void updateArchivedAndObsoletesChain(String pidStr) { | |
| Identifier pid = new Identifier(); | |
| pid.setValue(pidStr); | |
| SystemMetadata sysmeta = hzSystemMetadata.get(pid); | |
| // first set archived to false | |
| sysmeta.setArchived(false); | |
| // Get the obsoletes for reference | |
| Identifier obsoletesPid = sysmeta.getObsoletes(); | |
| // Then remove it | |
| sysmeta = removeObsoletes(sysmeta); | |
| // Now get the sysmeta for the object it obsoletes | |
| SystemMetadata obsoletesSysmeta = hzSystemMetadata.get(obsoletesPid); | |
| obsoletesSysmeta = removeObsoletedBy(obsoletesSysmeta); | |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); | |
| try { | |
| TypeMarshaller.marshalTypeToOutputStream(sysmeta, baos); | |
| System.out.println(baos.toString()); | |
| baos.close(); | |
| baos = new ByteArrayOutputStream(); | |
| TypeMarshaller.marshalTypeToOutputStream(obsoletesSysmeta, baos); | |
| System.out.println(baos.toString()); | |
| Date dateModified = new Date(); | |
| sysmeta.setDateSysMetadataModified(dateModified); | |
| obsoletesSysmeta.setDateSysMetadataModified(dateModified); | |
| hzSystemMetadata.put(pid, sysmeta); | |
| hzSystemMetadata.put(obsoletesPid, obsoletesSysmeta); | |
| System.out.println("Set archived to false for " + pid.getValue()); | |
| System.out.println("Removed the revision chain between " + | |
| pid.getValue() + " and " + obsoletesPid.getValue()); | |
| } catch (JiBXException e) { | |
| e.printStackTrace(); | |
| } catch (IOException e) { | |
| e.printStackTrace(); | |
| } | |
| } | |
| private static SystemMetadata removeObsoletes(SystemMetadata sysmeta) { | |
| // effectively zeros out the entry | |
| sysmeta.setObsoletes(null); | |
| return sysmeta; | |
| } | |
| private static SystemMetadata removeObsoletedBy(SystemMetadata sysmeta) { | |
| // effectively zeros out the entry | |
| sysmeta.setObsoletedBy(null); | |
| return sysmeta; | |
| } | |
| /* | |
| * Updates the dateUploaded system metadata field from the given file | |
| */ | |
| private static void updateUploadDates(String pidsAndDatesFile) { | |
| try { | |
| File uploadedFile = new File(pidsAndDatesFile); | |
| CSVReader csvReader = new CSVReader(new FileReader(uploadedFile)); | |
| String[] lineParts; | |
| String pidStr = ""; | |
| String dateUploadedStr = ""; | |
| String dateUpdatedStr = ""; | |
| // 2015-06-18 05:55:01.688869-08 | |
| SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSSSSX"); | |
| Date dateUploaded = null; | |
| SystemMetadata sysmeta = null; | |
| Identifier pid = new Identifier(); | |
| int count = 0; | |
| while ( (lineParts = csvReader.readNext()) != null ) { | |
| pidStr = lineParts[0]; | |
| dateUpdatedStr = lineParts[1]; | |
| dateUploadedStr = lineParts[2]; | |
| pid.setValue(pidStr); | |
| count++; | |
| try { | |
| dateUploaded = formatter.parse(dateUploadedStr); | |
| //System.out.println(dateUploadedStr); | |
| //System.out.println(dateUploaded); | |
| // Get the sysmeta | |
| sysmeta = hzSystemMetadata.get(pid); | |
| if ( sysmeta instanceof org.dataone.service.types.v1.SystemMetadata ) { | |
| sysmeta = TypeFactory.convertTypeFromType(sysmeta, SystemMetadata.class); | |
| } | |
| sysmeta.setDateUploaded(dateUploaded); | |
| sysmeta.setDateSysMetadataModified(new Date()); | |
| // update the hz map | |
| hzSystemMetadata.put(pid, sysmeta); | |
| hzSystemMetadata.evict(pid); | |
| hzSystemMetadata.get(pid); | |
| System.out.println(count + "\t" + pidStr); | |
| } catch (InstantiationException e) { | |
| e.printStackTrace(); | |
| continue; | |
| } catch (IllegalAccessException e) { | |
| e.printStackTrace(); | |
| continue; | |
| } catch (InvocationTargetException e) { | |
| e.printStackTrace(); | |
| continue; | |
| } catch (NoSuchMethodException e) { | |
| e.printStackTrace(); | |
| continue; | |
| } catch (ParseException e) { | |
| e.printStackTrace(); | |
| continue; | |
| } | |
| } | |
| } catch (FileNotFoundException e) { | |
| e.printStackTrace(); | |
| } catch (IOException e) { | |
| e.printStackTrace(); | |
| } | |
| } | |
| /* | |
| * Updates the checksums of each object with an SHA256 checksum found | |
| * in the provided file | |
| */ | |
| private static void updateChecksums(String pidChecksumFile) | |
| throws InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { | |
| File file = new File(pidChecksumFile); | |
| try { | |
| CSVReader csvReader = new CSVReader(new FileReader(file), '|'); | |
| String[] lineParts; | |
| String pidStr; | |
| String checksumStr; | |
| String path; | |
| String algorithm; | |
| SystemMetadata sysmeta = null; | |
| Identifier pid = new Identifier(); | |
| Checksum checksum = new Checksum(); | |
| int count = 1; | |
| while ((lineParts = csvReader.readNext()) != null) { | |
| pidStr = lineParts[0]; | |
| checksumStr = lineParts[1]; | |
| path = lineParts[2]; | |
| algorithm = lineParts[3]; | |
| pid.setValue(pidStr); | |
| checksum.setAlgorithm(algorithm); | |
| checksum.setValue(checksumStr); | |
| // Get the sysmeta | |
| sysmeta = hzSystemMetadata.get(pid); | |
| if ( sysmeta instanceof org.dataone.service.types.v1.SystemMetadata ) { | |
| sysmeta = TypeFactory.convertTypeFromType(sysmeta, SystemMetadata.class); | |
| } | |
| sysmeta.setChecksum(checksum); | |
| sysmeta.setDateSysMetadataModified(new Date()); | |
| // Update the hz map | |
| try { | |
| hzSystemMetadata.put(pid, sysmeta); | |
| hzSystemMetadata.evict(pid); | |
| } catch (RuntimeException e) { | |
| e.printStackTrace(); | |
| continue; | |
| } | |
| System.out.println(count + "\tUpdated " + pid.getValue()); | |
| count++; | |
| } | |
| } catch (FileNotFoundException e) { | |
| e.printStackTrace(); | |
| } catch (IOException e) { | |
| e.printStackTrace(); | |
| } | |
| } | |
| /* | |
| * Updates the access policies of each system metadata entry, adding in | |
| * an admin read/write/changePermission ACL if it doesn't exist already | |
| */ | |
| private static void updateAccessPolicies(Set<Identifier> hzIdentifiers) { | |
| String adminDN = "CN=arctic-data-admins,DC=dataone,DC=org"; | |
| Subject adminSubject = new Subject(); | |
| adminSubject.setValue(adminDN); | |
| int count = 0; | |
| pidloop: | |
| for (Identifier pid : hzIdentifiers) { | |
| count++; | |
| sysmeta = hzSystemMetadata.get(pid); | |
| if ( sysmeta == null ) { | |
| continue; | |
| } | |
| AccessPolicy newPolicy = new AccessPolicy(); | |
| AccessPolicy oldPolicy = sysmeta.getAccessPolicy(); | |
| List<AccessRule> oldRules = oldPolicy.getAllowList(); | |
| boolean hasAdminRule = false; | |
| // loop through the rules, add existing rules to the new policy | |
| for (AccessRule rule : oldRules) { | |
| String subject = rule.getSubject(0).getValue(); | |
| if ( ! subject.equals(adminDN) ) { | |
| newPolicy.addAllow(rule); | |
| } else { | |
| List<Permission> permissions = rule.getPermissionList(); | |
| for (Permission permission : permissions) { | |
| if ( permission.equals(Permission.CHANGE_PERMISSION)) { | |
| newPolicy.addAllow(rule); | |
| hasAdminRule = true; | |
| continue pidloop; | |
| } | |
| } | |
| } | |
| } | |
| // if we don't encounter an admin group rule, add it | |
| if ( ! hasAdminRule ) { | |
| AccessRule adminRule = new AccessRule(); | |
| adminRule.addSubject(adminSubject); | |
| adminRule.addPermission(Permission.READ); | |
| adminRule.addPermission(Permission.WRITE); | |
| adminRule.addPermission(Permission.CHANGE_PERMISSION); | |
| newPolicy.addAllow(adminRule); | |
| } | |
| sysmeta.setAccessPolicy(newPolicy); | |
| sysmeta.setDateSysMetadataModified(new Date()); | |
| //ByteArrayOutputStream baos = new ByteArrayOutputStream(); | |
| //TypeMarshaller.marshalTypeToOutputStream(sysmeta, baos); | |
| hzSystemMetadata.put(pid, sysmeta); | |
| hzSystemMetadata.evict(pid); | |
| //System.out.println(count + "\t Evicted " + pid.getValue()); | |
| // SystemMetadata newSysmeta = hzSystemMetadata.get(id); | |
| if ( ! hasAdminRule ) { | |
| System.out.println(count + "\t" + pid.getValue()); | |
| } else { | |
| System.out.println(count + "\t" + pid.getValue() +"\t updated."); | |
| } | |
| } | |
| } | |
| /* | |
| * Evict a single identifier and induce a put() back into the map | |
| */ | |
| private static void evictAndGetSystemMetadata(String identifier) { | |
| Identifier pid = new Identifier(); | |
| pid.setValue(identifier); | |
| hzSystemMetadata.evict(pid); | |
| System.out.println("\tEvicted\t" + pid.getValue()); | |
| hzSystemMetadata.get(pid); | |
| } | |
| /* | |
| * Add system metadata to hazelcast from a file | |
| */ | |
| private static void addSystemMetadata(String file) { | |
| try { | |
| org.dataone.service.types.v1.SystemMetadata sysmeta = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, file); | |
| SystemMetadata sysmetaV2 = TypeFactory.convertTypeFromType(sysmeta, SystemMetadata.class); | |
| Identifier pid = sysmetaV2.getIdentifier(); | |
| hzSystemMetadata.put(pid, sysmetaV2); | |
| System.out.println("Added: " + pid.getValue()); | |
| } catch (InvocationTargetException e) { | |
| e.printStackTrace(); | |
| } catch (NoSuchMethodException e) { | |
| e.printStackTrace(); | |
| } catch (InstantiationException e) { | |
| e.printStackTrace(); | |
| } catch (IllegalAccessException e) { | |
| e.printStackTrace(); | |
| } catch (IOException e) { | |
| e.printStackTrace(); | |
| } catch (JiBXException e) { | |
| e.printStackTrace(); | |
| } | |
| } | |
| /* | |
| * Evict all identifiers currently in hzSystemMetadata | |
| */ | |
| private static void evictSystemMetadata() { | |
| // Set<Identifier> pids = hzSystemMetadata.keySet(); | |
| Set<String> pids = new HashSet<String>(); | |
| pids.add("knb.314.1"); | |
| pids.add("urn:uuid:86bc13a7-d03d-47b4-b9bc-57cc5d79d3f3"); | |
| pids.add("resourceMap_knb.314.2"); | |
| pids.add("knb.314.2"); | |
| pids.add("doi:10.5063/F1HQ3WT0"); | |
| pids.add("knb.313.1"); | |
| int count = 1; | |
| for ( String pid : pids ) { | |
| // evict the pid | |
| Identifier identifier = new Identifier(); | |
| identifier.setValue(pid); | |
| hzSystemMetadata.evict(identifier); | |
| //hzSystemMetadata.get(identifier); | |
| System.out.println(count + "\tEvicted\t" + identifier.getValue()); | |
| count++; | |
| } | |
| } | |
| /* | |
| * Evict all identifiers in hzIdentifiers, then get() them again | |
| * to induce a put(), causing the index to be updated | |
| */ | |
| private static void evictAndGetAll() { | |
| int count = 1; | |
| for ( Identifier pid : hzIdentifiers ) { | |
| // evict the pid | |
| hzSystemMetadata.evict(pid); | |
| hzSystemMetadata.get(pid); | |
| System.out.println(count + "\tEvicted and got\t" + pid.getValue()); | |
| count++; | |
| } | |
| } | |
| private static void showChecksums(String pidFilePath) { | |
| File pidFile = new File(pidFilePath); | |
| } | |
| /* | |
| * Updates the access policies of each system metadata entry, adding in | |
| * an admin read/write/changePermission ACL if it doesn't exist already | |
| */ | |
| private static void updateAccessPolicy(Set<Identifier> identifiers) { | |
| String dn = "uid=jclark,o=unaffiliated,dc=ecoinformatics,dc=org"; | |
| Subject userSubject = new Subject(); | |
| userSubject.setValue(dn); | |
| int count = 0; | |
| pidloop: | |
| for (Identifier pid : identifiers) { | |
| count++; | |
| sysmeta = hzSystemMetadata.get(pid); | |
| if ( sysmeta == null ) { | |
| continue; | |
| } | |
| AccessPolicy newPolicy = new AccessPolicy(); | |
| AccessPolicy oldPolicy = sysmeta.getAccessPolicy(); | |
| List<AccessRule> oldRules = oldPolicy.getAllowList(); | |
| boolean hasRule = false; | |
| // loop through the rules, add existing rules to the new policy | |
| for (AccessRule rule : oldRules) { | |
| String subject = rule.getSubject(0).getValue(); | |
| if ( ! subject.equals(dn) ) { | |
| newPolicy.addAllow(rule); | |
| } else { | |
| List<Permission> permissions = rule.getPermissionList(); | |
| for (Permission permission : permissions) { | |
| if ( permission.equals(Permission.CHANGE_PERMISSION)) { | |
| newPolicy.addAllow(rule); | |
| hasRule = true; | |
| continue pidloop; | |
| } | |
| } | |
| } | |
| } | |
| // if we don't encounter an user rule, add it | |
| if ( ! hasRule ) { | |
| AccessRule userRule = new AccessRule(); | |
| userRule.addSubject(userSubject); | |
| userRule.addPermission(Permission.READ); | |
| userRule.addPermission(Permission.WRITE); | |
| userRule.addPermission(Permission.CHANGE_PERMISSION); | |
| newPolicy.addAllow(userRule); | |
| } | |
| sysmeta.setAccessPolicy(newPolicy); | |
| sysmeta.setDateSysMetadataModified(new Date()); | |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); | |
| try { | |
| TypeMarshaller.marshalTypeToOutputStream(sysmeta, baos); | |
| } catch (JiBXException e) { | |
| e.printStackTrace(); | |
| } catch (IOException e) { | |
| e.printStackTrace(); | |
| } | |
| //hzSystemMetadata.put(pid, sysmeta); | |
| //hzSystemMetadata.evict(pid); | |
| //System.out.println(count + "\t Evicted " + pid.getValue()); | |
| // SystemMetadata newSysmeta = hzSystemMetadata.get(id); | |
| if ( ! hasRule ) { | |
| System.out.println(count + "\t" + pid.getValue()); | |
| } else { | |
| System.out.println(count + "\t" + pid.getValue() +"\t updated."); | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment