Skip to content

Instantly share code, notes, and snippets.

@mdoering
Last active May 24, 2017 12:41
Show Gist options
  • Save mdoering/8587ad0126bc7c73f6a7f14137875a9c to your computer and use it in GitHub Desktop.
Save mdoering/8587ad0126bc7c73f6a7f14137875a9c to your computer and use it in GitHub Desktop.
sciname regex
static class Name {
public int key;
public int nameKey;
public String rank;
public String nameType;
public String canonicalName;
public String authorship;
public String scientificName;
}
public void testRegex() throws Exception {
final Pattern regex = Pattern.compile("^([A-Z][a-z][a-z-]+(:? [a-z][a-z]+)?(:? (:?[a-z]{1,5}\\.? )?[a-z][a-z-]+)?)\\b");
String base = "http://api.gbif.org/v1/species/";
Random rnd = new Random();
ObjectMapper mapper = new ObjectMapper();
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
while (true) {
int id = 100000000 + rnd.nextInt(20000000);
try {
URL url = new URL(base + id);
Name name = mapper.readValue(url, Name.class);
System.out.println(String.format("%12s %s", name.nameType, name.scientificName));
Matcher m = regex.matcher(name.scientificName);
if (m.find()) {
String canonical = m.group();
if (!canonical.equalsIgnoreCase(name.canonicalName)) {
System.out.println(" -> " + m.group());
}
} else {
System.out.println(" -> UNPARSABLE " + name.nameType);
}
} catch (IOException e) {
// ignore
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment