Skip to content

Instantly share code, notes, and snippets.

@damianoporta
Last active December 3, 2015 17:21
Show Gist options
  • Save damianoporta/9a32e4a86baef268f9aa to your computer and use it in GitHub Desktop.
Save damianoporta/9a32e4a86baef268f9aa to your computer and use it in GitHub Desktop.
package model;
import java.util.Collection;
import java.util.regex.Pattern;
import opennlp.tools.util.Span;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import opennlp.namefind.RegexNameFinder;
import parsercv.Setup;
import util.Search;
public class Birthdate {
private Setup setup;
public Birthdate(Setup setup) {
this.setup = setup;
}
public Span[] process() {
// Es. 2015 12 03 - 2015.12.03 - 2015/12/03
String regex1 = "\\d{2,4}[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}";
Pattern pattern1 = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE);
// Es. 03 12 2015 - 03.12.2015 - 03/12/2015
String regex2 = "\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}\\d{2,4}";
Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE);
// Es. 03 dec(ember) 2015 (inglese)
String regex3 = "\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}(([J]an(uary)?|Feb(ruary)?|Mar(ch)?|[A]pr(il)?|May|June?|July?|Aug(ust)?|Sep(t(ember)?)?|Oct(ober)?|Nov(ember)?|Dec(ember)?))[\\/\\-\\.\\,\\ ]{1,3}\\d{2,4}";
Pattern pattern3 = Pattern.compile(regex3, Pattern.CASE_INSENSITIVE);
// Es. 2015 dec(ember) 03 (inglese)
String regex4 = "\\d{2,4}[\\/\\-\\.\\,\\ ]{1,3}(([J]an(uary)?|Feb(ruary)?|Mar(ch)?|[A]pr(il)?|May|June?|July?|Aug(ust)?|Sep(t(ember)?)?|Oct(ober)?|Nov(ember)?|Dec(ember)?))[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}";
Pattern pattern4 = Pattern.compile(regex4, Pattern.CASE_INSENSITIVE);
// Es. 03 dic(embre) 2015 (italiano)
String regex5 = "\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}((gen(naio)?|feb(braio)?|mar(zo)?|apr(ile)?|mag(gio)?|giu(gno)?|lug(lio)?|ago(sto)?|set(t(embre)?)?|ott(obre)?|nov(embre)?|dic(embre)?))[\\/\\-\\.\\,\\ ]{1,3}\\d{2,4}";
Pattern pattern5 = Pattern.compile(regex5, Pattern.CASE_INSENSITIVE);
// Es. 2015 dic(embre) 03 (italiano)
String regex6 = "\\d{2,4}[\\/\\-\\.\\,\\ ]{1,3}((gen(naio)?|feb(braio)?|mar(zo)?|apr(ile)?|mag(gio)?|giu(gno)?|lug(lio)?|ago(sto)?|set(t(embre)?)?|ott(obre)?|nov(embre)?|dic(embre)?))[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}";
Pattern pattern6 = Pattern.compile(regex6, Pattern.CASE_INSENSITIVE);
Pattern[] patterns = new Pattern[]{pattern1, pattern2, pattern3, pattern4, pattern5, pattern6};
Map<String, Pattern[]> regexMap = new HashMap<>();
String type = "date";
regexMap.put(type, patterns);
RegexNameFinder finder = new RegexNameFinder(regexMap);
// Effetto la ricerca
Span[] results = finder.find(this.setup.tokens);
Collection<Span> annotations = new LinkedList<>();
Search search = new Search();
for(Span result: results) {
boolean r = search.proximity(setup.tokens, result, setup.labels.get("birthdates"));
if (r == true) {
Span annotation = new Span(result.getStart(), result.getEnd(), result.getType());
annotations.add(annotation);
}
}
return annotations.toArray(new Span[annotations.size()]);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment