Last active
December 3, 2015 17:21
-
-
Save damianoporta/9a32e4a86baef268f9aa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package model; | |
import java.util.Collection; | |
import java.util.regex.Pattern; | |
import opennlp.tools.util.Span; | |
import java.util.HashMap; | |
import java.util.LinkedList; | |
import java.util.Map; | |
import opennlp.namefind.RegexNameFinder; | |
import parsercv.Setup; | |
import util.Search; | |
public class Birthdate { | |
private Setup setup; | |
public Birthdate(Setup setup) { | |
this.setup = setup; | |
} | |
public Span[] process() { | |
// Es. 2015 12 03 - 2015.12.03 - 2015/12/03 | |
String regex1 = "\\d{2,4}[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}"; | |
Pattern pattern1 = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE); | |
// Es. 03 12 2015 - 03.12.2015 - 03/12/2015 | |
String regex2 = "\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}\\d{2,4}"; | |
Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE); | |
// Es. 03 dec(ember) 2015 (inglese) | |
String regex3 = "\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}(([J]an(uary)?|Feb(ruary)?|Mar(ch)?|[A]pr(il)?|May|June?|July?|Aug(ust)?|Sep(t(ember)?)?|Oct(ober)?|Nov(ember)?|Dec(ember)?))[\\/\\-\\.\\,\\ ]{1,3}\\d{2,4}"; | |
Pattern pattern3 = Pattern.compile(regex3, Pattern.CASE_INSENSITIVE); | |
// Es. 2015 dec(ember) 03 (inglese) | |
String regex4 = "\\d{2,4}[\\/\\-\\.\\,\\ ]{1,3}(([J]an(uary)?|Feb(ruary)?|Mar(ch)?|[A]pr(il)?|May|June?|July?|Aug(ust)?|Sep(t(ember)?)?|Oct(ober)?|Nov(ember)?|Dec(ember)?))[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}"; | |
Pattern pattern4 = Pattern.compile(regex4, Pattern.CASE_INSENSITIVE); | |
// Es. 03 dic(embre) 2015 (italiano) | |
String regex5 = "\\d{1,2}[\\/\\-\\.\\,\\ ]{1,3}((gen(naio)?|feb(braio)?|mar(zo)?|apr(ile)?|mag(gio)?|giu(gno)?|lug(lio)?|ago(sto)?|set(t(embre)?)?|ott(obre)?|nov(embre)?|dic(embre)?))[\\/\\-\\.\\,\\ ]{1,3}\\d{2,4}"; | |
Pattern pattern5 = Pattern.compile(regex5, Pattern.CASE_INSENSITIVE); | |
// Es. 2015 dic(embre) 03 (italiano) | |
String regex6 = "\\d{2,4}[\\/\\-\\.\\,\\ ]{1,3}((gen(naio)?|feb(braio)?|mar(zo)?|apr(ile)?|mag(gio)?|giu(gno)?|lug(lio)?|ago(sto)?|set(t(embre)?)?|ott(obre)?|nov(embre)?|dic(embre)?))[\\/\\-\\.\\,\\ ]{1,3}\\d{1,2}"; | |
Pattern pattern6 = Pattern.compile(regex6, Pattern.CASE_INSENSITIVE); | |
Pattern[] patterns = new Pattern[]{pattern1, pattern2, pattern3, pattern4, pattern5, pattern6}; | |
Map<String, Pattern[]> regexMap = new HashMap<>(); | |
String type = "date"; | |
regexMap.put(type, patterns); | |
RegexNameFinder finder = new RegexNameFinder(regexMap); | |
// Effetto la ricerca | |
Span[] results = finder.find(this.setup.tokens); | |
Collection<Span> annotations = new LinkedList<>(); | |
Search search = new Search(); | |
for(Span result: results) { | |
boolean r = search.proximity(setup.tokens, result, setup.labels.get("birthdates")); | |
if (r == true) { | |
Span annotation = new Span(result.getStart(), result.getEnd(), result.getType()); | |
annotations.add(annotation); | |
} | |
} | |
return annotations.toArray(new Span[annotations.size()]); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment