Last active
October 4, 2024 10:22
-
-
Save pagetronic/39b5cf1b794b79c69846cae4afa176d1 to your computer and use it in GitHub Desktop.
Parse complex wikiSyntax date
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package live.page.utils; | |
import com.drew.imaging.ImageMetadataReader; | |
import com.drew.metadata.Metadata; | |
import com.drew.metadata.exif.ExifSubIFDDirectory; | |
import live.page.hubd.blobs.Blob; | |
import live.page.hubd.system.json.Json; | |
import java.text.SimpleDateFormat; | |
import java.util.Date; | |
import java.util.List; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
public class WikiCommonDateParser { | |
private static final List<SimpleDateFormat> formats = List.of( | |
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"), | |
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"), | |
new SimpleDateFormat("yyyy-MM-dd HH:mm"), | |
new SimpleDateFormat("yyyy-MM-dd"), | |
new SimpleDateFormat("yyyy-MM"), | |
new SimpleDateFormat("MMMM d, yyyy"), | |
new SimpleDateFormat("MMMM yyyy"), | |
new SimpleDateFormat("yyyy") | |
); | |
public static Date parse(String date) { | |
for (SimpleDateFormat format : formats) { | |
try { | |
return format.parse(date); | |
} catch (Exception ignore) { | |
} | |
} | |
date = date.trim(); | |
for (int i = 1; i < date.length() - 4; i++) { | |
for (SimpleDateFormat format : formats) { | |
try { | |
return format.parse(date.substring(0, date.length() - (1 + i))); | |
} catch (Exception ignore) { | |
} | |
try { | |
return format.parse(date.substring(i)); | |
} catch (Exception ignore) { | |
} | |
} | |
} | |
return null; | |
} | |
public static Date cleanDate(Json page, Blob blob) { | |
Json infos = null; | |
try { | |
infos = page.getJson("data").getJson("information"); | |
} catch (Exception ignore) { | |
} | |
String dateStr = null; | |
if (infos != null && infos.get("date") != null) { | |
dateStr = infos.getString("date"); | |
if (dateStr == null) { | |
Json date = infos.getJson("date"); | |
if (date != null) { | |
String jsonDate = date.toString(true); | |
if (date.getString("isostring") != null) { | |
dateStr = date.getString("isostring"); | |
} else if (date.containsKey("isodate")) { | |
dateStr = date.getString("isodate"); | |
} else if (date.getJson("taken on") != null) { | |
dateStr = date.getJson("taken on").getString("1"); | |
} else if (date.getJson("taken in") != null) { | |
dateStr = date.getJson("taken in").getString("1"); | |
} else if (date.getString("taken on") != null) { | |
dateStr = date.getString("taken on"); | |
} else if (date.getString("taken o") != null) { | |
dateStr = date.getString("taken o").replace("n", ""); | |
} else if (date.getString("taken in") != null) { | |
dateStr = date.getString("taken in"); | |
} else if (date.getString("original upload date") != null) { | |
dateStr = date.getString("original upload date"); | |
} else if (date.getString("original upload date") != null) { | |
dateStr = date.getString("original upload date"); | |
} else if (date.getString("dtz") != null) { | |
dateStr = date.getString("dtz"); | |
} else if (date.getJson("exif date") != null) { | |
dateStr = date.getJson("exif date").getString("1"); | |
} else if (date.getJson("according to exif data") != null) { | |
dateStr = date.getJson("according to exif data").getString("1"); | |
} else if (date.getJson("accordingtoexif") != null) { | |
dateStr = date.getJson("accordingtoexif").getString("1"); | |
} else if (date.getString("exif date") != null) { | |
dateStr = date.getString("exif date"); | |
} else if (date.getString("according to exif data") != null) { | |
dateStr = date.getString("according to exif data"); | |
} else if (date.getString("accordingtoexif") != null) { | |
dateStr = date.getString("accordingtoexif"); | |
} else if (date.getString("according to exif") != null) { | |
dateStr = date.getString("according to exif"); | |
} else if (date.getList("other date", String.class) != null) { | |
dateStr = date.getList("other date", String.class).get(1); | |
} else if (date.getString("other date") != null) { | |
dateStr = date.getString("other date"); | |
} else if (date.getString("circa") != null) { | |
dateStr = date.getString("circa"); | |
} else if (date.getString("upload date") != null) { | |
dateStr = date.getString("upload date"); | |
} else if (date.getList("takenestormiz", String.class) != null) { | |
dateStr = date.getList("takenestormiz", String.class).get(0); | |
} | |
if (dateStr != null && dateStr.equals("?")) { | |
dateStr = null; | |
} | |
} | |
} | |
} | |
if (dateStr == null) { | |
dateStr = parseTextDate(page.getText("text", "")); | |
} | |
if (dateStr != null) { | |
Date date = parse(dateStr); | |
if (date != null) { | |
return date; | |
} else { | |
System.out.print("."); | |
} | |
} else { | |
System.out.print("."); | |
} | |
if (blob != null) { | |
try { | |
Metadata metadata = ImageMetadataReader.readMetadata(blob.file); | |
ExifSubIFDDirectory directory | |
= metadata.getFirstDirectoryOfType(ExifSubIFDDirectory.class); | |
Date date = directory.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL); | |
if (date == null) { | |
date = directory.getDate(ExifSubIFDDirectory.TAG_DATETIME); | |
} | |
return date; | |
} catch (Exception ignore) { | |
} | |
} | |
return null; | |
} | |
private static String parseTextDate(String text) { | |
for (Matcher matcher : List.of(Pattern.compile("date ?+= ?+([a-z{|]+)([0-9 \\-:]+)", Pattern.CASE_INSENSITIVE).matcher(text))) { | |
if (matcher.find()) { | |
return matcher.group(1); | |
} | |
} | |
return null; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment