Skip to content

Instantly share code, notes, and snippets.

@pagetronic
Last active October 4, 2024 10:22
Show Gist options
  • Save pagetronic/39b5cf1b794b79c69846cae4afa176d1 to your computer and use it in GitHub Desktop.
Save pagetronic/39b5cf1b794b79c69846cae4afa176d1 to your computer and use it in GitHub Desktop.
Parse complex wikiSyntax date
package live.page.utils;
import com.drew.imaging.ImageMetadataReader;
import com.drew.metadata.Metadata;
import com.drew.metadata.exif.ExifSubIFDDirectory;
import live.page.hubd.blobs.Blob;
import live.page.hubd.system.json.Json;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WikiCommonDateParser {
private static final List<SimpleDateFormat> formats = List.of(
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"),
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"),
new SimpleDateFormat("yyyy-MM-dd HH:mm"),
new SimpleDateFormat("yyyy-MM-dd"),
new SimpleDateFormat("yyyy-MM"),
new SimpleDateFormat("MMMM d, yyyy"),
new SimpleDateFormat("MMMM yyyy"),
new SimpleDateFormat("yyyy")
);
public static Date parse(String date) {
for (SimpleDateFormat format : formats) {
try {
return format.parse(date);
} catch (Exception ignore) {
}
}
date = date.trim();
for (int i = 1; i < date.length() - 4; i++) {
for (SimpleDateFormat format : formats) {
try {
return format.parse(date.substring(0, date.length() - (1 + i)));
} catch (Exception ignore) {
}
try {
return format.parse(date.substring(i));
} catch (Exception ignore) {
}
}
}
return null;
}
public static Date cleanDate(Json page, Blob blob) {
Json infos = null;
try {
infos = page.getJson("data").getJson("information");
} catch (Exception ignore) {
}
String dateStr = null;
if (infos != null && infos.get("date") != null) {
dateStr = infos.getString("date");
if (dateStr == null) {
Json date = infos.getJson("date");
if (date != null) {
String jsonDate = date.toString(true);
if (date.getString("isostring") != null) {
dateStr = date.getString("isostring");
} else if (date.containsKey("isodate")) {
dateStr = date.getString("isodate");
} else if (date.getJson("taken on") != null) {
dateStr = date.getJson("taken on").getString("1");
} else if (date.getJson("taken in") != null) {
dateStr = date.getJson("taken in").getString("1");
} else if (date.getString("taken on") != null) {
dateStr = date.getString("taken on");
} else if (date.getString("taken o") != null) {
dateStr = date.getString("taken o").replace("n", "");
} else if (date.getString("taken in") != null) {
dateStr = date.getString("taken in");
} else if (date.getString("original upload date") != null) {
dateStr = date.getString("original upload date");
} else if (date.getString("original upload date") != null) {
dateStr = date.getString("original upload date");
} else if (date.getString("dtz") != null) {
dateStr = date.getString("dtz");
} else if (date.getJson("exif date") != null) {
dateStr = date.getJson("exif date").getString("1");
} else if (date.getJson("according to exif data") != null) {
dateStr = date.getJson("according to exif data").getString("1");
} else if (date.getJson("accordingtoexif") != null) {
dateStr = date.getJson("accordingtoexif").getString("1");
} else if (date.getString("exif date") != null) {
dateStr = date.getString("exif date");
} else if (date.getString("according to exif data") != null) {
dateStr = date.getString("according to exif data");
} else if (date.getString("accordingtoexif") != null) {
dateStr = date.getString("accordingtoexif");
} else if (date.getString("according to exif") != null) {
dateStr = date.getString("according to exif");
} else if (date.getList("other date", String.class) != null) {
dateStr = date.getList("other date", String.class).get(1);
} else if (date.getString("other date") != null) {
dateStr = date.getString("other date");
} else if (date.getString("circa") != null) {
dateStr = date.getString("circa");
} else if (date.getString("upload date") != null) {
dateStr = date.getString("upload date");
} else if (date.getList("takenestormiz", String.class) != null) {
dateStr = date.getList("takenestormiz", String.class).get(0);
}
if (dateStr != null && dateStr.equals("?")) {
dateStr = null;
}
}
}
}
if (dateStr == null) {
dateStr = parseTextDate(page.getText("text", ""));
}
if (dateStr != null) {
Date date = parse(dateStr);
if (date != null) {
return date;
} else {
System.out.print(".");
}
} else {
System.out.print(".");
}
if (blob != null) {
try {
Metadata metadata = ImageMetadataReader.readMetadata(blob.file);
ExifSubIFDDirectory directory
= metadata.getFirstDirectoryOfType(ExifSubIFDDirectory.class);
Date date = directory.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL);
if (date == null) {
date = directory.getDate(ExifSubIFDDirectory.TAG_DATETIME);
}
return date;
} catch (Exception ignore) {
}
}
return null;
}
private static String parseTextDate(String text) {
for (Matcher matcher : List.of(Pattern.compile("date ?+= ?+([a-z{|]+)([0-9 \\-:]+)", Pattern.CASE_INSENSITIVE).matcher(text))) {
if (matcher.find()) {
return matcher.group(1);
}
}
return null;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment