Skip to content

Instantly share code, notes, and snippets.

@MichaelChirico
Created July 6, 2020 09:57
Show Gist options
  • Select an option

  • Save MichaelChirico/4568f11c8062b9bc7207f24aaffb6ce2 to your computer and use it in GitHub Desktop.

Select an option

Save MichaelChirico/4568f11c8062b9bc7207f24aaffb6ce2 to your computer and use it in GitHub Desktop.
Translate between Java SimpleDateFormat and POSIX time format
# sources:
# https://prestodb.io/docs/current/functions/datetime.html
# https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html
# NB: when multiple matches are possible (e.g. %h=%I), the higher one is "preferred"
# NB: %r/%T are basically blocked out from being returned Java->Presto by putting
# them at the bottom, as a way of preferring the more verbose "full" form
# NB: the following supported formats don't have an exact equivalent: %w, %x, %v
time_fmt_mapping = fread('posix,java
%Y,yyyy
%y,yy
%M,MMMM
%b,MMM
%m,MM
%d,dd
%H,HH
%k,H
%i,mm
%s,ss
%S,ss
%e,d
%W,EEEE
%a,E
%c,M
%f,SSS
%h,hh
%I,hh
%l,h
%j,DDD
%p,a
%v,ww
%w,u
%x,Y
%T,HH:mm:ss
%r,hh:mm:ss a')
java_chars = sort(unique(unlist(strsplit(time_fmt_mapping[1:(.N-2L)]$java, NULL))))
date_fmts = c('%Y-%m-%d', '%H:%i:%s') # vector of date formats, e.g. '%Y-%m-%d' or 'yyyyMMdd'
to = 'java' # either to = 'java' or to = 'posix'
# need a fundamentally different approach for java->posix. take 'yyyy' for example.
# the posix approach would find yyyy and match it to %Y right away. but then later
# we get to the java format Y, and this gets replaced again --> %%x. oops. i don't think
# there's some magical ordering of the rows that would solve this issue, either.
date_fmts_ptn = lapply(strsplit(date_fmts, NULL, fixed=TRUE), rle)
if (to == 'java') {
date_fmts_out = sapply(date_fmts_ptn, function(ptn) {
n_comp = length(ptn$lengths)
out = character(n_comp)
# use while to skip around more naturally
jj = 1L
while(jj <= n_comp) {
if (ptn$values[jj] == '%') {
if (ptn$lengths[jj] %% 2L == 0L) {
out[jj] = paste(rep.int('%', ptn$lengths[jj]/2L), collapse='')
jj = jj + 1L
} else {
posix_fmt = paste(ptn$values[jj+0:1], collapse='')
java = time_fmt_mapping[posix == posix_fmt, java[1L]]
if (is.na(java)) stop(domain = NA, call. = FALSE, gettextf(
"Recognized POSIX component '%s', but don't know any corresponding SimpleDateTime format",
posix_fmt
))
if (posix_fmt %chin% c('%w', '%x', '%v')) warning(domain=NA, call. = FALSE, gettextf(
"Detected time format '%s', which doesn't have an exact SimpleDateFormat equivalent; closest guess is '%s', but please inspect manually",
posix_fmt, java
))
out[jj] = paste0(paste(rep.int('%', (ptn$lengths[jj]-1L)/2L), collapse = ''), java)
jj = jj + 2L
}
} else if (grepl('[a-zA-Z]', ptn$values[jj])) { # needs to be escaped
JJ = jj
while (grepl('[a-zA-Z]', ptn$values[jj+1L])) JJ = JJ+1L
out[jj] = paste0("'", paste(inverse.rle(lapply(ptn, `[`, jj:JJ)), collapse = ''), "'")
jj = JJ+1L
} else {
out[jj] = paste(rep.int(ptn$values[jj], ptn$lengths[jj]), collapse = '')
jj = jj + 1L
}
}
paste(out, collapse = '')
})
} else {
date_fmts_out = sapply(date_fmts_ptn, function(ptn) {
n_comp = length(ptn$lengths)
out = character(n_comp)
in_escape = FALSE
for (jj in seq_len(n_comp)) {
# "unescape" escaped patterns like yyyy-MM-dd'T'HH:mm:ss
if (ptn$values[jj] == "'") {
in_escape = !in_escape
next
}
if (in_escape) {
out[jj] = ptn$values[jj]
next
}
if (!ptn$values[jj] %chin% java_chars) {
out[jj] = ptn$values[jj]
} else {
java_fmt = paste(rep(ptn$values[jj], ptn$lengths[jj]), collapse='')
posix = time_fmt_mapping[java == java_fmt, posix[1L]]
if (is.na(posix)) stop(domain = NA, gettextf(
"Recognized SimpleDateFormat component '%s', but don't know any corresponding POSIX format",
java_fmt
))
if (java_fmt %chin% c('ww', 'u', 'Y')) warning(domain=NA, call. = FALSE, gettextf(
"Detected time format '%s', which doesn't have an exact POSIX equivalent; closest guess is '%s', but please inspect manually",
java_fmt, posix
))
out[jj] = posix
}
}
paste(out, collapse = '')
})
}
print(date_fmts_out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment