Created
July 6, 2020 09:57
-
-
Save MichaelChirico/4568f11c8062b9bc7207f24aaffb6ce2 to your computer and use it in GitHub Desktop.
Translate between Java SimpleDateFormat and POSIX time format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # sources: | |
| # https://prestodb.io/docs/current/functions/datetime.html | |
| # https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html | |
| # NB: when multiple matches are possible (e.g. %h=%I), the higher one is "preferred" | |
| # NB: %r/%T are basically blocked out from being returned Java->Presto by putting | |
| # them at the bottom, as a way of preferring the more verbose "full" form | |
| # NB: the following supported formats don't have an exact equivalent: %w, %x, %v | |
| time_fmt_mapping = fread('posix,java | |
| %Y,yyyy | |
| %y,yy | |
| %M,MMMM | |
| %b,MMM | |
| %m,MM | |
| %d,dd | |
| %H,HH | |
| %k,H | |
| %i,mm | |
| %s,ss | |
| %S,ss | |
| %e,d | |
| %W,EEEE | |
| %a,E | |
| %c,M | |
| %f,SSS | |
| %h,hh | |
| %I,hh | |
| %l,h | |
| %j,DDD | |
| %p,a | |
| %v,ww | |
| %w,u | |
| %x,Y | |
| %T,HH:mm:ss | |
| %r,hh:mm:ss a') | |
| java_chars = sort(unique(unlist(strsplit(time_fmt_mapping[1:(.N-2L)]$java, NULL)))) | |
| date_fmts = c('%Y-%m-%d', '%H:%i:%s') # vector of date formats, e.g. '%Y-%m-%d' or 'yyyyMMdd' | |
| to = 'java' # either to = 'java' or to = 'posix' | |
| # need a fundamentally different approach for java->posix. take 'yyyy' for example. | |
| # the posix approach would find yyyy and match it to %Y right away. but then later | |
| # we get to the java format Y, and this gets replaced again --> %%x. oops. i don't think | |
| # there's some magical ordering of the rows that would solve this issue, either. | |
| date_fmts_ptn = lapply(strsplit(date_fmts, NULL, fixed=TRUE), rle) | |
| if (to == 'java') { | |
| date_fmts_out = sapply(date_fmts_ptn, function(ptn) { | |
| n_comp = length(ptn$lengths) | |
| out = character(n_comp) | |
| # use while to skip around more naturally | |
| jj = 1L | |
| while(jj <= n_comp) { | |
| if (ptn$values[jj] == '%') { | |
| if (ptn$lengths[jj] %% 2L == 0L) { | |
| out[jj] = paste(rep.int('%', ptn$lengths[jj]/2L), collapse='') | |
| jj = jj + 1L | |
| } else { | |
| posix_fmt = paste(ptn$values[jj+0:1], collapse='') | |
| java = time_fmt_mapping[posix == posix_fmt, java[1L]] | |
| if (is.na(java)) stop(domain = NA, call. = FALSE, gettextf( | |
| "Recognized POSIX component '%s', but don't know any corresponding SimpleDateTime format", | |
| posix_fmt | |
| )) | |
| if (posix_fmt %chin% c('%w', '%x', '%v')) warning(domain=NA, call. = FALSE, gettextf( | |
| "Detected time format '%s', which doesn't have an exact SimpleDateFormat equivalent; closest guess is '%s', but please inspect manually", | |
| posix_fmt, java | |
| )) | |
| out[jj] = paste0(paste(rep.int('%', (ptn$lengths[jj]-1L)/2L), collapse = ''), java) | |
| jj = jj + 2L | |
| } | |
| } else if (grepl('[a-zA-Z]', ptn$values[jj])) { # needs to be escaped | |
| JJ = jj | |
| while (grepl('[a-zA-Z]', ptn$values[jj+1L])) JJ = JJ+1L | |
| out[jj] = paste0("'", paste(inverse.rle(lapply(ptn, `[`, jj:JJ)), collapse = ''), "'") | |
| jj = JJ+1L | |
| } else { | |
| out[jj] = paste(rep.int(ptn$values[jj], ptn$lengths[jj]), collapse = '') | |
| jj = jj + 1L | |
| } | |
| } | |
| paste(out, collapse = '') | |
| }) | |
| } else { | |
| date_fmts_out = sapply(date_fmts_ptn, function(ptn) { | |
| n_comp = length(ptn$lengths) | |
| out = character(n_comp) | |
| in_escape = FALSE | |
| for (jj in seq_len(n_comp)) { | |
| # "unescape" escaped patterns like yyyy-MM-dd'T'HH:mm:ss | |
| if (ptn$values[jj] == "'") { | |
| in_escape = !in_escape | |
| next | |
| } | |
| if (in_escape) { | |
| out[jj] = ptn$values[jj] | |
| next | |
| } | |
| if (!ptn$values[jj] %chin% java_chars) { | |
| out[jj] = ptn$values[jj] | |
| } else { | |
| java_fmt = paste(rep(ptn$values[jj], ptn$lengths[jj]), collapse='') | |
| posix = time_fmt_mapping[java == java_fmt, posix[1L]] | |
| if (is.na(posix)) stop(domain = NA, gettextf( | |
| "Recognized SimpleDateFormat component '%s', but don't know any corresponding POSIX format", | |
| java_fmt | |
| )) | |
| if (java_fmt %chin% c('ww', 'u', 'Y')) warning(domain=NA, call. = FALSE, gettextf( | |
| "Detected time format '%s', which doesn't have an exact POSIX equivalent; closest guess is '%s', but please inspect manually", | |
| java_fmt, posix | |
| )) | |
| out[jj] = posix | |
| } | |
| } | |
| paste(out, collapse = '') | |
| }) | |
| } | |
| print(date_fmts_out) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment