Last active
July 16, 2021 08:42
-
-
Save alekrutkowski/a0d53d05e5cb890ef385bcc6f3fc55ca to your computer and use it in GitHub Desktop.
Parse and visualise Oracle metadata SQL file obtained via "Quick DDL > Save to File..." in R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Parse Oracle metadata SQL file obtained via "Quick DDL > Save to File..." | |
| # (see https://stackoverflow.com/a/14262027) | |
| # in Oracle SQL Developer (tested with version 19.2.1.247.2212) | |
| # into R code which builds a list of data.tables with 0 length columns | |
| # which can be later visualised via autoschema::schema -- see | |
| # the example: | |
| # r_code <- parseQDDL('exported.sql') ## the top-level function defined below | |
| # library(autoschema) ## it can be obtained from https://github.com/alekrutkowski/autoschema | |
| # schema(eval(parse(text=r_code))) ## see the graph or... | |
| # gv_code <- schema(eval(parse(text=r_code)), output_type='gv') ## ...produce GraphViz code which can be further edited | |
| # cat(gv_code, file='exported.gv') ## e.g. in https://dreampuf.github.io/GraphvizOnline and rendered with different engines | |
| parseQDDL <- function(filename) { | |
| code_lines <- | |
| readLines(filename) | |
| r_code <- | |
| Reduce(parseLine | |
| ,code_lines | |
| ,list(is_table=FALSE | |
| ,is_first_col=FALSE | |
| ,r_code=""))$r_code | |
| paste0('list(',sub(',$',"",r_code),')') # returns a string (character vector of length 1) | |
| } | |
| # Helpers: | |
| parseLine <- function(previous_code_list, code_line) { | |
| code_list <- { | |
| if (isTblStart(code_line)) | |
| list(is_table=TRUE | |
| ,is_first_col=TRUE | |
| ,r_code=parseTblStart(code_line)) | |
| else if (previous_code_list$is_table && code_line!='(') | |
| `if`(isTblEnd(code_line) | |
| ,list(is_table=FALSE | |
| ,is_first_col=TRUE | |
| ,r_code='),') | |
| ,# not table end: | |
| list(is_table=TRUE | |
| ,is_first_col=FALSE | |
| ,r_code=paste0(`if`(previous_code_list$is_first_col,"",',') | |
| ,parseCol(code_line)))) | |
| else list(is_table=previous_code_list$is_table | |
| ,is_first_col=previous_code_list$is_first_col | |
| ,r_code="") | |
| } | |
| code_list$r_code <- | |
| paste0(previous_code_list$r_code | |
| ,code_list$r_code) | |
| code_list | |
| } | |
| isTblStart <- function(code_line) | |
| grepl('NOPARALLELCREATE .+\\.',code_line) | |
| parseTblStart <- function(code_line) | |
| paste0(trimws(sub('NOPARALLELCREATE .+\\.(.*)','\\1',code_line)) | |
| ,'=data.table(') | |
| isTblEnd <- function(code_line) | |
| code_line==') ' | |
| parseCol <- function(code_line) { | |
| colname <- | |
| sub('..([^ ]+).*','\\1',code_line) | |
| sql_type <- | |
| sub('..([^ ]+) (VARCHAR|DATE|NUMBER\\(.+\\)).*' | |
| ,'\\2' | |
| ,code_line) | |
| NUMBER <- function(x,y) | |
| if (y<=0) 'integer(0)' else 'numeric(0)' | |
| VARCHAR <- | |
| 'character(0)' | |
| DATE <- | |
| 'as.Date(numeric(0),origin="1899-12-30")' | |
| paste0(colname,'=' | |
| ,eval(parse(text=sql_type))) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment