Skip to content

Instantly share code, notes, and snippets.

@tomsing1
Created February 7, 2022 02:28
Show Gist options
  • Save tomsing1/da0181b966e56e5fe429ccdddb3a6ef9 to your computer and use it in GitHub Desktop.
Save tomsing1/da0181b966e56e5fe429ccdddb3a6ef9 to your computer and use it in GitHub Desktop.
Reading and writing a SQLite database in R with the dm R package
library(DiagrammeR) # must be v 1.0.6.1 https://github.com/cynkra/dm/issues/823
library(dm)
library(RSQLite)
# download and decompress the chinook example SQLite database
zip_file <- tempfile(fileext = ".zip")
download.file(
"https://www.sqlitetutorial.net/wp-content/uploads/2018/03/chinook.zip",
destfile = zip_file)
chinook <- unzip(zip_file, exdir = tempdir())
# Connect to the chinook SQLite database and enable extended types to
# automatically map e.g. dates to the correct R types
con <- dbConnect(SQLite(), chinook, extended_types = TRUE)
# take a peek at the invoices tables, which contains a date column
tbl(con, "invoices")
# keys can only be imported from Postgres and SQL server databases
dm <- dm_from_src(con, learn_keys = FALSE)
# define primary and foreign keys
dm <- dm %>%
dm_add_pk(tracks, TrackId) %>%
dm_add_pk(media_types, MediaTypeId) %>%
dm_add_pk(genres, GenreId) %>%
dm_add_pk(playlists, PlaylistId) %>%
dm_add_pk(playlist_track, c(TrackId, PlaylistId)) %>%
dm_add_pk(artists, ArtistId) %>%
dm_add_pk(invoices, InvoiceId) %>%
dm_add_pk(invoice_items, InvoiceLineId) %>%
dm_add_pk(albums, AlbumId) %>%
dm_add_pk(customers, CustomerId) %>%
dm_add_pk(employees, EmployeeId) %>%
dm_add_fk(tracks, MediaTypeId, media_types) %>%
dm_add_fk(tracks, AlbumId, albums) %>%
dm_add_fk(tracks, GenreId, genres) %>%
dm_add_fk(playlist_track, PlaylistId, playlists) %>%
dm_add_fk(playlist_track, TrackId, tracks) %>%
dm_add_fk(albums, ArtistId, artists) %>%
dm_add_fk(invoice_items, InvoiceId, invoices) %>%
dm_add_fk(invoices, CustomerId, customers) %>%
dm_add_fk(invoice_items, TrackId, tracks, ref_columns = "TrackId") %>%
dm_add_fk(customers, SupportRepId, employees, ref_columns = "EmployeeId") %>%
dm_add_fk(employees, ReportsTo, employees, ref_columns = "EmployeeId") %>%
dm_select_tbl(-sqlite_sequence, -sqlite_stat1)
# draw the entity relationship diagram
dm_draw(dm)
# export the data model to a new SQLite database
temp_file <- tempfile(fileext = ".db")
destination_db <- DBI::dbConnect(RSQLite::SQLite(), temp_file)
copy_dm_to(destination_db, dm, temporary = FALSE)
# disconnect from the old and new databases
dbDisconnect(con)
dbDisconnect(destination_db)
# reconnect to the new database
con <- dbConnect(
SQLite(),
temp_file,
extended_types = TRUE)
# take a peek at the invoices table (which contains dates)
tbl(con, "invoices")
# create a data model from the new database - again, keys are lost
dm2 <- dm_from_src(con, learn_keys = FALSE)
# add the keys back and draw the diagram
dm2 %>%
dm_add_pk(tracks, TrackId) %>%
dm_add_pk(media_types, MediaTypeId) %>%
dm_add_pk(genres, GenreId) %>%
dm_add_pk(playlists, PlaylistId) %>%
dm_add_pk(playlist_track, c(TrackId, PlaylistId)) %>%
dm_add_pk(artists, ArtistId) %>%
dm_add_pk(invoices, InvoiceId) %>%
dm_add_pk(invoice_items, InvoiceLineId) %>%
dm_add_pk(albums, AlbumId) %>%
dm_add_pk(customers, CustomerId) %>%
dm_add_pk(employees, EmployeeId) %>%
dm_add_fk(tracks, MediaTypeId, media_types) %>%
dm_add_fk(tracks, AlbumId, albums) %>%
dm_add_fk(tracks, GenreId, genres) %>%
dm_add_fk(playlist_track, PlaylistId, playlists) %>%
dm_add_fk(playlist_track, TrackId, tracks) %>%
dm_add_fk(albums, ArtistId, artists) %>%
dm_add_fk(invoice_items, InvoiceId, invoices) %>%
dm_add_fk(invoices, CustomerId, customers) %>%
dm_add_fk(invoice_items, TrackId, tracks, ref_columns = "TrackId") %>%
dm_add_fk(customers, SupportRepId, employees, ref_columns = "EmployeeId") %>%
dm_add_fk(employees, ReportsTo, employees, ref_columns = "EmployeeId") %>%
dm_draw()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment