Created
May 9, 2018 19:28
-
-
Save muschellij2/d612245564bde92bc422b223e2b698b0 to your computer and use it in GitHub Desktop.
Consistency checks for CDS classes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(didactr) | |
library(pdftools) | |
library(dplyr) | |
library(httr) | |
library(googledrive) | |
library(broom) | |
library(tidyr) | |
n_pdf_pages = function(file) { | |
if (length(file) == 0) { | |
return(NA) | |
} | |
pdf_info(file)$pages | |
} | |
res_path = file.path("manuscript", "resources") | |
# get manuscript md files and check names of | |
manuscript_files = list.files(pattern = ".md$", path = "manuscript", | |
full.names = TRUE) | |
man_stubs = sub("[.]md$", "", basename(manuscript_files)) | |
# md file has highest precedence | |
df = data_frame(lesson = man_stubs, md_file = manuscript_files) | |
df$id = sapply(df$md_file, function(fname) { | |
x = readLines(fname, warn = FALSE) | |
x = grep(x, pattern = "\\[(S|s)lides\\]", value = TRUE) | |
x = sub(".*\\((http.*)\\).*", "\\1", x) | |
x = unlist(sapply(x, function(r) parse_url(r)$path)) | |
x = sub("/edit$", "", x) | |
x = basename(x) | |
x = unique(x) | |
if (length(x) > 1) { | |
warning(paste0("Multiple sheets identified! Please check ", | |
fname)) | |
} | |
if (length(x) == 0) { | |
return(NA) | |
} | |
return(x) | |
}) | |
if (anyDuplicated(df$id)) { | |
dup_df = df %>% | |
group_by(id) %>% | |
add_tally() %>% | |
filter(n > 1) | |
warning("Duplicated IDs are present! MD files are off") | |
print(dup_df) | |
} | |
image_links = lapply(df$md_file, function(fname) { | |
x = readLines(fname, warn = FALSE) | |
x = grep(x, pattern = "!\\[.*\\]\\((images.*)\\)", value = TRUE) | |
x = sub(x, pattern = "!\\[(.*)\\]\\((images.*)\\)", replacement = "\\1") | |
return(x) | |
}) | |
images = lapply(df$md_file, function(fname) { | |
x = readLines(fname, warn = FALSE) | |
x = grep(x, pattern = "!\\[.*\\]\\((images.*)\\)", value = TRUE) | |
x = sub(x, pattern = "!\\[.*\\]\\((images.*)\\)", replacement = "\\1") | |
return(x) | |
}) | |
df$all_images_exist = sapply(images, function(x) { | |
all(file.exists(file.path(res_path, x))) | |
}) | |
drive_info = drive_get(id = df$id) | |
if (nrow(drive_info) > 0) { | |
drive_info = drive_info %>% | |
rename(gs_name = name) | |
mod_time = sapply(drive_info$drive_resource, | |
function(x) { | |
x$modifiedTime | |
}) | |
drive_info$mod_time = mod_time | |
drive_info = drive_info %>% | |
select(-drive_resource) | |
df = left_join(df, drive_info, by = "id") | |
df = distinct(df) | |
} | |
path = file.path("manuscript", "resources", "images") | |
if (!dir.exists(path)) { | |
dir.create(path, recursive = TRUE, showWarnings = FALSE) | |
} | |
script_path = "scripts" | |
if (!dir.exists(script_path)) { | |
dir.create(script_path, recursive = TRUE, showWarnings = FALSE) | |
} | |
df = df %>% | |
mutate(img_dir = file.path(path, lesson)) | |
# naming conventions for the images folders | |
img_dirs = list.dirs(path = path, recursive = FALSE, | |
full.names = TRUE) | |
names(img_dirs) = img_dirs | |
df = df %>% | |
mutate(has_img_dir = img_dir %in% img_dirs) | |
# if img_dir doesn't exist, then create one | |
bad_img_dir = !df$has_img_dir | |
if (any(bad_img_dir)) { | |
sapply(df$img_dir[bad_img_dir], dir.create, recursive = TRUE, | |
showWarnings = FALSE) | |
} | |
df$has_img_dir = NULL | |
# check if image directories exist but don't have MD file | |
bad_img_dir = !(img_dirs %in% df$img_dir) | |
if (any(bad_img_dir)) { | |
warning(paste0("An image directory exists but doesn't correspond to a ", | |
"lesson. Possible naming inconsistency. Possible:")) | |
cat(img_dirs[bad_img_dir], sep = "\n") | |
} | |
# Check if a image folder has a PDF | |
df$pdf = sapply(df$img_dir, | |
function(x) { | |
pdfs = list.files(pattern = "[.]pdf", | |
path = x, | |
full.names = TRUE) | |
if (length(pdfs) > 1) { | |
warning(paste0(path, " had more than one PDF! ", | |
"Only grabbing first")) | |
pdfs = pdfs[1] | |
} | |
if (length(pdfs) == 0) { | |
return(NA) | |
} | |
return(pdfs) | |
}) | |
# Check the number of pages of the pdf to cross-ref with the pngs | |
df$pdf_pages = sapply(df$pdf, n_pdf_pages) | |
# list out the pngs of the folder | |
png_names = lapply(df$img_dir, | |
function(x) { | |
pngs = list.files(pattern = "[.]png", | |
path = x) | |
pngs | |
}) | |
df$n_pngs = sapply(png_names, length) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment