Created
March 16, 2022 22:31
-
-
Save philippmuench/5a03cccfd472cb33b2a3a12058b21277 to your computer and use it in GitHub Desktop.
shiny CRISPR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## app.R ## | |
library(shinydashboard) | |
library(shiny) | |
library(keras) | |
library(deepG) | |
library(ggplot2) | |
library(dplyr) | |
library(DT) | |
library(hdf5r) | |
library(plotly) | |
library(zoo) | |
# crispr_gap: After what numbber of negative predictions to start new CRISPR region. | |
# conf_cutoff: Confidence decision threshold. | |
# pos_rate: What percentage of predictions must be above conf_cutoff. | |
# min_seq_len: Minimum sequence length. | |
filter_crispr <- function(states_df, | |
crispr_gap = 10, | |
conf_cutoff = 0.5, | |
pos_rate = 0.8, | |
min_seq_len = 120, | |
maxlen = 200) { | |
stopifnot(all(c("conf_CRISPR", "conf_non_CRISPR", "seq_end") %in% colnames(states_df))) | |
crispr_list <- list() | |
states_df <- states_df %>% dplyr::filter(conf_CRISPR > conf_cutoff) | |
states_df <- states_df[order(states_df$seq_end), ] | |
row_num <- nrow(states_df) | |
crispr_index <- 1 | |
crispr_start <- states_df$seq_end[1] | |
for (i in 1:(row_num-1)) { | |
l_name <- paste0("CRISPR_region_", crispr_index) | |
current_pos <- states_df$seq_end[i] | |
next_pos <- states_df$seq_end[i+1] | |
if ((abs(current_pos - next_pos) > crispr_gap) & (i != (row_num-1))) { | |
index <- (states_df$seq_end >= crispr_start) & (states_df$seq_end <= current_pos) | |
crispr_list[[l_name]] <- states_df[index, ] | |
crispr_start <- next_pos | |
crispr_index <- crispr_index + 1 | |
} | |
if (i == (row_num-1)) { | |
if (abs(current_pos - next_pos) <= crispr_gap) { | |
index <- states_df$seq_end >= crispr_start | |
crispr_list[[l_name]] <- states_df[index, ] | |
} else { | |
index <- (states_df$seq_end >= crispr_start) & (states_df$seq_end <= current_pos) | |
crispr_list[[l_name]] <- states_df[index, ] | |
# single sample at end | |
crispr_index <- crispr_index + 1 | |
l_name <- paste0("CRISPR_region_", crispr_index) | |
crispr_list[[l_name]] <- states_df[nrow(states_df), ] | |
} | |
} | |
} | |
# filter by positivity rate | |
for (i in names(crispr_list)) { | |
df <- crispr_list[[i]] | |
seq_len <- df$seq_end[nrow(df)] - df$seq_end[1] | |
cov_rate <- nrow(df)/seq_len | |
if (cov_rate < pos_rate) { | |
crispr_list[[i]] <- NULL | |
} | |
} | |
# filter by size | |
for (i in names(crispr_list)) { | |
df <- crispr_list[[i]] | |
seq_len <- df$seq_end[nrow(df)] - df$seq_end[1] + 1 | |
if (seq_len < min_seq_len) { | |
crispr_list[[i]] <- NULL | |
} | |
} | |
for (i in names(crispr_list)) { | |
df <- crispr_list[[i]] | |
df$seq_middle <- df$seq_end - (maxlen/2) | |
crispr_list[[i]] <- df | |
} | |
crispr_list | |
} | |
# https://f000.backblazeb2.com/file/bioinf/crispr/crispr_model.h5 | |
model <- keras::load_model_hdf5("~/github/philippmuench/GenomeNetFinder/crispr_model.h5", compile = FALSE) | |
num_layers <- length(model$get_config()$layers) | |
layer_name <- model$get_config()$layers[[num_layers]]$name | |
dummy <- readChar("~/github/philippmuench/GenomeNetFinder/dummy.txt", | |
file.info("~/github/philippmuench/GenomeNetFinder/dummy.txt")$size) | |
ui <- dashboardPage(#skin = "black", | |
dashboardHeader(title = "CRISPR prediction", titleWidth = 350, disable = T), | |
dashboardSidebar(width = 350, disable = F, | |
box(width = 12, solidHeader = TRUE, | |
p( | |
class = "text-muted", | |
paste("Deep Learning CRISPR Prediction Tool.", | |
"Input a sequence and the model confidence", | |
"will be displayed." | |
)), | |
tags$hr(), | |
uiOutput('resetable_input'), | |
actionButton("run", "Predict"), | |
actionButton("reset_input", "Reset inputs") | |
), | |
box(width = 12, solidHeader = TRUE, | |
p(class = "text-muted", | |
paste("Funded by Priority Programme 2141. Much more than defence: the multiple functions and facets of CRISPR-Cas") | |
)), | |
box(width = 12, solidHeader = TRUE, | |
p(class = "text-muted", | |
paste("Made with deepG")), | |
menuItem(uiOutput("png")) | |
) | |
), | |
dashboardBody( | |
tags$head(tags$style(HTML(' | |
/* logo */ | |
.skin-blue .main-header .logo { | |
background-color: #ffffff; | |
} | |
/* logo when hovered */ | |
.skin-blue .main-header .logo:hover { | |
background-color: #ffffff; | |
} | |
/* navbar (rest of the header) */ | |
.skin-blue .main-header .navbar { | |
background-color: #ffffff; | |
} | |
.box-title { | |
color:#000000; | |
font-family: "Source Sans Pro","Helvetica Neue","Helvetica","Arial","sans-serif"; | |
background:#e8e8e8 | |
} | |
.box.box-solid.box-primary>.box-header { | |
color:#000000; | |
background:#e8e8e8 | |
} | |
.box.box-solid.box-primary{ | |
border-bottom-color:#e8e8e8; | |
border-left-color:#e8e8e8; | |
border-right-color:#e8e8e8; | |
border-top-color:#e8e8e8; | |
} | |
/* main sidebar */ | |
.skin-blue .main-sidebar { | |
background-color: #e8e8e8; | |
} | |
/* control lable */ | |
.skin-blue .control-label { | |
background-color: #55555; | |
color:#000000; | |
} | |
/* active selected tab in the sidebarmenu */ | |
.skin-blue .main-sidebar .sidebar .sidebar-menu .active a{ | |
background-color: #ff0000; | |
} | |
/* other links in the sidebarmenu */ | |
.skin-blue .main-sidebar .sidebar .sidebar-menu a{ | |
background-color: #00ff00; | |
color: #000000; | |
} | |
/* other links in the sidebarmenu when hovered */ | |
.skin-blue .main-sidebar .sidebar .sidebar-menu a:hover{ | |
background-color: #ff69b4; | |
} | |
/* toggle button when hovered */ | |
.skin-blue .main-header .navbar .sidebar-toggle:hover{ | |
background-color: #ff69b4; | |
} | |
/* body */ | |
.content-wrapper, .right-side { | |
background-color: #ffffff; | |
} | |
'))), | |
fluidRow( | |
box( | |
title = "Model Confidence", width = 8, solidHeader = TRUE, status = "primary", | |
plotlyOutput("plot1", height = 250), | |
), | |
box( | |
title = "Density", width = 4, solidHeader = TRUE, status = "primary", | |
plotlyOutput("plot2", height = 250)), | |
# box(width = 12, status = "warning", | |
# p(class = "text-muted", | |
# paste("No predictions have been computed."))), | |
), | |
fluidRow( | |
box(title = "Configure Summary", width = 2, solidHeader = TRUE, status = "primary", | |
numericInput("threshold_conf", "Confidence:", 0.5, step = .1, min = 0, max = 1), | |
numericInput("threshold_pos_rate", "Positive rate:", 0.4, step = .1, min = 0, max = 0.99), | |
numericInput("threshold_min_seq_len", "Min length:", 2, step = 10, min = 1, max = 100), | |
numericInput("threshold_gap", "Gap:", 10, step = 10, min = 1, max = 1000)), | |
box(title = "Candidates", width = 6, solidHeader = TRUE, status = "primary", | |
dataTableOutput('table') | |
), | |
box( | |
title = "Detailed View", width = 4, solidHeader = TRUE, status = "primary", | |
plotlyOutput("plot3", height = 250), | |
), | |
) | |
) | |
) | |
server <- function(input, output) { | |
output$resetable_input <- renderUI({ | |
times <- input$reset_input | |
div(id=letters[(times %% length(letters)) + 1], | |
textAreaInput("text", "Sequence input", | |
height = 150, | |
width = 350, | |
value = dummy, | |
placeholder = NULL), | |
fileInput("fasta_path", width = 350, label = "or Input a FASTA file (will ignore text box)"), | |
numericInput("step", "Resolution (nt):", 1, step = 10, min = 1, max = 100), | |
) | |
}) | |
#https://f000.backblazeb2.com/file/bioinf/crispr/logo.png | |
output$png <- renderUI({ | |
tags$a(img(src = "https://f000.backblazeb2.com/file/bioinf/crispr/logo.png", width = "50px"), href = "http://deepg.de", target = "_blank") | |
}) | |
fastaData <- reactive({ | |
req(input$fasta_path$datapath) | |
fasta_file <- microseq::readFasta(input$fasta_path$datapath) #tibble | |
fasta_file$Sequence[1] | |
}) | |
dataSummary <- reactive({ | |
req(dataInput()) | |
crispr_list <- filter_crispr(states_df = dataInput(), | |
crispr_gap = input$threshold_gap, | |
conf_cutoff = input$threshold_conf, | |
pos_rate = input$threshold_pos_rate, | |
min_seq_len = input$threshold_min_seq_len) | |
placeholder <- rep(0, length(crispr_list)) | |
crispr_summary_df <- data.frame(seq_len = placeholder, cov_rate = placeholder, | |
first_pred = placeholder, last_pred = placeholder) | |
count <- 1 | |
for (i in names(crispr_list)) { | |
df <- crispr_list[[i]] | |
seq_len <- df$seq_end[nrow(df)] - df$seq_end[1] + 1 | |
crispr_summary_df$seq_len[count] <- seq_len | |
crispr_summary_df$cov_rate[count] <- nrow(df)/seq_len | |
crispr_summary_df$first_pred[count] <- min(df$seq_middle) | |
crispr_summary_df$last_pred[count] <- max(df$seq_middle) | |
count <- count + 1 | |
} | |
crispr_summary_df | |
}) | |
dataInput <- eventReactive(input$run, { | |
withProgress(value = 0, message = 'Running predictions', | |
detail = 'Please wait' ,{ | |
incProgress(amount = .25, message = "Loading sequence") | |
if(is.null(input$fasta_path$datapath)) { | |
input_sequence <- as.character(input$text) # use textarea data | |
} else { | |
input_sequence <- fastaData() | |
} | |
if (file.exists("tmp2.h5")) file.remove("tmp2.h5") | |
incProgress(amount = .25, message = "Inference") | |
deepG::writeStates(model = model, | |
layer_name = layer_name, | |
sequence = input_sequence, | |
round_digits = 4, | |
filename = "tmp2.h5", | |
batch.size = 500, | |
step = input$step) | |
incProgress(amount = .5, message = "Loading results") | |
states <- readRowsFromH5(h5_path = "tmp2.h5", complete = TRUE, getTargetPositions = TRUE) | |
incProgress(amount = 1, message = "Processing results") | |
pred <- states[[1]] | |
position <- states[[2]] - 1 | |
df <- cbind(pred, position) %>% as.data.frame() | |
colnames(df) <- c("conf_CRISPR", "conf_non_CRISPR", "seq_end") | |
df$seq_middle <- df$seq_end - 100 | |
df | |
}) | |
}) | |
output$plot1 <- renderPlotly({ | |
p <- ggplot(data = dataInput(), aes(seq_middle, conf_CRISPR)) + geom_point(size = .1) | |
p <- p + xlab("sequence position") + ylab("model confidence") | |
p <- p + geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") | |
p <- p + theme_classic() | |
p <- p + geom_line(aes(y=rollmean(conf_CRISPR, nrow(dataInput()) / 100, | |
na.pad = TRUE)), color = "grey50", alpha = .5, linewidth = 2) | |
if(!is.null(input$table_row_last_clicked)){ | |
start <- dataSummary()[input$table_row_last_clicked,]$first_pred - 10 | |
end <- dataSummary()[input$table_row_last_clicked,]$last_pred + 10 | |
p <- p + geom_vline(xintercept = start, color = "red", linetype = "dashed") | |
p <- p + geom_vline(xintercept = end, color = "red", linetype = "dashed") | |
} | |
ggplotly(p) | |
}) | |
output$plot2 <- renderPlotly({ | |
p <- ggplot(data = dataInput(), aes(conf_CRISPR)) | |
p <- p + geom_density() | |
p <- p + geom_vline(xintercept = 0.5, color = "black", linetype = "dashed") | |
p <- p + theme_classic() | |
ggplotly(p) | |
}) | |
output$table <- renderDataTable(dataSummary()) | |
output$plot3 <- renderPlotly({ | |
req(input$table_row_last_clicked) | |
req(dataSummary()) | |
req(dataInput()) | |
start <- dataSummary()[input$table_row_last_clicked,]$first_pred - 10 | |
end <- dataSummary()[input$table_row_last_clicked,]$last_pred + 10 | |
p <- ggplot(data = dataInput(), aes(seq_middle, conf_CRISPR)) | |
p <- p + xlab("sequence position") + ylab("model confidence") | |
p <- p + geom_vline(xintercept = start, color = "red", linetype = "dashed") | |
p <- p + geom_vline(xintercept = end, color = "red", linetype = "dashed") | |
p <- p + geom_point(size = .1) | |
p <- p + xlim(start, end) | |
p <- p + geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") | |
p <- p + theme_classic() | |
ggplotly(p) | |
}) | |
} | |
shinyApp(ui, server) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment