Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save philippmuench/5a03cccfd472cb33b2a3a12058b21277 to your computer and use it in GitHub Desktop.
Save philippmuench/5a03cccfd472cb33b2a3a12058b21277 to your computer and use it in GitHub Desktop.
shiny CRISPR
## app.R ##
library(shinydashboard)
library(shiny)
library(keras)
library(deepG)
library(ggplot2)
library(dplyr)
library(DT)
library(hdf5r)
library(plotly)
library(zoo)
# crispr_gap: After what numbber of negative predictions to start new CRISPR region.
# conf_cutoff: Confidence decision threshold.
# pos_rate: What percentage of predictions must be above conf_cutoff.
# min_seq_len: Minimum sequence length.
filter_crispr <- function(states_df,
crispr_gap = 10,
conf_cutoff = 0.5,
pos_rate = 0.8,
min_seq_len = 120,
maxlen = 200) {
stopifnot(all(c("conf_CRISPR", "conf_non_CRISPR", "seq_end") %in% colnames(states_df)))
crispr_list <- list()
states_df <- states_df %>% dplyr::filter(conf_CRISPR > conf_cutoff)
states_df <- states_df[order(states_df$seq_end), ]
row_num <- nrow(states_df)
crispr_index <- 1
crispr_start <- states_df$seq_end[1]
for (i in 1:(row_num-1)) {
l_name <- paste0("CRISPR_region_", crispr_index)
current_pos <- states_df$seq_end[i]
next_pos <- states_df$seq_end[i+1]
if ((abs(current_pos - next_pos) > crispr_gap) & (i != (row_num-1))) {
index <- (states_df$seq_end >= crispr_start) & (states_df$seq_end <= current_pos)
crispr_list[[l_name]] <- states_df[index, ]
crispr_start <- next_pos
crispr_index <- crispr_index + 1
}
if (i == (row_num-1)) {
if (abs(current_pos - next_pos) <= crispr_gap) {
index <- states_df$seq_end >= crispr_start
crispr_list[[l_name]] <- states_df[index, ]
} else {
index <- (states_df$seq_end >= crispr_start) & (states_df$seq_end <= current_pos)
crispr_list[[l_name]] <- states_df[index, ]
# single sample at end
crispr_index <- crispr_index + 1
l_name <- paste0("CRISPR_region_", crispr_index)
crispr_list[[l_name]] <- states_df[nrow(states_df), ]
}
}
}
# filter by positivity rate
for (i in names(crispr_list)) {
df <- crispr_list[[i]]
seq_len <- df$seq_end[nrow(df)] - df$seq_end[1]
cov_rate <- nrow(df)/seq_len
if (cov_rate < pos_rate) {
crispr_list[[i]] <- NULL
}
}
# filter by size
for (i in names(crispr_list)) {
df <- crispr_list[[i]]
seq_len <- df$seq_end[nrow(df)] - df$seq_end[1] + 1
if (seq_len < min_seq_len) {
crispr_list[[i]] <- NULL
}
}
for (i in names(crispr_list)) {
df <- crispr_list[[i]]
df$seq_middle <- df$seq_end - (maxlen/2)
crispr_list[[i]] <- df
}
crispr_list
}
# https://f000.backblazeb2.com/file/bioinf/crispr/crispr_model.h5
model <- keras::load_model_hdf5("~/github/philippmuench/GenomeNetFinder/crispr_model.h5", compile = FALSE)
num_layers <- length(model$get_config()$layers)
layer_name <- model$get_config()$layers[[num_layers]]$name
dummy <- readChar("~/github/philippmuench/GenomeNetFinder/dummy.txt",
file.info("~/github/philippmuench/GenomeNetFinder/dummy.txt")$size)
ui <- dashboardPage(#skin = "black",
dashboardHeader(title = "CRISPR prediction", titleWidth = 350, disable = T),
dashboardSidebar(width = 350, disable = F,
box(width = 12, solidHeader = TRUE,
p(
class = "text-muted",
paste("Deep Learning CRISPR Prediction Tool.",
"Input a sequence and the model confidence",
"will be displayed."
)),
tags$hr(),
uiOutput('resetable_input'),
actionButton("run", "Predict"),
actionButton("reset_input", "Reset inputs")
),
box(width = 12, solidHeader = TRUE,
p(class = "text-muted",
paste("Funded by Priority Programme 2141. Much more than defence: the multiple functions and facets of CRISPR-Cas")
)),
box(width = 12, solidHeader = TRUE,
p(class = "text-muted",
paste("Made with deepG")),
menuItem(uiOutput("png"))
)
),
dashboardBody(
tags$head(tags$style(HTML('
/* logo */
.skin-blue .main-header .logo {
background-color: #ffffff;
}
/* logo when hovered */
.skin-blue .main-header .logo:hover {
background-color: #ffffff;
}
/* navbar (rest of the header) */
.skin-blue .main-header .navbar {
background-color: #ffffff;
}
.box-title {
color:#000000;
font-family: "Source Sans Pro","Helvetica Neue","Helvetica","Arial","sans-serif";
background:#e8e8e8
}
.box.box-solid.box-primary>.box-header {
color:#000000;
background:#e8e8e8
}
.box.box-solid.box-primary{
border-bottom-color:#e8e8e8;
border-left-color:#e8e8e8;
border-right-color:#e8e8e8;
border-top-color:#e8e8e8;
}
/* main sidebar */
.skin-blue .main-sidebar {
background-color: #e8e8e8;
}
/* control lable */
.skin-blue .control-label {
background-color: #55555;
color:#000000;
}
/* active selected tab in the sidebarmenu */
.skin-blue .main-sidebar .sidebar .sidebar-menu .active a{
background-color: #ff0000;
}
/* other links in the sidebarmenu */
.skin-blue .main-sidebar .sidebar .sidebar-menu a{
background-color: #00ff00;
color: #000000;
}
/* other links in the sidebarmenu when hovered */
.skin-blue .main-sidebar .sidebar .sidebar-menu a:hover{
background-color: #ff69b4;
}
/* toggle button when hovered */
.skin-blue .main-header .navbar .sidebar-toggle:hover{
background-color: #ff69b4;
}
/* body */
.content-wrapper, .right-side {
background-color: #ffffff;
}
'))),
fluidRow(
box(
title = "Model Confidence", width = 8, solidHeader = TRUE, status = "primary",
plotlyOutput("plot1", height = 250),
),
box(
title = "Density", width = 4, solidHeader = TRUE, status = "primary",
plotlyOutput("plot2", height = 250)),
# box(width = 12, status = "warning",
# p(class = "text-muted",
# paste("No predictions have been computed."))),
),
fluidRow(
box(title = "Configure Summary", width = 2, solidHeader = TRUE, status = "primary",
numericInput("threshold_conf", "Confidence:", 0.5, step = .1, min = 0, max = 1),
numericInput("threshold_pos_rate", "Positive rate:", 0.4, step = .1, min = 0, max = 0.99),
numericInput("threshold_min_seq_len", "Min length:", 2, step = 10, min = 1, max = 100),
numericInput("threshold_gap", "Gap:", 10, step = 10, min = 1, max = 1000)),
box(title = "Candidates", width = 6, solidHeader = TRUE, status = "primary",
dataTableOutput('table')
),
box(
title = "Detailed View", width = 4, solidHeader = TRUE, status = "primary",
plotlyOutput("plot3", height = 250),
),
)
)
)
server <- function(input, output) {
output$resetable_input <- renderUI({
times <- input$reset_input
div(id=letters[(times %% length(letters)) + 1],
textAreaInput("text", "Sequence input",
height = 150,
width = 350,
value = dummy,
placeholder = NULL),
fileInput("fasta_path", width = 350, label = "or Input a FASTA file (will ignore text box)"),
numericInput("step", "Resolution (nt):", 1, step = 10, min = 1, max = 100),
)
})
#https://f000.backblazeb2.com/file/bioinf/crispr/logo.png
output$png <- renderUI({
tags$a(img(src = "https://f000.backblazeb2.com/file/bioinf/crispr/logo.png", width = "50px"), href = "http://deepg.de", target = "_blank")
})
fastaData <- reactive({
req(input$fasta_path$datapath)
fasta_file <- microseq::readFasta(input$fasta_path$datapath) #tibble
fasta_file$Sequence[1]
})
dataSummary <- reactive({
req(dataInput())
crispr_list <- filter_crispr(states_df = dataInput(),
crispr_gap = input$threshold_gap,
conf_cutoff = input$threshold_conf,
pos_rate = input$threshold_pos_rate,
min_seq_len = input$threshold_min_seq_len)
placeholder <- rep(0, length(crispr_list))
crispr_summary_df <- data.frame(seq_len = placeholder, cov_rate = placeholder,
first_pred = placeholder, last_pred = placeholder)
count <- 1
for (i in names(crispr_list)) {
df <- crispr_list[[i]]
seq_len <- df$seq_end[nrow(df)] - df$seq_end[1] + 1
crispr_summary_df$seq_len[count] <- seq_len
crispr_summary_df$cov_rate[count] <- nrow(df)/seq_len
crispr_summary_df$first_pred[count] <- min(df$seq_middle)
crispr_summary_df$last_pred[count] <- max(df$seq_middle)
count <- count + 1
}
crispr_summary_df
})
dataInput <- eventReactive(input$run, {
withProgress(value = 0, message = 'Running predictions',
detail = 'Please wait' ,{
incProgress(amount = .25, message = "Loading sequence")
if(is.null(input$fasta_path$datapath)) {
input_sequence <- as.character(input$text) # use textarea data
} else {
input_sequence <- fastaData()
}
if (file.exists("tmp2.h5")) file.remove("tmp2.h5")
incProgress(amount = .25, message = "Inference")
deepG::writeStates(model = model,
layer_name = layer_name,
sequence = input_sequence,
round_digits = 4,
filename = "tmp2.h5",
batch.size = 500,
step = input$step)
incProgress(amount = .5, message = "Loading results")
states <- readRowsFromH5(h5_path = "tmp2.h5", complete = TRUE, getTargetPositions = TRUE)
incProgress(amount = 1, message = "Processing results")
pred <- states[[1]]
position <- states[[2]] - 1
df <- cbind(pred, position) %>% as.data.frame()
colnames(df) <- c("conf_CRISPR", "conf_non_CRISPR", "seq_end")
df$seq_middle <- df$seq_end - 100
df
})
})
output$plot1 <- renderPlotly({
p <- ggplot(data = dataInput(), aes(seq_middle, conf_CRISPR)) + geom_point(size = .1)
p <- p + xlab("sequence position") + ylab("model confidence")
p <- p + geom_hline(yintercept = 0.5, linetype = "dashed", color = "black")
p <- p + theme_classic()
p <- p + geom_line(aes(y=rollmean(conf_CRISPR, nrow(dataInput()) / 100,
na.pad = TRUE)), color = "grey50", alpha = .5, linewidth = 2)
if(!is.null(input$table_row_last_clicked)){
start <- dataSummary()[input$table_row_last_clicked,]$first_pred - 10
end <- dataSummary()[input$table_row_last_clicked,]$last_pred + 10
p <- p + geom_vline(xintercept = start, color = "red", linetype = "dashed")
p <- p + geom_vline(xintercept = end, color = "red", linetype = "dashed")
}
ggplotly(p)
})
output$plot2 <- renderPlotly({
p <- ggplot(data = dataInput(), aes(conf_CRISPR))
p <- p + geom_density()
p <- p + geom_vline(xintercept = 0.5, color = "black", linetype = "dashed")
p <- p + theme_classic()
ggplotly(p)
})
output$table <- renderDataTable(dataSummary())
output$plot3 <- renderPlotly({
req(input$table_row_last_clicked)
req(dataSummary())
req(dataInput())
start <- dataSummary()[input$table_row_last_clicked,]$first_pred - 10
end <- dataSummary()[input$table_row_last_clicked,]$last_pred + 10
p <- ggplot(data = dataInput(), aes(seq_middle, conf_CRISPR))
p <- p + xlab("sequence position") + ylab("model confidence")
p <- p + geom_vline(xintercept = start, color = "red", linetype = "dashed")
p <- p + geom_vline(xintercept = end, color = "red", linetype = "dashed")
p <- p + geom_point(size = .1)
p <- p + xlim(start, end)
p <- p + geom_hline(yintercept = 0.5, linetype = "dashed", color = "black")
p <- p + theme_classic()
ggplotly(p)
})
}
shinyApp(ui, server)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment