This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny) | |
# I like this color pallete much more than the base options | |
library(viridis) | |
shinyServer(function(input, output) { | |
# this is the actual brownian motion simulation | |
x <- reactive({ | |
# insure reproducability | |
set.seed <- input$seed.val | |
replicate(input$reps, cumsum(rnorm(input$gens))) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## plotting simulated data | |
require("vcd") | |
colnames(simresult) <- c("Root", "XY", "XO", "Xyp") | |
simresult <- as.data.frame(simresult) | |
colors <- c("black","red","green", "orange") #This sets up a vector of colors | |
ternaryplot( #This is the actual plotting of our data | |
simresult[,2:4], #Here I provide the file and columns to plot | |
pch = 20, #This is choosing the shape of data points (simple circle here) | |
cex = .5, #This is the size of the data points | |
col = colors[as.numeric(simresult$Root)], #Telling it to color the points |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## non-phylogenetic approach to simulating data | |
simresult <- matrix(1,200,4) | |
set.seed(4) | |
simresult[,2] <- sample(1:498, size=200) | |
for(i in 1:200){ | |
simresult[i,3] <- sample(1:(499 - simresult[i,2]), size=1) | |
simresult[i,4] <- sample(1:(500 - (simresult[i,2] + simresult[i,3])), size=1) | |
simresult[i,1] <- sample(2:4, size=1, prob = c(simresult[i,2], | |
simresult[i,3], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## phylogenetic approach to simulating data | |
library(geiger) | |
tree <- sim.bdtree(b=1, d=.4, stop=c("taxa", "time"), n=200, seed=0, extinct=T) | |
tree <- drop.extinct(tree) | |
q <- list(rbind(c(-.06, .05, .01), c(.09, -.1, .01), c(.01, .01, -.02))) | |
simresult <- matrix(1,200,4) | |
for(i in 1:200){ | |
sim.root <- sample(1:3, prob=c(.55,.35,.1), size=1) | |
phylosim <- sim.char(phy=tree, model="discrete", par=q, | |
root=sim.root) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## simple function to take a line of text and return a version of | |
## this text that has been encoded into DNA. This function makes | |
## use of a couple of codons that are normally stop codons but in | |
## certain organisms have been coopted to code for unusual amino | |
## acids. This is how we are able to expand the alphabet to include | |
## O and U | |
wordTOcodons <- function(x){ | |
table <- matrix(c("START", 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
slideFunct <- function(data, window, step){ | |
total <- length(data) | |
spots <- seq(from = 1, to = (total - window + 1), by = step) | |
result <- vector(length = length(spots)) | |
for(i in 1:length(spots)){ | |
result[i] <- mean(data[spots[i]:(spots[i] + window - 1)]) | |
} | |
return(result) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data <- c(runif(100000, min=0, max=.1),runif(100000, min=.05, max=.1),runif(10000, min=.05, max=1), runif(100000, min=0, max=.2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# to construct database from an assembly in the file | |
# conf.scaf.fa run this in the terminal | |
# it will create a blast database call confscaf | |
makeblastdb -in conf.scaf.fa -dbtype nucl -out confscaf | |
# next we want to blast our large exons against this db | |
blastn -query=/outputs/300+bpExons.fa -db=/blastDB/confscaf -outfmt='6 qseqid qstart sseqid sstart qlen length pident' -max_target_seqs=2 -out=info | |
# this produces the lookup table that we can then use to analyze the degree of synteny conservation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("~/Desktop/retrogenes/data/") | |
gff <- read.csv("exons.csv", header = F, as.is=T) #This is a parsed GFF3 file that contains the lines for exons | |
# lets get big ones >300 bp | |
gff <- gff[gff[, 3] - gff[, 2] > 300, ] | |
setwd("~/Desktop/retrogenes/data/chromosomes") #This is the directory that contains the chromosomes of the genome | |
library(ape) | |
seq <- name <- list() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio import SeqIO # to deal with the fast files | |
# lets pull in our exon table | |
datafile = open('exons.csv', 'r') | |
data = [] | |
for row in datafile: | |
data.append(row.strip().split(',')) | |
# now lets start the process of creating all of our exon-exon sequences | |
# by first creating a list that has the name we want to assign and the LG and |