coleoguy · January 4, 2016 09:28
diff --git a/retro-lgexonFinder.R b/retro-lgexonFinder.R
 setwd("~/Desktop/retrogenes/data/")
 gff <- read.csv("exons.csv", header = F, as.is=T) #This is a parsed GFF3 file that contains the lines for exons

 # lets get big ones >300 bp

 gff <- gff[gff[, 3] - gff[, 2] > 300, ]
 setwd("~/Desktop/retrogenes/data/chromosomes") #This is the directory that contains the chromosomes of the genome

 library(ape)
 seq <- name <- list()
 LGS <- totals <- vector()

 counter <- 0
 fileName <- "NONE"
 fileName <- paste(gff[1, 1], "-2.fa", sep="")
 chromosome <- read.dna(fileName, format = 'fasta', as.character = T)  #This opens the first chromosome

 for(i in 1:nrow(gff)){
    if(gff[i, 1] != substr(fileName, 1, stop=nchar(fileName) - 5)){
      LGS <- c(LGS, fileName)
      totals <- c(totals, counter)
      fileName <- paste(gff[i, 1], "-2.fa", sep="")
      chromosome <- read.dna(fileName, format = 'fasta', as.character = T)
      print(paste("Working on:", fileName))
      counter <- 0      
    }
    counter <- counter + 1
    print(paste("exon:", counter))
    seq[[i]] <- paste(chromosome[gff[i, 2]:(gff[i, 2]+300)], collapse="")
    name[[i]] <- paste(gff[i, 1], gff[i,2])
    fileName <- paste(gff[i, 1], "-2.fa", sep="")
 }
 LGS <- c(LGS, fileName)
 totals <- c(totals, counter)
 names(totals) <- LGS
 totals
 names(seq) <- name
 write.dna(seq, format = "fasta", file = "300+bpExons.fa", colw=300)   #This saves the final file
	setwd("~/Desktop/retrogenes/data/")
	gff <- read.csv("exons.csv", header = F, as.is=T) #This is a parsed GFF3 file that contains the lines for exons

	# lets get big ones >300 bp

	gff <- gff[gff[, 3] - gff[, 2] > 300, ]
	setwd("~/Desktop/retrogenes/data/chromosomes") #This is the directory that contains the chromosomes of the genome

	library(ape)
	seq <- name <- list()
	LGS <- totals <- vector()

	counter <- 0
	fileName <- "NONE"
	fileName <- paste(gff[1, 1], "-2.fa", sep="")
	chromosome <- read.dna(fileName, format = 'fasta', as.character = T) #This opens the first chromosome

	for(i in 1:nrow(gff)){
	if(gff[i, 1] != substr(fileName, 1, stop=nchar(fileName) - 5)){
	LGS <- c(LGS, fileName)
	totals <- c(totals, counter)
	fileName <- paste(gff[i, 1], "-2.fa", sep="")
	chromosome <- read.dna(fileName, format = 'fasta', as.character = T)
	print(paste("Working on:", fileName))
	counter <- 0
	}
	counter <- counter + 1
	print(paste("exon:", counter))
	seq[[i]] <- paste(chromosome[gff[i, 2]:(gff[i, 2]+300)], collapse="")
	name[[i]] <- paste(gff[i, 1], gff[i,2])
	fileName <- paste(gff[i, 1], "-2.fa", sep="")
	}
	LGS <- c(LGS, fileName)
	totals <- c(totals, counter)
	names(totals) <- LGS
	totals
	names(seq) <- name
	write.dna(seq, format = "fasta", file = "300+bpExons.fa", colw=300) #This saves the final file