Wouter De Coster wdecoster

🐗

Fork me, and then just push me, until I get your, contribution

Bioinformatics postdoc using short and long read sequencing in neurodegenerative disorders at Rademakers Lab

wdecoster / PCAonModules.R

Last active July 29, 2016 11:44

	library(ggfortify)
	GenesAndModules <- as.data.frame(t(rbind(datExpr, module=moduleColors)))
	modules = unique(GenesAndModules[,c("module")])
	for (mod in modules) {
	moduledata <- subset(GenesAndModules, module == mod)
	moduledata <- t(as.data.frame(lapply(moduledata[, !names(moduledata) %in% c("symbol", "module")], as.numeric)))

	Traits$threegroups <- as.factor(Traits$threegroups)
	levels(Traits$threegroups) <- c("CON", "uPAT", "kPAT")
	pca <- autoplot(prcomp(moduledata), data = Traits, colour='threegroups', frame = TRUE, frame.type = 'norm') + ggtitle(mod)

wdecoster / genevenn.py

Created August 9, 2016 15:46

	#Make venn diagram out of two or three lists
	#wdecoster

	import sys, os, collections, matplotlib, subprocess
	matplotlib.use('Agg')
	from matplotlib_venn import venn3, venn2
	import matplotlib.pyplot as plt

	if '-h=1' in sys.argv:
	header=True

wdecoster / addrefnucl.py

Created August 23, 2016 14:15

	#wdecoster

	import sys, os
	from Bio import SeqIO

	def createDict(tsvfile):
	'''Input is a tsv file and returned is a dictionary with key=chromosome and value=list of tuples with positions'''
	if os.path.isfile(tsvfile):
	chrdict = { 'chr1' : [], 'chr2' : [], 'chr3' : [], 'chr4' : [], 'chr5' : [], 'chr6' : [],
	'chr7' : [], 'chr8' : [], 'chr9' : [], 'chr10' : [],'chr11' : [], 'chr12' : [],

wdecoster / getGeneSummary.py

Last active August 29, 2016 21:29

	from Bio import Entrez
	Entrez.email = "youremail@example.com" # Always tell NCBI who you are
	import sys

	with open(sys.argv[1]) as genefile:
	genes = [line.strip() for line in genefile.readlines() if not line == ""]

	for gene in genes:
	handle = Entrez.esearch(db='gene', term=gene + '[sym] AND "Homo sapiens"[porgn]')
	result = Entrez.read(handle)

wdecoster / gettingInFrameStopcodons.py

Last active October 13, 2016 15:00

	import sys
	from Bio import SeqIO

	def findstart(sequence):
	"""
	Takes a sequence, locates the start codon, returns the rest of the sequence split by codons
	"""
	try: #We are not sure a start codon is present, therefore I use the try-except construct to catch potential errors without crashing the script
	start = sequence.index("ATG") #the string method .index() returns the first position of "ATG" if any is present, if not it gives a ValueError
	pseudoCDS = sequence[start:] #Create a new string starting from the start codon

wdecoster / renameCountIdentifiers.py

Last active November 13, 2016 19:42

	#Convert gene identifiers in a countfile to those mined from a gtf file, swapping gene_id to gene_name attribute
	#Execute as `python renameCountIdentifiers.py yourannotation.gtf yourcountfile.txt`
	#Question for which this was written: https://www.biostars.org/p/221278/
	#Did I mention I like dict comprehensions, thereby sacrificing code readability?
	#wdecoster

	import sys

	def gtf2dict(gtfF):
	"""

wdecoster / FilterFasta.py

Last active November 25, 2016 14:33

Written for https://www.biostars.org/p/223495

	import sys, os, glob
	from Bio import SeqIO

	def getfiles():
	files = glob.glob("*.csv") + glob.glob(".tfa")
	print("Found {} files to filter".format(len(files)))
	return(files)

	def filter(inputfile):
	countAll = 0

wdecoster / DEA.R

Last active December 9, 2016 17:26

	#!/usr/bin/Rscript
	#Script to automate differential expression analysis using one of the following R algorithms: DESeq2, edgeR, DEXSeq or limma
	#Based on manuals, pieces of code found on the internet and helpful comments of colleagues
	###Required input is:###
	#1) Either a matrix of counts (features*samples) with features (genes) on lines and samples on columns OR a directory of bam files to use featureCounts on
	#2) A sampleInfo File matching the samples containing at least the fields 'condition' and 'gender'. Reference level is 'CON'
	######

	#Author: wdecoster
	#Email: wouter.decoster AT molgen DOT vib-ua DOT be

wdecoster / Script2Extract.py

Last active December 23, 2016 14:01

	#Usage: python Script2Extract.py valuesfile.txt intervalsfile.txt
	#Written for https://www.biostars.org/p/228655/
	#For now unclear whether intervals have to be inclusive or exclusive begin and end position
	#wdecoster

	import sys

	with open(sys.argv[1]) as data:
	#datadict = {line.split('\t')[1] : (line.split('\t')[2], line.strip().split('\t')[3]) for line in data if not line == ""} #Create a dictionary mapping the position to the values
	datadict = {}

wdecoster / PlotTileDups.R

Last active January 10, 2017 15:22

	library(ggplot2)
	dups = read.table("coordinates.duplicates1")
	names(dups) <- c("X", "Y")
	uniq = read.table("coordinates.unique1")
	names(uniq) <- names(dups)
	df <- rbind(cbind(group= "unique", uniq), cbind(group= "dups", dups))

	scatter <-ggplot(data=df, aes(x=X, y=Y)) + geom_point(aes(colour = group), size=0.1)
	ggsave(file="DuplicatesNextSeq_Scatter.jpeg", scatter)