library('TCGAbiolinks')
library('plyr')
library('devtools')
projects<- "TCGA-LUAD"
clin <- lapply(projects, function(p) {
#install UMAP from https://github.com/lmcinnes/umap | |
#install.packages("rPython") | |
umap <- function(x,n_neighbors=10,min_dist=0.1,metric="euclidean"){ | |
x <- as.matrix(x) | |
colnames(x) <- NULL | |
rPython::python.exec( c( "def umap(data,n,mdist,metric):", | |
"\timport umap" , | |
"\timport numpy", | |
"\tembedding = umap.UMAP(n_neighbors=n,min_dist=mdist,metric=metric).fit_transform(data)", |
Gene ID Transcript ID Human associated gene name Human gene stable ID Associated Gene Name | |
ENSMUSG00000064336 ENSMUST00000082387 mt-Tf | |
ENSMUSG00000064337 ENSMUST00000082388 mt-Rnr1 | |
ENSMUSG00000064338 ENSMUST00000082389 mt-Tv | |
ENSMUSG00000064339 ENSMUST00000082390 mt-Rnr2 | |
ENSMUSG00000064340 ENSMUST00000082391 mt-Tl1 | |
ENSMUSG00000064341 ENSMUST00000082392 MT-ND1 ENSG00000198888 mt-Nd1 | |
ENSMUSG00000064342 ENSMUST00000082393 mt-Ti | |
ENSMUSG00000064343 ENSMUST00000082394 mt-Tq | |
ENSMUSG00000064344 ENSMUST00000082395 mt-Tm |
The script was copied from http://genomespot.blogspot.com/2016/12/msigdb-gene-sets-for-mouse.html by Mark Ziemann.
save the below script to convert_gmt_perline.sh
:
This function translate the human gene symbol to mouse gene symbol for each line.
#! /bin/bash | |
set -e | |
set -u | |
set -o pipefail | |
root=`pwd` | |
mkdir macs14_pbs | |
cat bam_names.txt | while read -r IP Input |
I downloaded the manifest file from gdc, but it only has the uuid, I will need to convert them to TCGA barcode.
read this https://support.bioconductor.org/p/89315/ and https://support.bioconductor.org/p/89021/
A function from the link above
library(httr)
I have download the enhancer-promoter interaction data from this paper Reconstruction of enhancer–target networks in 935 samples of human primary cells, tissues and cell lines
and I want to create a super-set of EP interaction across all cell lines. It turns out to be not that easy...
tools to use: https://github.com/billgreenwald/pgltools
#only for encode and roadmap data
mkdir Encoderoadmap_EP_interaction
--- | |
title: "02_bioc_curated_TCGA_data" | |
author: "Ming Tang" | |
date: "November 9, 2017" | |
output: html_document | |
--- | |
```{r} | |
devtools::install_github(repo = "BioinformaticsFMRP/TCGAbiolinks") | |
library(here) |
#!/bin/bash | |
printf "\n *** BIS BATCH PRIMER version 2.0 ***" | |
printf "\n\n !!! 'Primer3 & fastx-toolkit' must be installed on the system.\n\n !!! Edit parameters (e.g. sizes, Tm, and etc) before start\n\n " | |
printf "\n\n Usage : \n ./mp_primer.sh FASTA PARAMETER \n\n" | |
printf " >>> input FASTA = "$1 | |
printf " \n >>> parameters = "$2 | |
printf "\n\n\n ()()() Running... \n\n" | |
if [ -f $1 -a -f $2 ]; then |
ssh mdaris337
conda create -n R330
source activate R330
conda install -c r r=3.3.2 r-essentials