###Jill Mesirov - Broad Institute
@broadinstitute
Broad has generated >150 TBPs in a year, how was this number calculated?
###Bas Burger - BT Global Commerce
{ | |
"metadata": { | |
"name": "Fitting SciLife" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ |
loadings = pca.components_ | |
# I've omitted the code to create ind; a list of the indexes of the | |
# loadings ordered by distance from origin. | |
plt.scatter(*loadings, alpha=0.3, label="Loadings"); | |
plt.scatter(*loadings[:, ind[:3]], c='r', marker='o', | |
s=80, linewidths=1, facecolors="none", | |
edgecolors='r', | |
label="Contributes most\nto variance"); |
from sklearn.decomposition.pca import PCA | |
pca = PCA(n_components=2) | |
# Note, data.shape = (6, 26440) | |
pca.fit(data) | |
Yd = dict() | |
Yd["400"] = pca.transform(data.ix[:,"400"]) | |
Yd["5000"] = pca.transform(data.ix[:,"5000"]) |
{ | |
"metadata": { | |
"name": "Omics Data Analysis Task" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ |
{ | |
"metadata": { | |
"name": "Confounding factors" | |
}, | |
"nbformat": 3, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", |
{ | |
"metadata": { | |
"name": "Sandbox" | |
}, | |
"nbformat": 3, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", |
###Jill Mesirov - Broad Institute
@broadinstitute
Broad has generated >150 TBPs in a year, how was this number calculated?
###Bas Burger - BT Global Commerce
log_entry = { | |
'date': '2011-03-31T00:00:00', | |
'machine': 'SN167', | |
'project': '0255_A81BF6ABXX', | |
'size': 277025390592L | |
} |
"""Randomly picks out some title / sequence / quality triples | |
from a given fastq file and writes them in to a new fastq file. | |
This is to generate rudimentary test data which doesn't take too long to run. | |
Usage: make_random_test_fastq.py <fastq_file> <target file size in MB> | |
""" | |
import os | |
import sys | |
from random import random |
from __future__ import with_statement | |
import matplotlib.pyplot as plt | |
plt.rc('xtick', labelsize='x-small') | |
import yaml | |
def convert(yaml_file): | |
"""Convert the data in a yaml file generated by count_barcodes.py to a | |
.dat file with space seperated sorted fractions of barcode distribution. | |
""" |