###Jill Mesirov - Broad Institute
@broadinstitute
Broad has generated >150 TBPs in a year, how was this number calculated?
###Bas Burger - BT Global Commerce
| """Solve 'Towers of Hanoi'""" | |
| import pylab as p; | |
| import mpl_toolkits.mplot3d.axes3d as p3; | |
| def solve(g,n): | |
| X = [sum(g[0])] | |
| Y = [sum(g[1])] | |
| Z = [sum(g[2])] | |
| moved = 0 |
| from __future__ import with_statement | |
| import matplotlib.pyplot as plt | |
| plt.rc('xtick', labelsize='x-small') | |
| import yaml | |
| def convert(yaml_file): | |
| """Convert the data in a yaml file generated by count_barcodes.py to a | |
| .dat file with space seperated sorted fractions of barcode distribution. | |
| """ |
| """Randomly picks out some title / sequence / quality triples | |
| from a given fastq file and writes them in to a new fastq file. | |
| This is to generate rudimentary test data which doesn't take too long to run. | |
| Usage: make_random_test_fastq.py <fastq_file> <target file size in MB> | |
| """ | |
| import os | |
| import sys | |
| from random import random |
| log_entry = { | |
| 'date': '2011-03-31T00:00:00', | |
| 'machine': 'SN167', | |
| 'project': '0255_A81BF6ABXX', | |
| 'size': 277025390592L | |
| } |
###Jill Mesirov - Broad Institute
@broadinstitute
Broad has generated >150 TBPs in a year, how was this number calculated?
###Bas Burger - BT Global Commerce
| { | |
| "metadata": { | |
| "name": "Sandbox" | |
| }, | |
| "nbformat": 3, | |
| "worksheets": [ | |
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", |
| { | |
| "metadata": { | |
| "name": "Confounding factors" | |
| }, | |
| "nbformat": 3, | |
| "worksheets": [ | |
| { | |
| "cells": [ | |
| { | |
| "cell_type": "heading", |
| { | |
| "metadata": { | |
| "name": "Omics Data Analysis Task" | |
| }, | |
| "nbformat": 3, | |
| "nbformat_minor": 0, | |
| "worksheets": [ | |
| { | |
| "cells": [ | |
| { |
| from sklearn.decomposition.pca import PCA | |
| pca = PCA(n_components=2) | |
| # Note, data.shape = (6, 26440) | |
| pca.fit(data) | |
| Yd = dict() | |
| Yd["400"] = pca.transform(data.ix[:,"400"]) | |
| Yd["5000"] = pca.transform(data.ix[:,"5000"]) |
| loadings = pca.components_ | |
| # I've omitted the code to create ind; a list of the indexes of the | |
| # loadings ordered by distance from origin. | |
| plt.scatter(*loadings, alpha=0.3, label="Loadings"); | |
| plt.scatter(*loadings[:, ind[:3]], c='r', marker='o', | |
| s=80, linewidths=1, facecolors="none", | |
| edgecolors='r', | |
| label="Contributes most\nto variance"); |