Valentine Svensson vals

###Jill Mesirov - Broad Institute

@broadinstitute

Broad has generated >150 TBPs in a year, how was this number calculated?

###Bas Burger - BT Global Commerce

	loadings = pca.components_

	# I've omitted the code to create ind; a list of the indexes of the
	# loadings ordered by distance from origin.

	plt.scatter(*loadings, alpha=0.3, label="Loadings");
	plt.scatter(*loadings[:, ind[:3]], c='r', marker='o',
	s=80, linewidths=1, facecolors="none",
	edgecolors='r',
	label="Contributes most\nto variance");

	from sklearn.decomposition.pca import PCA

	pca = PCA(n_components=2)

	# Note, data.shape = (6, 26440)
	pca.fit(data)

	Yd = dict()
	Yd["400"] = pca.transform(data.ix[:,"400"])
	Yd["5000"] = pca.transform(data.ix[:,"5000"])

	{
	"metadata": {
	"name": "Omics Data Analysis Task"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{

	"""Randomly picks out some title / sequence / quality triples
	from a given fastq file and writes them in to a new fastq file.

	This is to generate rudimentary test data which doesn't take too long to run.

	Usage: make_random_test_fastq.py <fastq_file> <target file size in MB>
	"""
	import os
	import sys
	from random import random

	from __future__ import with_statement
	import matplotlib.pyplot as plt
	plt.rc('xtick', labelsize='x-small')
	import yaml


	def convert(yaml_file):
	"""Convert the data in a yaml file generated by count_barcodes.py to a
	.dat file with space seperated sorted fractions of barcode distribution.
	"""