Peter Kruczkiewicz peterk87

JS+D3: Zoomable, pannable scatterplot with shift keypress enabled brush multiselect of data points

This JS+D3 gist creates a scatterplot with zooming and panning enabled as well as a brush for selecting or deselecting points using the iris dataset within data.tsv.

The "Get Selection" button gets the current selection of points and prints their ids to the JS console (i.e. console.log(selection);).

The "Clear Selection" button clears the current selection.

	def chunk_seq(seq_name, sequence, chunk_size, chunk_increment):
	"""
	Chunk up a sequence and return a list of tuples with the chunked up
	sequences and new sequence names with the position of the chunk in the
	original sequence.

	Args:
	seq_name: Sequence name.
	sequence: Nucleotide or amino acid sequence that is to be chunked up.
	chunk_size: Size of chunks (e.g. 30 bp)

	## {{{ http://code.activestate.com/recipes/578175/ (r1)
	### hierarchical_clustering.py
	#Copyright 2005-2012 J. David Gladstone Institutes, San Francisco California
	#Author Nathan Salomonis - [email protected]

	#Permission is hereby granted, free of charge, to any person obtaining a copy
	#of this software and associated documentation files (the "Software"), to deal
	#in the Software without restriction, including without limitation the rights
	#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	#copies of the Software, and to permit persons to whom the Software is furnished

	from scipy.spatial.distance import *
	from scipy.cluster.hierarchy import *
	import pandas as pd
	import numpy
	import matplotlib as plt
	from matplotlib.pylab import figure
	import pylab as pl
	import pp

	def num_clusters(hc, d):

	# This file contains a set of functions for parsing out some useful information
	# from BLAST results files saved in BLAST's tabular output format ("-outfmt 6").

	# Biopython is required for reading multifasta files and storing sequences.
	from Bio.Seq import Seq
	from Bio.SeqRecord import SeqRecord
	from Bio.Alphabet import IUPAC

	# if all of your genome sequences are within one multifasta file
	recs = [rec for rec in SeqIO.parse('all_genomes.fasta', 'fasta')]


	aln_snps = {}
	for aln in aln_files:
	recs = [f for f in SeqIO.parse(aln, 'fasta')]
	# strain names should be the last dash delimited element in fasta header
	strains = [rec.name.split('-')[-1] for rec in recs]
	# get a dictionary of strain names and sequences
	strain_seq = {rec.name.split('-')[-1]:''.join([nt for nt in rec.seq]) \
	for rec in recs}
	# get length of the MSA and check that all of the seq are the same length

	library(RColorBrewer)

	qualitative_colours <- function(n, light=FALSE) {
	# Get a specified number of qualitative colours if possible.
	# This function will default to a continuous color scheme if there are more
	# than 21 colours needed.

	# rainbow12equal <- c("#BF4D4D", "#BF864D", "#BFBF4D", "#86BF4D", "#4DBF4D", "#4DBF86", "#4DBFBF", "#4D86BF", "#4D4DBF", "#864DBF", "#BF4DBF", "#BF4D86")
	rich12equal <- c("#000040", "#000093", "#0020E9", "#0076FF", "#00B8C2", "#04E466", "#49FB25", "#E7FD09", "#FEEA02", "#FFC200", "#FF8500", "#FF3300")


	"""

	SAM-based reboot

	"""

	import sys, os, subprocess, itertools, array, datetime, socket, heapq, tempfile

	import argparse
	import textwrap
	import os
	import sys
	from datetime import timedelta, datetime


	# function for reading a multifasta file
	# returns a dictionary with sequence headers and nucleotide sequences
	def get_seqs_from_fasta(filepath):

	/bitbucket.org dark css theme/

	body, aside {
	background: #222 !important;
	background-color: #222 !important;
	color: #bbb !important;
	}
	h1, h2, h3, h4, h5, span {
	background-color: transparent !important;
	color: #FFC963 !important;