Install Python
$ brew install readline sqlite gdbm
$ brew install python --universal --framework
$ python --version
Python 2.7
Symlinks...
# Written by Brendan O'Connor, [email protected], www.anyall.org | |
# * Originally written Aug. 2005 | |
# * Posted to gist.github.com/16173 on Oct. 2008 | |
# Copyright (c) 2003-2006 Open Source Applications Foundation | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# |
# In case you had some strange python installation | |
# NOTE: .pydistutils.cfg seems to be not compatible with brew install python | |
# areas I needed to clean before installation | |
# clean up ~/Library/Python | |
# clean up .local | |
# preconditions: | |
# xcode with command line tools installed | |
xcode-select --install |
Install Python
$ brew install readline sqlite gdbm
$ brew install python --universal --framework
$ python --version
Python 2.7
Symlinks...
import mock | |
class AlmostAlwaysTrue(object): | |
def __init__(self, total_iterations=1): | |
self.total_iterations = total_iterations | |
self.current_iteration = 0 | |
def __nonzero__(self): | |
if self.current_iteration < self.total_iterations: |
#!/usr/bin/env python | |
"""MailBox class for processing IMAP email. | |
(To use with Gmail: enable IMAP access in your Google account settings) | |
usage with GMail: | |
import mailbox |
def namedlist(typename, field_names): | |
"""Returns a new subclass of list with named fields. | |
>>> Point = namedlist('Point', ('x', 'y')) | |
>>> Point.__doc__ # docstring for the new class | |
'Point(x, y)' | |
>>> p = Point(11, y=22) # instantiate with positional args or keywords | |
>>> p[0] + p[1] # indexable like a plain list | |
33 | |
>>> x, y = p # unpack like a regular list |
# First install homebrew | |
ruby -e "$(curl -fsSL https://raw.github.com/mxcl/homebrew/go)" | |
# Choose the name of the virtualenv as simplecv | |
virtualenv simplecv --no-site-packages | |
source simplecv/bin/activate | |
# This activates the virtualenv; your command prompt should change from `$` to `(simplecv)$` | |
# Next the dependencies | |
sudo easy_install pip # Can be ignored, if pip already present |
import org.apache.spark.sql.catalyst.expressions.Row | |
import org.apache.spark.sql.catalyst.types._ | |
import scala.collection.mutable.{ArrayBuffer} | |
object SparkRowFormatter { | |
def formatRowsWithSchema(rowArr: Array[Row], schema: StructType) = { | |
rowArr.map(r => formatStruct(schema.fields, r)) | |
} |
from pyspark import SparkContext | |
def main(): | |
sc = SparkContext(appName="Test Compression") | |
# RDD has to be key, value pairs | |
data = sc.parallelize([ | |
("key1", "value1"), | |
("key2", "value2"), | |
("key3", "value3"), |
import org.bdgenomics.formats.avro.AlignmentRecord | |
import org.bdgenomics.adam.rdd.ADAMContext._ | |
import org.bdgenomics.adam.projections.Projection | |
import org.apache.spark.rdd.RDD | |
import org.apache.parquet.filter2.dsl.Dsl._ | |
import org.apache.parquet.filter2.predicate.FilterPredicate | |
import org.bdgenomics.adam.projections.AlignmentRecordField._ | |
val adamFile = "/user/nikhilrp/encoded-data/mm10/chr1/ENCFF891NNX.adam" | |
val proj = Projection(readName, contig, start, end, qual) |