Install Python
$ brew install readline sqlite gdbm
$ brew install python --universal --framework
$ python --version
Python 2.7
Symlinks...
| # Written by Brendan O'Connor, [email protected], www.anyall.org | |
| # * Originally written Aug. 2005 | |
| # * Posted to gist.github.com/16173 on Oct. 2008 | |
| # Copyright (c) 2003-2006 Open Source Applications Foundation | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # |
| # In case you had some strange python installation | |
| # NOTE: .pydistutils.cfg seems to be not compatible with brew install python | |
| # areas I needed to clean before installation | |
| # clean up ~/Library/Python | |
| # clean up .local | |
| # preconditions: | |
| # xcode with command line tools installed | |
| xcode-select --install |
Install Python
$ brew install readline sqlite gdbm
$ brew install python --universal --framework
$ python --version
Python 2.7
Symlinks...
| import mock | |
| class AlmostAlwaysTrue(object): | |
| def __init__(self, total_iterations=1): | |
| self.total_iterations = total_iterations | |
| self.current_iteration = 0 | |
| def __nonzero__(self): | |
| if self.current_iteration < self.total_iterations: |
| #!/usr/bin/env python | |
| """MailBox class for processing IMAP email. | |
| (To use with Gmail: enable IMAP access in your Google account settings) | |
| usage with GMail: | |
| import mailbox |
| def namedlist(typename, field_names): | |
| """Returns a new subclass of list with named fields. | |
| >>> Point = namedlist('Point', ('x', 'y')) | |
| >>> Point.__doc__ # docstring for the new class | |
| 'Point(x, y)' | |
| >>> p = Point(11, y=22) # instantiate with positional args or keywords | |
| >>> p[0] + p[1] # indexable like a plain list | |
| 33 | |
| >>> x, y = p # unpack like a regular list |
| # First install homebrew | |
| ruby -e "$(curl -fsSL https://raw.github.com/mxcl/homebrew/go)" | |
| # Choose the name of the virtualenv as simplecv | |
| virtualenv simplecv --no-site-packages | |
| source simplecv/bin/activate | |
| # This activates the virtualenv; your command prompt should change from `$` to `(simplecv)$` | |
| # Next the dependencies | |
| sudo easy_install pip # Can be ignored, if pip already present |
| import org.apache.spark.sql.catalyst.expressions.Row | |
| import org.apache.spark.sql.catalyst.types._ | |
| import scala.collection.mutable.{ArrayBuffer} | |
| object SparkRowFormatter { | |
| def formatRowsWithSchema(rowArr: Array[Row], schema: StructType) = { | |
| rowArr.map(r => formatStruct(schema.fields, r)) | |
| } |
| from pyspark import SparkContext | |
| def main(): | |
| sc = SparkContext(appName="Test Compression") | |
| # RDD has to be key, value pairs | |
| data = sc.parallelize([ | |
| ("key1", "value1"), | |
| ("key2", "value2"), | |
| ("key3", "value3"), |
| import org.bdgenomics.formats.avro.AlignmentRecord | |
| import org.bdgenomics.adam.rdd.ADAMContext._ | |
| import org.bdgenomics.adam.projections.Projection | |
| import org.apache.spark.rdd.RDD | |
| import org.apache.parquet.filter2.dsl.Dsl._ | |
| import org.apache.parquet.filter2.predicate.FilterPredicate | |
| import org.bdgenomics.adam.projections.AlignmentRecordField._ | |
| val adamFile = "/user/nikhilrp/encoded-data/mm10/chr1/ENCFF891NNX.adam" | |
| val proj = Projection(readName, contig, start, end, qual) |