Skip to content

Instantly share code, notes, and snippets.

View brentp's full-sized avatar

Brent Pedersen brentp

View GitHub Profile
from osgeo import gdal, gdal_array
import numpy
from osgeo.gdalconst import GDT_Float64
xsize, ysize = 10, 10
a = numpy.random.random((xsize, ysize)).astype(numpy.float64)
xmin, xmax = -121., -119.
ymin, ymax = 41., 43.
import sys
import nwalign as nw
print nw
from Bio.pairwise2 import align
# match, mismatch, gap_init, gap_extend
bioalign = align.globalms
N = 100
import numpy as np
seq = ... # get some sequence in an np array.
methyl = np.zeros(seq.shape, dtype=np.uint8)
# CG
c_idxs, = np.where(seq == 'C')
# where a 'G' follows a 'C'
first_g = seq[c_idxs + 1] == 'G'
def calc_methylation(sequence):
"""
>>> calc_methylation("CCC")
array([3, 3, 3], dtype=uint8)
>>> calc_methylation("CCCCC")
array([3, 3, 3, 3, 3], dtype=uint8)
>>> calc_methylation("GGGGG")
array([6, 6, 6, 6, 6], dtype=uint8)
"""
given a fasta file. for each chromosome, write a file containing:
0: no methylation possible (not a C or G)
1: + CG methylation
2: + CHG methylation
3: + CHH methylation
4: - CG methylation
5: - CHG methylation
6: - CHH methylation
"""
LUA_CFLAGS=`pkg-config lua5.1 --cflags`
all: stringext.so
stringext.so: stringext.c
gcc $(LUA_CFLAGS) -O3 -fPIC -o stringext.o -c stringext.c
gcc -shared -O3 stringext.o -o stringext.so
class FileIndex(object):
ext = ".fidx"
@classmethod
def create(cls, filename, get_next, allow_multiple=False):
fh = open(filename)
lines = sum(1 for line in fh)
bnum = lines if lines > 2**24 else lines * 2
fh.seek(0)
db = BDB()
db.open(filename + cls.ext, bnum=bnum, lcnum=2**19,
In [1]: from shapely.geometry import LineString
In [2]: l = LineString([(1, 2), (3, 4)])
In [3]: import geojson
In [4]: geojson.du
geojson.dump geojson.dumps
In [4]: geojson.dumps(l)
from itertools import chain, groupby
def as_pairs (xs, ys, grouper=lambda _: _):
"""
>>> aordered = iter([1, 1, 2, 4, 6, 9])
>>> bordered = iter([1, 3, 4, 5, 6, 7])
>>> for a, b in as_pairs(aordered, bordered):
... a, b
([1, 1], [1])
([2], None)
"""
find ssrs
"""
import itertools
from pyfasta import Fasta
import sys
pairs = ["%s%s" % p for p in itertools.product("ACGT", "ACGT")]
triples = ["%s%s%s" % p for p in itertools.product("ACGT", "ACGT", "ACGT")]