$ brew install apache-spark
A python shell with a preconfigured SparkContext (available as sc
). It is
def chunk_query(sql, cursor, chunksize=10): | |
"""Yields rows in chunks.""" | |
cursor.execute(sql) | |
while True: | |
nextrows = cursor.fetchmany(chunksize) | |
if not nextrows: | |
break | |
yield nextrows | |
def iquery(sql, cursor, chunksize=10): |
import json | |
import boto3 | |
s3 = boto3.resource('s3') | |
obj = s3.Object(bucket, key) | |
data = obj.get()['Body'].read() | |
d = json.loads(data) |
import signal | |
class GracefulInterruptHandler(object): | |
def __init__(self, sig=signal.SIGINT): | |
self.sig = sig | |
def __enter__(self): | |
self.interrupted = False |
from bokeh.plotting import figure, ColumnDataSource | |
from bokeh.models import HoverTool | |
def scatter_with_hover(df, x, y, | |
fig=None, cols=None, name=None, marker='x', | |
fig_width=500, fig_height=500, **kwargs): | |
""" | |
Plots an interactive scatter plot of `x` vs `y` using bokeh, with automatic | |
tooltips showing columns from `df`. |
import collections | |
def update(d, other): | |
"""Recursively merge or update dict-like objects. | |
>>> from pprint import pprint | |
>>> pprint(update({'k1': {'k2': 2}}, {'k1': {'k2': {'k3': 3}}, 'k4': 4})) | |
{'k1': {'k2': {'k3': 3}}, 'k4': 4} | |
>>> pprint(update({'k1': {'k2': 2}}, {'k1': {'k3': 3}})) | |
{'k1': {'k2': 2, 'k3': 3}} | |
>>> pprint(update({'k1': {'k2': 2}}, dict())) |
import pandas as pd | |
from io import StringIO | |
datastring = StringIO("""\ | |
ticker avg_spread max_spread timestamp | |
a 0.22 1.84 2016-06-03 03:00:00 | |
aa 0.01 0.10 2016-06-03 02:00:00 | |
aaap 2.07 2.17 2016-06-03 01:00:00 | |
aal 0.15 0.5 2016-06-03 04:00:00 | |
""") |
# Working example for my blog post at: | |
# https://danijar.github.io/structuring-your-tensorflow-models | |
import functools | |
import tensorflow as tf | |
import sets | |
def lazy_property(function): | |
attribute = '_' + function.__name__ |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import os.path | |
import re | |
import sys | |
import tarfile | |
import numpy as np |