sort -k3 -r
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
from streamparse.bolt import Bolt | |
log = logging.getLogger("custom_code_bolt") | |
class CustomCodeBolt(Bolt): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
class InvalidVersionException(Exception): pass | |
class StormVersion(object): | |
VERSION_RE = re.compile(r"(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)" | |
"(?P<older_patch>\.\d+)?(?P<other>.*)") | |
RC_RE = re.compile(r"-rc(?P<release_candidate>\d+)", re.IGNORECASE) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Licensed to the Apache Software Foundation (ASF) under one or more | |
* contributor license agreements. See the NOTICE file distributed with | |
* this work for additional information regarding copyright ownership. | |
* The ASF licenses this file to You under the Apache License, Version 2.0 | |
* (the "License"); you may not use this file except in compliance with | |
* the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.context import SparkContext | |
from pyspark.serializers import BatchedSerializer, PickleSerializer | |
from pyspark.rdd import RDD | |
from py4j.java_gateway import java_import | |
class CassandraSparkContext(SparkContext): | |
def _do_init(self, *args, **kwargs): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark import SparkContext | |
def main(): | |
sc = SparkContext(appName="Test Compression") | |
# RDD has to be key, value pairs | |
data = sc.parallelize([ | |
("key1", "value1"), | |
("key2", "value2"), | |
("key3", "value3"), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install.packages("jsonlite", dependencies = TRUE) | |
install.packages("RCurl", dependencies = TRUE) | |
library("jsonlite") | |
library("RCurl") | |
base_url <- "https://api.parsely.com/v2" | |
apikey <- "computerworld.com" | |
api_secret <- "YOUR SECRET KEY" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Hitting CTRL-C kills the Django server as well as all tunnels that were created | |
TUNNEL_PIDS=() | |
function kill_tunnels() { | |
for tunnel_pid in "${TUNNEL_PIDS[@]}" | |
do | |
kill $tunnel_pid | |
done | |
} |
The existing examples for this are good, but they miss a pretty critical observation, the number of partitions and how this affects things.
Assume we have the following script, aggregate_by_key.py:
import pprint
from pyspark.context import SparkContext
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime as dt | |
import pprint | |
import pytz | |
print(pytz.__version__) | |
# '2015.4' | |
timezone = pytz.timezone('Europe/London') | |
tmsp = dt.datetime(2015, 3, 29, 1, tzinfo=pytz.utc) |