Théo Crevon oleiade

Spark

Spark manipulates input datas as RDD, which basically are distributed datasets.
RDD transformations (map) are lazy. It's like a roadmap of transformations to operate over dataset. But lazy, still.
RDD actions evaluates transformations and reduces in order to generate and return the result.
RDD transformations are re-evaluated on each actions by default unless you cache them

tips

Here at Botify

	import tempfile
	import hurdles
	import leveldb
	import shutil

	from hurdles.tools import extra_setup

	common_setup = "import random\n"

	def MGet(self, db, keys, fill_cache=True, args, *kwargs):
	def get_or_none(key, context):
	try:
	res = db.Get(key, fill_cache=fill_cache)
	except KeyError:
	warning_msg = "Key {0} does not exist".format(key)
	context.update({'status': WARNING_STATUS})
	self.errors_logger.warning(warning_msg)
	res = None
	return res

	import time
	import random
	import itertools

	from fastset.bitvector import bitvector
	from Queue import Queue
	from threading import Thread

	from multiprocessing import Process

	from Queue import Queue
	from threading import Thread

	class Worker(Thread):
	"""Thread executing tasks from a given tasks queue"""
	def __init__(self, tasks):
	Thread.__init__(self)
	self.tasks = tasks
	self.daemon = True
	self.start()

	; <<>> DiG 9.7.0-P1 <<>> telecomix.org
	;; global options: +cmd
	;; Got answer:
	;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 49117
	;; flags: qr rd ra; QUERY: 1, ANSWER: 0, AUTHORITY: 0, ADDITIONAL: 0

	;; QUESTION SECTION:
	;telecomix.org. IN A

	;; Query time: 188 msec

	#!/usr/bin/python
	# -- coding : utf-8 --

	from __future__ import with_statement

	import os

	from fabric.api import *
	from fabric.contrib.files import exists

	<label class="control-label" for="channelselect">Channels</label>
	<div class="controls">
	<select name="channels" id="channelselect" multiple="multiple" style="height: 150px;">
	{% for channel in channels %}
	<option value="{{ channel }}">{{ channel }}</option>
	{% endfor %}
	</select>
	<p class="help-block">Channels on whih keyword should be overriden</p>
	</div>