ottomata’s gists

ottomata / hue.ini

Created June 20, 2017 13:10

	# Note: This file is managed by Puppet.

	# Hue configuration file
	# ===================================
	#
	# For complete documentation about the contents of this file, run
	# $ <hue_root>/build/env/bin/hue config_help
	#
	# All .ini files under the current directory are treated equally. Their
	# contents are merged to form the Hue configuration, which can

ottomata / revision-score-event.json

Created June 6, 2017 20:30

	{
	"rev_timestamp": "2017-06-06T20:18:06Z",
	"rev_sha1": "fy9n1sdjob23lhe51hspnnvqmicrgm8",
	"rev_parent_id": 137977799,
	"rev_minor_edit": false,
	"rev_len": 14267,
	"rev_id": 137981314,
	"rev_content_model": "wikitext",
	"rev_content_format": "wikitext",
	"comment": "",

ottomata / gist:df9d4615b8ffdf538faf6a005683e1fc

Created May 24, 2017 19:17

	set session sql_log_bin=0;
	-- Dropping EventLogging tables with no events more recent than 90 days ago (since )...
	DROP TABLE `BannerImpression_5329872`;
	DROP TABLE `CentralNoticeBannerHistory_13447710`;
	DROP TABLE `ChangesListHighlights_16449602`;
	DROP TABLE `ChangesListHighlights_16484288`;
	DROP TABLE `CommandInvocation_15237653`;
	DROP TABLE `CompletionSuggestions_13424343`;
	DROP TABLE `CompletionSuggestions_13630018`;
	DROP TABLE `ContentTranslationCTA_11616099`;

ottomata / banner_tranquility.conf.json

Created May 2, 2017 17:34

	{
	"dataSources" : [
	{
	"spec" : {
	"dataSchema" : {
	"dataSource" : "banner_activity_minutely",
	"metricsSpec" : [
	{
	"name" : "request_count",
	"type" : "longSum",

ottomata / none_of_this_works.scala

Created April 25, 2017 17:56

	// // TODO: instead of the following, what if we took the original outputDf, and
	// // added null cast as type fields not present in outputDf
	// // to the outputDfs schema. Then we shouldn't need to re-read the data with the
	// val table = hiveContext.table(tableName)
	//
	// // THis is kinda working? Maybe we need need
	//// val emptyTableDf = table.where("1=0")
	//// val finalDf = emptyTableDf.unionAll(outputDf)
	//
	// // make sure the output df has table's fields

ottomata / gist:d33787567d28f08edd0f81a196fadba2

Created April 21, 2017 14:58

	object CoolStuff {
	implicit class StringImplicits(s: String) {
	def withCool(s2: String): String = s + " COOL " + s2
	}

	implicit class SeqStringImplicits(strings: Seq[String]) {
	def joinEm(sep: String = " \| "): String = {
	// error: not found: value withCool
	strings.map(withCool).mkString(sep)
	// But this will work:

ottomata / drop_old_eventlogging_tables.sh

Last active April 3, 2017 17:27

whitelist='(DiacriticsPoll|DiacriticsVisibility|Echo|EchoInteraction|Echo|EchoMail|Echo|Edit|EditorActivation|FlowReplies|GatherClicks|GatherFlags|GettingStartedNavbarNoArticle|GettingStartedOnRedirect|GettingStartedRedirectImpression|GuidedTourButtonClick|GuidedTour|GuidedTourExited|GuidedTourExternalLinkActivation|GuidedTourGuiderHidden|GuidedTourGuiderImpression|GuidedTour|GuidedTourInternalLinkActivation|GuidedTour|MediaWikiInstallPingback|MobileOptionsTracking|MobileWebBrowse|MobileWebClickTracking|MobileWebCta|MobileWebDiffClickTracking|MobileWebEditing|MobileWebInfobox|MobileWebLanguageSwitcher|MobileWebMainMenuClickTracking|MobileWebSearch|MobileWebSectionUsage|MobileWebUIClickTracking|MobileWebUploads|MobileWebWatching|MobileWebWatchlistClickTracking|MobileWebWikiGrok|MobileWebWikiGrokError|MobileWebWikiGrok|MobileWebWikiGrokResponse|MobileWebWikiGrok|MobileWikiAppAppearanceSettings|MobileWikiAppArticleSuggestions|MobileWikiAppCreateAccount|MobileWikiAppDailyStats|MobileWikiAppEdit|MobileWikiAppInsta

ottomata / sseclient-recentchange.py

Last active January 31, 2019 15:43

	from sseclient import SSEClient
	import json

	url = 'https://stream.wikimedia.org/v2/stream/recentchange'
	for event in SSEClient(url):
	if event.event == 'message' and event.data:
	change = json.loads(event.data)
	print('%(user)s edited %(title)s' % change)
	elif event.event == 'error':
	print('--- Encountered error', event.data)

ottomata / stdout.varnishkafka.conf

Created March 8, 2017 15:20

	# Where to output varnish log lines:
	# kafka - (default) send to kafka broker
	# stdout - just print to stdout (behave like varnishncsa)
	# null - (test) collect all tags specified by format but dont output anything
	output = stdout

	# Log formatter
	format.type = json
	format = %{fake_tag0@hostname?fake-hostname-change-me-if-you-want}x %{@sequence!num?0}n %{end:%FT%T@dt}t %{Varnish:time_firstbyte@time_firstbyte!num?0.0}x %{X-Client-IP@ip}o %{X-Cache-Status@cache_status}o %{@http_status}s %{@response_size!num?0}b %{@http_method}m %{Host@uri_host}i %{@uri_path}U %{@uri_query}q %{Content-Type@content_type}o %{Referer@referer}i %{X-Forwarded-For@x_forwarded_for}i %{User-Agent@user_agent}i %{Accept-Language@accept_language}i %{X-Analytics@x_analytics}o %{Range@range}i %{X-Cache@x_cache}o

ottomata / OttoHiveContext.scala

Created March 2, 2017 18:48

	package org.wikimedia.analytics.refinery.job

	import com.github.nscala_time.time.Imports.{LocalDate, Period}
	import com.twitter.algebird.{QTree, QTreeSemigroup}
	import org.apache.hadoop.fs.Path
	import org.apache.spark.rdd.RDD
	import org.apache.spark.sql.hive.HiveContext
	import org.apache.spark.sql.{DataFrame, SQLContext}
	import org.apache.spark.{SparkConf, SparkContext}
	import scopt.OptionParser

Andrew Otto ottomata