ottomata’s gists

ottomata / gist:8b5c65a9d6b0f3500c436ae2dd6aeaa2

Last active June 23, 2020 20:27

	# Get EventLoggingSchemas config for TemplateWizard in beta
	curl -s 'https://deployment.wikimedia.beta.wmflabs.org/w/load.php?debug=true&lang=en&modules=ext.centralNotice.geoIP%7Cext.centralauth.centralautologin%7Cext.dismissableSiteNotice%2CeventLogging%2CnavigationTiming%2Cpopups%2CwikimediaEvents%7Cext.uls.common%2Ccompactlinks%2Cinit%2Cinterface%2Cpreferences%2Cwebfonts%7Cext.urlShortener.toolbar%7Cjquery%2Csite%7Cjquery.client%2Ccookie%2CtextSelection%7Cjquery.uls.data%7Cmediawiki.String%2CTitle%2CUri%2Capi%2Cbase%2Ccldr%2Ccookie%2Cexperiments%2CjqueryMsg%2Clanguage%2Cstorage%2Cuser%2Cutil%7Cmediawiki.editfont.styles%7Cmediawiki.libs.pluralruleparser%7Cmediawiki.page.ready%2Cstartup%7Cmediawiki.ui.button%7Cskins.vector.js%7Cuser.defaults&skin=vector&version=vzm6n' \| grep -i templatewizard
	"TemplateWizard": "/analytics/legacy/templatewizard/1.0.0",
	"eventlogging_TemplateWizard": [],

	# Get EventLoggingSchemas config for TemplateWizard in beta enwiki
	curl -s 'https://en.wik

ottomata / eventlogging_legacy_schema_convert.js

Created June 23, 2020 16:21

	#!/usr/bin/env node

	'use strict';

	const fetch = require('node-fetch');
	const jsTools = require('@wikimedia/jsonschema-tools');

	/**
	* Recurses through schema converting Draft 4 JSONSchema style
	* required to Draft 7.

ottomata / backfill_searchsatisfaction_0.scala

Created June 19, 2020 15:52

	import scala.collection.JavaConverters._
	import scala.collection.immutable.ListMap

	import org.wikimedia.analytics.refinery.spark.sql.JsonSchemaConverter
	import org.wikimedia.analytics.refinery.core.jsonschema.EventSchemaLoader
	import org.apache.spark.sql.functions.from_json
	import org.apache.spark.sql.DataFrame
	import org.wikimedia.analytics.refinery.job.refine._
	import org.wikimedia.analytics.refinery.spark.connectors.DataFrameToHive
	import org.wikimedia.analytics.refinery.spark.sql.PartitionedDataFrame

ottomata / page.py

Last active June 9, 2020 18:20

	def get_all_schedule_b_results(
	committee_id=['C00618389', 'C00637512'],
	sort='-disbursement_date',
	two_year_transaction_period=[2018, 2020]
	):
	"""
	Gets all paginated results of schedule b results for the given parameters.
	The returned value will be the list of all result object records.
	"""

ottomata / gist:828f08a145a2744612b0ba44b38dbc73

Created June 8, 2020 21:58


	# schedule be keys.txt
	committed_id
	money_for_baths

	# OR

	keys = ['committee_id', 'money_for_baths']

ottomata / gist:ba6564fd616dfe2eb73681bab513c37d

Created April 16, 2020 15:35

	15:32:04 [@logstash1010:/srv/kafka/data/udp_localhost-info-2] $ stat /srv/kafka/data/udp_localhost-info-2/00000000004911836348.log
	File: /srv/kafka/data/udp_localhost-info-2/00000000004911836348.log
	Size: 1073741181 Blocks: 2097152 IO Block: 4096 regular file
	Device: 901h/2305d Inode: 10741850117 Links: 1
	Access: (0644/-rw-r--r--) Uid: ( 499/ kafka) Gid: ( 499/ kafka)
	Access: 2020-04-16 15:26:56.963215263 +0000
	Modify: 2020-01-01 05:07:39.234657596 +0000
	Change: 2020-01-01 05:07:39.234657596 +0000
	Birth: -

ottomata / geocode_data.scala

Last active April 14, 2020 15:20

	// spark2-shell --driver-java-options='-Drefinery.log.level=DEBUG' --jars /srv/deployment/analytics/refinery/artifacts/refinery-hive.jar

	sc.setLogLevel("DEBUG")
	spark.sql("CREATE TEMPORARY FUNCTION geocode_data as 'org.wikimedia.analytics.refinery.hive.GetGeoDataUDF'")
	case class Ip(ip: String) {}
	val data = Seq(Ip("81.2.69.160"), Ip("81.2.69.160"), Ip("81.2.69.160"), Ip("81.2.69.160"))
	val df = spark.createDataFrame(data).repartition(2)
	df.selectExpr("geocode_data(ip)").show

	// java.lang.NullPointerException

ottomata / eventlogging_to_mep_schema.js

Created April 3, 2020 20:54

	'use strict';

	const got = require('got');
	const jsTools = require('@wikimedia/jsonschema-tools');
	const yaml = require('js-yaml');

	/**
	* Tests if obj is an Object (not an array).
	* @param {[type]} obj [description]
	* @return {Boolean} [description]

ottomata / jupyter_presto.py

Created February 24, 2020 15:53

	# !pip install git+https://github.com/dropbox/PyHive.git@437eefa7bceda1fd27051e5146e66cb8e4bdfea1
	# !pip install requests-kerberos

	import os
	import socket
	from pyhive import presto

	def get_presto_cursor(
	host="an-coord1001.eqiad.wmnet",
	port=8281,

ottomata / Spark Streaming Demo for Isaac

Created January 31, 2020 01:02

	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	get_ipython().system('pip install --upgrade git+https://github.com/neilpquinn/wmfdata.git')


	# In[2]:

Andrew Otto ottomata