Skip to content

Instantly share code, notes, and snippets.

View ottomata's full-sized avatar

Andrew Otto ottomata

View GitHub Profile
# Get EventLoggingSchemas config for TemplateWizard in beta
curl -s 'https://deployment.wikimedia.beta.wmflabs.org/w/load.php?debug=true&lang=en&modules=ext.centralNotice.geoIP%7Cext.centralauth.centralautologin%7Cext.dismissableSiteNotice%2CeventLogging%2CnavigationTiming%2Cpopups%2CwikimediaEvents%7Cext.uls.common%2Ccompactlinks%2Cinit%2Cinterface%2Cpreferences%2Cwebfonts%7Cext.urlShortener.toolbar%7Cjquery%2Csite%7Cjquery.client%2Ccookie%2CtextSelection%7Cjquery.uls.data%7Cmediawiki.String%2CTitle%2CUri%2Capi%2Cbase%2Ccldr%2Ccookie%2Cexperiments%2CjqueryMsg%2Clanguage%2Cstorage%2Cuser%2Cutil%7Cmediawiki.editfont.styles%7Cmediawiki.libs.pluralruleparser%7Cmediawiki.page.ready%2Cstartup%7Cmediawiki.ui.button%7Cskins.vector.js%7Cuser.defaults&skin=vector&version=vzm6n' | grep -i templatewizard
"TemplateWizard": "/analytics/legacy/templatewizard/1.0.0",
"eventlogging_TemplateWizard": [],
# Get EventLoggingSchemas config for TemplateWizard in beta enwiki
curl -s 'https://en.wik
#!/usr/bin/env node
'use strict';
const fetch = require('node-fetch');
const jsTools = require('@wikimedia/jsonschema-tools');
/**
* Recurses through schema converting Draft 4 JSONSchema style
* required to Draft 7.
import scala.collection.JavaConverters._
import scala.collection.immutable.ListMap
import org.wikimedia.analytics.refinery.spark.sql.JsonSchemaConverter
import org.wikimedia.analytics.refinery.core.jsonschema.EventSchemaLoader
import org.apache.spark.sql.functions.from_json
import org.apache.spark.sql.DataFrame
import org.wikimedia.analytics.refinery.job.refine._
import org.wikimedia.analytics.refinery.spark.connectors.DataFrameToHive
import org.wikimedia.analytics.refinery.spark.sql.PartitionedDataFrame
def get_all_schedule_b_results(
committee_id=['C00618389', 'C00637512'],
sort='-disbursement_date',
two_year_transaction_period=[2018, 2020]
):
"""
Gets all paginated results of schedule b results for the given parameters.
The returned value will be the list of all result object records.
"""
# schedule be keys.txt
committed_id
money_for_baths
# OR
keys = ['committee_id', 'money_for_baths']
15:32:04 [@logstash1010:/srv/kafka/data/udp_localhost-info-2] $ stat /srv/kafka/data/udp_localhost-info-2/00000000004911836348.log
File: /srv/kafka/data/udp_localhost-info-2/00000000004911836348.log
Size: 1073741181 Blocks: 2097152 IO Block: 4096 regular file
Device: 901h/2305d Inode: 10741850117 Links: 1
Access: (0644/-rw-r--r--) Uid: ( 499/ kafka) Gid: ( 499/ kafka)
Access: 2020-04-16 15:26:56.963215263 +0000
Modify: 2020-01-01 05:07:39.234657596 +0000
Change: 2020-01-01 05:07:39.234657596 +0000
Birth: -
// spark2-shell --driver-java-options='-Drefinery.log.level=DEBUG' --jars /srv/deployment/analytics/refinery/artifacts/refinery-hive.jar
sc.setLogLevel("DEBUG")
spark.sql("CREATE TEMPORARY FUNCTION geocode_data as 'org.wikimedia.analytics.refinery.hive.GetGeoDataUDF'")
case class Ip(ip: String) {}
val data = Seq(Ip("81.2.69.160"), Ip("81.2.69.160"), Ip("81.2.69.160"), Ip("81.2.69.160"))
val df = spark.createDataFrame(data).repartition(2)
df.selectExpr("geocode_data(ip)").show
// java.lang.NullPointerException
'use strict';
const got = require('got');
const jsTools = require('@wikimedia/jsonschema-tools');
const yaml = require('js-yaml');
/**
* Tests if obj is an Object (not an array).
* @param {[type]} obj [description]
* @return {Boolean} [description]
# !pip install git+https://github.com/dropbox/PyHive.git@437eefa7bceda1fd27051e5146e66cb8e4bdfea1
# !pip install requests-kerberos
import os
import socket
from pyhive import presto
def get_presto_cursor(
host="an-coord1001.eqiad.wmnet",
port=8281,
#!/usr/bin/env python
# coding: utf-8
# In[1]:
get_ipython().system('pip install --upgrade git+https://github.com/neilpquinn/wmfdata.git')
# In[2]: