This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
import re | |
# anonymizes ua | |
def sanitize(ua): | |
# from 1.1.2 ->1.1 | |
processed = re.sub(r'(\d+\.\d+)(\.(\d|\w)+)+', r'\1', ua) | |
# remove language headers en-EN | |
processed = re.sub(r'\s\w\w-\w\w(;|\s|\)){1}', r'\1', processed) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# calculates per country weekly percentiles | |
# | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import matplotlib.mlab as mlab | |
import matplotlib.dates as md | |
import csv | |
import datetime | |
from scipy import stats |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from celery import Celery | |
from celery.signals import after_task_publish,task_success,task_prerun,task_postrun | |
# first argument, current module | |
app = Celery('tasks') | |
app.config_from_object('celeryconfig') | |
# To instantiate celery and import this module | |
# do: celery -A task worker --loglevel=info | |
# after, once celery is running, instantiate a python console: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# Reads a file like <count>, raw browser string | |
# and processses it to output: | |
# percentage, normalized browser string | |
import sys | |
import md5 | |
from ua_parser import user_agent_parser | |
# beautify ua |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/lib/python | |
# read file | |
# File format is: | |
# {"browser_major":"1","os_family":"Android","os_major":"1","device_family":"Opus One","browser_family":"Android","os_minor":"5"} 5 | |
# {"browser_major":"1","os_family":"Android","os_major":"4","device_family":"icube 900","browser_family":"Baidu Explorer","os_minor":"2"} 1 | |
# hash and store values | |
# loop over values and |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin | |
import zmq | |
import io | |
import time | |
import sys | |
import re | |
# reads line by line a file and sends it | |
# to a tcp endpoint using zeromq | |
# handy to cat big files to a listener |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
node 'limn1.eqiad.wmflabs' { | |
include webserver::apache | |
# make sure /var/log/apache2 is readable by wikidevs for debugging. | |
# This won't make the actual log files readable, only the directory. | |
# Individual log files can be created and made readable by | |
# classes that manage individual sites. | |
file { '/var/log/apache2': | |
ensure => 'directory', | |
owner => 'root', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
C{ | |
#include<stdio.h> | |
#include <time.h> | |
#include <string.h> | |
#define vcl_string char | |
char* get_expiration() { | |
struct tm str_time; | |
time_t time_of_day; | |
char expiration[100]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
add jar /home/nuria/workplace/refinery/source/refinery-core/target/refinery-core-0.0.19-SNAPSHOT.jar; | |
add jar /home/nuria/workplace/refinery/source/refinery-hive/target/refinery-hive-0.0.19-SNAPSHOT.jar; | |
CREATE TEMPORARY FUNCTION isPageview as 'org.wikimedia.analytics.refinery.hive.IsPageviewUDF'; | |
CREATE TEMPORARY FUNCTION isAppPageview as 'org.wikimedia.analytics.refinery.hive.IsAppPageviewUDF'; | |
use wmf; | |
select isPageview(uri_host, uri_path, uri_query, http_status, content_type, user_agent) from webrequest where year=2015 and month=09 and day=04 and hour=01; | |
--call hive like this leaving the hive.aux.jars.path | |
>hive --hiveconf hive.aux.jars.path= -f test-udf.hql |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- geocoded data on webrequest is like: | |
-- {"city":"Unknown","country_code":"--","longitude":"-1","postal_code":"Unknown","timezone":"Unknown","subdivision":"Unknown","continent":"Unknown","latitude":"-1","country":"Unknown"} | |
-- find records where by city we have less than 10 unique IPs | |
use wmf; | |
select wr1.client_ip, geocoded_data["city"] from webrequest as wr1 where year=2015 and month=09 and hour=01 | |
and wr1.client_ip in (select wr2.client_ip from webrequest wr2 where year=2015 and month=09 and hour=01 group by wr2.client_ip having count(*) <10); |
OlderNewer