Skip to content

Instantly share code, notes, and snippets.

View ottomata's full-sized avatar

Andrew Otto ottomata

View GitHub Profile
#!/bin/bash
host=$1
sed -i -e "/$host/d" $HOME/.ssh/known_hosts
# dse hadoop fs -cat /user/otto/banner0/country_count-20110917-27.1/part*
DE 204078696
RU 116527863
FR 115311324
PL 70759051
ES 64887851
NL 40053782
SE 29960199
CH 21904803
BE 21522453
# dse hadoop fs -cat /user/otto/banner0/country_count-20110917-27.1/part* | sort
A1 111410
A2 18534
AD 112675
AE 69376
AF 2587
AG 876
AI 61
AL 24873
@ottomata
ottomata / gist:3801808
Created September 28, 2012 19:57
Banner Impressions 2011/2012 delta
Country 2011 2012 delta
A1 111410 93300 -18110
A2 18534 48700 30166
AD 112675 12400 -100275
AE 69376 259400 190024
AF 2587 3100 513
AG 876 6900 6024
AI 61 1400 1339
AL 24873 119500 94627
AM 38047 86000 47953
@ottomata
ottomata / Banner Impressions 20112012 delta
Created September 28, 2012 19:57
Banner Impressions 2011/2012 delta
Banner Impression Page Requests for Septempter 17-27 in 2011 and 2012.
Country 2011 2012 delta
A1 111410 93300 -18110
A2 18534 48700 30166
AD 112675 12400 -100275
AE 69376 259400 190024
AF 2587 3100 513
AG 876 6900 6024
AI 61 1400 1339
@ottomata
ottomata / gist:3812038
Created October 1, 2012 14:15
dse hadoop fs -cat /user/otto/banner0/country_count-filtered-20110917-27.0/part* | sort
CA 6
ES 41
FI 1
FR 1
GH 6
GR 2
IN 30
JM 1
JP 10
LB 1
REGISTER 'akela-0.5-SNAPSHOT.jar'
REGISTER 'maxmind-geoip-1.2.5.jar'
set pig.exec.nocombiner true
DEFINE GeoIpLookup com.mozilla.pig.eval.geoip.GeoIpLookup('GeoIPCity.dat');
LOG_FIELDS = LOAD '/user/otto/banner0/bannerImpressions-2011-11-16-05PM--30.log' USING PigStorage(' ') AS (hostname:chararray, udplog_sequence:chararray, timestamp:chararray, request_time:chararray, remote_addr:chararray, http_status:chararray, bytes_sent:chararray, request_method:chararray, uri:chararray, proxy_host:chararray, content_type:chararray, referer:chararray, x_forwarded_for:chararray, user_agent);
GEO_DATA = FOREACH LOG_FIELDS GENERATE FLATTEN (GeoIpLookup(remote_addr)) AS (country:chararray, country_code:chararray, region:chararray, city:chararray, postal_code:chararray, metro_code:int);
COUNTRY_CODE = FOREACH GEO_DATA GENERATE country_code;
COUNTRY_COUNT = FOREACH (GROUP COUNTRY_CODE BY $0 PARALLEL 28) GENERATE $0, COUNT($1) as num;
COUNTRY_COUNT_SORTED = ORDER COUNTRY_COUNT BY num DES
@ottomata
ottomata / uri_counts_BannerController.pig
Created October 3, 2012 20:01
404 URI Counts, matching BannerController
http://en.wikipedia.org/w/index.php?title=Special:BannerController&cache=/cn.js&303-4 444496
http://pl.wikipedia.org/w/index.php?title=Specjalna:BannerController&cache=/cn.js&303-4 354280
http://ru.wikipedia.org/w/index.php?title=%D0%A1%D0%BB%D1%83%D0%B6%D0%B5%D0%B1%D0%BD%D0%B0%D1%8F:BannerController&cache=/cn.js&303-4 318375
http://fr.wikipedia.org/w/index.php?title=Sp%C3%A9cial:BannerController&cache=/cn.js&303-4 229867
http://es.wikipedia.org/w/index.php?title=Especial:BannerController&cache=/cn.js&303-4 213239
http://it.wikipedia.org/w/index.php?title=Speciale:BannerController&cache=/cn.js&303-4 202884
http://cs.wikipedia.org/w/index.php?title=Speci%C3%A1ln%C3%AD:BannerController&cache=/cn.js&303-4 116938
http://pt.wikipedia.org/w/index.php?title=Especial:BannerController&cache=/cn.js&303-4 91700
http://sv.wikipedia.org/w/index.php?title=Special:BannerController&cache=/cn.js&303-4 67572
http://commons.wikimedia.org/w/index.php?title=Special:BannerController&cache=/cn.js&303-4 60926
LOG_FIELDS = LOAD '$input' USING PigStorage(' ') AS (hostname:chararray, udplog_sequence:chararray, timestamp:chararray, request_time:chararray, remote_addr:chararray, http_status:chararray, bytes_sent:chararray, request_method:chararray, uri:chararray, proxy_host:chararray, content_type:chararray, referer:chararray, x_forwarded_for:chararray, user_agent);
STATUS = FOREACH LOG_FIELDS GENERATE http_status;
FILTERED_STATUS = FILTER STATUS BY ($0 matches '.*(404|200|302).*');
STATUS_COUNT = FOREACH (GROUP FILTERED_STATUS BY $0 PARALLEL 28) GENERATE $0, COUNT($1) as num;
STATUS_COUNT_SORTED = ORDER STATUS_COUNT BY num DESC;
STORE STATUS_COUNT_SORTED into '$output';
@ottomata
ottomata / Status Counts
Created October 4, 2012 14:20
HTTP response status counts for 200, 404 and # 302 in 1:1 bannerImpression logs from 2012-09-30 and 2012-10-01
# HTTP response status counts for 200, 404 and
# 302 in 1:1 bannerImpression logs from 2012-09-30 and 2012-10-01.
200 655438604
404 137