Skip to content

Instantly share code, notes, and snippets.

#Dataframes (of strings. Glorious strings)
foo <- data.frame(c("foo","qux","baz","quux"),c("baz","bar","foo","corge"))
bar <- data.frame(c("foo","bar","baz"),c("baz","bar","foo"))
#Retrieve the subset of foo that matches entries in bar.
@Ironholds
Ironholds / thanks.r
Created October 11, 2013 22:57
Thanks users on Wikipedia.
#Read in config variables and associated packages.
source("config.r")
#Construct query function
sql.fun <- function(query_statement){
#Open a connection
con <- dbConnect(drv = "MySQL",
username = analytics_user,
password = analytics_pass,
@Ironholds
Ironholds / gist:10511927
Created April 12, 2014 00:32
example query
SELECT DISTINCT(rc_user_text) AS bfuser,
user_editcount AS edits,
min(rev_timestamp) AS firstedit
FROM recentchanges INNER JOIN user ON rc_user = user_id
INNER JOIN revision ON rc_user = rev_user
WHERE rc_type IN (1,0)
AND rc_bot = 0
AND rc_user > 0
GROUP BY bfuser
def country(x):
#Read in files, storing in memory for speed
ip4_geo = pygeoip.GeoIP(filename = sys.argv[4], flags = 1)
ip6_geo = pygeoip.GeoIP(filename = sys.argv[5], flags = 1)
#Check type
x = listcheck(x)
#Construct output list
cdm <- joinCountryData2Map(dF = x, joinCode = "ISO2", nameJoinColumn = "country")
fortified_polygons <- fortify(cdm)
cdm <- as.data.frame(cdm[,c("value")])
fortified_polygons <- merge(fortified_polygons)
> str(x)
'data.frame': 226 obs. of 2 variables:
$ country: chr "AD" "AE" "AF" "AG" ...
$ value : int 350 19655 644 206 1 6770 64038 59 880 123039 ...
foo <- function(data_frame, column, regex, ...){
#Run the regex over the column and add a new column to the object noting whether the value in that row matched or not
data_frame$matched <- grepl(x = data_frame[,column], pattern = regex, ...)
#Return
return(data_frame)
}
#Call. Works like a charm!
@Ironholds
Ironholds / gist:04d32b2018df09962f03
Last active August 29, 2015 14:05
bot-excludor
SELECT INTO TABLE blahblahblah rev_user_text, page_title, LEFT(rev_timestamp,6) AS ts COUNT(*) as edits FROM revision
JOIN page ON page_id = rev_page
JOIN user_groups ON ug_user = rev_user
WHERE page_namespace = 3
AND page_is_redirect = 0
AND rev_user NOT IN (SELECT user_id FROM user INNER JOIN user_groups ON user_id = ug_user AND ug_group = 'bot')
AND page_title NOT LIKE '%/%'
GROUP BY LEFT(rev_timestamp,6), rev_user_text, page_title
ORDER BY COUNT(*) DESC;
#Libraries and options
library(RMySQL)
library(ggplot2)
library(data.table)
options(quit = "no", scipen = 500)
npp <- function(){
#Retrieve and parse data
con <- dbConnect(drv = "MySQL",
@Ironholds
Ironholds / gist:c81dd7dcd35c1a5560d2
Created September 16, 2014 19:53
Fer breaking inter-time events up into sessions
output_list <- list(numeric())
for(i in seq_along(x)){
if(x[i] > intertime){
if(length(output_list[[length(output_list)]]) > 0){
output_list[[(length(output_list)+1)]] <- numeric()
}
@Ironholds
Ironholds / gaah
Created September 30, 2014 18:11
SELECT parse_url(concat('http://bla.org/',uri_path), 'FILE')
FROM wmf_raw.webrequest
WHERE year = 2014 AND month = 09 AND day = 23 AND
hour = 07 AND webrequest_source='upload' LIMIT 5;