A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
#Load sqldf package, which will load all others necessary | |
#By default, SQLite runs in background to do processing, could use others DB engines if you wanted | |
library("sqldf") | |
#Import employees data | |
employees <- structure(list(id = 1:20, lastname = structure(c(5L, 14L, 13L, 15L, 6L, 16L, 9L, 1L, 3L, 12L, 10L, 8L, 12L, 3L, 11L, 13L, 10L, 7L, 2L, 4L), .Label = c("a", "b", "c", "f", "g", "h", "i", "j", "n", "o", "p", "r", "s", "t", "w", "z"), class = "factor"), firstname = structure(c(12L, 6L, 5L, 12L, 11L, 15L, 9L, 18L, 17L, 7L, 8L, 10L, 4L, 14L, 19L, 16L, 1L, 13L, 2L, 3L), .Label = c("chris", "dima", "drew", "eric", "hila", "jason", "jeremy", "joe", "jon", "jowanza", "lashanda", "matt", "michael", "michelle", "randy", "rudi", "solon", "stewart", "tim"), class = "factor"), gender = structure(c(2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("f", "m"), class = "factor")), .Names = c("id", "lastname", "firstname", "gender"), class = "data.frame", row.names = c(NA, -20 |
# sql.export.gbm(): save a GBM model as SQL | |
# v0.11 | |
# Copyright (c) 2013-2014 Shane Butler <shane dot butler at gmail dot com> | |
# | |
# sql.export.gbm is free software: you can redistribute it and/or modify it | |
# under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 2 of the License, or | |
# (at your option) any later version. | |
# | |
# sql.export.gbm is distributed in the hope that it will be useful, but |
from pylab import * | |
from numpy import * | |
from numpy.linalg import solve | |
from scipy.integrate import odeint | |
from scipy.stats import norm, uniform, beta | |
from scipy.special import jacobi | |
a = 0.0 |
// '.\' is the current path | |
// Define the path here, if required ... | |
SET basePath = '.\'; | |
TRACE ---------------------------------------------------------------; | |
TRACE Saving tables ... ; | |
TRACE ~~; | |
For i = 0 To NoOfTables() -1 | |
#First we are going to set up probaility distributions for our beliefs about the inputs | |
#We've been told ARPU is about £7 and it's very unlikely to be higher than £10 or lower than £4 | |
#So we'll go for a normal distribution centred at 7 with 5% and 95% quantiles at 4 and 10 | |
#Show how we get the variance | |
arpu.sd<-3/1.96 | |
x<-seq(0, 15,by=0.5) | |
d<-dnorm(x, 7, arpu.sd) | |
plot(x, d, type='l') |
from matplotlib import use | |
from pylab import * | |
from scipy.stats import beta, norm, uniform | |
from random import random | |
from numpy import * | |
import numpy as np | |
import os | |
# Input data |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
# sql.export.rf(): save a randomForest model as SQL | |
# v0.04 | |
# Copyright (c) 2013-2014 Shane Butler <shane dot butler at gmail dot com> | |
# | |
# sql.export.rf is free software: you can redistribute it and/or modify it | |
# under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 2 of the License, or | |
# (at your option) any later version. | |
# | |
# sql.export.rf is distributed in the hope that it will be useful, but |
require(RMySQL) | |
require(ggplot2) | |
require(scales) | |
myusername = "peter" | |
mypassword = "sekret" | |
system('ssh -f [email protected] -L 3306:localhost:3306 -N -o ExitOnForwardFailure=yes') | |
con <- dbConnect(MySQL(), | |
user=myusername, password=mypassword, |
#!/bin/bash | |
# | |
# Installs the latest RStudio daily desktop build for OSX/macOS and Ubuntu(amd64) | |
# | |
# https://support.rstudio.com/hc/en-us/articles/203842428-Getting-the-newest-RStudio-builds | |
set -e | |
install_macos_daily() { | |
REDIRECT_URL="https://www.rstudio.org/download/latest/daily/desktop/mac/RStudio-latest.dmg" |