Skip to content

Instantly share code, notes, and snippets.

View verajosemanuel's full-sized avatar
💭
RRRRRRRRRRRRRRRR

Jose Manuel Vera verajosemanuel

💭
RRRRRRRRRRRRRRRR
View GitHub Profile
@verajosemanuel
verajosemanuel / autoincrement_column
Last active November 14, 2017 08:36
get a numerical ID from Data Frame in order to insert into Database as autoincrement #R #tibble #wrangling
require(tibble)
# if nrow is numerical
df <- rownames_to_column(df, var = "id")
# if nrow is not numerical
df$id <- seq.int(nrow(df))
@verajosemanuel
verajosemanuel / py_logging_example.R
Last active November 14, 2017 09:41
python style logging in #R with #py_logging #logs
source('py_logging.R')
# Set up logging
logger.setup(debugLog = "debug.log", infoLog = "info.log", errorLog = "error.log")
# Silence other warning messages
options(warn = -1) # -1=ignore, 0=save/print, 1=print, 2=error
err_msg <- geterrmessage()
logger.error('Error on running some scripts: %s')
@verajosemanuel
verajosemanuel / fftree.R
Last active November 14, 2017 09:40
#FFTrees #packages #R #ML
df <- read.csv("data.csv", sep = ";", stringsAsFactors = FALSE)
library("FFTrees")
## Recoding
pdf$recoded <- pdf$original
pdf$recoded[pdf$TIPO == "no"] <- "0"
pdf$recoded[pdf$TIPO == "yes"] <- "1"
pdf$recoded <- as.numeric(pdf$recoded)
@verajosemanuel
verajosemanuel / microbenchmark.R
Last active November 14, 2017 09:39
#microbenchmark in #R
mb <- microbenchmark(function_1(df), function_2(df), function_3(df), function_4(df))
library(ggplot2)
autoplot(mb)
@verajosemanuel
verajosemanuel / lookup.R
Last active November 14, 2017 09:39
#lookup #tables in #R #wrangling
x <- c("m", "f", "u", "f", "f", "m", "m")
lookup <- c(m = "Male", f = "Female", u = NA)
lookup[x]
unname(lookup[x])
@verajosemanuel
verajosemanuel / match-merge.R
Last active November 14, 2017 09:39
matching and merging #df in #R
grades <- c(1, 2, 2, 3, 1)
info <- data.frame(
grade = 3:1,
desc = c("Excellent", "Good", "Poor"),
fail = c(F, F, T)
)
# info table with grades
@verajosemanuel
verajosemanuel / expand_agg.R
Last active November 14, 2017 09:38
Expanding aggregates in #R #df #wrangling
df <- data.frame(x = c(2, 4, 1), y = c(9, 11, 6), n = c(3, 5, 1))
rep(1:nrow(df), df$n)
df[rep(1:nrow(df), df$n), ]
@verajosemanuel
verajosemanuel / set_operations.r
Last active November 14, 2017 09:35
Set operations #dplyr #R #wrangling
# discard columns
df[setdiff(names(df), "y")]
(x <- c(sort(sample(1:20, 9)), NA))
(y <- c(sort(sample(3:23, 7)), NA))
union(x, y)
intersect(x, y)
@verajosemanuel
verajosemanuel / assign_loop.R
Last active November 14, 2017 09:35
#loop to assign in #R
for (i in 1:4) {
assign(paste("df_name",i,sep = ""), read.csv(paste0("df",i,".csv"), sep = ";", stringsAsFactors = FALSE))
}
@verajosemanuel
verajosemanuel / janitor.R
Last active November 14, 2017 09:34
#janitor tools #R #packages #wrangling
library(janitor)
df <- remove_empty_rows(df)
df <- remove_empty_cols(df)
# clean names
names(df) <- names(clean_names(df))