[TOC]
as.data.frame( df[,1], drop=false)
mydata$newvar <- oldvar
new_writers_df <- merge(writers_df, data2) # problem with column values -> left outer join instead of join
merge(writers_df, data2, all.x=FALSE) # with extra row for non-matching entries
merge(writers_df, data2, all.y=TRUE)
merge(writers_df, data2, by.x="Age.At.Death", by.y="Age")
train[, INI := substr(Name, 1, 1)] # new column with first character from column Name
ds$temp_range <- ds$max_temp - ds$min_temp
train <- transform(train, avg_times = mon_since_first_don / no_don)
ds <- transform(ds,
temp_range=max_temp-min_temp,
excess=rainfall-evaporation)
dt[INI == "", named := 0]
dt[, sex := "Unknown"]
dt[SexuponOutcome %like% "Male", sex := "Male"]
dt[SexuponOutcome %like% "Female", sex := "Female"]
full_imp[is.na(full_imp[, age]), age := .(imp_age)] # set NA values to value from column imp_age
mydata <- cbind(mydata, newVector)
writers_df$Location <- c("Belgium", "United Kingdom", "United States", "United Kingdom")
new_row <- c(50, 22, "Roberto", "Bolano", "MALE", "2003-07-15")
writers_df_large <- rbind(writers_df, new_row)
rbindlist(list(dt.train[, -1, with = F], dt.test)) #rbindlist is faster, required two lists. rbindlist(list(DT1,DT2)
mydata <- rbind(mydata, newVector)
na.omit(merge) # full range
merge[complete.cases(merge[,2:3]),] # keep columns with values complete in column 2+3
ds <- ds[!is.na(ds[target]),]
sum(is.na(ds[target]))
ds$excess <- NULL
ds[vars] <- na.roughfix(ds[vars])
sum(is.na(ds[vars]))
writers_df[1,3] <- NULL
Age.At.Death <- NULL
rows_to_keep <- c(TRUE, FALSE, TRUE, FALSE)
limited_writers_df <- writers_df[rows_to_keep,]
fourty_sth_writers <- writers_df[writers_df$Age.At.Death > 40,]
## Subject Gender Read Write Listen
## 1 1 M 10 8 7
## 2 2 F 7 4 6
long_format <- stack(observations_wide,
select=c(Read,
Write,
Listen))
## values ind
## 1 10 Read
## 2 7 Read
## 3 8 Write
## 4 4 Write
## 5 7 Listen
#----------------------------------------
## Subject Gender Test Result
## 1 1 M Read 10
## 2 2 F Write 4
## 3 1 M Write 8
## 4 2 F Listen 6
## 5 2 F Read 7
unstack(observations_long,
Result ~ Test)
## Listen Read Write
## 1 6 10 4
## 2 7 7 8
#----------------------------------------
dt[, DateTime:= ymd_hms(DateTime)]
dt[, time := (lubridate::hour(DateTime) + minute(DateTime)/60)]
dt[, year := factor(lubridate::year(DateTime))]
dt[, month := factor(lubridate::month(DateTime))]
dt[, day := as.numeric(lubridate::day(DateTime))]
dt[, weekday := lubridate::wday(DateTime)]
names(writers_df) <- c("Age.At.Death", "Age.As.Writer", "Name", "Surname", "Gender", "Death")
colnames(writers_df) = c("Age.At.Death", "Age.As.Writer", "Name", "Surname", "Gender", "Death")
rownames(writers_df) = c("ID1", "ID2", "ID3", "ID4")
A = matrix(c(2, 4, 3, 1, 5, 7), nrow=2, ncol=3, byrow = TRUE)
A_df <- as.data.frame(A)
writers_matrix <- as.matrix(writers_df)
writers_list <- as.list(writers_df)
dt[AgeuponOutcome == "", AgeuponOutcome := "unknown unknown"]
parsed_age <- do.call(rbind, sapply(dt[, AgeuponOutcome], strsplit, " "))
dt[, c("num", "unit") := .(parsed_age[, 1], parsed_age[, 2])]
dt[unit %like% "year", age := as.numeric(num)*365]
dt[unit %like% "month", age := as.numeric(num)*30.5]
dt[unit %like% "week", age := as.numeric(num)*7]
dt[unit %like% "day", age := as.numeric(num)]
dt[unit == "unknown", age := NA]
library(lubridate)
ds$date <- ymd(as.character(ds$date))
ds[target] <- as.factor(ds[[target]])
p <- ggplot(ds, aes_string(x=target))
p <- p + geom_bar(width=0.2)
print(p)