Created
July 23, 2019 04:00
-
-
Save tslumley/02d07670dbb92b2ca7e1d624c28c2823 to your computer and use it in GitHub Desktop.
Some ideas towards a multiple-response class
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Multiple response" | |
author: "Thomas Lumley" | |
date: "7/23/2019" | |
output: html_document | |
--- | |
```{r setup, include=FALSE} | |
knitr::opts_chunk$set(echo = TRUE) | |
``` | |
Representation: logical vector with TRUE/FALSE/NA and labels | |
```{r} | |
ethnicity <- matrix( c(T,F,F,F,F, | |
T,T,F,F,F, | |
F,T,F,F,F, | |
F,T,T,F,F, | |
NA,NA,F,F,F, | |
T,NA,F,T,F), byrow=TRUE,ncol=5) | |
attr(ethnicity,"levels")<-c("European","Māori","Pacific","Asian","MELAA") | |
attr(ethnicity,"class")<-"mr" | |
``` | |
Coercion from `mr` objects | |
```{r} | |
as.logical.mr<-function(x){ | |
unclass(x) | |
} | |
as.character.mr<-function(x,sep="+",na.rm=TRUE){ | |
levels<-attr(x,"levels") | |
x<-unclass(x) | |
if (na.rm) { | |
x[is.na(x)]<-FALSE | |
x<-lapply(seq_len(NROW(x)), function(i) levels[x[i,]]) | |
} else { | |
z<-x | |
z[is.na(z)]<-TRUE | |
tmp<-lapply(seq_len(NROW(x)),function(i) ifelse(is.na(x[i,])[z[i,]], paste0("?",levels[z[i,]]), levels[z[i,]])) | |
x<-tmp | |
} | |
sapply(x,paste,collapse=sep) | |
} | |
as.character(ethnicity,na.rm=TRUE) | |
as.character(ethnicity,na.rm=FALSE) | |
``` | |
Basic manipulations | |
```{r} | |
"[.mr"<-function(x,i,j){ | |
levels<-attr(x,"levels") | |
x<-as.logical(x)[i,j,drop=FALSE] | |
new_levels<-levels[j] | |
attr(x,"levels")<-new_levels | |
class(x)<-"mr" | |
x | |
} | |
length.mr<-function(x) NROW(x) | |
length(ethnicity) | |
ethnicity[1,] | |
ethnicity[1:2,] | |
ethnicity[,1:2] | |
``` | |
Print method | |
```{r} | |
print.mr <-function(x,...,na.rm=FALSE,sep="+"){ | |
x<-as.character(x,na.rm=na.rm,sep=sep) | |
print(x) | |
} | |
ethnicity | |
ethnicity[1,] | |
ethnicity[1:2,] | |
ethnicity[,1:2] | |
``` | |
Relational operator | |
```{r} | |
"%has%"<- function(x,y) { | |
if (is.factor(y)) y<-as.character(y) | |
if (!is.character(y)) stop('needs to be character or factor') | |
if(length(y)==1) y<-rep(y,length(x)) | |
ifelse(y %in% levels(x), x[,match(y,levels(x))] ,FALSE) | |
} | |
ethnicity %has% "Māori" | |
response_count<-function(x,na.rm=TRUE) rowSums(x,na.rm=na.rm) | |
"%hasonly%"<- function(x,y) { | |
(x %has% y) & (response_count(x)==1) | |
} | |
ethnicity %hasonly% "Māori" | |
``` | |
Prioritised collapsing | |
```{r} | |
prioritise<-function(x, priorities){ | |
y<-rep(NA_character_,length(x)) | |
for(l in rev(priorities)){ | |
y<-ifelse(x %has% l,l,y) | |
} | |
factor(y,levels=levels(x)) | |
} | |
pr_eth<-prioritise(ethnicity, c("Māori","Pacific","Asian","MELAA","European")) | |
str(pr_eth) | |
``` | |
Tidying to individual indicators (like `gather`) | |
```{r} | |
stack.mr<-function(x,...,na.rm=FALSE){ | |
levels<-levels(x) | |
x<-unclass(x) | |
x[is.na(x)]<-!na.rm | |
r<-rowSums(x) | |
values<-do.call(c,lapply(seq_len(NROW(x)),function(i) levels[x[i,]])) | |
id<-rep(seq_len(NROW(x)),r) | |
data.frame(values=factor(values,levels=levels),id) | |
} | |
stack(ethnicity) | |
``` | |
```{r} | |
indicator<-function(x, ...){ | |
xx<-1L*unclass(x) | |
colnames(xx)<-levels(x) | |
attr(x,"levels")<-NULL | |
xx | |
} | |
indicator(ethnicity) | |
``` | |
```{r} | |
as.mr<-function(x,...) UseMethod("as.mr",x) | |
as.mr.mr<-function(x,...) x | |
as.mr.default<-function(x,levels=unique(x)){ | |
rval<-outer(x,levels,"==") | |
attr(rval,"levels")<-levels | |
class(rval)<-"mr" | |
rval | |
} | |
as.mr.factor<-function(x,...){ | |
rval<-outer(x,levels(x),"==") | |
attr(rval,"levels")<-levels(x) | |
class(rval)<-"mr" | |
rval | |
} | |
mtable<-function(x,y,na.rm=TRUE){ | |
x<-as.mr(x) | |
if (missing(y)){ | |
rval<-matrix(colSums(x,na.rm=na.rm),nrow=1) | |
dimnames(rval)<-list("",levels(x)) | |
rval | |
} else { | |
y<-as.mr(y) | |
xx<-unclass(x) | |
if(na.rm) xx[is.na(xx)]<-FALSE | |
yy<-unclass(y) | |
if(na.rm) yy[is.na(yy)]<-FALSE | |
rval<-crossprod(xx,yy) | |
dimnames(rval)<-list(levels(x),levels(y)) | |
rval | |
} | |
} | |
mtable(ethnicity) | |
mtable(1:6,ethnicity) | |
mtable(pr_eth,ethnicity) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment