Last active
March 2, 2018 15:31
-
-
Save dgrapov/5384152 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#needed packages | |
library(andrews) | |
library(ggplot2) | |
#get some data here I use "mtcars" | |
data<-mtcars | |
fct<-as.factor(mtcars$cyl) # factor for intial grouping | |
#get andrews encoding (x and y coords) | |
make.andrews<-function (df, type = 1, clr = NULL, step = 100, ymax = 10,...) | |
{ | |
#add objects to store x and y | |
storelist<-list() | |
if (step < 1) | |
step <- 100 | |
n <- dim(df)[1] | |
m <- dim(df)[2] | |
#plot.new() | |
if ((type == 1) | (type == 2) | (type == 4)) { | |
xmin <- (-pi) | |
xmax <- pi | |
} | |
else if (type == 3) { | |
xmin <- 0 | |
xmax <- 4 * pi | |
} | |
# plot.window(c(xmin, xmax), c(-ymax, ymax)) | |
# title(main = main, sub = sub) | |
# axis(1) | |
# axis(2) | |
# box() | |
# lines(c(xmin, xmax), c(0, 0)) | |
for (i in 1:m) df[, i] <- normalize(df[, i]) | |
clx <- rep(1, n) | |
if (!is.null(clr)) { | |
if (!is.numeric(df[, clr])) { | |
for (i in 1:n) { | |
for (a in 1:nlevels(df[, clr])) if (levels(df[, | |
clr])[a] == df[i, clr]) | |
clx[i] <- rainbow(nlevels(df[, clr]))[a] | |
} | |
} | |
else { | |
for (i in 1:n) clx[i] <- hsv(0, 1, df[i, clr]) | |
} | |
} | |
dfm <- numarray(df) | |
m <- dim(dfm)[2] | |
coorx <- 0:step | |
for (p in 0:step) coorx[p + 1] <- (xmin + (xmax - xmin)/step * | |
p) | |
coory <- 0:step | |
for (i in 1:n) { | |
for (p in 0:step) { | |
coory[p + 1] <- 0 | |
tt <- (xmin + (xmax - xmin)/step * p) | |
for (a in 1:m) { | |
if (type == 1) { | |
if (a == 1) { | |
coory[p + 1] <- dfm[i, a]/(2^0.5) | |
} | |
else { | |
cnst <- (a - 2)%/%2 + 1 | |
if ((a - 2)%%2 == 0) | |
coory[p + 1] <- coory[p + 1] + dfm[i, a] * | |
sin(cnst * tt) | |
else coory[p + 1] <- coory[p + 1] + dfm[i, | |
a] * cos(cnst * tt) | |
} | |
} | |
else if (type == 2) { | |
cnst <- (a - 1)%/%2 + 1 | |
if ((a - 1)%%2 == 0) | |
coory[p + 1] <- coory[p + 1] + dfm[i, a] * | |
sin(cnst * tt) | |
else coory[p + 1] <- coory[p + 1] + dfm[i, | |
a] * cos(cnst * tt) | |
} | |
else if (type == 3) { | |
coory[p + 1] <- coory[p + 1] + dfm[i, a] * | |
cos((a * tt)^0.5) | |
} | |
else if (type == 4) { | |
if (a == 1) { | |
coory[p + 1] <- dfm[i, a] | |
} | |
else { | |
cnst <- (a - 2)%/%2 + 1 | |
if ((a - 2)%%2 == 0) | |
coory[p + 1] <- coory[p + 1] + dfm[i, a] * | |
(sin(cnst * tt) + cos(cnst * tt)) | |
else coory[p + 1] <- coory[p + 1] + dfm[i, | |
a] * (sin(cnst * tt) - cos(cnst * tt)) | |
} | |
coory[p + 1] <- coory[p + 1]/(2^0.5) | |
} | |
} | |
} | |
#lines(coorx, coory, col = clx[i]) | |
storelist[[i]]<-data.frame(x=coorx, y=coory) | |
} | |
return(storelist) | |
} | |
#plot | |
plot.data<-function(data,meta.data = NULL, kind="andrews",...){ | |
#change plotting and returned object using kind | |
#kind = c("andrews-polygon","andrews-line","PCA","andrews-PCA") | |
# meta data will be used to coor groups | |
# can be multi column data frame, in this case it is collapsed to single factor | |
#plotting theme for convenience (base'ish) | |
.theme2<- theme( | |
axis.line = element_line(colour = 'gray', size = .75), | |
panel.background = element_blank(), | |
plot.background = element_blank() | |
) | |
#meta data needs to be separated from numeric | |
num.id<-c(1:ncol(data))[sapply(data,is.numeric)] # numeric columns | |
df<-data[,num.id] | |
if(is.null(meta.data)){ | |
meta<-data[,-(num.id),drop=FALSE] # look for factors | |
if(length(meta)==0) {meta<-data.frame(rep(1,nrow(data)))} # fix emptyness | |
} else { | |
meta<-as.data.frame(meta.data) | |
} | |
#need to combine meta to 1 column | |
if(ncol(meta)>1) { | |
meta<-data.frame(sapply(1:nrow(meta),function(i) { | |
paste(as.character(unlist(meta[i,,drop=FALSE])),collapse = "|") | |
})) | |
} | |
if(kind == "pairs"){ | |
meta<-as.factor(as.character(unlist(meta))) | |
.rainbow<-function(x,alpha=NULL,...){ | |
if(is.null(alpha)){alpha<-1} | |
rainbow(nlevels(x),alpha=alpha)[x] | |
} | |
color<-.rainbow(meta,...) | |
plot(df,bg=color,col=color,...) | |
} | |
if(kind == "andrews-line"){ | |
a.data<-make.andrews(df,...) # makes list of x and y for each row | |
#need to add meta data cols to x and y to keep track | |
tmp<-lapply(1:length(a.data),function(i){ | |
cbind(a.data[[i]],meta=factor(meta[i,],levels=levels(as.factor(meta[,1]))),row.id=i) | |
}) | |
obj<-do.call("rbind",tmp) | |
#plot | |
#intersting to change line width | |
p<-ggplot(data=obj, aes(x=x, y=y,group=row.id,color=meta)) + #change group = meta for an interesting effct | |
geom_line(...) + #geom_point(,...) + | |
.theme2# | |
print(p) | |
return(obj) | |
} | |
if(kind == "andrews-polygon"){ | |
a.data<-make.andrews(df,...) # makes list of x and y for each row | |
#need to add meta data cols to x and y to keep track | |
tmp<-lapply(1:length(a.data),function(i){ | |
cbind(a.data[[i]],meta=factor(meta[i,],levels=levels(as.factor(meta[,1]))),row.id=i) | |
}) | |
obj<-do.call("rbind",tmp) | |
#plot | |
#intersting to change line width | |
p<-ggplot(data=obj, aes(x=x, y=y,group=meta,color=meta)) + #change group = meta for an interesting effct | |
geom_line(size=4,aes(color=meta),...) + geom_point(,...) + | |
.theme2# | |
print(p) | |
return(obj) | |
} | |
#PCA scores of andrews encoded data?? does the encoding help? define help? | |
# need to consider how to combine x an y | |
# could be a combination or relationship between | |
# try separeted | |
if(kind == "andrews-PCA"){ | |
a.data<-make.andrews(df,...) # makes list of x and y for each row | |
#need to make vector combine x and y | |
tmp<-lapply(1:length(a.data),function(i){ | |
c(a.data[[i]][,1],a.data[[i]][,2]) | |
}) | |
obj<-do.call("rbind",tmp) | |
#make PCA scores | |
#scaling changes effects so don't | |
full<-prcomp(obj) | |
scores<-data.frame(full$x, meta=as.factor(meta[,1])) | |
#plot | |
p<-ggplot(data=scores, aes(x=PC1, y=PC2 ,group=meta,color=meta)) + | |
geom_point(...) + | |
.theme2 | |
print(p) | |
return(full) | |
} | |
if(kind == "PCA"){ | |
#plot PCA scores | |
full<-prcomp(df) | |
scores<-data.frame(full$x, meta=as.factor(meta[,1])) | |
#intersting to change line width | |
p<-ggplot(data=scores, aes(x=PC1, y=PC2 ,group=meta,color=meta)) + | |
geom_point(...) + | |
.theme2 | |
print(p) | |
return(full) | |
} | |
} | |
#pairs plot for the first 5 columns coloring on number of cylinders | |
x11() # turn these off to prevent making new plotting device | |
plot.data(data,kind="pairs",meta.data=fct, cex=1, alpha=.5, pch=21) | |
#make classical andrews plot | |
x11() | |
andrews(cbind(fct,data),type=1,clr=1,ymax=4.25) # add cyl as factor to allow coloring | |
# factor for legend (colors) | |
legend("topleft",legend=levels(fct), fill=rainbow(length(levels(as.factor(mtcars$cyl)))) ) | |
#alteranative visualization of andrews plot | |
library(ggplot2) | |
x11() | |
plot.data(data,kind="andrews-polygon",meta.data=fct,type=1, size=1, alpha=.5) #andrews | |
#make PCA | |
x11() | |
plot.data(data,kind="PCA",meta.data=mtcars$cyl, size=5, alpha=.75) | |
#make pca from andrews encoded data | |
x11() | |
plot.data(data,kind="andrews-PCA",meta.data=fct, size=5, alpha=.75) | |
#note intersting groups structure not explained by cyl | |
# after some cheking find that this is perfectly explained by the mtcars$am | |
#very intersting that this not obvious untill after encoding | |
#Make combined factor | |
fct<-cbind(mtcars$cyl,mtcars$am) | |
x11() | |
plot.data(mtcars,kind="PCA",meta.data=fct, size=5, alpha=.75) | |
x11() | |
plot.data(mtcars,kind="andrews-PCA",meta.data=fct, size=5, alpha=.75) | |
# judging by the clustering patterns, | |
# it looks like with this encoding cyl and am can be modeled (classified) relatively well | |
# there also seems to be a nother hidden covariate based on the pattern for 8 cylinder and 0 am (8|0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment