Last active
December 29, 2015 07:08
-
-
Save vsoch/7633498 to your computer and use it in GitHub Desktop.
Export a data matrix and associated demographic variables to arff file for import into Weka. All variables are assumed to be numeric, and columns are features (with column names the feature names), and rows data objects (with appropriate rownames). Missing values, currently set as -9999 and NA, are recoded as "?" Change this section (line 28) to…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exportRWeka = function(data,relation_name,outname) { | |
# Data is a data matrix, with features in columns, data in rows | |
# Edit the script to match whatever is the missing value for your data | |
# Rownames will be exported as well, and should be data labels | |
# relation_name is the name of the relation in Weka | |
# outname is the output file | |
# If you have a nominal outcome variable (eg, you want to color | |
# your data by a label in Weka) change the variable type as follows: | |
# @attribute groupVar {1,2} | |
# and select this variable in the dropdown next to "visualize all" | |
# Open output arff file for writing | |
filey = file(paste(outname,".arff",sep=''),"w") | |
# Name the analysis | |
write(paste("@relation \"",relation_name,"\"",sep=""), file=filey,append=TRUE) | |
# Define the rownames as the first variable | |
write("@attribute uid string", file=filey,append=TRUE) | |
# For each column data variable, define as attribute | |
for (c in 1:ncol(data)) { | |
colname = colnames(data)[c] | |
write(paste("@attribute",colname,"numeric",sep=" "),file=filey,append=TRUE) | |
} | |
# Now write data in rows, separated by commas, with behavioral data | |
write("@data",file=filey,append=TRUE) | |
for (d in 1:nrow(data)) { | |
dat = data[d,] | |
# Weka expects missing values to be ? | |
dat[which(de == -9999)] = '?' | |
dat[which(de == "NA")] = '?' | |
dat[which(is.na(dat))] = '?' | |
# Print the row of data, including the row.name | |
write.table(dat,append=TRUE,sep=',',file=filey,col.names=FALSE,quote=FALSE) | |
} | |
close(filey) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment