Created
April 19, 2012 20:02
-
-
Save mushkevych/2423775 to your computer and use it in GitHub Desktop.
Class illustrating integration of R into stand-alone Java application
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.log4j.Logger; | |
import org.rosuda.JRI.REXP; | |
import org.rosuda.JRI.RMainLoopCallbacks; | |
import org.rosuda.JRI.Rengine; | |
import java.util.*; | |
/** | |
* @author Bohdan Mushkevych | |
* date Apr 2012 | |
* Description: Example illustrating Java->R integration | |
*/ | |
public class RIntegrationExample { | |
private static Logger log = Logger.getLogger(RIntegrationExample.class); | |
protected Map<Integer, Map<String, Integer>> terms = new HashMap<Integer, Map<String, Integer>>(); | |
protected Map<Integer, Double> value = new HashMap<Integer, Double>(); | |
/** | |
* static class for JRI Callbacks | |
*/ | |
static class LoggingConsole implements RMainLoopCallbacks { | |
private Logger log; | |
LoggingConsole(Logger log) { | |
this.log = log; | |
} | |
public void rWriteConsole(Rengine re, String text, int oType) { | |
log.info("*** Logging Console rWriteConsole(" + text + ")"); | |
} | |
public void rBusy(Rengine re, int which) { | |
log.info("*** Logging Console rBusy(" + which + ")"); | |
} | |
public void rShowMessage(Rengine re, String message) { | |
log.info("*** Logging Console rShowMessage \"" + message + "\""); | |
} | |
public String rReadConsole(Rengine re, String prompt, int addToHistory) { | |
return null; | |
} | |
public String rChooseFile(Rengine re, int newFile) { | |
return null; | |
} | |
public void rFlushConsole(Rengine re) { | |
} | |
public void rLoadHistory(Rengine re, String filename) { | |
} | |
public void rSaveHistory(Rengine re, String filename) { | |
} | |
} | |
public RIntegrationExample() { | |
Map<String, Integer> term_1 = new HashMap<String, Integer>(); | |
term_1.put("term_1", 10); | |
term_1.put("term_2", 11); | |
term_1.put("term_3", 7); | |
term_1.put("term_4", 8); | |
term_1.put("term_5", 9); | |
Map<String, Integer> term_2 = new HashMap<String, Integer>(); | |
term_2.put("term_1", 8); | |
term_2.put("term_2", 12); | |
term_2.put("term_5", 10); | |
Map<String, Integer> term_3 = new HashMap<String, Integer>(); | |
term_3.put("term_3", 5); | |
term_3.put("term_4", 10); | |
term_3.put("term_5", 12); | |
terms.put(20120101, term_1); | |
terms.put(20120102, term_2); | |
terms.put(20120103, term_3); | |
value.put(20120101, 70.5); | |
value.put(20120102, 50.5); | |
value.put(20120103, 48.2); | |
} | |
/** | |
* method takes terms map and value map and performs two transformation: | |
* 1. transforms terms from "long" to "wide" format | |
* details in: http://mushkevych.blogspot.com/2012/04/r-from-long-to-wide.html | |
* 2. computes Multiple Linear Regression | |
* details in: http://mushkevych.blogspot.com/2012/04/r-multiple-linear-regression.html | |
* | |
* @param terms in format: | |
* datetime : {term_1 : value_1, term_2 : value_2, ...} | |
* @param values in format: | |
* datetime : {term_1 : value_1, term_2 : value_2, ...} | |
* @return linear regression's coefficients in format | |
* {(Intercept) : value, term_1 : value, ..., term_N : value} | |
*/ | |
public Map<String, Double> calculateCoefficient(Map<Integer, Map<String, Integer>> terms, Map<Integer, Double> values) { | |
// Call R and perform coefficient computing | |
// just making sure we have the right version of everything | |
if (!Rengine.versionCheck()) { | |
throw new IllegalStateException("*** Version mismatch - Java files don't match R library version."); | |
} | |
Rengine re = new Rengine(new String[]{"--no-restore", "--slave", "--no-save"}, false, new LoggingConsole(log)); | |
// the engine creates R is a new thread, so we should wait until it's ready | |
if (!re.waitForR()) { | |
throw new IllegalStateException("*** Cannot load R."); | |
} | |
int aggregatedLength = 0; | |
for (Map<String, Integer> value : terms.values()) { | |
aggregatedLength += value.size(); | |
} | |
Map<String, Double> result = null; | |
try { | |
// declare data frame for terms | |
re.eval("library(reshape)"); | |
re.eval("N <- " + aggregatedLength); | |
re.eval("DF <- data.frame(date=rep(NA, N), term=rep(\"\", N), value=rep(NA, N), stringsAsFactors=FALSE)"); | |
int index = 1; | |
List<Integer> keys = new ArrayList<Integer>(terms.keySet()); | |
Collections.sort(keys); | |
// fill terms data frame with actual data | |
for (Integer key : keys) { | |
Map<String, Integer> map = terms.get(key); | |
for (Map.Entry<String, Integer> entry : map.entrySet()) { | |
StringBuilder builder = new StringBuilder(); | |
builder.append("DF[").append(index).append(",]"); | |
builder.append("<-c(").append(key).append(",\""); | |
builder.append(entry.getKey()).append("\","); | |
builder.append(entry.getValue()).append(")"); | |
re.eval(builder.toString()); | |
index++; | |
log.info("debug: " + builder.toString()); | |
} | |
} | |
//transform "value" column to numeric type | |
re.eval("DF <- transform(DF, value = as.numeric(value))"); | |
// declare data frame for value | |
re.eval("M <- " + values.size()); | |
re.eval("VAL <- data.frame(date=rep(NA, M), value=rep(NA, M), stringsAsFactors=FALSE)"); | |
index = 1; | |
keys = new ArrayList<Integer>(values.keySet()); | |
Collections.sort(keys); | |
// fill data frame with | |
for (Integer key : keys) { | |
Double value = values.get(key); | |
StringBuilder builder = new StringBuilder(); | |
builder.append("VAL[").append(index).append(",]"); | |
builder.append("<-c(").append(key).append(",").append(value).append(")"); | |
re.eval(builder.toString()); | |
index++; | |
log.info("debug: " + builder.toString()); | |
} | |
re.eval("Y <- VAL$value"); | |
re.eval("wide <- cast(data=DF, formula=date~term, value=\"value\", fun.aggregate=sum, fill=0)"); | |
re.eval("X <- wide[, 2:length(wide)]"); | |
re.eval("X. <- data.matrix(X)"); | |
re.eval("LMR = lm(formula=Y ~ X.)"); | |
re.eval("COEF = coefficients(LMR)"); | |
REXP names = re.eval("names(COEF)"); | |
REXP coef = re.eval("COEF"); | |
result = new HashMap<String, Double>(); | |
String[] arrayNames = names.asStringArray(); | |
double[] arrayCoef = coef.asDoubleArray(); | |
for (int i = 0; i < arrayCoef.length; i++) { | |
String name = arrayNames[i].startsWith("X.") ? arrayNames[i].substring(2) : arrayNames[i]; | |
result.put(name, arrayCoef[i]); | |
} | |
} catch (Exception e) { | |
log.error("*** Exception on R stage: ", e); | |
} | |
return result; | |
} | |
public void testCoefficientCalculation() { | |
Map<String, Double> result = calculateCoefficient(terms, value); | |
for (Map.Entry<String, Double> entry : result.entrySet()) { | |
System.out.println(entry.getKey() + " : " + entry.getValue()); | |
} | |
} | |
public static void main(String[] args) { | |
RIntegrationExample example = new RIntegrationExample(); | |
example.testCoefficientCalculation(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
R_HOME=/usr/lib64/R | |
R_SHARE_DIR=/usr/share/R/share | |
R_INCLUDE_DIR=/usr/share/R/include | |
JRI_HOME=/usr/lib/R/site-library/rJava/jri | |
CLASSPATH=.:${JRI_HOME}/JRI.jar:${JRI_HOME}/REngine.jar:${JRI_HOME}/JRIEngine.jar:log4j-1.2.15.jar | |
LD_LIBRARY_PATH=${R_HOME}/lib:${R_HOME}/bin:${JRI_HOME}/ | |
export R_HOME R_SHARE_DIR R_INCLUDE_DIR CLASSPATH LD_LIBRARY_PATH | |
JAVA=/usr/lib/jvm/java-6-sun/jre/bin/java | |
${JAVA} -Djava.library.path=.:${JRI_HOME} -cp ${CLASSPATH} RIntegrationExample |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment