Last active
October 22, 2017 19:15
-
-
Save MonteShaffer/b7c8748eecd8dc83aec035a519debd38 to your computer and use it in GitHub Desktop.
Helper class to interface between R and python using rpy2 (From R persp)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# helper class to interface between R and python using rpy2 | |
# © Monte J. Shaffer | |
# [email protected] | |
# MIT license | |
import numpy as np | |
import rpy2 | |
import rpy2.robjects as robjects | |
import rpy2.robjects.packages as rpackages | |
from rpy2.robjects.packages import importr | |
import rpy2.robjects.numpy2ri as npr | |
npr.activate() | |
base = importr('base') | |
utils = importr('utils') | |
stats = importr('stats') | |
# helper class to interface between R and python using rpy2 | |
class classR: | |
def __init__(self, plotme=True): | |
self.version = {} | |
self.version["rpy2"] = rpy2.__version__ | |
from rpy2.rinterface import R_VERSION_BUILD | |
self.version["R"] = R_VERSION_BUILD | |
system = base.Sys_info() # base.set_seed(123) | |
#utils.str(system) | |
self.version["bit"] = system.rx2("machine") | |
print(system) | |
# rather than doing a bunch of 'from ... import' | |
# common functions here | |
# http://rpy.sourceforge.net/rpy2/doc-2.2/html/vector.html#creating-vectors | |
self.Vector = robjects.Vector | |
self.IntVector = robjects.IntVector | |
self.FloatVector = robjects.FloatVector | |
self.FactorVector = robjects.FactorVector | |
self.BoolVector = robjects.BoolVector | |
self.StrVector = robjects.StrVector | |
self.ListVector = robjects.ListVector | |
self.plotme = plotme | |
if(plotme == True): | |
# http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyqt4 | |
# pip install .\PyQt... | |
# https://github.com/matplotlib/matplotlib/issues/2134/ | |
import matplotlib | |
matplotlib.use("qt4agg") | |
from matplotlib import pyplot as plt | |
self.plt = plt | |
print("pyplot loaded") | |
def mirror(self,idx=1): | |
utils.chooseCRANmirror(False,ind=idx) # F and FALSE don't work, False and 0 works | |
def library(self,package): | |
return importr(package) | |
def install(self,package): | |
utils.install_packages(package) | |
def print(self,what): | |
base.print(what) | |
def summary(self,what): | |
base.summary(what) | |
def str(self,what): | |
utils.str(what) | |
def castValue(self,val): | |
vlen = len(val) | |
if vlen == 1: | |
return val[0] | |
else: | |
return np.asarray(val) | |
return val | |
def getValue(self,what): # may not work on dataframe element rx not rx2 | |
a = what.split("$") # partition | |
e = str(a[0]) | |
for i in range(1,len(a)): | |
e = e + ".rx2(" + '"' + str(a[i]) + '"' + ")" | |
val = eval(e) | |
return self.castValue(val) | |
# http://ipython.readthedocs.io/en/stable/interactive/tutorial.html | |
# POWERSHELL ... cd C:\Python3.6.3\_monte_\ | |
# ipython | |
# %run -d classR.py | |
##################################################################### | |
# USAGE | |
##################################################################### | |
""" | |
# web delivery of data 'points' | |
import pickle | |
import urllib.request | |
raw = urllib.request.urlopen('http://www.mshaffer.com/arizona/dissertation/points').read() | |
points = pickle.loads(raw) # notice plural | |
""" | |
""" | |
# local delivery of data 'points' | |
import pickle | |
with open ('points', 'rb') as fp: | |
points = pickle.load(fp) | |
# initiate class | |
R = classR() | |
# R.mirror(1) # choose mirror [1 - Cloud] before installing package | |
# R.install("fpc") | |
fpc = R.library("fpc") | |
cluster = R.library('cluster') | |
k = R.IntVector(range(3, 8)) # r-syntax 3:7 # I expect 5 | |
pamk_clusters = fpc.pamk(points,k) # %time # very slow [Wall time: 1min 11s] | |
# k = 5 ... as expected | |
pam_clusters = cluster.pam(points,5) # %time # much slower [Wall time: 10.6 s] | |
kmeans_clusters = stats.kmeans(points,5) # %time # much faster [Wall time: 5.5 ms] | |
print(kmeans_clusters) | |
R.print(kmeans_clusters) | |
R.str(kmeans_clusters) | |
centers = R.getValue("kmeans_clusters$centers") | |
wss = R.getValue("kmeans_clusters$withinss") | |
tot = R.getValue("kmeans_clusters$tot.withinss") | |
wss/tot | |
size = R.getValue("kmeans_clusters$size") | |
member = R.getValue("kmeans_clusters$cluster") # cluster may be package | |
nmember = member.reshape(len(member),1) | |
npoints = np.hstack((points,nmember)) | |
plt.figure("scatter") | |
plt.scatter(npoints[:,0],npoints[:,1],c=npoints[:,2]) | |
plt.show(block=False) | |
sidx = np.argmax(size) | |
smax = size[sidx] | |
smember = 1 + sidx | |
scenter = centers[sidx] | |
spoints = npoints[npoints[:,2]==smember] | |
minx = min(spoints[:,0]) | |
maxx = max(spoints[:,0]) | |
cx = (minx + maxx)/2 | |
miny = min(spoints[:,1]) | |
maxy = max(spoints[:,1]) | |
cy = (miny + maxy)/2 | |
deltax = cx - scenter[0] | |
deltay = cy - scenter[1] | |
w = maxx - minx | |
h = maxy - miny | |
from matplotlib.patches import Ellipse | |
plt.figure("scatter") | |
plt.scatter(spoints[:,0],spoints[:,1],c=spoints[:,2]) | |
plt.scatter(scenter[0],scenter[1]) | |
ax = plt.gca() | |
ellipse = Ellipse(xy=(scenter[0] + deltax,scenter[1] + deltay), width=w, height=h, | |
edgecolor='r', fc='None', lw=2) | |
ax.add_patch(ellipse) | |
plt.show(block=False) | |
""" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment