Skip to content

Instantly share code, notes, and snippets.

@brodieG
Last active May 20, 2020 16:27
Show Gist options
  • Save brodieG/6f0e68a6718de1994ca16450860e0ab5 to your computer and use it in GitHub Desktop.
Save brodieG/6f0e68a6718de1994ca16450860e0ab5 to your computer and use it in GitHub Desktop.
Sum rows vs cols
library(inline)
frow <- cfunction(sig=c(x='list'), body='
R_xlen_t J = XLENGTH(x);
R_xlen_t n = XLENGTH(VECTOR_ELT(x, 0));
SEXP out = PROTECT(allocVector(INTSXP, n));
int *outp = INTEGER(out);
for (int j=0; j<J; j++) {
int *col = INTEGER(VECTOR_ELT(x, j));
for (int i=0; i<n; i++) {
if(j) {outp[i] += col[i];} else {outp[i] = col[i];}
}
}
UNPROTECT(1);
return out;
')
fcol <- cfunction(sig=c(x='list'), body='
R_xlen_t J = XLENGTH(x);
R_xlen_t n = XLENGTH(VECTOR_ELT(x, 0));
// Avoid VECTOR_ELT in main loop
int** cols = (int**)R_alloc(J, sizeof(int*));
for(int j=0; j < J; ++j) {
cols[j] = INTEGER(VECTOR_ELT(x, j));
}
SEXP out = PROTECT(allocVector(INTSXP, n));
int *outp = INTEGER(out);
int a, b;
for(int j=0; j <= J; j+=4) {
// Put more ops in flight, if we just do outp[] += x
// next sum depends on prior sum. Diminishing returns
// so stop at 4 simultaneous accesses
if(J >= j + 4) {
for (int i=0; i<n; i++) {
a = cols[j][i] + cols[j+1][i];
b = cols[j+2][i] + cols[j+3][i];
if(j == 0) {outp[i] = a + b;} else {outp[i] += a + b;}
}
} else if (J - j == 3) {
for (int i=0; i<n; i++) {
a = cols[j][i] + cols[j+1][i];
b = cols[j+2][i];
if(j == 0) {outp[i] = a + b;} else {outp[i] += a + b;}
}
} else if (J - j == 2) {
for (int i=0; i<n; i++) {
a = cols[j][i];
b = cols[j+1][i];
if(j == 0) {outp[i] = a + b;} else {outp[i] += a + b;}
}
} else if (J - j == 1) {
for (int i=0; i<n; i++) {
a = cols[j][i];
if(j == 0) {outp[i] = a;} else {outp[i] += a;}
}
}
}
UNPROTECT(1);
return out;
')
# this one does four rows at a time; make sure
# input has multiple of 4 rows!
frow2 <- cfunction(sig=c(x='list'), body='
R_xlen_t J = XLENGTH(x);
R_xlen_t n = XLENGTH(VECTOR_ELT(x, 0));
SEXP out = PROTECT(allocVector(INTSXP, n));
int *outp = INTEGER(out);
for (int j=0; j<J; j++) {
int *col = INTEGER(VECTOR_ELT(x, j));
for (int i=0; i<n; i+=4) {
if(j) {
outp[i] += col[i];
outp[i+1] += col[i+1];
outp[i+2] += col[i+2];
outp[i+3] += col[i+3];
} else {
outp[i] = col[i];
outp[i+1] = col[i+1];
outp[i+2] = col[i+2];
outp[i+3] = col[i+3];
}
}
}
UNPROTECT(1);
return out;
')
n <- 5e7
L <- replicate(16, sample(-2:2, n, rep=TRUE), simplify=FALSE)
l <- L[1:15]
system.time(rr <- frow(l)) # rows are inner loop
system.time(rc3 <- fcol3(l)) # cols are inner loop
identical(rc3, rr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment