In [3]:
len = 100000
x1 = 1:len ### x1 INTEGER vector
dim(x1) = c(len/2,2)
x2 = x1 + .1 ### x2 REAL vector
x3 = rep(TRUE,len) ### x3 LOGICAL vector
dim(x3) = c(len/2,2)
x4 = complex(r=1:len,i=1:len)
dim(x4) = c(len/2,2) ### x4 COMPLEX vector
measure <- function(n, x)
{ print(n); print(system.time(for (i in 1:1000) colMeans(x))) }
### Examples of polymophism, colMeans works on all those datatypes
measure("Integer",x1) # .53
measure("Float", x2) # .49
measure("Logical",x3) # .54
measure("Complex",x4) #2.77
In [4]:
## A simplified version of colMeans written entirely in R
## using loops
colMeans2 <- function (x, na.rm = FALSE, dims = 1L) {
dn = dim(x)
id = 1:dims
n = prod(dn[id])
dn = dn[-id]
pdn = prod(dn)
res = 0
for (j in 0:(pdn-1)) {
sum = 0
cnt = 0
off = (j * n)
for (i in 1:n) {
v = x[[i+off]]
cnt = cnt+ 1
sum = sum+ v
}
res[j+1] = sum/cnt
}
res
}
measure2 <- function(n, x)
{ print(n); print(system.time(for (i in 1:1000) colMeans2(x))) }
### Warning this is SLOW!!!
#measure2("Integer",x1) # 222.6
require(compiler)
enableJIT(3)
measure2("Integer",x1) # 33.9
Out[4]:
In [ ]:
## For performance colMeans is split in a dimension-polymorphic function in R
## and more special-cased function in C
colMeans <- function (x, na.rm = FALSE, dims = 1L) {
dn = dim(x)
id = 1:dims
n = prod(dn[id])
dn = dn[-id]
pdn = prod(dn)
z <- if (is.complex(x))
.Internal(colMeans(Re(x),n,pdn,na.rm))+(0+1i)*.Internal(colMeans(Im(x),n,pdn,na.rm))
else .Internal(colMeans(x, n, pdn, na.rm))
if (length(dn) > 1L) {
dim(z) <- dn
dimnames(z) <- dimnames(x)[-id]
} else names(z) <- dimnames(x)[[dims + 1L]]
z
}
In [ ]:
/* colSums(x, n, p, na.rm) and friends */
SEXP attribute_hidden do_colsum(SEXP call, SEXP op, SEXP args, SEXP rho)
{
SEXP x, ans = R_NilValue;
int type;
Rboolean NaRm, keepNA;
checkArity(op, args);
x = CAR(args); args = CDR(args);
R_xlen_t n = asVecSize(CAR(args)); args = CDR(args);
R_xlen_t p = asVecSize(CAR(args)); args = CDR(args);
NaRm = asLogical(CAR(args));
if (n == NA_INTEGER || n < 0)
error(_("invalid '%s' argument"), "n");
if (p == NA_INTEGER || p < 0)
error(_("invalid '%s' argument"), "p");
if (NaRm == NA_LOGICAL) error(_("invalid '%s' argument"), "na.rm");
keepNA = !NaRm;
int OP = PRIMVAL(op);
switch (type = TYPEOF(x)) {
case LGLSXP: break;
case INTSXP: break;
case REALSXP: break;
default:
error(_("'x' must be numeric"));
}
if (OP == 0 || OP == 1) { /* columns */
PROTECT(ans = allocVector(REALSXP, p));
for (R_xlen_t j = 0; j < p; j++) {
R_xlen_t cnt = n, i;
LDOUBLE sum = 0.0;
switch (type) {
case REALSXP: {
double *rx = REAL(x) + (R_xlen_t)n*j;
if (keepNA)
for (sum = 0., i = 0; i < n; i++) sum += *rx++;
else
for (cnt = 0, sum = 0., i = 0; i < n; i++, rx++)
if (!ISNAN(*rx)) {cnt++; sum += *rx;}
break;
}
case INTSXP: {
int *ix = INTEGER(x) + (R_xlen_t)n*j;
for (cnt = 0, sum = 0., i = 0; i < n; i++, ix++)
if (*ix != NA_INTEGER) {cnt++; sum += *ix;}
else if (keepNA) {sum = NA_REAL; break;}
break;
}
case LGLSXP: {
int *ix = LOGICAL(x) + (R_xlen_t)n*j;
for (cnt = 0, sum = 0., i = 0; i < n; i++, ix++)
if (*ix != NA_LOGICAL) {cnt++; sum += *ix;}
else if (keepNA) {sum = NA_REAL; break;}
break;
}
}
if (OP == 1) sum /= cnt; /* gives NaN for cnt = 0 */
REAL(ans)[j] = (double) sum;
}
} else { /* rows */
PROTECT(ans = allocVector(REALSXP, n));
/* allocate scratch storage to allow accumulating by columns
to improve cache hits */
int *Cnt = NULL;
LDOUBLE *rans;
if(n <= 10000) {
R_CheckStack2(n * sizeof(LDOUBLE));
rans = (LDOUBLE *) alloca(n * sizeof(LDOUBLE));
Memzero(rans, n);
} else rans = Calloc(n, LDOUBLE);
if (!keepNA && OP == 3) Cnt = Calloc(n, int);
for (R_xlen_t j = 0; j < p; j++) {
LDOUBLE *ra = rans;
switch (type) {
case REALSXP:
{
double *rx = REAL(x) + (R_xlen_t)n * j;
if (keepNA)
for (R_xlen_t i = 0; i < n; i++) *ra++ += *rx++;
else
for (R_xlen_t i = 0; i < n; i++, ra++, rx++)
if (!ISNAN(*rx)) {
*ra += *rx;
if (OP == 3) Cnt[i]++;
}
break;
}
case INTSXP:
{
int *ix = INTEGER(x) + (R_xlen_t)n * j;
for (R_xlen_t i = 0; i < n; i++, ra++, ix++)
if (keepNA) {
if (*ix != NA_INTEGER) *ra += *ix;
else *ra = NA_REAL;
}
else if (*ix != NA_INTEGER) {
*ra += *ix;
if (OP == 3) Cnt[i]++;
}
break;
}
case LGLSXP:
{
int *ix = LOGICAL(x) + (R_xlen_t)n * j;
for (R_xlen_t i = 0; i < n; i++, ra++, ix++)
if (keepNA) {
if (*ix != NA_LOGICAL) *ra += *ix;
else *ra = NA_REAL;
}
else if (*ix != NA_LOGICAL) {
*ra += *ix;
if (OP == 3) Cnt[i]++;
}
break;
}
}
}
if (OP == 3) {
if (keepNA)
for (R_xlen_t i = 0; i < n; i++) rans[i] /= p;
else
for (R_xlen_t i = 0; i < n; i++) rans[i] /= Cnt[i];
}
for (R_xlen_t i = 0; i < n; i++) REAL(ans)[i] = (double) rans[i];
if (!keepNA && OP == 3) Free(Cnt);
if(n > 10000) Free(rans);
}
UNPROTECT(1);
return ans;
}