In [3]:
len = 100000        
x1 = 1:len              ### x1 INTEGER vector
dim(x1) = c(len/2,2)
x2 = x1 + .1            ### x2 REAL vector
x3 = rep(TRUE,len)      ### x3 LOGICAL vector
dim(x3) = c(len/2,2)
x4 = complex(r=1:len,i=1:len)
dim(x4) = c(len/2,2)    ### x4 COMPLEX vector

measure <- function(n, x) 
  { print(n); print(system.time(for (i in 1:1000) colMeans(x))) }
  
### Examples of polymophism, colMeans works on all those datatypes                                
measure("Integer",x1)  # .53
measure("Float",  x2)  # .49
measure("Logical",x3)  # .54
measure("Complex",x4)  #2.77


[1] "Integer"
   user  system elapsed 
  0.527   0.002   0.530 
[1] "Float"
   user  system elapsed 
  0.496   0.001   0.498 
[1] "Logical"
   user  system elapsed 
  0.543   0.001   0.545 
[1] "Complex"
   user  system elapsed 
  2.056   0.675   2.775 

In [ ]:
## For performance colMeans is split in a dimension-polymorphic function in R
## and more special-cased function in C
colMeans <- function (x, na.rm = FALSE, dims = 1L) {
    dn  = dim(x)
    id  = 1:dims
    n   = prod(dn[id])
    dn  = dn[-id]
    pdn = prod(dn)
    z <- if (is.complex(x)) 
        .Internal(colMeans(Re(x),n,pdn,na.rm))+(0+1i)*.Internal(colMeans(Im(x),n,pdn,na.rm))
    else .Internal(colMeans(x, n, pdn, na.rm))
    if (length(dn) > 1L) {
        dim(z) <- dn
        dimnames(z) <- dimnames(x)[-id]
    } else names(z) <- dimnames(x)[[dims + 1L]]
    z
}

In [5]:
## A simplified version of colMeans written entirely in R
## using loops
colMeans2 <- function (x, na.rm = FALSE, dims = 1L) {
    dn  = dim(x)
    id  = 1:dims
    n   = prod(dn[id])
    dn  = dn[-id]
    pdn = prod(dn)
    res = 0
    for (j in 0:(pdn-1)) {
        sum = 0
        cnt = 0
        off = (j * n)
        for (i in 1:n) {
            v   = x[[i+off]]
            cnt = cnt+ 1
            sum = sum+ v
         }
         res[j+1] = sum/cnt
   }
   res
}
      
measure2 <- function(n, x) 
  { print(n);  print(system.time(for (i in 1:1000) colMeans2(x))) }
### Warning this is SLOW!!!        
#measure2("Integer",x1)  # 222.6
require(compiler)
enableJIT(3)
measure2("Integer",x1)  #  33.9


Out[5]:
3
[1] "Integer"
   user  system elapsed 
 33.960   0.191  34.800 

In [ ]:
/* colSums(x, n, p, na.rm) and friends */
SEXP attribute_hidden do_colsum(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    SEXP x, ans = R_NilValue;
    int type;
    Rboolean NaRm, keepNA;

    checkArity(op, args);
    x = CAR(args); args = CDR(args);
    R_xlen_t n = asVecSize(CAR(args)); args = CDR(args);
    R_xlen_t p = asVecSize(CAR(args)); args = CDR(args);
    NaRm = asLogical(CAR(args));
    if (n == NA_INTEGER || n < 0)
        error(_("invalid '%s' argument"), "n");
    if (p == NA_INTEGER || p < 0)
        error(_("invalid '%s' argument"), "p");
    if (NaRm == NA_LOGICAL) error(_("invalid '%s' argument"), "na.rm");
    keepNA = !NaRm;

    int OP = PRIMVAL(op);
    switch (type = TYPEOF(x)) {
    case LGLSXP: break;
    case INTSXP: break;
    case REALSXP: break;
    default:
        error(_("'x' must be numeric"));
    }

    if (OP == 0 || OP == 1) { /* columns */
        PROTECT(ans = allocVector(REALSXP, p));
        for (R_xlen_t j = 0; j < p; j++) {
            R_xlen_t  cnt = n, i;
            LDOUBLE sum = 0.0;
            switch (type) {
            case REALSXP:   {
                double *rx = REAL(x) + (R_xlen_t)n*j;
                if (keepNA)
                    for (sum = 0., i = 0; i < n; i++) sum += *rx++;
                else 
                    for (cnt = 0, sum = 0., i = 0; i < n; i++, rx++)
                        if (!ISNAN(*rx)) {cnt++; sum += *rx;}
                break;
            }
            case INTSXP: {
                int *ix = INTEGER(x) + (R_xlen_t)n*j;
                for (cnt = 0, sum = 0., i = 0; i < n; i++, ix++)
                    if (*ix != NA_INTEGER) {cnt++; sum += *ix;}
                    else if (keepNA) {sum = NA_REAL; break;}
                break;
            }
            case LGLSXP: {
                int *ix = LOGICAL(x) + (R_xlen_t)n*j;
                for (cnt = 0, sum = 0., i = 0; i < n; i++, ix++)
                    if (*ix != NA_LOGICAL) {cnt++; sum += *ix;}
                    else if (keepNA) {sum = NA_REAL; break;}
            break;
            }
            }
            if (OP == 1) sum /= cnt; /* gives NaN for cnt = 0 */
            REAL(ans)[j] = (double) sum;
        }
    } else { /* rows */
	PROTECT(ans = allocVector(REALSXP, n));

	/* allocate scratch storage to allow accumulating by columns
	   to improve cache hits */
	int *Cnt = NULL;
	LDOUBLE *rans;
	if(n <= 10000) {
	    R_CheckStack2(n * sizeof(LDOUBLE));
	    rans = (LDOUBLE *) alloca(n * sizeof(LDOUBLE));
	    Memzero(rans, n);
	} else rans = Calloc(n, LDOUBLE);
	if (!keepNA && OP == 3) Cnt = Calloc(n, int);

	for (R_xlen_t j = 0; j < p; j++) {
	    LDOUBLE *ra = rans;
	    switch (type) {
	    case REALSXP:
	    {
		double *rx = REAL(x) + (R_xlen_t)n * j;
		if (keepNA)
		    for (R_xlen_t i = 0; i < n; i++) *ra++ += *rx++;
		else
		    for (R_xlen_t i = 0; i < n; i++, ra++, rx++)
			if (!ISNAN(*rx)) {
			    *ra += *rx;
			    if (OP == 3) Cnt[i]++;
			}
		break;
	    }
	    case INTSXP:
	    {
		int *ix = INTEGER(x) + (R_xlen_t)n * j;
		for (R_xlen_t i = 0; i < n; i++, ra++, ix++)
		    if (keepNA) {
			if (*ix != NA_INTEGER) *ra += *ix;
			else *ra = NA_REAL;
		    }
		    else if (*ix != NA_INTEGER) {
			*ra += *ix;
			if (OP == 3) Cnt[i]++;
		    }
		break;
	    }
	    case LGLSXP:
	    {
		int *ix = LOGICAL(x) + (R_xlen_t)n * j;
		for (R_xlen_t i = 0; i < n; i++, ra++, ix++)
		    if (keepNA) {
			if (*ix != NA_LOGICAL) *ra += *ix;
			else *ra = NA_REAL;
		    }
		    else if (*ix != NA_LOGICAL) {
			*ra += *ix;
			if (OP == 3) Cnt[i]++;
		    }
		break;
	    }
	    }
	}
	if (OP == 3) {
	    if (keepNA)
		for (R_xlen_t i = 0; i < n; i++) rans[i] /= p;
	    else
		for (R_xlen_t i = 0; i < n; i++) rans[i] /= Cnt[i];
	}
	for (R_xlen_t i = 0; i < n; i++) REAL(ans)[i] = (double) rans[i];

	if (!keepNA && OP == 3) Free(Cnt);
	if(n > 10000) Free(rans);
    }

    UNPROTECT(1);
    return ans;
}