Arrays

Definition: A multidimensional array $A$ of dimension $d$ is a collection of individual data points indexed by $n$ numbers: i.e., an individual data point is denoted by

$$A[i_1, i_2, \dots, i_d]$$

where $i_l = 1,\dots, n_l$ is the index corresponding to the $l$ axis, and where $n_l$ is the number of data points stored along the $l$ axis.

The shape of an array is the tuple

$$(n_1,\dots,n_d).$$

Remark 1: The dimension above is different from the notion of dimension in linear algebra, which is the number of entries in the array (i.e. $n_1n_2\dots n_d$).

Remark 2: The dimension in the definition above emphasis the fact that a multidimensional array of dimension $d$ can be geometrically regarded as a $d$ dimensinal cube of numbers sitting in $\mathbb R^d$.


In [1]:
%load_ext rmagic

Numerical arrays


In [ ]:
%%R

dim(x) = c(2,3,3)
print(class(x))
print(dim(x))
print(x)

In [ ]:
%%R

x = sample(18); dim(x) = c(2,3,3)
y = sample(18); dim(y) = c(2,3,3)

In [ ]:
%%R

z = x + y
z = x * y
z = x ^ y

In [ ]:
%%R

print(z)

Boolean arrays


In [ ]:
%%R

x = sample(c(T,F), 18, replace=T)
y = sample(c(T,F), 18, replace=T)
print(y)

In [ ]:
%%R

dim(x) = c(2,3,3)
dim(y) = c(2,3,3)

print(x | y) # Python Or
print(x & y) # Python And
print(!x)    # Python Not

Character arrrays


In [ ]:
%%R

x = sample(c('A','C','G','T'), 18, replace=T)
y = sample(c('A','C','G','T'), 18, replace=T)

print(x)

In [ ]:
%%R

dim(x) = c(2,3,3)
dim(y) = c(2,3,3)

In [ ]:
%%R

z = paste(x, y, sep='')
print(z)

In [ ]:
%%R

dim(z) = c(2,3,3)

print(z)

Subsetting


In [5]:
%%R

A = sample(64)
dim(A) = c(8, 8)
print(class(A))
print(A)


[1] "matrix"
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,]   13    1   26   27   62   41   14    4
[2,]   30   50   60   17    5   59   25   38
[3,]   33   29   22   48   24   42   19   34
[4,]   40   23    2   10   52   11   37   20
[5,]   39    3   44    6   18   55   16   56
[6,]   43   57    9   12   54   21   35   45
[7,]   47   31   61   51   46   53   49   28
[8,]    8   64   58   15    7   63   32   36

In [7]:
%%R

print(A[2,3])


[1] 60

In [11]:
%%R

print(A[c(4, 1, 5), c(4,2)])


     [,1] [,2]
[1,]   10   23
[2,]   27    1
[3,]    6    3

In [12]:
%%R

A[c(4,1,5), c(4,2)] = 999
print(A)


     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,]   13  999   26  999   62   41   14    4
[2,]   30   50   60   17    5   59   25   38
[3,]   33   29   22   48   24   42   19   34
[4,]   40  999    2  999   52   11   37   20
[5,]   39  999   44  999   18   55   16   56
[6,]   43   57    9   12   54   21   35   45
[7,]   47   31   61   51   46   53   49   28
[8,]    8   64   58   15    7   63   32   36

In [17]:
%%R

ind = (A > 50) & (A < 80)
print(ind)


      [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]
[1,] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
[2,] FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE
[3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[4,] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
[5,] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE
[6,] FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE
[7,] FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE
[8,] FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE

In [18]:
%%R

print(A[ind])


 [1] 57 64 60 61 58 51 62 52 54 59 55 53 63 56

In [19]:
%%R

A[ind] = 0

print(A)


     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,]   13  999   26  999    0   41   14    4
[2,]   30   50    0   17    5    0   25   38
[3,]   33   29   22   48   24   42   19   34
[4,]   40  999    2  999    0   11   37   20
[5,]   39  999   44  999   18    0   16    0
[6,]   43    0    9   12    0   21   35   45
[7,]   47   31    0    0   46    0   49   28
[8,]    8    0    0   15    7    0   32   36

Recycling


In [27]:
%%R

x = c(1,2,3,4)
y =1

In [32]:
%%R

z = x + y
print(z)


[1]  11 102   3  14

In [34]:
%%R

x = c(1,    2, 3, 4)
y = c(10, 100, 0    )
z = x * y

print(z)


[1]  10 200   0  40

In [ ]:

Applying


In [35]:
%%R

x = sample(5)
print(x)


[1] 2 4 5 1 3

In [42]:
%%R

f = function(x) x^x

In [43]:
%%R

z = f(x)

print(z)


[1]    4  256 3125    1   27

In [44]:
%%R

g = function(x) c(x^x, x * x, x + x)

In [47]:
%%R

z = g(2)
print(z)


[1] 4 4 4

In [49]:
%%R 

z = g(c(1, 2, 3))

print(z)


[1]  1  4 27  1  4  9  2  4  6

In [51]:
%%R

z = sapply(c(1,2,3), g)

print(z)


     [,1] [,2] [,3]
[1,]    1    4   27
[2,]    1    4    9
[3,]    2    4    6

In [52]:
%%R

S = sum(z) 

print(S)


[1] 58

In [55]:
%%R

stdGrade = apply(z, 1, mean)

print(stdGrade)


[1] 10.666667  4.666667  4.000000

In [56]:
%%R

examAvg = apply(z, 2, mean)

print(examAvg)


[1]  1.333333  4.000000 14.000000

In [57]:
%%R

A = sample(27)
dim(A) = c(3,3,3)

print(A)


, , 1

     [,1] [,2] [,3]
[1,]   19    8   25
[2,]   10   22   14
[3,]    5   24    6

, , 2

     [,1] [,2] [,3]
[1,]   27   16    1
[2,]   13    2    9
[3,]    3   18   21

, , 3

     [,1] [,2] [,3]
[1,]   11   15   17
[2,]   23   26    4
[3,]   20    7   12


In [60]:
%%R

Z = apply(A, 1, mean)

print(Z)


[1] 15.44444 13.66667 12.88889

Simulating


In [280]:
%%R -r 86 -w 400 -h 300

n = 81
#x = rnorm(n, mean=50, sd=10)
x = runif(n, min=0, max=100)
#print(x)

hist(x, xlim=c(-10, 110), ylim=c(0, n/2))



In [282]:
%%R

x = round(x)
dim(x) = c(9,9)
print(x)


      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
 [1,]   58   29   29   30   34   78   49   16   40
 [2,]   41   65   17    1    1   38    8   24   40
 [3,]   49   41   60   33   40   54   84   64   28
 [4,]   91   33   54    4   30   87   26   21   74
 [5,]   28   11   51   88   91   94   65    2   68
 [6,]   91   41   50   23   29   22   14   74   19
 [7,]   15   29   39   68   81   73   20   20    6
 [8,]   71    9    7    9   95    7   45   85   16
 [9,]   51   53   56   11   23   94   61   61   25

In [285]:
%%R

exam = rnorm(10, mean = 70, sd=30)
print(exam)


 [1]  46.41281  12.48709  85.27709  67.67440  51.69012  43.94118  56.02645
 [8]  14.24480  52.94803 104.91452

In [295]:
%%R

gradeBook = replicate(2, rnorm(10, mean=40, sd=79))

print(gradeBook)


             [,1]        [,2]
 [1,]   28.045536   -3.067969
 [2,]  134.619287   22.228820
 [3,]   93.674670   24.769019
 [4,]   67.318250 -121.065588
 [5,]  -80.228689   37.217535
 [6,] -169.620782   90.604733
 [7,]   69.177760  -74.533917
 [8,]    3.464685   53.340418
 [9,]  -95.297423   23.155349
[10,]  -45.939948  -28.437872

Linear algebra


In [296]:
%%R

A = sample(25)
dim(A) = c(5,5)
print(A)


     [,1] [,2] [,3] [,4] [,5]
[1,]   16    1   12   10   15
[2,]   25    5   24    6   23
[3,]   22    4   21   20    3
[4,]   13   19    2   17   14
[5,]    9   18    7    8   11

In [301]:
%%R

B = sample(1:100, 5)
dim(B) = c(5, 1)

print(B)

print(class(B))


     [,1]
[1,]   61
[2,]   14
[3,]   40
[4,]   26
[5,]   59
[1] "matrix"

In [302]:
%%R

C = A %*% B

print(C)


     [,1]
[1,] 2615
[2,] 4068
[3,] 2935
[4,] 2407
[5,] 1938

In [303]:
%%R

AInv = solve(A)

print(AInv)


            [,1]        [,2]         [,3]        [,4]        [,5]
[1,] -0.38605960  0.24576088  0.025903745  0.26749405 -0.33493037
[2,] -0.09289631  0.03310123  0.005177141  0.02616350  0.02275417
[3,]  0.18443250 -0.11088462  0.014809871 -0.18857060  0.21631070
[4,]  0.22113789 -0.14909084  0.012104048 -0.09004979  0.12149145
[5,]  0.18968540 -0.07625009 -0.047893066 -0.07618154  0.10169926

In [306]:
%%R

D = A %*% AInv

print(round(D))
print(typeof(D[1,1]))


     [,1] [,2] [,3] [,4] [,5]
[1,]    1    0    0    0    0
[2,]    0    1    0    0    0
[3,]    0    0    1    0    0
[4,]    0    0    0    1    0
[5,]    0    0    0    0    1
[1] "double"

In [309]:
%%R

d = det(A)
print(d)


[1] 277180

In [ ]: