1) R Basics 1.1) Arithmetics: +,-, *,^, %% 1.2) Data types 1.3) Vectors: names(), sum(), selection vector 1.4) Matrices: matrix(), colnames(), rownames(), dimnames(), cbind(), rbind(), matrix arithmetics 1.5) Factors: factor(), levels(), summary(), selection, comparison 1.6) DataFrames: head(), tail(), str(), summary(), data.frame(), selection, subset(), order(), 1.7) Lists: create list, slice list, concat list
2) R Intermediate 2.1) Relational Operators: ==, !=, >, <
For simple arithmetics use:
For matrix arithmetics use %*% or %/ % , else all elements are calculated and the arithmetic is not the usual matrix one.
Numerics : 4.5
Integers : 4
Boolean : TRUE, FALSE, T, F
Characters : 'this is a text', 'abc'
Vectors, Matrices, Factors and Lists are made using the basic data types.
To find the type of a variable use: class(variable_name)
In [95]:
    
var <- 4.1
    
In [96]:
    
class(var)
    
    Out[96]:
In [97]:
    
myvec <- c(1,3,5.5); myvec
    
    Out[97]:
Naming the vector by :
In [98]:
    
names(myvec) <- c('a','b','c'); myvec
    
    Out[98]:
Summing the elements of a vector:
In [99]:
    
sum(myvec)
    
    Out[99]:
Select a vector index:
In [100]:
    
myvec[2]        # counting starts from 1 not 0!
    
    Out[100]:
In [101]:
    
myvec[c(1,2)]   # multiple select
    
    Out[101]:
In [102]:
    
myvec[1:2]      # slicing
    
    Out[102]:
In [103]:
    
myvec[c('a','b')] # by name
    
    Out[103]:
Selection by vector comparison:
In [104]:
    
poker_vector <- c(140, -50, 20, -120, 240);
selection <- poker_vector > 0;
selection
    
    Out[104]:
In [105]:
    
winnings_vector <- poker_vector[selection]; winnings_vector
    
    Out[105]:
In [106]:
    
matrix(1:9, byrow = TRUE, nrow =3) # byrow : fill by rows , nrow = # rows
    
    Out[106]:
create a matrix using vectors:
In [107]:
    
new_hope <- c(460.998, 314.4);
empire_strikes <- c(290.475, 247.900);
return_jedi <- c(309.306, 165.8);
star_wars_matrix = matrix(c(new_hope,empire_strikes,return_jedi), nrow=3,
                          byrow=TRUE);
star_wars_matrix
    
    Out[107]:
Name your columns, rows
In [108]:
    
colnames(star_wars_matrix) <- c("US", "non-US");
rownames(star_wars_matrix) <- c("A New Hope", "The Empire Strikes Back", "Return of the Jedi");
star_wars_matrix
    
    Out[108]:
Summing rows and cols:
In [109]:
    
colSums(star_wars_matrix)
    
    Out[109]:
In [110]:
    
rowSums(star_wars_matrix)
    
    Out[110]:
In [111]:
    
box_office_all <- c(461, 314.4, 290.5, 247.9, 309.3, 165.8);
movie_names <- c("A New Hope","The Empire Strikes Back","Return of the Jedi");
col_titles <- c("US","non-US")
#### NAME USING VECTOR NAMES
star_wars_matrix <- matrix(box_office_all, nrow = 3, byrow = TRUE, 
                           dimnames = list(movie_names, col_titles));
star_wars_matrix
    
    Out[111]:
Adding columns and rows : cbind(), rbind()
In [112]:
    
worldwide_vector <- rowSums(star_wars_matrix);
all_wars_matrix <- cbind(star_wars_matrix, worldwide_vector)
all_wars_matrix
    
    Out[112]:
In [113]:
    
box_office_2 <- c(474, 552, 310, 338, 380, 468);
movie_names_2 <- c("The Phantom Menace","Attack of the Clones","Revenge of the Sith");
col_titles_2 <- c("US","non-US")
star_wars_matrix_2 <- matrix(box_office_2, nrow = 3, byrow = TRUE, 
                           dimnames = list(movie_names_2, col_titles_2));
star_wars_all <- rbind(star_wars_matrix,star_wars_matrix_2);
star_wars_all
    
    Out[113]:
Selecting elements of matrix
In [114]:
    
star_wars_all[1,2] #1st row, 2nd column
    
    Out[114]:
In [115]:
    
star_wars_all[ ,2] # ALL rows from 2nd column
    
    Out[115]:
In [116]:
    
star_wars_all[1, ] # 1st row, all columns
    
    Out[116]:
In [117]:
    
star_wars_all[1:2,] #slicing
    
    Out[117]:
In [118]:
    
mean(star_wars_all[1:2,])
    
    Out[118]:
Arithmetics with matrices
In [119]:
    
star_wars_all *2 # multiply all elements by 2
    
    Out[119]:
In [120]:
    
gender_vector <- c("Male", "Female", "Female", "Male", "Male");
factor_gender_vector <- factor(gender_vector);
factor_gender_vector
    
    Out[120]:
There are two types of categorical variables nominal and ordinal. You can use arguments in the factor function to define the levels of an ordinal variable.
For example:
In [121]:
    
animals_vector <- c("Elephant", "Giraffe", "Donkey", "Horse")
temperature_vector <- c("High", "Low", "High","Low", "Medium")
factor_animals_vector <- factor(animals_vector)
factor_animals_vector
factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
factor_temperature_vector 
# in R terminal this shows also the levels :
# for the nominal it states: Levels: Donkey Elephant Giraffe Horse
# while for the ordinal : Levels: Low < Medium < High
    
    Out[121]:
    Out[121]:
You can change the names of the levels by levels() function:
In [122]:
    
survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)
factor_survey_vector
    
    Out[122]:
but I want 'Male' and 'Female' !
In [123]:
    
levels(factor_survey_vector) <- c('Female', 'Male')
    
In [124]:
    
factor_survey_vector
    
    Out[124]:
Summary of factors
In [125]:
    
summary(survey_vector)
    
    Out[125]:
In [126]:
    
summary(factor_survey_vector)
    
    Out[126]:
Selection
In [127]:
    
factor_survey_vector[1]
    
    Out[127]:
In [128]:
    
factor_survey_vector[2]
    
    Out[128]:
Comparison of factors
Comparison does not work for nominal factors, but works fine for ordinal!!
In [129]:
    
factor_survey_vector[1]>factor_survey_vector[2] ## NA! Doesn't work for 
                                                ## nominal factors!
    
    
    Out[129]:
In [130]:
    
speed_vector <- c("Fast", "Slow", "Slow", "Fast", "Ultra-fast")
factor_speed_vector <- factor(speed_vector, ordered = TRUE, 
                              levels = c("Slow", "Fast", "Ultra-fast"))
compare_them <- factor_speed_vector[2]>factor_speed_vector[5]
# Is data analyst 2 faster than data analyst 5?
compare_them
    
    Out[130]:
In [131]:
    
mtcars;  # dataframe included in R
    
    Out[131]:
Quick look at dataframe head(), tail(), str() [structure]
In [132]:
    
head(mtcars,2)
    
    Out[132]:
In [133]:
    
tail(mtcars,4)
    
    Out[133]:
In [134]:
    
str(mtcars) # means structure
    
    
In [135]:
    
summary(mtcars)  # returns summary statistics
    
    Out[135]:
Creating a data.frame()
You can create a dataframe using simple vectors by calling the data.frame() function
In [136]:
    
planets <- c("Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune");
type <- c("Terrestrial planet", "Terrestrial planet", "Terrestrial planet", "Terrestrial planet", "Gas giant", "Gas giant", "Gas giant", "Gas giant")
diameter <- c(0.382, 0.949, 1, 0.532, 11.209, 9.449, 4.007, 3.883); 
rotation <- c(58.64, -243.02, 1, 1.03, 0.41, 0.43, -0.72, 0.67);
rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE);
# Create the data frame:
planets_df  <- data.frame(planets, type, diameter, rotation, rings)
planets_df  # indexes the rows and names the columns by the name of vector
    
    Out[136]:
Slicing a data frame
As with matrices you can use [1,2], [1, ], [1:3, ] etc
OR USE THE NAME OF THE COLUMN!!!
In [137]:
    
planets_df[1:3,1]
    
    Out[137]:
In [138]:
    
planets_df[3:8, "diameter"]
    
    Out[138]:
In [139]:
    
planets_df$diameter # this returns a vector with the elements of the column
## similar to planets_df[ ,"diameter"]
    
    Out[139]:
Selection of df entries using selector vector
In [140]:
    
selection_rings <- planets_df$rings
    
In [141]:
    
planets_with_rings <- planets_df[selection_rings, ] # all columns, 
                                                    #rows with True
    
In [142]:
    
planets_with_rings
    
    Out[142]:
Subset()
Use the subset function to select entries from a df
In [143]:
    
planets_with_rings_2 <- subset(planets_df, subset = rings == TRUE);
planets_with_rings_2
    
    Out[143]:
Order
the order() function returns the hierarchical order in a vector:
In [144]:
    
a<- c(100, 150, 101)
    
In [145]:
    
order(a)
    
    Out[145]:
This can be used in data frames. For example order the planet df with the largest planet in the top.
To do this use the order function with the decreasing = TRUE argument to make a vector and then pass it in the df :
In [146]:
    
positions <- order(planets_df$diameter, decreasing=TRUE)
    
In [147]:
    
largest_first_df <- planets_df[positions, ]
    
In [148]:
    
largest_first_df
    
    Out[148]:
In [149]:
    
vec <- 1:10;
mat <- matrix(1:9, byrow=TRUE, ncol=3)
mdf <- mtcars[1:10,]
    
In [151]:
    
my_list <- list(vec, mat, mdf)
my_list
    
    Out[151]:
Update the names(my_list) to name the object your list holds:
In [155]:
    
names(my_list) <- c("vector", "matrix", "dataframe")
    
In [156]:
    
my_list
    
    
    Out[156]:
Select elements from list
To select from the list use the double bracket [[ ]]
In [159]:
    
my_list[[1]] # returns the object 1 (ie the vector)
    
    Out[159]:
In [160]:
    
my_list[[2]]
    
    Out[160]:
In [164]:
    
my_list[[1]][2] #from the list the 1st object (vec), 
                # from that the 2nd element
    
    Out[164]:
Append to list
To append an info in the list use the c() function. To also name the new item when concatinating use the syntax :
c(list, name=new_var)
In [166]:
    
year <- 1980
my_list_2 <- c(my_list, myyear=year)
my_list_2
    
    
    Out[166]:
In [167]:
    
3>5
    
    Out[167]:
In [168]:
    
FALSE == TRUE
    
    Out[168]:
In [169]:
    
FALSE < TRUE  # TRUE = 1, FALSE = 0
    
    Out[169]:
In [171]:
    
"Hello" > "Goodbye" # alphabetical order (G < H < I ...)
    
    Out[171]:
For vectors:
In [172]:
    
linkedin <- c (10,20,30,40,60)
    
In [173]:
    
linkedin > 30
    
    Out[173]:
In [176]:
    
facebook <- c(10,50,20,56) # check the warning
    
In [177]:
    
linkedin<facebook
    
    
    Out[177]:
In [ ]: