Vectors

# Vectors allow us to store many scalar-elements in a single object.  Thereby,
# it is important that all scalars are of the same type (double, logical or
# string).  Later, we will discuss lists, which do not have this requirement.


Creation

# There are many ways to create a vector in R:

vx = 1:3  # or:
vx = c(1, 2, 3)

show(vx)


# By default, these commands create a vertical vector (3x1 in these examples).
# This is not apparent when displaying the vector, but you realize it when
# performing matrix algebra (see below).

# To transpose the vector vx, type:

t(vx)


# Note that this vector is of type 'double' (and not of type 'logical' or
# 'character' (i.e. string)):

# This vector with
typeof(vx)
is.double(vx)

is.logical(vx)
is.character(vx)


# Nevertheless, it is also a vector (as opposed to a matrix), and it is also of
# type 'numeric' (the difference between a double and a numeric is typically
# irrelevant):

is.vector(vx)
is.matrix(vx)

is.numeric(vx)


# We can analogously also create vectors of strings or logicals:

vy = c("a", "b")  # vector of strings

vz = c(TRUE, FALSE)  # vector of logicals

typeof(vy)
typeof(vz)
# both give true:
is.vector(vy)
is.vector(vz)

# both give false:
is.matrix(vy)
is.matrix(vz)

# both give false:
is.numeric(vy)
is.numeric(vz)
# to go back and forth between double and logicals, as before:

as.logical(c(1, 0))
as.double(c(TRUE, FALSE))
as.numeric(c(TRUE, FALSE))


# to go back and forth between double and strings, as before:

as.character(c(1, 3))
as.double(c("1", "3"))
as.numeric(c("1", "3"))
# Advanced ways to create vectors:


seq(-4, 4, by = 0.8)  # a vector going from -4 to 4 in increments of 0.8

seq(-4, 4, length.out = 5)  # a vector going from -4 to 4 with 5 elements


rep(1, 5)  # (5x1)-vector of ones
rep(TRUE, 5)  # (5x1)-vector with elements TRUE
rep("a", 5)  # (5x1)-vector with elements 'a'

rep(vx, times = 2)  # create new vector by taking vector 'vx' two times 
# (analogously for vectors of strings or logicals)

rep(vx, each = 2)  # create new vector by taking each element of 'vx' two times 
# (analogously for vectors of strings or logicals)


rev(vx)  # reverse order of elements in vx ('flip vx around')
# (analogously for vectors of strings or logicals)


vy = c(3, 2, 5, 1)
c(vx, vy)  # create new vector by stacking two vectors
# (analogously for vectors of strings or logicals)


Basic Operations

# We can perform standard algebraic operations using vectors:

vx = c(1, 4, 3)
vy = c(1, 5, 7)

vx + vy
vx - vy

vy/3

vx * vy  # element-wise multiplication

vx %*% t(vy)  # matrix multiplication


# Careful: these are not the same:

2 * 1:6
(2 * 1):6
# We can also use vectors to conduct comparisons, yielding logicals:

# Element-wise comparison

vx >= 2  # compares each element in vx to 2, returns vector of logicals

vx >= vy  # compares each element in vx to corresponding element in vy, returns vector of logicals


# Overall comparison of two vectors
identical(vx, vy)  # returns single logical
# Many of the functions we applied to scalars can also be applied to vectors.
# (This is not surprising once you realize that R treats a number like a
# 1x1-vector; type 'is.vector(3)' to see.)  When applied to a vector, these
# functions are applied to each element individually:

sqrt(vx)  # compute square root of each element in vx
exp(vx)
log(vx)
cos(vx)
# There are many functions that make sense only for vectors (not numbers):

vx = c(1.2, 3.3, 4.2, 2.7, 5)
vy = c(3.4, 5.5, 3.2, 8.9, 0.8)

length(vx)  #number of elements

min(vx)  #minimum
max(vx)  #maximum
range(vx)  #(min, max)
sum(vx)  #sum of elements
prod(vx)  #product of elements

mean(vx)
median(vx)
sd(vx)  # standard deviation
var(vx)  # variance
cov(vx, vy)  # covariance of two vectors
cor(vx, vy)  # correlation of two vectors
# the following two require the package 'timeDate':
library(timeDate)
kurtosis(vx)
skewness(vx)

# computing percentiles: (makes only sense for longer vectors, as it supposes
# that the entries are draws from some underlying continuous distribution)
quantile(vx)
quantile(vx, probs = 0.95)  #95th percentile
quantile(vx, seq(0, 1, 0.125))
# A vector can contain NA elements.  In this case, the above functions return
# NA.  However, one can tell the functions to ignore the NA-entries:

vx = c(1.2, NA, 4.2, 2.7, 5)

mean(vx)  # returns NA
mean(vx, na.rm = TRUE)  # returns mean of remaining elements, ignoring NA-entries
# CAUTION!  Some functions in R do not prevent you from performing operations
# with two vectors of different length, but they recycle the shorter one so
# that the two match in length:

vx = c(1, 3, 4, 2)
vy = c(1, 3, 5)


# These all give nonsensical output:

vx + vy
vx == vy

vx == c(1, 2)


# However, some (smart) functions do throw an error when applied to vectors of
# different length, e.g.:
cor(vx, vy)
## Error in cor(vx, vy): incompatible dimensions


Indexing

# We can access a subset of a vector. This is referred to as 'indexing', and
# there are different ways to do so.


vx = c(5, 1, 7)
# 'Direct' indexing:

vx[2]  # take second element

vx[-2]  # take all but second element

vx[c(1, 3)]  # take first and third element

vx[-c(1, 3)]  # take all but first and third element
# Indexing using logicals:

vx[c(FALSE, TRUE, FALSE)]  # take again second element (all but first and third element)

vx[vx >= 2]  # take all elements larger or equal to 2
# Indexing using entry names (=strings):

names(vx) = c("a", "b", "c")  # name vector entries

vx["b"]  # take again second element (= element 'b')

vx[c("a", "c")]  # take first and third element
# Using indexing, we can change specific entries of a vector, leaving the rest
# untouched:

vx[2] = 4  # change second element to 4

vx[c(2, 3)] = c(3, 9)  # change second and third elements to 3 and 9, respectively
# i.e. change sub-vector containing second and third element to vector c(3,9)

vx = vx[-1]  # delete first element


Sorting & Ordering

vx = c(2, 1, 3, 2, 5)
vy = c(2, 3, 4, 1, 5)


sort(vx)  # sorts in increasing order

sort(vx, decreasing = TRUE)  #sorts in decreasing order


order(vx)  # like sort(), but returns indices of sorted vector 

order(vx, decreasing = TRUE)

# note: with strings, sort and order work alphabetically


Element Identification

# Identify elements that match certain condition in a single vector:

which(vx == 2)  # returns the indices of all entries equal to 2

max(which(vx == 2))  # finds largest index (last element) that matches condition

which.max(vx)  # returns index of maximum (largest entry)

which.min(vx)  # returns index of minimum (smallest entry)
# Identify elements from one vector in another vector:


# Are elements from vy in vx?  returns vector of logicals of the length of vy
vy %in% vx


# Finds location of elements from vy in vx:
match(vy, vx)
# (returns NA if an element does not exist in vx)


Set Operations

# We can also perform set operations with vectors, as they denote essentially a
# set of elements (numbers, logicals or strings):


vz = c(2, 8, 4, 7, 5)

union(vx, vy)

intersect(vx, vy)  # intersection of two sets (vectors)

Reduce(intersect, list(vx, vy, vz))  #intersection of three sets (vectors)

setdiff(vx, vy)  # 'vx complement vy'; all elements in vx that are not in vy
# here the order matters!


Operations for/with String-Vectors:

# String creation and modification:

# Previously, we introduced the function 'paste()'.  It creates a string by
# combining a number (double) and a string, e.g.:
paste(1, "mouse", sep = ".")
paste(1, "mouse")


# paste() also works on vectors, returning vector of strings:

paste(1:3, "mouse", sep = ".")  # returns '1.2.3'

paste(1:3, collapse = ".")  # returns '1.2.3'
paste(1:3, collapse = "_")  # returns '1_2_3'
paste(1:3, collapse = "+")  # returns '1+2+3'


# When we split strings, we obtain a vector of strings.  e.g. split a string at
# points where 'i' is located:
strsplit("Mississippi", "i")
# Identification of elements in strings:

vsCountries = c("Zimbabwe", "Cameroon", "Kenya", "Rwanda", "Djibouti")
# return logical-vector saying which string-entries contain 'w':
grepl("w", vsCountries)
# this can be used for indexing:
vsCountries[grepl("w", vsCountries)]
# Working with dates:

library(lubridate)

sDate <- "2020-04-01"

year(sDate)  # show year corresponding to the date in sDate
month(sDate)  # show month
day(sDate)  # show daay
ymd("2012-03-26")  # show year, month and day


# Note that the date needs to be in format 'YYYY-MM-DD'.  e.g. this gives
# error:
year("01-04-2020")

# To convert another format to this format, use
sNewDate <- as.Date("01.04.2020", "%d.%m.%Y")
# or:
sNewDate <- as.Date("01-04-2020", format = "%d-%m-%Y")
# or:
sNewDate <- as.POSIXlt("01-04-2020", format = "%d-%m-%Y")

year(sNewDate)


# Identify weeks from a vector of dates; for each date in vDates, find the
# preceding Monday:
vDates = paste("2020-04-", 10:30, sep = "")
cut(as.Date(vDates), "week")


# Get month-abbreviations from January to April:
month.abb[1:4]


# Some more useful things for working with dates will be discussed later when
# talking about time series methods and about plotting.