Plotting

# A primary reason behind the popularity of R is the fact that one can create
# plots very flexibly.  Before we discuss more advanced plotting-techniques
# based on the package 'ggplot2', let's start with some basic plots that can be
# constructed using pre-built commands in R.  They are useful for checking
# quickly how something looks in the data-exploratory process (and for
# inclusion in preliminary reports).

# Generate data to be plotted:

set.seed(50)

vx = seq(0.1, 7, by = 0.1)
vNormPDF = dnorm(vx, 4, 2)
vChi2PDF = dchisq(vx, 3)
vtPDF = dt(vx, 10)

library(mvtnorm)
vMeans = c(2, 5, 7)
mVariance = diag(c(2, 1, 3))
mVariance[1, 2] = 1
mVariance[2, 1] = 1
mNormDraws = rmvnorm(30, vMeans, mVariance)

# Load further data from actual dataset:
library(carData)
data(Hartnagel)
data(mtcars)

Simple Plots Based on Built-In-Functions

# Plot single line:

# default:
plot(vNormPDF)

# to get line instead of dots:
plot(vNormPDF, type = "l")
# for line and dots:
plot(vNormPDF, type = "o")


# Note that by default, index is on the x-axis (1,2,...).  To specify x-axis:
plot(vx, vNormPDF, type = "l")


# specify y-limits, and x-limits:
plot(vx, vNormPDF, type = "l", ylim = c(0, 0.25), xlim = c(-2, 9))
# specify title, x-axis-label and y-axis-label:
plot(vx, vNormPDF, type = "l", main = "My Title Here", xlab = "my x lab", ylab = "my y lab")
# note: can also be used to delete default labs: (default has no title)
plot(vx, vNormPDF, type = "l", xlab = "", ylab = "")


# specify color of markers (line and/or dots): (most standard colors are
# supported)
plot(vx, vNormPDF, type = "l", col = "blue")
plot(vx, vNormPDF, col = "blue")

# Make line thicker:
plot(vx, vNormPDF, type = "l", lwd = 4)


# Remove ticks for y-axis:
plot(vx, vNormPDF, type = "l", yaxt = "n")
# Remove ticks for x-axis:
plot(vx, vNormPDF, type = "l", xaxt = "n")
# (these commands make more sense if the corresponding axis-label is also
# deleted)
# With package 'latex2exp', we can add math to labels (x-lab, y-lab, title):

library(latex2exp)
sXlab = TeX("$\\theta_t $ (I made it up)")
sTitle = TeX("$f(\\theta_t)$")
plot(vx, vNormPDF, type = "l", xlab = sXlab, ylab = "", main = sTitle)

# The latex-part of the expression starts and ends with a dollar sign.  The
# code is just as in latex, except that one has to use two backlashes to call
# latex-symbols.
# Taken together, nice single-line plot:

plot(vx, vNormPDF, type = "l", col = "blue", lwd = 4, main = "my pretty plot of N(4,4)",
    xlab = "", ylab = "", yaxt = "n", ylim = c(0, 0.22), xlim = c(-1, 8))

# We can easily add further lines to such an existing plot:

# Initial plot:
plot(vx, vNormPDF, type = "l", col = "blue")

# Add another line (chi^2-disttribution, in red):
lines(vx, vChi2PDF, type = "l", col = "red")

# Note that the y-axis is not adjusted automatically so that the new line fits.


# Add mathematically defined lines:
abline(v = 3, col = "green")  # add vertical line at x=3
abline(h = 0.08, col = "gray")  # add horizontal line at y=0.08
abline(a = -0.2, b = 0.1, col = "orange")  # add line with specific intercept and slope
# Taken together, nice lines plot:

plot(vx, vNormPDF, type = "l", col = "blue", lwd = 4, main = "my pretty plot of N(4,4) and Chi2(3)",
    xlab = "", ylab = "", ylim = c(0, 0.3), xlim = c(-1, 8))
lines(vx, vChi2PDF, type = "l", col = "red", lwd = 4)

# Note that we can also directly plot a function, without first manually
# specifying a grid of x-values and evaluating the function at those values:

# Plot standard normal pdf:
plot(dnorm, xlim = c(-2, 8))

# Plot pdf of N(4,4):
plot(function(x) {
    dnorm(x, 4, 2)
}, xlim = c(-2, 8))

# Plot exponential function:
plot(exp, xlim = c(1, 3))

# Of course, all of the options discussed above can also be applied for these
# plots.  They also work for all of the plots discussed below (scatter,
# histogram, boxplot, ...).
# Saving a plot as a .png or .pdf file:

# 1. specify your path:
sMyPlotPath = "/Users/markomlikota/Dropbox/myfolder"
setwd(sMyPlotPath)

# 2. specify name:
png("blabla.png")  # specify width and height here for nice plots

# 3. create the plot (or print an existing, already saved one with 'print()'):
plot(vx, vNormPDF, type = "l", col = "blue", lwd = 4, main = "my pretty plot of N(4,4) and Chi2(3)",
    xlab = "", ylab = "", ylim = c(0, 0.3), xlim = c(-1, 8))
lines(vx, vChi2PDF, type = "l", col = "red", lwd = 4)

# 4. tell R that you're done: (no further lines or so will be added)
dev.off()


# Note that it is not possible, without rather complicated adjustments, to save
# such a plot as an object.  This is different for plots created via 'ggplot2'.
# Scatter-Plot:

# To obtain a scatter plot, simply specify two vectors (as before) and use dots
# rather than a line as markers:
plot(mNormDraws[, 1], mNormDraws[, 2])

# Change marker type:
plot(mNormDraws[, 1], mNormDraws[, 2], pch = 8)
plot(mNormDraws[, 1], mNormDraws[, 2], pch = 15)
# (see e.g.
# https://www.math.ucla.edu/~anderson/rw1001/library/base/html/points.html for
# further marker-types)


# add further cloud of points in different color:
points(mNormDraws[, 1], mNormDraws[, 3], col = "red")

# add regression line for first cloud of points:
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]))
# for second:
abline(lm(mNormDraws[, 3] ~ mNormDraws[, 1]), col = "red")

# Should enlarge x- and y-axis to make this more appealing.
# Taken together, nice scaatter-plot:

plot(mNormDraws[, 1], mNormDraws[, 2], col = "blue", pch = 4, lwd = 2, main = "my pretty scatterplot of Normal draws",
    xlab = "", ylab = "", ylim = c(2, 11), xlim = c(-2, 5))
points(mNormDraws[, 1], mNormDraws[, 3], col = "red", pch = 4, lwd = 2)
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]), col = "blue")
abline(lm(mNormDraws[, 3] ~ mNormDraws[, 1]), col = "red")

# Histogram:

# Default:
hist(mNormDraws[, 1])

# Add color:
hist(mNormDraws[, 1], col = "blue")
# Add border color:
hist(mNormDraws[, 1], col = "blue", border = "white")
# Remove fill-color:
hist(mNormDraws[, 1], col = NULL, border = "blue")


# Use density rather than frequency on y-axis: (so that area of bins sums up to
# 1)
hist(mNormDraws[, 1], freq = FALSE)

# Specify number of bins by specifying location of bin-breaks:
hist(mNormDraws[, 1], breaks = seq(-2, 6, by = 0.5))
# Specify number of bins:
hist(mNormDraws[, 1], breaks = 8)
# (doesn't work perfectly; R uses it only as suggestion; see ?hist)
# Taken together, nice histogram:

hist(mNormDraws[, 1], breaks = seq(-1, 5, by = 0.5), col = "blue", border = "white",
    main = "my pretty histogram of Normal draws", xlab = "", ylab = "")

# Boxplot:

# Boxplot of first vector of Normal draws:
boxplot(mNormDraws[, 1])
# of first two:
boxplot(mNormDraws[, 1], mNormDraws[, 2])
# of all three: (i.e. of all columns in supplied matrix)
boxplot(mNormDraws)

# Naming columns in that matrix changes x-tick-labels:
colnames(mNormDraws) <- c("first", "second", "third")
boxplot(mNormDraws)


# With dataframes, one can directly choose to get boxplots of one variable as a
# function of (values of) another:
boxplot(hp ~ cyl, data = mtcars, xlab = "cyls", ylab = "", main = "Boxplot of HP by #cyls")
# (rather than first manually creating vectors that show hp for each value of
# cyl)
# QQ-Plot:

qqnorm(vtPDF, xlab = "N(0,1)", ylab = "t(10)", col = "blue")
qqline(vtPDF, col = "red")
# Display multiple plots as panels in a single figure:

library(dyn)

par(mfrow = c(1, 3))  # 1 x 3 plot
hist(mNormDraws[, 1], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
    main = "hist var1", xlab = "", ylab = "")
hist(mNormDraws[, 2], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
    main = "hist var2", xlab = "", ylab = "")
plot(mNormDraws[, 1], mNormDraws[, 2], col = "blue", pch = 4, lwd = 2, main = "scatter",
    xlab = "var1", ylab = "var2", ylim = c(2, 11), xlim = c(-2, 5))
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]), col = "blue")
par(mfrow = c(1, 1))


# Adjust margins and specify overarching title:

par(mfrow = c(1, 3), oma = c(2, 2, 2, 2), mar = c(2, 2, 2, 2))
# (see https://www.r-graph-gallery.com/74-margin-and-oma-cheatsheet.html)
hist(mNormDraws[, 1], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
    main = "hist var1", xlab = "", ylab = "")
hist(mNormDraws[, 2], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
    main = "hist var2", xlab = "", ylab = "")
plot(mNormDraws[, 1], mNormDraws[, 2], col = "blue", pch = 4, lwd = 2, main = "scatter",
    xlab = "var1", ylab = "var2", ylim = c(2, 11), xlim = c(-2, 5))
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]), col = "blue")
mtext("Bivariate Normal Draws", outer = TRUE, cex = 1.5)
par(mfrow = c(1, 1))
# Empty plot: (useful when creating e.g. a plot with 3 x 3 subplots and you
# have only 8 plots)
plot(c(0, 0), type = "n", ylab = "", xlab = "", axes = FALSE)
# Plotting xts objects:

# When plotting xts objects, we get a plot with lots of default-adjustments (R
# realizes that the x-axis refers to time, returns a grid, etc.):

# To illustrate this, we download data on GDP and inflation directly from the
# FRED database:
library(quantmod)
# Inflation (based on GDP deflator):
getSymbols("GDPDEF", src = "FRED")
GDPDEF = GDPDEF["1960-01-01/2025-01-01"]
vInfl = 100 * 4 * diff(log(GDPDEF))[-1]
# GDP growth:
getSymbols("GDP", src = "FRED")
GDP = GDP["1960-01-01/2025-01-01"]
vGDPg = 100 * 4 * diff(log(GDP))[-1]


plot(vInfl, main = "Inflation (quarterly, annualized, %)")

# delete y-axis ticks on the right:
plot(vInfl, main = "Inflation (quarterly, annualized, %)", yaxis.right = FALSE)


# Plot two series on same graph:

plot(cbind(vInfl, vGDPg), main = "Inflation & GDP Growth")

# (better than manually;) plot(vInfl, main = 'Inflation & GDP Growth',
# yaxis.right = FALSE) lines(vGDPg, col='red', yaxis.right = FALSE)

Plots Based on “ggplot2”

# With the package 'ggplot2', one has much more flexibility in creating plots.

# We'll start with a plot containing a single line and then discuss multiple
# lines, scatter plots, histograms and other plot-types.  Various plot-options
# are discussed, and each is introduced as part of the discussion of the
# plot-type for which it is best explained.

# (A further popular package for plotting in R is 'plotly'.  However, in my
# view 'ggplot2' is better in all aspects except for creating interactive
# plots.)

Single-Line Plot

library(ggplot2)

mData = data.frame(xvar = vx, norm = vNormPDF)
# Plot single line based on two vectors:

ggplot() + geom_line(aes(x = vx, y = vNormPDF))


# Plot single line based on a dataframe:

ggplot(data = mData) + geom_line(aes(x = xvar, y = norm))


# To change appearance of line, specify further attributes in the option
# 'geom_line()'.  e.g. change color, size and linetype:
ggplot(data = mData) + geom_line(aes(x = xvar, y = norm), color = "blue", size = 0.9,
    linetype = "dashed")
# see http://sape.inf.usi.ch/quick-reference/ggplot2/linetype for linetypes
# available note: can specify linetype also as number (1 is solid, 2 is dashed,
# etc.)

# Add further options with pluses:
ggplot(data = mData) + geom_line(aes(x = xvar, y = norm), color = "blue", size = 0.9) +
    labs(x = "my x-lab", y = "my y-lab", title = "my title") + theme_bw() + theme(aspect.ratio = 5/8)
# Save plot to object:

pp1 = ggplot(data = mData) + geom_line(aes(x = xvar, y = norm), color = "blue", size = 0.9) +
    labs(x = "my x-lab", y = "my y-lab", title = "my title")

# Then, can add further options to already existing plot:
pp1 + theme_bw() + theme(aspect.ratio = 5/8)

# And can save to new object (or override existing one):
pp = pp1 + theme_bw() + theme(aspect.ratio = 5/8)


# You may want to re-use some options over and over again.  Instead of typing
# them every time, store them as a list and call them at once all together:

plotOptions = list(theme_bw(), theme(aspect.ratio = 5/8))

# Show the plot 'pp' with these options:
pp1 + plotOptions
# Add these options to plot 'pp1':
pp1 = pp1 + plotOptions
# Save plot as .png or .pdf file:

# 1. create plot or print/call existing one:
pp
# 2. save:
ggsave("myprettyplot.png")

# save with advanced options:
ggsave("myprettyplot.png", width = 15, height = 15 * (8/5), units = "cm")
# note: width/height should match previously specified aspect ratio

# Note: for inclusion in latex-document, save plot best as pdf
# As before, can have latex-code for labels:

library(latex2exp)

sXlab = TeX("$\\theta_t $ (I made it up)")
sTitle = TeX("$ f(\\theta_t) $")
pp1 + labs(x = sXlab, y = "", title = sTitle)
# There are many more options to customize the x-axis, y-axis and title:

# Adjust limits and ticks of x-axis and limits of y-axis:
pp + scale_x_continuous(breaks = seq(-1, 8, 3), limits = c(-1, 8)) + scale_y_continuous(limits = c(0,
    0.25))

# Equivalent to the latter option:
pp + ylim(c(0, 0.25))
# Also equivalent:
pp + expand_limits(y = c(0, 0.25))
# When changing only upper limit, type:
pp + expand_limits(y = c(NA, 0.25))
# (analogously for only lower limit)


# Manually specify labels of ticks on x-axis:
pp + scale_x_continuous(limits = c(-1, 8), breaks = c(0, 3, 4, 6), labels = paste(c(0,
    3, 4, 6), "a", sep = ""))
# note: by specifying empty labels for certain breaks, we can have ticks
# without labels:
vLabs = paste(c(0, 3, 4, 6), "a", sep = "")
vLabs[2] = ""
pp + scale_x_continuous(limits = c(-1, 8), breaks = c(0, 3, 4, 6), labels = vLabs)

# Change axis values from decimals to percent (e.g. 0.01 becomes 1%): (just for
# illustration; it doesn't make any sense here)
pp + scale_y_continuous(labels = scales::percent)

# Have x-axis in units of pi:
vxDivPi = vx/pi
pp + scale_x_continuous(limits = c(-1, 8), breaks = pi * (0:3), labels = paste(0:3,
    "pi"))
# (and analogously for any other transformation)

# Force all tick-labels to have same, specified number of decimals
scaleFUN = function(x) sprintf("%.3f", x)
pp + scale_y_continuous(limits = c(0, 0.25), breaks = seq(0, 0.25, by = 0.05), lab = scaleFUN)
# (without this option, we'd have two decimals in this case) (the option is
# also useful in some cases where R by default gives different ticks with
# different numbers of decimals )


# Remove axis ticks and tick-labels:
pp + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())

# To remove axis-label, can use +labels(ylab='') as above, or:
pp + theme(axis.title.y = element_blank())

# Change font size for x-axis label:
pp + theme(axis.title.x = element_text(size = 12))

# Change font size of tick labels and space between axis and tick labels:
pp + theme(axis.text.x = element_text(size = 12, vjust = -1))

# Rotate x-axis labels:
pp + theme(axis.text.x = element_text(angle = 30, hjust = 0.5, vjust = 0.8))


# Change font size and family of title:
pp + theme(plot.title = element_text(family = "sans", size = 14))
# Change also margins:
pp + theme(plot.title = element_text(family = "sans", size = 14, margin = margin(0,
    0, 2, 0)))
# (instead of margin, can also specify vjust and hjust)
# If you have time on the x-axis, use the option 'scale_x_date()' rather than
# 'scale_x_manual()':

# Obtain quarterly GDP growth and inflation for illustration:

library(quantmod)

getSymbols("GDPDEF", src = "FRED")
GDPDEF = GDPDEF["1960-01-01/2025-01-01"]
vInfl = 100 * 4 * diff(log(GDPDEF))[-1]

getSymbols("GDP", src = "FRED")
GDP = GDP["1960-01-01/2025-01-01"]
vGDPg = 100 * 4 * diff(log(GDP))[-1]

mDataTS = data.frame(date = index(vGDPg), GDPgrowth = as.numeric(vGDPg), inflation = as.numeric(vInfl))
# sidenote: without 'as.numeric()', the column names would be given by the
# column names of the xts-objects 'vGDPg' and 'vInfl' and cannot be overwritten


# Plot GDP growth:
ppTS = ggplot(mDataTS) + geom_line(aes(x = date, y = GDPgrowth), size = 0.9) + scale_x_date(date_labels = "%Y",
    date_breaks = "2 years", limits = as.Date(c("2000-01-01", "2020-01-01")))


# Sometimes, the breaks don't appear as desired. e.g.
ppTS = ggplot(mDataTS) + geom_line(aes(x = date, y = GDPgrowth), size = 0.9) + scale_x_date(date_labels = "%Y",
    date_breaks = "5 years", limits = as.Date(c("2000-01-01", "2025-01-01")))
# In such cases, it's a good idea to set 'breaks' rather than 'date_breaks',
# for which it's useful to load package 'lubridate':
library(lubridate)
ppTS = ggplot(mDataTS) + geom_line(aes(x = date, y = GDPgrowth), size = 0.9) + scale_x_date(date_labels = "%Y",
    breaks = seq(ymd("2000/01/01"), ymd("2025/01/01"), by = "5 years"), limits = as.Date(c("2000-01-01",
        "2025-01-01")))


# The comments from the section on string-dates above apply; if your dates are
# not in the format required by R ('YYYY-MM-DD'), you must first change them to
# this format using the command 'as.Date()'. e.g.
as.Date("31.01.2020", "%d.%m.%Y")
# Further useful options:


# Remove grid:
pp + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Remove panel border (the box around the main plot area):
pp + theme(panel.border = element_blank())

# Remove panel border but add x- and y-axis lines:
pp + theme(panel.border = element_blank()) + theme(axis.line = element_line(color = "black",
    size = 0.2))


# Add margins of 1cm all around plot:

pp + theme(plot.margin = unit(c(1, 1, 1, 1), "cm"))


# Shading of specified area: (useful for confidence intervals)
pp + geom_ribbon(aes(ymin = vNormPDF * 0.9, ymax = vNormPDF * 1.1, x = vx), alpha = 0.2,
    fill = "blue")
# Note. can add several layers:
pp + geom_ribbon(aes(ymin = vNormPDF * 0.9, ymax = vNormPDF * 1.1, x = vx), alpha = 0.2,
    fill = "blue") + geom_ribbon(aes(ymin = vNormPDF * 0.8, ymax = vNormPDF * 1.2,
    x = vx), alpha = 0.2, fill = "blue")


# Annotate text at given (x,y)-point:
pp + annotate("text", label = "abc", x = 2, y = 0.07)
# (could specify hjust and vjust for further customization)

# note: with date on x-axis, need to put wrapper 'as.Date()' around date:
ppTS + annotate("text", label = "abc", x = as.Date("2003-01-31"), y = 0.1)


# Flip axes:
pp + coord_flip()
# Note that the options can be combined, and they must be if they belong to the
# same option-command; e.g. don't write
theme(panel.border = element_blank())
# and then
theme(axis.title.x = element_text(size = 12))
# but write
theme(panel.border = element_blank(), axis.title.x = element_text(size = 12))

# Note that this can create issues for the use of a list of pre-defined
# options.  Sometimes when the same option is called several times, the order
# how they are called can change the outcome.
# Taken together, nice single-line plots:

plotOptions = list(theme_bw(), theme(aspect.ratio = 5/8, axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12), axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
    plot.title = element_text(family = "sans", size = 14)))

ggplot(data = mData) + plotOptions + geom_line(aes(x = xvar, y = norm), color = "blue",
    size = 0.9) + labs(x = sXlab, y = "", title = sTitle) + scale_x_continuous(breaks = seq(-1,
    8, 3), limits = c(-1, 8)) + scale_y_continuous(limits = c(0, 0.25)) + geom_ribbon(aes(ymin = vNormPDF *
    0.9, ymax = vNormPDF * 1.1, x = vx), alpha = 0.2, fill = "blue") + geom_ribbon(aes(ymin = vNormPDF *
    0.8, ymax = vNormPDF * 1.2, x = vx), alpha = 0.2, fill = "blue")

ggplot(mDataTS) + plotOptions + geom_line(aes(x = date, y = GDPgrowth), color = "blue",
    size = 0.9) + labs(x = "", y = "", title = "US GDP Growth (quarterly, annualized)") +
    scale_y_continuous(breaks = seq(-30, 30, by = 10)) + scale_x_date(date_labels = "%Y",
    breaks = seq(ymd("2000/01/01"), ymd("2025/01/01"), by = "5 years"), limits = as.Date(c("2000-01-01",
        "2025-01-01")))

Scatter-Plot

mData = data.frame(norm1 = mNormDraws[, 1], norm2 = mNormDraws[, 2])
# A scatter plot is created analogously to a line plot.  We just use the option
# geom_point rather than geom_line.


# Based on two vectors:

ggplot() + geom_point(aes(x = mNormDraws[, 1], y = mNormDraws[, 2]))


# Based on data frame:

ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2))


# Change marker appearance:

pp1 = ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2), size = 1.5, shape = 4,
    color = "blue")
# see http://sape.inf.usi.ch/quick-reference/ggplot2/shape for shapes available

# Some markers (21 - 25) allow for filling:
pp2 = ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2), size = 1.5, shape = 21,
    color = "blue", fill = "blue")


# Can of course add all other options from above, e.g.

pp1 = pp1 + plotOptions + labs(x = "my x-lab", y = "my y-lab", title = "my nice scatter")


# Add labels to scatter-points:

vMyLabels = paste("a", 1:nrow(mData), sep = "")  # some arbitrary labels

pp1 + geom_text(aes(x = norm1, y = norm2, label = vMyLabels), size = 2, color = "purple",
    hjust = 1, vjust = -0.5)

# add labels only to some points (e.g. only first 4):
vMyLabels[5:length(vMyLabels)] = ""  # can also set to NA
pp1 + geom_text(aes(x = norm1, y = norm2, label = vMyLabels), size = 2, color = "purple",
    hjust = 1, vjust = -0.5)



# Add lines to plot:

# line with specified intercept and slope (e.g. could add 45-degree line):
pp1 + geom_abline(intercept = 3, slope = 2, size = 1, color = "black", linetype = "dotted")
# vertical line:
pp1 + geom_vline(xintercept = 2, size = 1, color = "black", linetype = "dotted")
# regression line:
pp1 + geom_smooth(aes(x = norm1, y = norm2), method = lm, se = FALSE, size = 1, color = "black",
    linetype = "dotted")
# regression line with 90% confidence bands:
pp1 + geom_smooth(aes(x = norm1, y = norm2), method = lm, se = TRUE, level = 0.9,
    size = 1, color = "black", linetype = "dotted")
# (note: single linear regression with homoskedasticity...)
# Taken together, nice scatter plot:

ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2), size = 2, shape = 21,
    color = "blue", fill = "blue") + geom_smooth(aes(x = norm1, y = norm2), method = lm,
    se = FALSE, size = 0.5, color = "darkblue", linetype = "solid") + labs(x = "first Normal",
    y = "second Normal", title = "Scatter of Bivariate Normal Draws") + plotOptions

Plot With Multiple Lines

# Rather than supplying several vectors, it is easier to supply a dataframe
# with all the lines one wishes to plot:

mData = data.frame(xvar = vx, norm = vNormPDF, chi2 = vChi2PDF, t = vtPDF)

# Note: the names of the different variables will be the line-labels.  They are
# a bit more difficult to change than the labels of the x- and y-axes.
# TWO LINES


ggplot(mData, aes(x = xvar)) + geom_line(aes(y = norm)) + geom_line(aes(y = t))

# we should distinguish the different lines by color or shape or linetype; e.g.
ggplot(mData, aes(x = xvar)) + geom_line(aes(y = norm), color = "blue") + geom_line(aes(y = t),
    color = "red")
# MANY (>=3) LINES


# In this case, it's best to first put the dataframe into long format, (as
# there could be many lines, and adding each one separately like above would be
# cumbersome)

library(reshape2)
mDat = melt(mData, id.vars = "xvar")

# the different variables are now distinguished in the column 'variable', and
# their values are shown in the column 'value'
# Plot the three lines, distinguishing them by color:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
    size = 0.9)  # could specify linetype (for all lines)

# distinguish by linetype:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(linetype = variable),
    size = 0.9)  # could specify color (for all lines)

# distinguish by both:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable,
    linetype = variable))

# distinguish by line-color, point-color and point-shapes:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
    size = 0.5) + geom_point(aes(color = variable, shape = variable), size = 1)

# Of course, we can also just add the same markers for all lines:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
    size = 0.9) + geom_point(aes(color = variable), shape = 1)


# Set linetypes manually:

ppL = ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(linetype = variable),
    size = 0.9) + plotOptions

ppL + scale_linetype_manual(values = c("solid", "dashed", "dotdash"))
# see http://sape.inf.usi.ch/quick-reference/ggplot2/linetype for linetypes
# available note: can specify linetype also as number (1 is solid, 2 is dashed,
# etc.)

# Note: provided vector may be longer than the number of lines you have.
# That's why you can create a long vector with your preferred linetypes and
# then just call this vector, even for plots with differing number of lines.

# When setting linetypes manually, can specify/change line labels:
ppL + scale_linetype_manual(values = c("solid", "dashed", "dotdash"), labels = c("mycurve1",
    "mycurve2", "mycurve3"))


# Set colors manually:

ppC = ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
    size = 0.9) + plotOptions

ppC + scale_color_manual(values = c("blue", "red", "purple"))

# again can specify/change line labels:
ppC + scale_color_manual(values = c("blue", "red", "purple"), labels = c("mycurve1",
    "mycurve2", "mycurve3"))
# more choice of colors is given by package 'colorspace':
library(colorspace)

# take three colors (evenly spaced) from palette 'Blues 2':
vMyColors = sequential_hcl(3, palette = "Blues 2")
ppC + scale_color_manual(values = vMyColors)
# to avoid too light first color, construct a vector with more colors, and then
# take the few ones you need from the beginning and middle of the colorspace:
vMyColors = sequential_hcl(7, palette = "Blues 2")[c(1, 3, 5)]
ppC + scale_color_manual(values = vMyColors)


# Similarly, we can specify the colors of the markers as well as lines:

ppCM = ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
    size = 0.5) + geom_point(aes(color = variable), size = 1) + plotOptions

ppCM + scale_color_manual(values = vMyColors)


# Set point-shapes manually:

ppM = ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
    size = 0.5) + geom_point(aes(color = variable, shape = variable), size = 1) +
    plotOptions

ppM + scale_shape_manual(values = c(7, 9, 24))

ppM + scale_shape_manual(values = c(7, 9, 24), labels = c("mycurve1", "mycurve2",
    "mycurve3"))
# see http://sape.inf.usi.ch/quick-reference/ggplot2/shape for shapes available
# Legend options:

# Take plot for illustration:

ppC = ppC + scale_color_manual(values = vMyColors)


# Remove legend title:

ppC + theme(legend.title = element_blank())


# Remove legend altogether:

ppC + theme(legend.position = "none")


# Change position and direction: (by default, legend is outside the plot
# (middle-right) and vertical)

# put to middle-left:
ppC + theme(legend.position = "left")

# put to bottom-right:
ppC + theme(legend.justification = "bottom")

# make horizontal and put to bottom of plot:
ppC + theme(legend.direction = "horizontal", legend.position = "bottom", legend.justification = "left")

# put inside the plot:
ppC + theme(legend.position = c(0.8, 0.75))
# ( top right is (1,1), top left is (0,1))


# In some cases, it's hard to put the legend inside the plot, but without
# crossing the lines/markers. e.g.
ppC + theme(legend.position = c(0.5, 0.5))
# Unfortunately, one cannot make the legend background fully transparent.  Make
# legend-text transparent at least:
ppC + theme(legend.position = c(0.5, 0.5), legend.background = element_rect(fill = alpha("white",
    0)))
# Taken together, nice multiple-line plot:

ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
    size = 0.9) + plotOptions + scale_color_manual(values = vMyColors) + theme(legend.title = element_blank(),
    legend.position = c(0.8, 0.75)) + labs(x = "", y = "", title = "Three Important Distributions")

Scatter-plot with multiple variables

# Analogous to above, just with geom_point instead of geom_line option.


# Create dataframe with variables (vectors) to plot:
mData = as.data.frame(mNormDraws)
colnames(mData) = c("norm1", "norm2", "norm3")

# Melt to long format:
mDat = melt(mData, id.vars = "norm1")

# Note: variable taken here as id.var will be on x-axis.  In a scatter plot,
# this could in principle be any variable.  (In contrast, under the line plot,
# it was clear what the x-axis is.)
# Plot, distinguishing variables by markers:

ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(shape = variable),
    size = 2, color = "blue")

# Plot, distinguishing variables by colors:

ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
    size = 2)

# Plot, distinguishing variables by both:

ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable,
    shape = variable), size = 2)


# Add options:

ppMS = ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable,
    shape = variable), size = 2) + plotOptions


# Specify marker-shapes and colors manually:

ppMS + scale_color_manual(values = vMyColors) + scale_shape_manual(values = c(1,
    4))

# Also re-label variables:
ppMS + scale_color_manual(values = vMyColors, labels = c("1st Normal", "2nd Normal")) +
    scale_shape_manual(values = c(1, 4), labels = c("1st Normal", "2nd Normal"))
# (note: just doing it in one of the two options will produce two legends)

# Note: when distinguishing by color, by default, R will choose a round marker
# that is filled (as above). e.g.
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
    size = 2)
# When you change the markers manually, like here, then they will not be
# filled.  To add filling, must let filling also change by variable.  e.g. this
# produces unfilled triangles:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
    size = 2, shape = 2)
# to get filled triangles:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable,
    fill = variable), size = 2, shape = 24)
# Nice, final scatter-plot with multiple variables:

ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
    size = 2) + scale_color_manual(values = c("blue", "red"), labels = c("2nd Normal",
    "3rd Normal")) + plotOptions + scale_y_continuous(limits = c(0, 12), breaks = c(0,
    4, 8, 12)) + theme(legend.title = element_blank(), legend.position = c(0.8, 0.15)) +
    labs(x = "1st Normal", y = "", title = "Multivariate Normal Draws (k=3)")

“Grouped” Scatter-Plot

# Above, we plotted three variables in a single scatter plot, i.e. we plotted
# two variables against a third.

# In the same way, we could have plotted, say, 5 variables by plotting 4
# variables against a fifth one, yielding 4 'clouds' of points.

# Sometimes, we indeed want to plot two variables, but distinguish the points
# in the scatter by a third variable.  The number of 'clouds' of points is then
# determined by how many values this third variable has.


# DISCRETE CASE


# Plot hp against hp, with different colors for different numbers of cylinders:

ggplot(mtcars, aes(x = mpg, y = hp, group = cyl)) + geom_point(aes(color = cyl))
# Note that R thinks of cyl as a continuous variable, even though it takes only
# three values.  To fix that, code it as type 'character' rather than
# 'numeric':

mtcars2 = mtcars  # create new dataframe..
mtcars2$cyl = as.character(mtcars2$cyl)  #.. in which cyl is of type character
ppMS = ggplot(mtcars2, aes(x = mpg, y = hp, group = cyl)) + geom_point(aes(color = cyl))

# We can change the colors as above; e.g.
ppMS + scale_color_manual(values = vMyColors, labels = c("4 cyl.", "6 cyl.", "8 cyl."))


# Also, could of course distinguish points for different numbers of cylinders
# by markers rather than colors (or both).

# In this case, cyl really needs to be of type character, because, in contrast
# to colors, markers cannot vary continuously.



# CONTINUOUS CASE


# Plot hp against mpg, and distinguish points by color based on value of qsec:
ppMS = ggplot(mtcars, aes(x = mpg, y = hp, group = qsec)) + geom_point(aes(color = qsec))

# To specify colors, need to use scale_color_gradient rather than
# scale_color_manual:
ppMS + scale_color_gradient(low = "white", high = "blue")

# The analogous holds for the filling (if marker-shapes are specified that can
# be filled):
ppMS = ggplot(mtcars, aes(x = mpg, y = hp, group = qsec)) + geom_point(aes(color = qsec),
    shape = 21) + scale_color_gradient(low = "white", high = "blue") + scale_fill_gradient(low = "white",
    high = "blue")
# Nice, final grouped scatter-plot:

ggplot(mtcars, aes(x = mpg, y = hp, group = qsec)) + geom_point(aes(color = qsec,
    fill = qsec), size = 3, shape = 21) + scale_color_gradient(low = "white", high = "blue") +
    scale_fill_gradient(low = "white", high = "blue") + plotOptions + scale_y_continuous(limits = c(0,
    300), breaks = seq(0, 300, by = 100)) + theme(legend.position = "none") + labs(title = "A Nice Grouped-Scatter Plot")

Histogram

# SINGLE HISTOGRAM


# Based on single vector:

ggplot() + geom_histogram(aes(x = mNormDraws[, 2]))

# Based on column in dataframe:
ggplot(data = mData) + geom_histogram(aes(x = norm2))

# note that here we only need to specify the x-variable in aes()


# Change y-axis from frequency to density:

ggplot() + geom_histogram(aes(x = mNormDraws[, 2], y = ..density..))

ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..))


# Specify number of bins directly:
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), bins = 10)

# Specify number of bins with binwidth:
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), binwidth = 0.5)


# Change color and filling:
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), color = "black",
    fill = "grey", alpha = 0.2)


# Add line: (here: add actual pdf of N(5,1))
vThisNormPDF = dnorm(vx, 5, 1)
ggplot() + geom_histogram(aes(x = mNormDraws[, 2], y = ..density..)) + geom_line(aes(x = vx,
    y = vThisNormPDF))

# Could also do this with dataframe, but then we need same number of draws as
# 'elements' in the vector that represents the line (here: function
# evaluations)



# MULTIPLE HISTOGRAMS IN SINGLE PLOT


ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), color = "blue",
    fill = "blue", alpha = 0.2) + geom_histogram(aes(x = norm3, y = ..density..),
    color = "red", fill = "red", alpha = 0.2)


# A separate histogram of one variable for each value of some third variable:
# e.g. a separate histogram of hp for each value of cyl, each with different
# color:

ggplot(mtcars2, aes(x = hp, fill = cyl)) + geom_histogram(aes(x = hp, y = ..density..,
    color = cyl), position = "identity")

# Should make bars a bit transparent:
ggplot(mtcars2, aes(x = hp, fill = cyl)) + geom_histogram(aes(x = hp, y = ..density..,
    color = cyl), position = "identity", alpha = 0.6)
# Taken together, nice histogram plots:

vThisNormPDF = dnorm(vx, 5, 1)

ggplot() + plotOptions + geom_histogram(aes(x = mNormDraws[, 2], y = ..density..),
    color = "blue", fill = "blue", alpha = 0.2) + geom_line(aes(x = vx, y = vThisNormPDF),
    color = "purple") + labs(x = "", y = "", title = "Histogram of N(5,1) draws") +
    theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())

ggplot(mtcars2, aes(x = hp, fill = cyl)) + plotOptions + geom_histogram(aes(x = hp,
    y = ..density.., color = cyl), position = "identity", alpha = 0.6) + theme(legend.title = element_blank(),
    legend.position = c(0.8, 0.75)) + labs(x = "", y = "", title = "Histogram of hp by cyl.") +
    theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())

Bar-Plot

# Compute mean of Normal draws for each column of mNormDraws (dimension of
# multiv. Normal):

mDataBP = data.frame(norms = c(1, 2, 3), means = apply(mNormDraws, 2, mean))
# Illustrate:

ggplot(data = mDataBP, aes(x = norms, y = means)) + geom_bar(stat = "identity")

# Adjust appearance of bars:
ggplot(data = mDataBP, aes(x = norms, y = means)) + geom_bar(stat = "identity", color = "blue",
    fill = "blue", alpha = 0.2)
# To change x-axis, it is best to first make R realize that it is a discrete
# rather than continuous variable:
mDataBP$norms = as.character(mDataBP$norms)
ggplot(data = mDataBP, aes(x = norms, y = means)) + geom_bar(stat = "identity", color = "blue",
    fill = "blue", alpha = 0.2) + scale_x_discrete(labels = c("1st", "2nd", "3rd"))
# Taken together, nice bar-plot:

ggplot(data = mDataBP, aes(x = norms, y = means)) + plotOptions + geom_bar(stat = "identity",
    color = "blue", fill = "blue", alpha = 0.2) + scale_x_discrete(labels = c("1st",
    "2nd", "3rd")) + labs(x = "", y = "", title = "Mean of Normal Draws")

Plot-Combinations Into Single Figure

# Note: when producing a latex document with these plots, it is a better idea
# to produce the plots individually and assemble them in latex.  (more
# flexibility)

# Three plots for illustration:
plotOptionsCommonHere = list(plotOptions, ylim(c(0, 6)), scale_x_continuous(limits = c(-2,
    10), breaks = seq(-2, 10, by = 2)))
pp1 = ggplot(data = mData) + plotOptionsCommonHere + geom_histogram(aes(x = norm1),
    color = "blue", fill = "blue", alpha = 0.2) + labs(x = "", y = "", title = "norm1")

pp2 = ggplot(data = mData) + plotOptionsCommonHere + geom_histogram(aes(x = norm2),
    color = "blue", fill = "blue", alpha = 0.2) + labs(x = "", y = "", title = "norm2")

pp3 = ggplot(data = mData) + plotOptionsCommonHere + geom_histogram(aes(x = norm3),
    color = "blue", fill = "blue", alpha = 0.2) + labs(x = "", y = "", title = "norm3")


# It might be useful to create an empty plot: (e.g. when creating a 3x3 figure,
# but have 8 plots)

ggplot() + theme_void()



# FIRST APPROACH


library(gridExtra)
library(ggpubr)
# (need both packages)

# Create 1 x 2 plot:
grid.arrange(pp1, pp2, ncol = 2)

# To save:
ggsave("myplot1.pdf", arrangeGrob(pp1, pp2, ncol = 2))


# Add title:
title = text_grob("Draws from two Normals", size = 14, face = "bold", vjust = 5)
grid.arrange(pp1, pp2, ncol = 2, top = title)

# Can specify widths:
grid.arrange(pp1, pp2, ncol = 2, top = title, widths = c(2, 2))

# Can also specify ordering of plots (useful when many more than just 2 plots
# are added):
grid.arrange(pp1, pp2, ncol = 2, top = title, widths = c(2, 2), layout_matrix = rbind(c(1,
    2)))


# All of these options need to be specified also in 'arrangeGrob()' when saving
# the plot.

# Note: it would make sense to delete y-axis ticks on the right plot as they
# are common to both and already shown in the left plot.



# SECOND APPROACH


library(patchwork)

# Create 1 x 2 plot as above:
(pp1 | pp2)

# Here we have the advantage that we can save the resulting plot and call it
# later:
ppTotal = (pp1 | pp2)
ppTotal

# Add title:
ppTotal + plot_annotation(title = "Draws from two Normals")

# With this approach, we do not need a special command for saving the plots,
# but can use 'ggsave()'.  However, one needs to create a large plot, because
# with many plots, default size window is usually too small:
ggsave("myplot2.pdf", width = 40, height = 25, units = "cm")
# Note: changing the width and height will also change the spacing between
# figures.


# 2 x 3 plots:
(pp1 | pp1 | pp1)/(pp1 | pp1 | pp1)

# Can have even more flexible plot-arrangement:
layout = "
      AABB
      AABB
      EEEE
      EEEE
    "
pp1 + pp1 + pp1 + plot_layout(design = layout)
ggsave("myplot2.pdf", width = 40, height = 25, units = "cm")

# For plots with legends: If all plots have the same legend, you can collect
# all them into one single legend using the following commands:
ppTotal + plot_layout(guides = "collect") + theme(legend.position = "bottom")

Further

# There are many more things that ggplot2 can do.  For example, to create a
# grid-plot, use 'geom_raster()' or 'geom_rect()', to create an area-plot, use
# 'geom_area()'.  A google search usually shows quick how to use these
# commands.


# A final, important note: when iteratively adding layers to a ggplot in a loop
# (e.g. adding one line at a time to an existing plot), you need to use double
# quotation marks.  Otherwise, ggplot creates the plot only once the loop is
# finished, which may lead to unexpected behavior.  See this post:
# https://stackoverflow.com/questions/52671480/adding-multiple-ribbons-in-ggplot.