# A primary reason behind the popularity of R is the fact that one can create
# plots very flexibly. Before we discuss more advanced plotting-techniques
# based on the package 'ggplot2', let's start with some basic plots that can be
# constructed using pre-built commands in R. They are useful for checking
# quickly how something looks in the data-exploratory process (and for
# inclusion in preliminary reports).
# Generate data to be plotted:
set.seed(50)
vx = seq(0.1, 7, by = 0.1)
vNormPDF = dnorm(vx, 4, 2)
vChi2PDF = dchisq(vx, 3)
vtPDF = dt(vx, 10)
library(mvtnorm)
vMeans = c(2, 5, 7)
mVariance = diag(c(2, 1, 3))
mVariance[1, 2] = 1
mVariance[2, 1] = 1
mNormDraws = rmvnorm(30, vMeans, mVariance)
# Load further data from actual dataset:
library(carData)
data(Hartnagel)
data(mtcars)# Plot single line:
# default:
plot(vNormPDF)
# to get line instead of dots:
plot(vNormPDF, type = "l")
# for line and dots:
plot(vNormPDF, type = "o")
# Note that by default, index is on the x-axis (1,2,...). To specify x-axis:
plot(vx, vNormPDF, type = "l")
# specify y-limits, and x-limits:
plot(vx, vNormPDF, type = "l", ylim = c(0, 0.25), xlim = c(-2, 9))
# specify title, x-axis-label and y-axis-label:
plot(vx, vNormPDF, type = "l", main = "My Title Here", xlab = "my x lab", ylab = "my y lab")
# note: can also be used to delete default labs: (default has no title)
plot(vx, vNormPDF, type = "l", xlab = "", ylab = "")
# specify color of markers (line and/or dots): (most standard colors are
# supported)
plot(vx, vNormPDF, type = "l", col = "blue")
plot(vx, vNormPDF, col = "blue")
# Make line thicker:
plot(vx, vNormPDF, type = "l", lwd = 4)
# Remove ticks for y-axis:
plot(vx, vNormPDF, type = "l", yaxt = "n")
# Remove ticks for x-axis:
plot(vx, vNormPDF, type = "l", xaxt = "n")
# (these commands make more sense if the corresponding axis-label is also
# deleted)# With package 'latex2exp', we can add math to labels (x-lab, y-lab, title):
library(latex2exp)
sXlab = TeX("$\\theta_t $ (I made it up)")
sTitle = TeX("$f(\\theta_t)$")
plot(vx, vNormPDF, type = "l", xlab = sXlab, ylab = "", main = sTitle)
# The latex-part of the expression starts and ends with a dollar sign. The
# code is just as in latex, except that one has to use two backlashes to call
# latex-symbols.# Taken together, nice single-line plot:
plot(vx, vNormPDF, type = "l", col = "blue", lwd = 4, main = "my pretty plot of N(4,4)",
xlab = "", ylab = "", yaxt = "n", ylim = c(0, 0.22), xlim = c(-1, 8))# We can easily add further lines to such an existing plot:
# Initial plot:
plot(vx, vNormPDF, type = "l", col = "blue")
# Add another line (chi^2-disttribution, in red):
lines(vx, vChi2PDF, type = "l", col = "red")
# Note that the y-axis is not adjusted automatically so that the new line fits.
# Add mathematically defined lines:
abline(v = 3, col = "green") # add vertical line at x=3
abline(h = 0.08, col = "gray") # add horizontal line at y=0.08
abline(a = -0.2, b = 0.1, col = "orange") # add line with specific intercept and slope# Taken together, nice lines plot:
plot(vx, vNormPDF, type = "l", col = "blue", lwd = 4, main = "my pretty plot of N(4,4) and Chi2(3)",
xlab = "", ylab = "", ylim = c(0, 0.3), xlim = c(-1, 8))
lines(vx, vChi2PDF, type = "l", col = "red", lwd = 4)# Note that we can also directly plot a function, without first manually
# specifying a grid of x-values and evaluating the function at those values:
# Plot standard normal pdf:
plot(dnorm, xlim = c(-2, 8))
# Plot pdf of N(4,4):
plot(function(x) {
dnorm(x, 4, 2)
}, xlim = c(-2, 8))
# Plot exponential function:
plot(exp, xlim = c(1, 3))
# Of course, all of the options discussed above can also be applied for these
# plots. They also work for all of the plots discussed below (scatter,
# histogram, boxplot, ...).# Saving a plot as a .png or .pdf file:
# 1. specify your path:
sMyPlotPath = "/Users/markomlikota/Dropbox/myfolder"
setwd(sMyPlotPath)
# 2. specify name:
png("blabla.png") # specify width and height here for nice plots
# 3. create the plot (or print an existing, already saved one with 'print()'):
plot(vx, vNormPDF, type = "l", col = "blue", lwd = 4, main = "my pretty plot of N(4,4) and Chi2(3)",
xlab = "", ylab = "", ylim = c(0, 0.3), xlim = c(-1, 8))
lines(vx, vChi2PDF, type = "l", col = "red", lwd = 4)
# 4. tell R that you're done: (no further lines or so will be added)
dev.off()
# Note that it is not possible, without rather complicated adjustments, to save
# such a plot as an object. This is different for plots created via 'ggplot2'.# Scatter-Plot:
# To obtain a scatter plot, simply specify two vectors (as before) and use dots
# rather than a line as markers:
plot(mNormDraws[, 1], mNormDraws[, 2])
# Change marker type:
plot(mNormDraws[, 1], mNormDraws[, 2], pch = 8)
plot(mNormDraws[, 1], mNormDraws[, 2], pch = 15)
# (see e.g.
# https://www.math.ucla.edu/~anderson/rw1001/library/base/html/points.html for
# further marker-types)
# add further cloud of points in different color:
points(mNormDraws[, 1], mNormDraws[, 3], col = "red")
# add regression line for first cloud of points:
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]))
# for second:
abline(lm(mNormDraws[, 3] ~ mNormDraws[, 1]), col = "red")
# Should enlarge x- and y-axis to make this more appealing.# Taken together, nice scaatter-plot:
plot(mNormDraws[, 1], mNormDraws[, 2], col = "blue", pch = 4, lwd = 2, main = "my pretty scatterplot of Normal draws",
xlab = "", ylab = "", ylim = c(2, 11), xlim = c(-2, 5))
points(mNormDraws[, 1], mNormDraws[, 3], col = "red", pch = 4, lwd = 2)
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]), col = "blue")
abline(lm(mNormDraws[, 3] ~ mNormDraws[, 1]), col = "red")# Histogram:
# Default:
hist(mNormDraws[, 1])
# Add color:
hist(mNormDraws[, 1], col = "blue")
# Add border color:
hist(mNormDraws[, 1], col = "blue", border = "white")
# Remove fill-color:
hist(mNormDraws[, 1], col = NULL, border = "blue")
# Use density rather than frequency on y-axis: (so that area of bins sums up to
# 1)
hist(mNormDraws[, 1], freq = FALSE)
# Specify number of bins by specifying location of bin-breaks:
hist(mNormDraws[, 1], breaks = seq(-2, 6, by = 0.5))
# Specify number of bins:
hist(mNormDraws[, 1], breaks = 8)
# (doesn't work perfectly; R uses it only as suggestion; see ?hist)# Taken together, nice histogram:
hist(mNormDraws[, 1], breaks = seq(-1, 5, by = 0.5), col = "blue", border = "white",
main = "my pretty histogram of Normal draws", xlab = "", ylab = "")# Boxplot:
# Boxplot of first vector of Normal draws:
boxplot(mNormDraws[, 1])
# of first two:
boxplot(mNormDraws[, 1], mNormDraws[, 2])
# of all three: (i.e. of all columns in supplied matrix)
boxplot(mNormDraws)
# Naming columns in that matrix changes x-tick-labels:
colnames(mNormDraws) <- c("first", "second", "third")
boxplot(mNormDraws)
# With dataframes, one can directly choose to get boxplots of one variable as a
# function of (values of) another:
boxplot(hp ~ cyl, data = mtcars, xlab = "cyls", ylab = "", main = "Boxplot of HP by #cyls")
# (rather than first manually creating vectors that show hp for each value of
# cyl)# QQ-Plot:
qqnorm(vtPDF, xlab = "N(0,1)", ylab = "t(10)", col = "blue")
qqline(vtPDF, col = "red")# Display multiple plots as panels in a single figure:
library(dyn)
par(mfrow = c(1, 3)) # 1 x 3 plot
hist(mNormDraws[, 1], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
main = "hist var1", xlab = "", ylab = "")
hist(mNormDraws[, 2], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
main = "hist var2", xlab = "", ylab = "")
plot(mNormDraws[, 1], mNormDraws[, 2], col = "blue", pch = 4, lwd = 2, main = "scatter",
xlab = "var1", ylab = "var2", ylim = c(2, 11), xlim = c(-2, 5))
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]), col = "blue")
par(mfrow = c(1, 1))
# Adjust margins and specify overarching title:
par(mfrow = c(1, 3), oma = c(2, 2, 2, 2), mar = c(2, 2, 2, 2))
# (see https://www.r-graph-gallery.com/74-margin-and-oma-cheatsheet.html)
hist(mNormDraws[, 1], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
main = "hist var1", xlab = "", ylab = "")
hist(mNormDraws[, 2], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
main = "hist var2", xlab = "", ylab = "")
plot(mNormDraws[, 1], mNormDraws[, 2], col = "blue", pch = 4, lwd = 2, main = "scatter",
xlab = "var1", ylab = "var2", ylim = c(2, 11), xlim = c(-2, 5))
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]), col = "blue")
mtext("Bivariate Normal Draws", outer = TRUE, cex = 1.5)
par(mfrow = c(1, 1))# Empty plot: (useful when creating e.g. a plot with 3 x 3 subplots and you
# have only 8 plots)
plot(c(0, 0), type = "n", ylab = "", xlab = "", axes = FALSE)# Plotting xts objects:
# When plotting xts objects, we get a plot with lots of default-adjustments (R
# realizes that the x-axis refers to time, returns a grid, etc.):
# To illustrate this, we download data on GDP and inflation directly from the
# FRED database:
library(quantmod)
# Inflation (based on GDP deflator):
getSymbols("GDPDEF", src = "FRED")
GDPDEF = GDPDEF["1960-01-01/2025-01-01"]
vInfl = 100 * 4 * diff(log(GDPDEF))[-1]
# GDP growth:
getSymbols("GDP", src = "FRED")
GDP = GDP["1960-01-01/2025-01-01"]
vGDPg = 100 * 4 * diff(log(GDP))[-1]
plot(vInfl, main = "Inflation (quarterly, annualized, %)")
# delete y-axis ticks on the right:
plot(vInfl, main = "Inflation (quarterly, annualized, %)", yaxis.right = FALSE)
# Plot two series on same graph:
plot(cbind(vInfl, vGDPg), main = "Inflation & GDP Growth")
# (better than manually;) plot(vInfl, main = 'Inflation & GDP Growth',
# yaxis.right = FALSE) lines(vGDPg, col='red', yaxis.right = FALSE)# With the package 'ggplot2', one has much more flexibility in creating plots.
# We'll start with a plot containing a single line and then discuss multiple
# lines, scatter plots, histograms and other plot-types. Various plot-options
# are discussed, and each is introduced as part of the discussion of the
# plot-type for which it is best explained.
# (A further popular package for plotting in R is 'plotly'. However, in my
# view 'ggplot2' is better in all aspects except for creating interactive
# plots.)library(ggplot2)
mData = data.frame(xvar = vx, norm = vNormPDF)# Plot single line based on two vectors:
ggplot() + geom_line(aes(x = vx, y = vNormPDF))
# Plot single line based on a dataframe:
ggplot(data = mData) + geom_line(aes(x = xvar, y = norm))
# To change appearance of line, specify further attributes in the option
# 'geom_line()'. e.g. change color, size and linetype:
ggplot(data = mData) + geom_line(aes(x = xvar, y = norm), color = "blue", size = 0.9,
linetype = "dashed")
# see http://sape.inf.usi.ch/quick-reference/ggplot2/linetype for linetypes
# available note: can specify linetype also as number (1 is solid, 2 is dashed,
# etc.)
# Add further options with pluses:
ggplot(data = mData) + geom_line(aes(x = xvar, y = norm), color = "blue", size = 0.9) +
labs(x = "my x-lab", y = "my y-lab", title = "my title") + theme_bw() + theme(aspect.ratio = 5/8)# Save plot to object:
pp1 = ggplot(data = mData) + geom_line(aes(x = xvar, y = norm), color = "blue", size = 0.9) +
labs(x = "my x-lab", y = "my y-lab", title = "my title")
# Then, can add further options to already existing plot:
pp1 + theme_bw() + theme(aspect.ratio = 5/8)
# And can save to new object (or override existing one):
pp = pp1 + theme_bw() + theme(aspect.ratio = 5/8)
# You may want to re-use some options over and over again. Instead of typing
# them every time, store them as a list and call them at once all together:
plotOptions = list(theme_bw(), theme(aspect.ratio = 5/8))
# Show the plot 'pp' with these options:
pp1 + plotOptions
# Add these options to plot 'pp1':
pp1 = pp1 + plotOptions# Save plot as .png or .pdf file:
# 1. create plot or print/call existing one:
pp
# 2. save:
ggsave("myprettyplot.png")
# save with advanced options:
ggsave("myprettyplot.png", width = 15, height = 15 * (8/5), units = "cm")
# note: width/height should match previously specified aspect ratio
# Note: for inclusion in latex-document, save plot best as pdf# As before, can have latex-code for labels:
library(latex2exp)
sXlab = TeX("$\\theta_t $ (I made it up)")
sTitle = TeX("$ f(\\theta_t) $")pp1 + labs(x = sXlab, y = "", title = sTitle)# There are many more options to customize the x-axis, y-axis and title:
# Adjust limits and ticks of x-axis and limits of y-axis:
pp + scale_x_continuous(breaks = seq(-1, 8, 3), limits = c(-1, 8)) + scale_y_continuous(limits = c(0,
0.25))
# Equivalent to the latter option:
pp + ylim(c(0, 0.25))
# Also equivalent:
pp + expand_limits(y = c(0, 0.25))
# When changing only upper limit, type:
pp + expand_limits(y = c(NA, 0.25))
# (analogously for only lower limit)
# Manually specify labels of ticks on x-axis:
pp + scale_x_continuous(limits = c(-1, 8), breaks = c(0, 3, 4, 6), labels = paste(c(0,
3, 4, 6), "a", sep = ""))
# note: by specifying empty labels for certain breaks, we can have ticks
# without labels:
vLabs = paste(c(0, 3, 4, 6), "a", sep = "")
vLabs[2] = ""
pp + scale_x_continuous(limits = c(-1, 8), breaks = c(0, 3, 4, 6), labels = vLabs)
# Change axis values from decimals to percent (e.g. 0.01 becomes 1%): (just for
# illustration; it doesn't make any sense here)
pp + scale_y_continuous(labels = scales::percent)
# Have x-axis in units of pi:
vxDivPi = vx/pi
pp + scale_x_continuous(limits = c(-1, 8), breaks = pi * (0:3), labels = paste(0:3,
"pi"))
# (and analogously for any other transformation)
# Force all tick-labels to have same, specified number of decimals
scaleFUN = function(x) sprintf("%.3f", x)
pp + scale_y_continuous(limits = c(0, 0.25), breaks = seq(0, 0.25, by = 0.05), lab = scaleFUN)
# (without this option, we'd have two decimals in this case) (the option is
# also useful in some cases where R by default gives different ticks with
# different numbers of decimals )
# Remove axis ticks and tick-labels:
pp + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
# To remove axis-label, can use +labels(ylab='') as above, or:
pp + theme(axis.title.y = element_blank())
# Change font size for x-axis label:
pp + theme(axis.title.x = element_text(size = 12))
# Change font size of tick labels and space between axis and tick labels:
pp + theme(axis.text.x = element_text(size = 12, vjust = -1))
# Rotate x-axis labels:
pp + theme(axis.text.x = element_text(angle = 30, hjust = 0.5, vjust = 0.8))
# Change font size and family of title:
pp + theme(plot.title = element_text(family = "sans", size = 14))
# Change also margins:
pp + theme(plot.title = element_text(family = "sans", size = 14, margin = margin(0,
0, 2, 0)))
# (instead of margin, can also specify vjust and hjust)# If you have time on the x-axis, use the option 'scale_x_date()' rather than
# 'scale_x_manual()':
# Obtain quarterly GDP growth and inflation for illustration:
library(quantmod)
getSymbols("GDPDEF", src = "FRED")
GDPDEF = GDPDEF["1960-01-01/2025-01-01"]
vInfl = 100 * 4 * diff(log(GDPDEF))[-1]
getSymbols("GDP", src = "FRED")
GDP = GDP["1960-01-01/2025-01-01"]
vGDPg = 100 * 4 * diff(log(GDP))[-1]
mDataTS = data.frame(date = index(vGDPg), GDPgrowth = as.numeric(vGDPg), inflation = as.numeric(vInfl))
# sidenote: without 'as.numeric()', the column names would be given by the
# column names of the xts-objects 'vGDPg' and 'vInfl' and cannot be overwritten
# Plot GDP growth:
ppTS = ggplot(mDataTS) + geom_line(aes(x = date, y = GDPgrowth), size = 0.9) + scale_x_date(date_labels = "%Y",
date_breaks = "2 years", limits = as.Date(c("2000-01-01", "2020-01-01")))
# Sometimes, the breaks don't appear as desired. e.g.
ppTS = ggplot(mDataTS) + geom_line(aes(x = date, y = GDPgrowth), size = 0.9) + scale_x_date(date_labels = "%Y",
date_breaks = "5 years", limits = as.Date(c("2000-01-01", "2025-01-01")))
# In such cases, it's a good idea to set 'breaks' rather than 'date_breaks',
# for which it's useful to load package 'lubridate':
library(lubridate)
ppTS = ggplot(mDataTS) + geom_line(aes(x = date, y = GDPgrowth), size = 0.9) + scale_x_date(date_labels = "%Y",
breaks = seq(ymd("2000/01/01"), ymd("2025/01/01"), by = "5 years"), limits = as.Date(c("2000-01-01",
"2025-01-01")))
# The comments from the section on string-dates above apply; if your dates are
# not in the format required by R ('YYYY-MM-DD'), you must first change them to
# this format using the command 'as.Date()'. e.g.
as.Date("31.01.2020", "%d.%m.%Y")# Further useful options:
# Remove grid:
pp + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# Remove panel border (the box around the main plot area):
pp + theme(panel.border = element_blank())
# Remove panel border but add x- and y-axis lines:
pp + theme(panel.border = element_blank()) + theme(axis.line = element_line(color = "black",
size = 0.2))
# Add margins of 1cm all around plot:
pp + theme(plot.margin = unit(c(1, 1, 1, 1), "cm"))
# Shading of specified area: (useful for confidence intervals)
pp + geom_ribbon(aes(ymin = vNormPDF * 0.9, ymax = vNormPDF * 1.1, x = vx), alpha = 0.2,
fill = "blue")
# Note. can add several layers:
pp + geom_ribbon(aes(ymin = vNormPDF * 0.9, ymax = vNormPDF * 1.1, x = vx), alpha = 0.2,
fill = "blue") + geom_ribbon(aes(ymin = vNormPDF * 0.8, ymax = vNormPDF * 1.2,
x = vx), alpha = 0.2, fill = "blue")
# Annotate text at given (x,y)-point:
pp + annotate("text", label = "abc", x = 2, y = 0.07)
# (could specify hjust and vjust for further customization)
# note: with date on x-axis, need to put wrapper 'as.Date()' around date:
ppTS + annotate("text", label = "abc", x = as.Date("2003-01-31"), y = 0.1)
# Flip axes:
pp + coord_flip()# Note that the options can be combined, and they must be if they belong to the
# same option-command; e.g. don't write
theme(panel.border = element_blank())
# and then
theme(axis.title.x = element_text(size = 12))
# but write
theme(panel.border = element_blank(), axis.title.x = element_text(size = 12))
# Note that this can create issues for the use of a list of pre-defined
# options. Sometimes when the same option is called several times, the order
# how they are called can change the outcome.# Taken together, nice single-line plots:
plotOptions = list(theme_bw(), theme(aspect.ratio = 5/8, axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12), axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.title = element_text(family = "sans", size = 14)))
ggplot(data = mData) + plotOptions + geom_line(aes(x = xvar, y = norm), color = "blue",
size = 0.9) + labs(x = sXlab, y = "", title = sTitle) + scale_x_continuous(breaks = seq(-1,
8, 3), limits = c(-1, 8)) + scale_y_continuous(limits = c(0, 0.25)) + geom_ribbon(aes(ymin = vNormPDF *
0.9, ymax = vNormPDF * 1.1, x = vx), alpha = 0.2, fill = "blue") + geom_ribbon(aes(ymin = vNormPDF *
0.8, ymax = vNormPDF * 1.2, x = vx), alpha = 0.2, fill = "blue")ggplot(mDataTS) + plotOptions + geom_line(aes(x = date, y = GDPgrowth), color = "blue",
size = 0.9) + labs(x = "", y = "", title = "US GDP Growth (quarterly, annualized)") +
scale_y_continuous(breaks = seq(-30, 30, by = 10)) + scale_x_date(date_labels = "%Y",
breaks = seq(ymd("2000/01/01"), ymd("2025/01/01"), by = "5 years"), limits = as.Date(c("2000-01-01",
"2025-01-01")))mData = data.frame(norm1 = mNormDraws[, 1], norm2 = mNormDraws[, 2])# A scatter plot is created analogously to a line plot. We just use the option
# geom_point rather than geom_line.
# Based on two vectors:
ggplot() + geom_point(aes(x = mNormDraws[, 1], y = mNormDraws[, 2]))
# Based on data frame:
ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2))
# Change marker appearance:
pp1 = ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2), size = 1.5, shape = 4,
color = "blue")
# see http://sape.inf.usi.ch/quick-reference/ggplot2/shape for shapes available
# Some markers (21 - 25) allow for filling:
pp2 = ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2), size = 1.5, shape = 21,
color = "blue", fill = "blue")
# Can of course add all other options from above, e.g.
pp1 = pp1 + plotOptions + labs(x = "my x-lab", y = "my y-lab", title = "my nice scatter")
# Add labels to scatter-points:
vMyLabels = paste("a", 1:nrow(mData), sep = "") # some arbitrary labels
pp1 + geom_text(aes(x = norm1, y = norm2, label = vMyLabels), size = 2, color = "purple",
hjust = 1, vjust = -0.5)
# add labels only to some points (e.g. only first 4):
vMyLabels[5:length(vMyLabels)] = "" # can also set to NA
pp1 + geom_text(aes(x = norm1, y = norm2, label = vMyLabels), size = 2, color = "purple",
hjust = 1, vjust = -0.5)
# Add lines to plot:
# line with specified intercept and slope (e.g. could add 45-degree line):
pp1 + geom_abline(intercept = 3, slope = 2, size = 1, color = "black", linetype = "dotted")
# vertical line:
pp1 + geom_vline(xintercept = 2, size = 1, color = "black", linetype = "dotted")
# regression line:
pp1 + geom_smooth(aes(x = norm1, y = norm2), method = lm, se = FALSE, size = 1, color = "black",
linetype = "dotted")
# regression line with 90% confidence bands:
pp1 + geom_smooth(aes(x = norm1, y = norm2), method = lm, se = TRUE, level = 0.9,
size = 1, color = "black", linetype = "dotted")
# (note: single linear regression with homoskedasticity...)# Taken together, nice scatter plot:
ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2), size = 2, shape = 21,
color = "blue", fill = "blue") + geom_smooth(aes(x = norm1, y = norm2), method = lm,
se = FALSE, size = 0.5, color = "darkblue", linetype = "solid") + labs(x = "first Normal",
y = "second Normal", title = "Scatter of Bivariate Normal Draws") + plotOptions# Rather than supplying several vectors, it is easier to supply a dataframe
# with all the lines one wishes to plot:
mData = data.frame(xvar = vx, norm = vNormPDF, chi2 = vChi2PDF, t = vtPDF)
# Note: the names of the different variables will be the line-labels. They are
# a bit more difficult to change than the labels of the x- and y-axes.# TWO LINES
ggplot(mData, aes(x = xvar)) + geom_line(aes(y = norm)) + geom_line(aes(y = t))
# we should distinguish the different lines by color or shape or linetype; e.g.
ggplot(mData, aes(x = xvar)) + geom_line(aes(y = norm), color = "blue") + geom_line(aes(y = t),
color = "red")# MANY (>=3) LINES
# In this case, it's best to first put the dataframe into long format, (as
# there could be many lines, and adding each one separately like above would be
# cumbersome)
library(reshape2)
mDat = melt(mData, id.vars = "xvar")
# the different variables are now distinguished in the column 'variable', and
# their values are shown in the column 'value'# Plot the three lines, distinguishing them by color:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.9) # could specify linetype (for all lines)
# distinguish by linetype:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(linetype = variable),
size = 0.9) # could specify color (for all lines)
# distinguish by both:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable,
linetype = variable))
# distinguish by line-color, point-color and point-shapes:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.5) + geom_point(aes(color = variable, shape = variable), size = 1)
# Of course, we can also just add the same markers for all lines:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.9) + geom_point(aes(color = variable), shape = 1)
# Set linetypes manually:
ppL = ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(linetype = variable),
size = 0.9) + plotOptions
ppL + scale_linetype_manual(values = c("solid", "dashed", "dotdash"))
# see http://sape.inf.usi.ch/quick-reference/ggplot2/linetype for linetypes
# available note: can specify linetype also as number (1 is solid, 2 is dashed,
# etc.)
# Note: provided vector may be longer than the number of lines you have.
# That's why you can create a long vector with your preferred linetypes and
# then just call this vector, even for plots with differing number of lines.
# When setting linetypes manually, can specify/change line labels:
ppL + scale_linetype_manual(values = c("solid", "dashed", "dotdash"), labels = c("mycurve1",
"mycurve2", "mycurve3"))
# Set colors manually:
ppC = ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.9) + plotOptions
ppC + scale_color_manual(values = c("blue", "red", "purple"))
# again can specify/change line labels:
ppC + scale_color_manual(values = c("blue", "red", "purple"), labels = c("mycurve1",
"mycurve2", "mycurve3"))# more choice of colors is given by package 'colorspace':
library(colorspace)
# take three colors (evenly spaced) from palette 'Blues 2':
vMyColors = sequential_hcl(3, palette = "Blues 2")ppC + scale_color_manual(values = vMyColors)# to avoid too light first color, construct a vector with more colors, and then
# take the few ones you need from the beginning and middle of the colorspace:
vMyColors = sequential_hcl(7, palette = "Blues 2")[c(1, 3, 5)]ppC + scale_color_manual(values = vMyColors)
# Similarly, we can specify the colors of the markers as well as lines:
ppCM = ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.5) + geom_point(aes(color = variable), size = 1) + plotOptions
ppCM + scale_color_manual(values = vMyColors)
# Set point-shapes manually:
ppM = ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.5) + geom_point(aes(color = variable, shape = variable), size = 1) +
plotOptions
ppM + scale_shape_manual(values = c(7, 9, 24))
ppM + scale_shape_manual(values = c(7, 9, 24), labels = c("mycurve1", "mycurve2",
"mycurve3"))
# see http://sape.inf.usi.ch/quick-reference/ggplot2/shape for shapes available# Legend options:
# Take plot for illustration:
ppC = ppC + scale_color_manual(values = vMyColors)
# Remove legend title:
ppC + theme(legend.title = element_blank())
# Remove legend altogether:
ppC + theme(legend.position = "none")
# Change position and direction: (by default, legend is outside the plot
# (middle-right) and vertical)
# put to middle-left:
ppC + theme(legend.position = "left")
# put to bottom-right:
ppC + theme(legend.justification = "bottom")
# make horizontal and put to bottom of plot:
ppC + theme(legend.direction = "horizontal", legend.position = "bottom", legend.justification = "left")
# put inside the plot:
ppC + theme(legend.position = c(0.8, 0.75))
# ( top right is (1,1), top left is (0,1))
# In some cases, it's hard to put the legend inside the plot, but without
# crossing the lines/markers. e.g.
ppC + theme(legend.position = c(0.5, 0.5))
# Unfortunately, one cannot make the legend background fully transparent. Make
# legend-text transparent at least:
ppC + theme(legend.position = c(0.5, 0.5), legend.background = element_rect(fill = alpha("white",
0)))# Taken together, nice multiple-line plot:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.9) + plotOptions + scale_color_manual(values = vMyColors) + theme(legend.title = element_blank(),
legend.position = c(0.8, 0.75)) + labs(x = "", y = "", title = "Three Important Distributions")# Analogous to above, just with geom_point instead of geom_line option.
# Create dataframe with variables (vectors) to plot:
mData = as.data.frame(mNormDraws)
colnames(mData) = c("norm1", "norm2", "norm3")
# Melt to long format:
mDat = melt(mData, id.vars = "norm1")
# Note: variable taken here as id.var will be on x-axis. In a scatter plot,
# this could in principle be any variable. (In contrast, under the line plot,
# it was clear what the x-axis is.)# Plot, distinguishing variables by markers:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(shape = variable),
size = 2, color = "blue")
# Plot, distinguishing variables by colors:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
size = 2)
# Plot, distinguishing variables by both:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable,
shape = variable), size = 2)
# Add options:
ppMS = ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable,
shape = variable), size = 2) + plotOptions
# Specify marker-shapes and colors manually:
ppMS + scale_color_manual(values = vMyColors) + scale_shape_manual(values = c(1,
4))
# Also re-label variables:
ppMS + scale_color_manual(values = vMyColors, labels = c("1st Normal", "2nd Normal")) +
scale_shape_manual(values = c(1, 4), labels = c("1st Normal", "2nd Normal"))
# (note: just doing it in one of the two options will produce two legends)
# Note: when distinguishing by color, by default, R will choose a round marker
# that is filled (as above). e.g.
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
size = 2)
# When you change the markers manually, like here, then they will not be
# filled. To add filling, must let filling also change by variable. e.g. this
# produces unfilled triangles:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
size = 2, shape = 2)
# to get filled triangles:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable,
fill = variable), size = 2, shape = 24)# Nice, final scatter-plot with multiple variables:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
size = 2) + scale_color_manual(values = c("blue", "red"), labels = c("2nd Normal",
"3rd Normal")) + plotOptions + scale_y_continuous(limits = c(0, 12), breaks = c(0,
4, 8, 12)) + theme(legend.title = element_blank(), legend.position = c(0.8, 0.15)) +
labs(x = "1st Normal", y = "", title = "Multivariate Normal Draws (k=3)")# Above, we plotted three variables in a single scatter plot, i.e. we plotted
# two variables against a third.
# In the same way, we could have plotted, say, 5 variables by plotting 4
# variables against a fifth one, yielding 4 'clouds' of points.
# Sometimes, we indeed want to plot two variables, but distinguish the points
# in the scatter by a third variable. The number of 'clouds' of points is then
# determined by how many values this third variable has.
# DISCRETE CASE
# Plot hp against hp, with different colors for different numbers of cylinders:
ggplot(mtcars, aes(x = mpg, y = hp, group = cyl)) + geom_point(aes(color = cyl))# Note that R thinks of cyl as a continuous variable, even though it takes only
# three values. To fix that, code it as type 'character' rather than
# 'numeric':
mtcars2 = mtcars # create new dataframe..
mtcars2$cyl = as.character(mtcars2$cyl) #.. in which cyl is of type characterppMS = ggplot(mtcars2, aes(x = mpg, y = hp, group = cyl)) + geom_point(aes(color = cyl))
# We can change the colors as above; e.g.
ppMS + scale_color_manual(values = vMyColors, labels = c("4 cyl.", "6 cyl.", "8 cyl."))
# Also, could of course distinguish points for different numbers of cylinders
# by markers rather than colors (or both).
# In this case, cyl really needs to be of type character, because, in contrast
# to colors, markers cannot vary continuously.
# CONTINUOUS CASE
# Plot hp against mpg, and distinguish points by color based on value of qsec:
ppMS = ggplot(mtcars, aes(x = mpg, y = hp, group = qsec)) + geom_point(aes(color = qsec))
# To specify colors, need to use scale_color_gradient rather than
# scale_color_manual:
ppMS + scale_color_gradient(low = "white", high = "blue")
# The analogous holds for the filling (if marker-shapes are specified that can
# be filled):
ppMS = ggplot(mtcars, aes(x = mpg, y = hp, group = qsec)) + geom_point(aes(color = qsec),
shape = 21) + scale_color_gradient(low = "white", high = "blue") + scale_fill_gradient(low = "white",
high = "blue")# Nice, final grouped scatter-plot:
ggplot(mtcars, aes(x = mpg, y = hp, group = qsec)) + geom_point(aes(color = qsec,
fill = qsec), size = 3, shape = 21) + scale_color_gradient(low = "white", high = "blue") +
scale_fill_gradient(low = "white", high = "blue") + plotOptions + scale_y_continuous(limits = c(0,
300), breaks = seq(0, 300, by = 100)) + theme(legend.position = "none") + labs(title = "A Nice Grouped-Scatter Plot")# SINGLE HISTOGRAM
# Based on single vector:
ggplot() + geom_histogram(aes(x = mNormDraws[, 2]))
# Based on column in dataframe:
ggplot(data = mData) + geom_histogram(aes(x = norm2))
# note that here we only need to specify the x-variable in aes()
# Change y-axis from frequency to density:
ggplot() + geom_histogram(aes(x = mNormDraws[, 2], y = ..density..))
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..))
# Specify number of bins directly:
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), bins = 10)
# Specify number of bins with binwidth:
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), binwidth = 0.5)
# Change color and filling:
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), color = "black",
fill = "grey", alpha = 0.2)
# Add line: (here: add actual pdf of N(5,1))
vThisNormPDF = dnorm(vx, 5, 1)
ggplot() + geom_histogram(aes(x = mNormDraws[, 2], y = ..density..)) + geom_line(aes(x = vx,
y = vThisNormPDF))
# Could also do this with dataframe, but then we need same number of draws as
# 'elements' in the vector that represents the line (here: function
# evaluations)
# MULTIPLE HISTOGRAMS IN SINGLE PLOT
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), color = "blue",
fill = "blue", alpha = 0.2) + geom_histogram(aes(x = norm3, y = ..density..),
color = "red", fill = "red", alpha = 0.2)
# A separate histogram of one variable for each value of some third variable:
# e.g. a separate histogram of hp for each value of cyl, each with different
# color:
ggplot(mtcars2, aes(x = hp, fill = cyl)) + geom_histogram(aes(x = hp, y = ..density..,
color = cyl), position = "identity")
# Should make bars a bit transparent:
ggplot(mtcars2, aes(x = hp, fill = cyl)) + geom_histogram(aes(x = hp, y = ..density..,
color = cyl), position = "identity", alpha = 0.6)# Taken together, nice histogram plots:
vThisNormPDF = dnorm(vx, 5, 1)
ggplot() + plotOptions + geom_histogram(aes(x = mNormDraws[, 2], y = ..density..),
color = "blue", fill = "blue", alpha = 0.2) + geom_line(aes(x = vx, y = vThisNormPDF),
color = "purple") + labs(x = "", y = "", title = "Histogram of N(5,1) draws") +
theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())ggplot(mtcars2, aes(x = hp, fill = cyl)) + plotOptions + geom_histogram(aes(x = hp,
y = ..density.., color = cyl), position = "identity", alpha = 0.6) + theme(legend.title = element_blank(),
legend.position = c(0.8, 0.75)) + labs(x = "", y = "", title = "Histogram of hp by cyl.") +
theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())# Compute mean of Normal draws for each column of mNormDraws (dimension of
# multiv. Normal):
mDataBP = data.frame(norms = c(1, 2, 3), means = apply(mNormDraws, 2, mean))# Illustrate:
ggplot(data = mDataBP, aes(x = norms, y = means)) + geom_bar(stat = "identity")
# Adjust appearance of bars:
ggplot(data = mDataBP, aes(x = norms, y = means)) + geom_bar(stat = "identity", color = "blue",
fill = "blue", alpha = 0.2)# To change x-axis, it is best to first make R realize that it is a discrete
# rather than continuous variable:
mDataBP$norms = as.character(mDataBP$norms)ggplot(data = mDataBP, aes(x = norms, y = means)) + geom_bar(stat = "identity", color = "blue",
fill = "blue", alpha = 0.2) + scale_x_discrete(labels = c("1st", "2nd", "3rd"))# Taken together, nice bar-plot:
ggplot(data = mDataBP, aes(x = norms, y = means)) + plotOptions + geom_bar(stat = "identity",
color = "blue", fill = "blue", alpha = 0.2) + scale_x_discrete(labels = c("1st",
"2nd", "3rd")) + labs(x = "", y = "", title = "Mean of Normal Draws")# Note: when producing a latex document with these plots, it is a better idea
# to produce the plots individually and assemble them in latex. (more
# flexibility)
# Three plots for illustration:
plotOptionsCommonHere = list(plotOptions, ylim(c(0, 6)), scale_x_continuous(limits = c(-2,
10), breaks = seq(-2, 10, by = 2)))
pp1 = ggplot(data = mData) + plotOptionsCommonHere + geom_histogram(aes(x = norm1),
color = "blue", fill = "blue", alpha = 0.2) + labs(x = "", y = "", title = "norm1")
pp2 = ggplot(data = mData) + plotOptionsCommonHere + geom_histogram(aes(x = norm2),
color = "blue", fill = "blue", alpha = 0.2) + labs(x = "", y = "", title = "norm2")
pp3 = ggplot(data = mData) + plotOptionsCommonHere + geom_histogram(aes(x = norm3),
color = "blue", fill = "blue", alpha = 0.2) + labs(x = "", y = "", title = "norm3")
# It might be useful to create an empty plot: (e.g. when creating a 3x3 figure,
# but have 8 plots)
ggplot() + theme_void()
# FIRST APPROACH
library(gridExtra)
library(ggpubr)
# (need both packages)
# Create 1 x 2 plot:
grid.arrange(pp1, pp2, ncol = 2)
# To save:
ggsave("myplot1.pdf", arrangeGrob(pp1, pp2, ncol = 2))
# Add title:
title = text_grob("Draws from two Normals", size = 14, face = "bold", vjust = 5)
grid.arrange(pp1, pp2, ncol = 2, top = title)
# Can specify widths:
grid.arrange(pp1, pp2, ncol = 2, top = title, widths = c(2, 2))
# Can also specify ordering of plots (useful when many more than just 2 plots
# are added):
grid.arrange(pp1, pp2, ncol = 2, top = title, widths = c(2, 2), layout_matrix = rbind(c(1,
2)))
# All of these options need to be specified also in 'arrangeGrob()' when saving
# the plot.
# Note: it would make sense to delete y-axis ticks on the right plot as they
# are common to both and already shown in the left plot.
# SECOND APPROACH
library(patchwork)
# Create 1 x 2 plot as above:
(pp1 | pp2)
# Here we have the advantage that we can save the resulting plot and call it
# later:
ppTotal = (pp1 | pp2)
ppTotal
# Add title:
ppTotal + plot_annotation(title = "Draws from two Normals")
# With this approach, we do not need a special command for saving the plots,
# but can use 'ggsave()'. However, one needs to create a large plot, because
# with many plots, default size window is usually too small:
ggsave("myplot2.pdf", width = 40, height = 25, units = "cm")
# Note: changing the width and height will also change the spacing between
# figures.
# 2 x 3 plots:
(pp1 | pp1 | pp1)/(pp1 | pp1 | pp1)
# Can have even more flexible plot-arrangement:
layout = "
AABB
AABB
EEEE
EEEE
"
pp1 + pp1 + pp1 + plot_layout(design = layout)
ggsave("myplot2.pdf", width = 40, height = 25, units = "cm")
# For plots with legends: If all plots have the same legend, you can collect
# all them into one single legend using the following commands:
ppTotal + plot_layout(guides = "collect") + theme(legend.position = "bottom")# There are many more things that ggplot2 can do. For example, to create a
# grid-plot, use 'geom_raster()' or 'geom_rect()', to create an area-plot, use
# 'geom_area()'. A google search usually shows quick how to use these
# commands.
# A final, important note: when iteratively adding layers to a ggplot in a loop
# (e.g. adding one line at a time to an existing plot), you need to use double
# quotation marks. Otherwise, ggplot creates the plot only once the loop is
# finished, which may lead to unexpected behavior. See this post:
# https://stackoverflow.com/questions/52671480/adding-multiple-ribbons-in-ggplot.