# A primary reason behind the popularity of R is the fact that one can create
# plots very flexibly. Before we discuss more advanced plotting-techniques
# based on the package 'ggplot2', let's start with some basic plots that can be
# constructed using pre-built commands in R. They are useful for checking
# quickly how something looks in the data-exploratory process (and for
# inclusion in preliminary reports).
# Generate data to be plotted:
set.seed(50)
= seq(0.1, 7, by = 0.1)
vx = dnorm(vx, 4, 2)
vNormPDF = dchisq(vx, 3)
vChi2PDF = dt(vx, 10)
vtPDF
library(mvtnorm)
= c(2, 5, 7)
vMeans = diag(c(2, 1, 3))
mVariance 1, 2] = 1
mVariance[2, 1] = 1
mVariance[= rmvnorm(30, vMeans, mVariance)
mNormDraws
# Load further data from actual dataset:
library(carData)
data(Hartnagel)
data(mtcars)
# Plot single line:
# default:
plot(vNormPDF)
# to get line instead of dots:
plot(vNormPDF, type = "l")
# for line and dots:
plot(vNormPDF, type = "o")
# Note that by default, index is on the x-axis (1,2,...). To specify x-axis:
plot(vx, vNormPDF, type = "l")
# specify y-limits, and x-limits:
plot(vx, vNormPDF, type = "l", ylim = c(0, 0.25), xlim = c(-2, 9))
# specify title, x-axis-label and y-axis-label:
plot(vx, vNormPDF, type = "l", main = "My Title Here", xlab = "my x lab", ylab = "my y lab")
# note: can also be used to delete default labs: (default has no title)
plot(vx, vNormPDF, type = "l", xlab = "", ylab = "")
# specify color of markers (line and/or dots): (most standard colors are
# supported)
plot(vx, vNormPDF, type = "l", col = "blue")
plot(vx, vNormPDF, col = "blue")
# Make line thicker:
plot(vx, vNormPDF, type = "l", lwd = 4)
# Remove ticks for y-axis:
plot(vx, vNormPDF, type = "l", yaxt = "n")
# Remove ticks for x-axis:
plot(vx, vNormPDF, type = "l", xaxt = "n")
# (these commands make more sense if the corresponding axis-label is also
# deleted)
# With package 'latex2exp', we can add math to labels (x-lab, y-lab, title):
library(latex2exp)
= TeX("$\\theta_t $ (I made it up)")
sXlab = TeX("$f(\\theta_t)$")
sTitle plot(vx, vNormPDF, type = "l", xlab = sXlab, ylab = "", main = sTitle)
# The latex-part of the expression starts and ends with a dollar sign. The
# code is just as in latex, except that one has to use two backlashes to call
# latex-symbols.
# Taken together, nice single-line plot:
plot(vx, vNormPDF, type = "l", col = "blue", lwd = 4, main = "my pretty plot of N(4,4)",
xlab = "", ylab = "", yaxt = "n", ylim = c(0, 0.22), xlim = c(-1, 8))
# We can easily add further lines to such an existing plot:
# Initial plot:
plot(vx, vNormPDF, type = "l", col = "blue")
# Add another line (chi^2-disttribution, in red):
lines(vx, vChi2PDF, type = "l", col = "red")
# Note that the y-axis is not adjusted automatically so that the new line fits.
# Add mathematically defined lines:
abline(v = 3, col = "green") # add vertical line at x=3
abline(h = 0.08, col = "gray") # add horizontal line at y=0.08
abline(a = -0.2, b = 0.1, col = "orange") # add line with specific intercept and slope
# Taken together, nice lines plot:
plot(vx, vNormPDF, type = "l", col = "blue", lwd = 4, main = "my pretty plot of N(4,4) and Chi2(3)",
xlab = "", ylab = "", ylim = c(0, 0.3), xlim = c(-1, 8))
lines(vx, vChi2PDF, type = "l", col = "red", lwd = 4)
# Note that we can also directly plot a function, without first manually
# specifying a grid of x-values and evaluating the function at those values:
# Plot standard normal pdf:
plot(dnorm, xlim = c(-2, 8))
# Plot pdf of N(4,4):
plot(function(x) {
dnorm(x, 4, 2)
xlim = c(-2, 8))
},
# Plot exponential function:
plot(exp, xlim = c(1, 3))
# Of course, all of the options discussed above can also be applied for these
# plots. They also work for all of the plots discussed below (scatter,
# histogram, boxplot, ...).
# Saving a plot as a .png or .pdf file:
# 1. specify your path:
= "/Users/markomlikota/Dropbox/myfolder"
sMyPlotPath setwd(sMyPlotPath)
# 2. specify name:
png("blabla.png") # specify width and height here for nice plots
# 3. create the plot (or print an existing, already saved one with 'print()'):
plot(vx, vNormPDF, type = "l", col = "blue", lwd = 4, main = "my pretty plot of N(4,4) and Chi2(3)",
xlab = "", ylab = "", ylim = c(0, 0.3), xlim = c(-1, 8))
lines(vx, vChi2PDF, type = "l", col = "red", lwd = 4)
# 4. tell R that you're done: (no further lines or so will be added)
dev.off()
# Note that it is not possible, without rather complicated adjustments, to save
# such a plot as an object. This is different for plots created via 'ggplot2'.
# Scatter-Plot:
# To obtain a scatter plot, simply specify two vectors (as before) and use dots
# rather than a line as markers:
plot(mNormDraws[, 1], mNormDraws[, 2])
# Change marker type:
plot(mNormDraws[, 1], mNormDraws[, 2], pch = 8)
plot(mNormDraws[, 1], mNormDraws[, 2], pch = 15)
# (see e.g.
# https://www.math.ucla.edu/~anderson/rw1001/library/base/html/points.html for
# further marker-types)
# add further cloud of points in different color:
points(mNormDraws[, 1], mNormDraws[, 3], col = "red")
# add regression line for first cloud of points:
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]))
# for second:
abline(lm(mNormDraws[, 3] ~ mNormDraws[, 1]), col = "red")
# Should enlarge x- and y-axis to make this more appealing.
# Taken together, nice scaatter-plot:
plot(mNormDraws[, 1], mNormDraws[, 2], col = "blue", pch = 4, lwd = 2, main = "my pretty scatterplot of Normal draws",
xlab = "", ylab = "", ylim = c(2, 11), xlim = c(-2, 5))
points(mNormDraws[, 1], mNormDraws[, 3], col = "red", pch = 4, lwd = 2)
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]), col = "blue")
abline(lm(mNormDraws[, 3] ~ mNormDraws[, 1]), col = "red")
# Histogram:
# Default:
hist(mNormDraws[, 1])
# Add color:
hist(mNormDraws[, 1], col = "blue")
# Add border color:
hist(mNormDraws[, 1], col = "blue", border = "white")
# Remove fill-color:
hist(mNormDraws[, 1], col = NULL, border = "blue")
# Use density rather than frequency on y-axis: (so that area of bins sums up to
# 1)
hist(mNormDraws[, 1], freq = FALSE)
# Specify number of bins by specifying location of bin-breaks:
hist(mNormDraws[, 1], breaks = seq(-2, 6, by = 0.5))
# Specify number of bins:
hist(mNormDraws[, 1], breaks = 8)
# (doesn't work perfectly; R uses it only as suggestion; see ?hist)
# Taken together, nice histogram:
hist(mNormDraws[, 1], breaks = seq(-1, 5, by = 0.5), col = "blue", border = "white",
main = "my pretty histogram of Normal draws", xlab = "", ylab = "")
# Boxplot:
# Boxplot of first vector of Normal draws:
boxplot(mNormDraws[, 1])
# of first two:
boxplot(mNormDraws[, 1], mNormDraws[, 2])
# of all three: (i.e. of all columns in supplied matrix)
boxplot(mNormDraws)
# Naming columns in that matrix changes x-tick-labels:
colnames(mNormDraws) <- c("first", "second", "third")
boxplot(mNormDraws)
# With dataframes, one can directly choose to get boxplots of one variable as a
# function of (values of) another:
boxplot(hp ~ cyl, data = mtcars, xlab = "cyls", ylab = "", main = "Boxplot of HP by #cyls")
# (rather than first manually creating vectors that show hp for each value of
# cyl)
# QQ-Plot:
qqnorm(vtPDF, xlab = "N(0,1)", ylab = "t(10)", col = "blue")
qqline(vtPDF, col = "red")
# Display multiple plots as panels in a single figure:
library(dyn)
par(mfrow = c(1, 3)) # 1 x 3 plot
hist(mNormDraws[, 1], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
main = "hist var1", xlab = "", ylab = "")
hist(mNormDraws[, 2], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
main = "hist var2", xlab = "", ylab = "")
plot(mNormDraws[, 1], mNormDraws[, 2], col = "blue", pch = 4, lwd = 2, main = "scatter",
xlab = "var1", ylab = "var2", ylim = c(2, 11), xlim = c(-2, 5))
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]), col = "blue")
par(mfrow = c(1, 1))
# Adjust margins and specify overarching title:
par(mfrow = c(1, 3), oma = c(2, 2, 2, 2), mar = c(2, 2, 2, 2))
# (see https://www.r-graph-gallery.com/74-margin-and-oma-cheatsheet.html)
hist(mNormDraws[, 1], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
main = "hist var1", xlab = "", ylab = "")
hist(mNormDraws[, 2], breaks = seq(-1, 8, by = 1), col = "blue", border = "white",
main = "hist var2", xlab = "", ylab = "")
plot(mNormDraws[, 1], mNormDraws[, 2], col = "blue", pch = 4, lwd = 2, main = "scatter",
xlab = "var1", ylab = "var2", ylim = c(2, 11), xlim = c(-2, 5))
abline(lm(mNormDraws[, 2] ~ mNormDraws[, 1]), col = "blue")
mtext("Bivariate Normal Draws", outer = TRUE, cex = 1.5)
par(mfrow = c(1, 1))
# Empty plot: (useful when creating e.g. a plot with 3 x 3 subplots and you
# have only 8 plots)
plot(c(0, 0), type = "n", ylab = "", xlab = "", axes = FALSE)
# Plotting xts objects:
# When plotting xts objects, we get a plot with lots of default-adjustments (R
# realizes that the x-axis refers to time, returns a grid, etc.):
# To illustrate this, we download data on GDP and inflation directly from the
# FRED database:
library(quantmod)
# Inflation (based on GDP deflator):
getSymbols("GDPDEF", src = "FRED")
= GDPDEF["1960-01-01/2025-01-01"]
GDPDEF = 100 * 4 * diff(log(GDPDEF))[-1]
vInfl # GDP growth:
getSymbols("GDP", src = "FRED")
= GDP["1960-01-01/2025-01-01"]
GDP = 100 * 4 * diff(log(GDP))[-1]
vGDPg
plot(vInfl, main = "Inflation (quarterly, annualized, %)")
# delete y-axis ticks on the right:
plot(vInfl, main = "Inflation (quarterly, annualized, %)", yaxis.right = FALSE)
# Plot two series on same graph:
plot(cbind(vInfl, vGDPg), main = "Inflation & GDP Growth")
# (better than manually;) plot(vInfl, main = 'Inflation & GDP Growth',
# yaxis.right = FALSE) lines(vGDPg, col='red', yaxis.right = FALSE)
# With the package 'ggplot2', one has much more flexibility in creating plots.
# We'll start with a plot containing a single line and then discuss multiple
# lines, scatter plots, histograms and other plot-types. Various plot-options
# are discussed, and each is introduced as part of the discussion of the
# plot-type for which it is best explained.
# (A further popular package for plotting in R is 'plotly'. However, in my
# view 'ggplot2' is better in all aspects except for creating interactive
# plots.)
library(ggplot2)
= data.frame(xvar = vx, norm = vNormPDF) mData
# Plot single line based on two vectors:
ggplot() + geom_line(aes(x = vx, y = vNormPDF))
# Plot single line based on a dataframe:
ggplot(data = mData) + geom_line(aes(x = xvar, y = norm))
# To change appearance of line, specify further attributes in the option
# 'geom_line()'. e.g. change color, size and linetype:
ggplot(data = mData) + geom_line(aes(x = xvar, y = norm), color = "blue", size = 0.9,
linetype = "dashed")
# see http://sape.inf.usi.ch/quick-reference/ggplot2/linetype for linetypes
# available note: can specify linetype also as number (1 is solid, 2 is dashed,
# etc.)
# Add further options with pluses:
ggplot(data = mData) + geom_line(aes(x = xvar, y = norm), color = "blue", size = 0.9) +
labs(x = "my x-lab", y = "my y-lab", title = "my title") + theme_bw() + theme(aspect.ratio = 5/8)
# Save plot to object:
= ggplot(data = mData) + geom_line(aes(x = xvar, y = norm), color = "blue", size = 0.9) +
pp1 labs(x = "my x-lab", y = "my y-lab", title = "my title")
# Then, can add further options to already existing plot:
+ theme_bw() + theme(aspect.ratio = 5/8)
pp1
# And can save to new object (or override existing one):
= pp1 + theme_bw() + theme(aspect.ratio = 5/8)
pp
# You may want to re-use some options over and over again. Instead of typing
# them every time, store them as a list and call them at once all together:
= list(theme_bw(), theme(aspect.ratio = 5/8))
plotOptions
# Show the plot 'pp' with these options:
+ plotOptions
pp1 # Add these options to plot 'pp1':
= pp1 + plotOptions pp1
# Save plot as .png or .pdf file:
# 1. create plot or print/call existing one:
pp# 2. save:
ggsave("myprettyplot.png")
# save with advanced options:
ggsave("myprettyplot.png", width = 15, height = 15 * (8/5), units = "cm")
# note: width/height should match previously specified aspect ratio
# Note: for inclusion in latex-document, save plot best as pdf
# As before, can have latex-code for labels:
library(latex2exp)
= TeX("$\\theta_t $ (I made it up)")
sXlab = TeX("$ f(\\theta_t) $") sTitle
+ labs(x = sXlab, y = "", title = sTitle) pp1
# There are many more options to customize the x-axis, y-axis and title:
# Adjust limits and ticks of x-axis and limits of y-axis:
+ scale_x_continuous(breaks = seq(-1, 8, 3), limits = c(-1, 8)) + scale_y_continuous(limits = c(0,
pp 0.25))
# Equivalent to the latter option:
+ ylim(c(0, 0.25))
pp # Also equivalent:
+ expand_limits(y = c(0, 0.25))
pp # When changing only upper limit, type:
+ expand_limits(y = c(NA, 0.25))
pp # (analogously for only lower limit)
# Manually specify labels of ticks on x-axis:
+ scale_x_continuous(limits = c(-1, 8), breaks = c(0, 3, 4, 6), labels = paste(c(0,
pp 3, 4, 6), "a", sep = ""))
# note: by specifying empty labels for certain breaks, we can have ticks
# without labels:
= paste(c(0, 3, 4, 6), "a", sep = "")
vLabs 2] = ""
vLabs[+ scale_x_continuous(limits = c(-1, 8), breaks = c(0, 3, 4, 6), labels = vLabs)
pp
# Change axis values from decimals to percent (e.g. 0.01 becomes 1%): (just for
# illustration; it doesn't make any sense here)
+ scale_y_continuous(labels = scales::percent)
pp
# Have x-axis in units of pi:
= vx/pi
vxDivPi + scale_x_continuous(limits = c(-1, 8), breaks = pi * (0:3), labels = paste(0:3,
pp "pi"))
# (and analogously for any other transformation)
# Force all tick-labels to have same, specified number of decimals
= function(x) sprintf("%.3f", x)
scaleFUN + scale_y_continuous(limits = c(0, 0.25), breaks = seq(0, 0.25, by = 0.05), lab = scaleFUN)
pp # (without this option, we'd have two decimals in this case) (the option is
# also useful in some cases where R by default gives different ticks with
# different numbers of decimals )
# Remove axis ticks and tick-labels:
+ theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
pp
# To remove axis-label, can use +labels(ylab='') as above, or:
+ theme(axis.title.y = element_blank())
pp
# Change font size for x-axis label:
+ theme(axis.title.x = element_text(size = 12))
pp
# Change font size of tick labels and space between axis and tick labels:
+ theme(axis.text.x = element_text(size = 12, vjust = -1))
pp
# Rotate x-axis labels:
+ theme(axis.text.x = element_text(angle = 30, hjust = 0.5, vjust = 0.8))
pp
# Change font size and family of title:
+ theme(plot.title = element_text(family = "sans", size = 14))
pp # Change also margins:
+ theme(plot.title = element_text(family = "sans", size = 14, margin = margin(0,
pp 0, 2, 0)))
# (instead of margin, can also specify vjust and hjust)
# If you have time on the x-axis, use the option 'scale_x_date()' rather than
# 'scale_x_manual()':
# Obtain quarterly GDP growth and inflation for illustration:
library(quantmod)
getSymbols("GDPDEF", src = "FRED")
= GDPDEF["1960-01-01/2025-01-01"]
GDPDEF = 100 * 4 * diff(log(GDPDEF))[-1]
vInfl
getSymbols("GDP", src = "FRED")
= GDP["1960-01-01/2025-01-01"]
GDP = 100 * 4 * diff(log(GDP))[-1]
vGDPg
= data.frame(date = index(vGDPg), GDPgrowth = as.numeric(vGDPg), inflation = as.numeric(vInfl))
mDataTS # sidenote: without 'as.numeric()', the column names would be given by the
# column names of the xts-objects 'vGDPg' and 'vInfl' and cannot be overwritten
# Plot GDP growth:
= ggplot(mDataTS) + geom_line(aes(x = date, y = GDPgrowth), size = 0.9) + scale_x_date(date_labels = "%Y",
ppTS date_breaks = "2 years", limits = as.Date(c("2000-01-01", "2020-01-01")))
# Sometimes, the breaks don't appear as desired. e.g.
= ggplot(mDataTS) + geom_line(aes(x = date, y = GDPgrowth), size = 0.9) + scale_x_date(date_labels = "%Y",
ppTS date_breaks = "5 years", limits = as.Date(c("2000-01-01", "2025-01-01")))
# In such cases, it's a good idea to set 'breaks' rather than 'date_breaks',
# for which it's useful to load package 'lubridate':
library(lubridate)
= ggplot(mDataTS) + geom_line(aes(x = date, y = GDPgrowth), size = 0.9) + scale_x_date(date_labels = "%Y",
ppTS breaks = seq(ymd("2000/01/01"), ymd("2025/01/01"), by = "5 years"), limits = as.Date(c("2000-01-01",
"2025-01-01")))
# The comments from the section on string-dates above apply; if your dates are
# not in the format required by R ('YYYY-MM-DD'), you must first change them to
# this format using the command 'as.Date()'. e.g.
as.Date("31.01.2020", "%d.%m.%Y")
# Further useful options:
# Remove grid:
+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
pp
# Remove panel border (the box around the main plot area):
+ theme(panel.border = element_blank())
pp
# Remove panel border but add x- and y-axis lines:
+ theme(panel.border = element_blank()) + theme(axis.line = element_line(color = "black",
pp size = 0.2))
# Add margins of 1cm all around plot:
+ theme(plot.margin = unit(c(1, 1, 1, 1), "cm"))
pp
# Shading of specified area: (useful for confidence intervals)
+ geom_ribbon(aes(ymin = vNormPDF * 0.9, ymax = vNormPDF * 1.1, x = vx), alpha = 0.2,
pp fill = "blue")
# Note. can add several layers:
+ geom_ribbon(aes(ymin = vNormPDF * 0.9, ymax = vNormPDF * 1.1, x = vx), alpha = 0.2,
pp fill = "blue") + geom_ribbon(aes(ymin = vNormPDF * 0.8, ymax = vNormPDF * 1.2,
x = vx), alpha = 0.2, fill = "blue")
# Annotate text at given (x,y)-point:
+ annotate("text", label = "abc", x = 2, y = 0.07)
pp # (could specify hjust and vjust for further customization)
# note: with date on x-axis, need to put wrapper 'as.Date()' around date:
+ annotate("text", label = "abc", x = as.Date("2003-01-31"), y = 0.1)
ppTS
# Flip axes:
+ coord_flip() pp
# Note that the options can be combined, and they must be if they belong to the
# same option-command; e.g. don't write
theme(panel.border = element_blank())
# and then
theme(axis.title.x = element_text(size = 12))
# but write
theme(panel.border = element_blank(), axis.title.x = element_text(size = 12))
# Note that this can create issues for the use of a list of pre-defined
# options. Sometimes when the same option is called several times, the order
# how they are called can change the outcome.
# Taken together, nice single-line plots:
= list(theme_bw(), theme(aspect.ratio = 5/8, axis.title.x = element_text(size = 12),
plotOptions axis.title.y = element_text(size = 12), axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.title = element_text(family = "sans", size = 14)))
ggplot(data = mData) + plotOptions + geom_line(aes(x = xvar, y = norm), color = "blue",
size = 0.9) + labs(x = sXlab, y = "", title = sTitle) + scale_x_continuous(breaks = seq(-1,
8, 3), limits = c(-1, 8)) + scale_y_continuous(limits = c(0, 0.25)) + geom_ribbon(aes(ymin = vNormPDF *
0.9, ymax = vNormPDF * 1.1, x = vx), alpha = 0.2, fill = "blue") + geom_ribbon(aes(ymin = vNormPDF *
0.8, ymax = vNormPDF * 1.2, x = vx), alpha = 0.2, fill = "blue")
ggplot(mDataTS) + plotOptions + geom_line(aes(x = date, y = GDPgrowth), color = "blue",
size = 0.9) + labs(x = "", y = "", title = "US GDP Growth (quarterly, annualized)") +
scale_y_continuous(breaks = seq(-30, 30, by = 10)) + scale_x_date(date_labels = "%Y",
breaks = seq(ymd("2000/01/01"), ymd("2025/01/01"), by = "5 years"), limits = as.Date(c("2000-01-01",
"2025-01-01")))
= data.frame(norm1 = mNormDraws[, 1], norm2 = mNormDraws[, 2]) mData
# A scatter plot is created analogously to a line plot. We just use the option
# geom_point rather than geom_line.
# Based on two vectors:
ggplot() + geom_point(aes(x = mNormDraws[, 1], y = mNormDraws[, 2]))
# Based on data frame:
ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2))
# Change marker appearance:
= ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2), size = 1.5, shape = 4,
pp1 color = "blue")
# see http://sape.inf.usi.ch/quick-reference/ggplot2/shape for shapes available
# Some markers (21 - 25) allow for filling:
= ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2), size = 1.5, shape = 21,
pp2 color = "blue", fill = "blue")
# Can of course add all other options from above, e.g.
= pp1 + plotOptions + labs(x = "my x-lab", y = "my y-lab", title = "my nice scatter")
pp1
# Add labels to scatter-points:
= paste("a", 1:nrow(mData), sep = "") # some arbitrary labels
vMyLabels
+ geom_text(aes(x = norm1, y = norm2, label = vMyLabels), size = 2, color = "purple",
pp1 hjust = 1, vjust = -0.5)
# add labels only to some points (e.g. only first 4):
5:length(vMyLabels)] = "" # can also set to NA
vMyLabels[+ geom_text(aes(x = norm1, y = norm2, label = vMyLabels), size = 2, color = "purple",
pp1 hjust = 1, vjust = -0.5)
# Add lines to plot:
# line with specified intercept and slope (e.g. could add 45-degree line):
+ geom_abline(intercept = 3, slope = 2, size = 1, color = "black", linetype = "dotted")
pp1 # vertical line:
+ geom_vline(xintercept = 2, size = 1, color = "black", linetype = "dotted")
pp1 # regression line:
+ geom_smooth(aes(x = norm1, y = norm2), method = lm, se = FALSE, size = 1, color = "black",
pp1 linetype = "dotted")
# regression line with 90% confidence bands:
+ geom_smooth(aes(x = norm1, y = norm2), method = lm, se = TRUE, level = 0.9,
pp1 size = 1, color = "black", linetype = "dotted")
# (note: single linear regression with homoskedasticity...)
# Taken together, nice scatter plot:
ggplot(data = mData) + geom_point(aes(x = norm1, y = norm2), size = 2, shape = 21,
color = "blue", fill = "blue") + geom_smooth(aes(x = norm1, y = norm2), method = lm,
se = FALSE, size = 0.5, color = "darkblue", linetype = "solid") + labs(x = "first Normal",
y = "second Normal", title = "Scatter of Bivariate Normal Draws") + plotOptions
# Rather than supplying several vectors, it is easier to supply a dataframe
# with all the lines one wishes to plot:
= data.frame(xvar = vx, norm = vNormPDF, chi2 = vChi2PDF, t = vtPDF)
mData
# Note: the names of the different variables will be the line-labels. They are
# a bit more difficult to change than the labels of the x- and y-axes.
# TWO LINES
ggplot(mData, aes(x = xvar)) + geom_line(aes(y = norm)) + geom_line(aes(y = t))
# we should distinguish the different lines by color or shape or linetype; e.g.
ggplot(mData, aes(x = xvar)) + geom_line(aes(y = norm), color = "blue") + geom_line(aes(y = t),
color = "red")
# MANY (>=3) LINES
# In this case, it's best to first put the dataframe into long format, (as
# there could be many lines, and adding each one separately like above would be
# cumbersome)
library(reshape2)
= melt(mData, id.vars = "xvar")
mDat
# the different variables are now distinguished in the column 'variable', and
# their values are shown in the column 'value'
# Plot the three lines, distinguishing them by color:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.9) # could specify linetype (for all lines)
# distinguish by linetype:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(linetype = variable),
size = 0.9) # could specify color (for all lines)
# distinguish by both:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable,
linetype = variable))
# distinguish by line-color, point-color and point-shapes:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.5) + geom_point(aes(color = variable, shape = variable), size = 1)
# Of course, we can also just add the same markers for all lines:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.9) + geom_point(aes(color = variable), shape = 1)
# Set linetypes manually:
= ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(linetype = variable),
ppL size = 0.9) + plotOptions
+ scale_linetype_manual(values = c("solid", "dashed", "dotdash"))
ppL # see http://sape.inf.usi.ch/quick-reference/ggplot2/linetype for linetypes
# available note: can specify linetype also as number (1 is solid, 2 is dashed,
# etc.)
# Note: provided vector may be longer than the number of lines you have.
# That's why you can create a long vector with your preferred linetypes and
# then just call this vector, even for plots with differing number of lines.
# When setting linetypes manually, can specify/change line labels:
+ scale_linetype_manual(values = c("solid", "dashed", "dotdash"), labels = c("mycurve1",
ppL "mycurve2", "mycurve3"))
# Set colors manually:
= ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
ppC size = 0.9) + plotOptions
+ scale_color_manual(values = c("blue", "red", "purple"))
ppC
# again can specify/change line labels:
+ scale_color_manual(values = c("blue", "red", "purple"), labels = c("mycurve1",
ppC "mycurve2", "mycurve3"))
# more choice of colors is given by package 'colorspace':
library(colorspace)
# take three colors (evenly spaced) from palette 'Blues 2':
= sequential_hcl(3, palette = "Blues 2") vMyColors
+ scale_color_manual(values = vMyColors) ppC
# to avoid too light first color, construct a vector with more colors, and then
# take the few ones you need from the beginning and middle of the colorspace:
= sequential_hcl(7, palette = "Blues 2")[c(1, 3, 5)] vMyColors
+ scale_color_manual(values = vMyColors)
ppC
# Similarly, we can specify the colors of the markers as well as lines:
= ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
ppCM size = 0.5) + geom_point(aes(color = variable), size = 1) + plotOptions
+ scale_color_manual(values = vMyColors)
ppCM
# Set point-shapes manually:
= ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
ppM size = 0.5) + geom_point(aes(color = variable, shape = variable), size = 1) +
plotOptions
+ scale_shape_manual(values = c(7, 9, 24))
ppM
+ scale_shape_manual(values = c(7, 9, 24), labels = c("mycurve1", "mycurve2",
ppM "mycurve3"))
# see http://sape.inf.usi.ch/quick-reference/ggplot2/shape for shapes available
# Legend options:
# Take plot for illustration:
= ppC + scale_color_manual(values = vMyColors)
ppC
# Remove legend title:
+ theme(legend.title = element_blank())
ppC
# Remove legend altogether:
+ theme(legend.position = "none")
ppC
# Change position and direction: (by default, legend is outside the plot
# (middle-right) and vertical)
# put to middle-left:
+ theme(legend.position = "left")
ppC
# put to bottom-right:
+ theme(legend.justification = "bottom")
ppC
# make horizontal and put to bottom of plot:
+ theme(legend.direction = "horizontal", legend.position = "bottom", legend.justification = "left")
ppC
# put inside the plot:
+ theme(legend.position = c(0.8, 0.75))
ppC # ( top right is (1,1), top left is (0,1))
# In some cases, it's hard to put the legend inside the plot, but without
# crossing the lines/markers. e.g.
+ theme(legend.position = c(0.5, 0.5))
ppC # Unfortunately, one cannot make the legend background fully transparent. Make
# legend-text transparent at least:
+ theme(legend.position = c(0.5, 0.5), legend.background = element_rect(fill = alpha("white",
ppC 0)))
# Taken together, nice multiple-line plot:
ggplot(mDat, aes(x = xvar, y = value, group = variable)) + geom_line(aes(color = variable),
size = 0.9) + plotOptions + scale_color_manual(values = vMyColors) + theme(legend.title = element_blank(),
legend.position = c(0.8, 0.75)) + labs(x = "", y = "", title = "Three Important Distributions")
# Analogous to above, just with geom_point instead of geom_line option.
# Create dataframe with variables (vectors) to plot:
= as.data.frame(mNormDraws)
mData colnames(mData) = c("norm1", "norm2", "norm3")
# Melt to long format:
= melt(mData, id.vars = "norm1")
mDat
# Note: variable taken here as id.var will be on x-axis. In a scatter plot,
# this could in principle be any variable. (In contrast, under the line plot,
# it was clear what the x-axis is.)
# Plot, distinguishing variables by markers:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(shape = variable),
size = 2, color = "blue")
# Plot, distinguishing variables by colors:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
size = 2)
# Plot, distinguishing variables by both:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable,
shape = variable), size = 2)
# Add options:
= ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable,
ppMS shape = variable), size = 2) + plotOptions
# Specify marker-shapes and colors manually:
+ scale_color_manual(values = vMyColors) + scale_shape_manual(values = c(1,
ppMS 4))
# Also re-label variables:
+ scale_color_manual(values = vMyColors, labels = c("1st Normal", "2nd Normal")) +
ppMS scale_shape_manual(values = c(1, 4), labels = c("1st Normal", "2nd Normal"))
# (note: just doing it in one of the two options will produce two legends)
# Note: when distinguishing by color, by default, R will choose a round marker
# that is filled (as above). e.g.
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
size = 2)
# When you change the markers manually, like here, then they will not be
# filled. To add filling, must let filling also change by variable. e.g. this
# produces unfilled triangles:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
size = 2, shape = 2)
# to get filled triangles:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable,
fill = variable), size = 2, shape = 24)
# Nice, final scatter-plot with multiple variables:
ggplot(mDat, aes(x = norm1, y = value, group = variable)) + geom_point(aes(color = variable),
size = 2) + scale_color_manual(values = c("blue", "red"), labels = c("2nd Normal",
"3rd Normal")) + plotOptions + scale_y_continuous(limits = c(0, 12), breaks = c(0,
4, 8, 12)) + theme(legend.title = element_blank(), legend.position = c(0.8, 0.15)) +
labs(x = "1st Normal", y = "", title = "Multivariate Normal Draws (k=3)")
# Above, we plotted three variables in a single scatter plot, i.e. we plotted
# two variables against a third.
# In the same way, we could have plotted, say, 5 variables by plotting 4
# variables against a fifth one, yielding 4 'clouds' of points.
# Sometimes, we indeed want to plot two variables, but distinguish the points
# in the scatter by a third variable. The number of 'clouds' of points is then
# determined by how many values this third variable has.
# DISCRETE CASE
# Plot hp against hp, with different colors for different numbers of cylinders:
ggplot(mtcars, aes(x = mpg, y = hp, group = cyl)) + geom_point(aes(color = cyl))
# Note that R thinks of cyl as a continuous variable, even though it takes only
# three values. To fix that, code it as type 'character' rather than
# 'numeric':
= mtcars # create new dataframe..
mtcars2 $cyl = as.character(mtcars2$cyl) #.. in which cyl is of type character mtcars2
= ggplot(mtcars2, aes(x = mpg, y = hp, group = cyl)) + geom_point(aes(color = cyl))
ppMS
# We can change the colors as above; e.g.
+ scale_color_manual(values = vMyColors, labels = c("4 cyl.", "6 cyl.", "8 cyl."))
ppMS
# Also, could of course distinguish points for different numbers of cylinders
# by markers rather than colors (or both).
# In this case, cyl really needs to be of type character, because, in contrast
# to colors, markers cannot vary continuously.
# CONTINUOUS CASE
# Plot hp against mpg, and distinguish points by color based on value of qsec:
= ggplot(mtcars, aes(x = mpg, y = hp, group = qsec)) + geom_point(aes(color = qsec))
ppMS
# To specify colors, need to use scale_color_gradient rather than
# scale_color_manual:
+ scale_color_gradient(low = "white", high = "blue")
ppMS
# The analogous holds for the filling (if marker-shapes are specified that can
# be filled):
= ggplot(mtcars, aes(x = mpg, y = hp, group = qsec)) + geom_point(aes(color = qsec),
ppMS shape = 21) + scale_color_gradient(low = "white", high = "blue") + scale_fill_gradient(low = "white",
high = "blue")
# Nice, final grouped scatter-plot:
ggplot(mtcars, aes(x = mpg, y = hp, group = qsec)) + geom_point(aes(color = qsec,
fill = qsec), size = 3, shape = 21) + scale_color_gradient(low = "white", high = "blue") +
scale_fill_gradient(low = "white", high = "blue") + plotOptions + scale_y_continuous(limits = c(0,
300), breaks = seq(0, 300, by = 100)) + theme(legend.position = "none") + labs(title = "A Nice Grouped-Scatter Plot")
# SINGLE HISTOGRAM
# Based on single vector:
ggplot() + geom_histogram(aes(x = mNormDraws[, 2]))
# Based on column in dataframe:
ggplot(data = mData) + geom_histogram(aes(x = norm2))
# note that here we only need to specify the x-variable in aes()
# Change y-axis from frequency to density:
ggplot() + geom_histogram(aes(x = mNormDraws[, 2], y = ..density..))
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..))
# Specify number of bins directly:
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), bins = 10)
# Specify number of bins with binwidth:
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), binwidth = 0.5)
# Change color and filling:
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), color = "black",
fill = "grey", alpha = 0.2)
# Add line: (here: add actual pdf of N(5,1))
= dnorm(vx, 5, 1)
vThisNormPDF ggplot() + geom_histogram(aes(x = mNormDraws[, 2], y = ..density..)) + geom_line(aes(x = vx,
y = vThisNormPDF))
# Could also do this with dataframe, but then we need same number of draws as
# 'elements' in the vector that represents the line (here: function
# evaluations)
# MULTIPLE HISTOGRAMS IN SINGLE PLOT
ggplot(data = mData) + geom_histogram(aes(x = norm2, y = ..density..), color = "blue",
fill = "blue", alpha = 0.2) + geom_histogram(aes(x = norm3, y = ..density..),
color = "red", fill = "red", alpha = 0.2)
# A separate histogram of one variable for each value of some third variable:
# e.g. a separate histogram of hp for each value of cyl, each with different
# color:
ggplot(mtcars2, aes(x = hp, fill = cyl)) + geom_histogram(aes(x = hp, y = ..density..,
color = cyl), position = "identity")
# Should make bars a bit transparent:
ggplot(mtcars2, aes(x = hp, fill = cyl)) + geom_histogram(aes(x = hp, y = ..density..,
color = cyl), position = "identity", alpha = 0.6)
# Taken together, nice histogram plots:
= dnorm(vx, 5, 1)
vThisNormPDF
ggplot() + plotOptions + geom_histogram(aes(x = mNormDraws[, 2], y = ..density..),
color = "blue", fill = "blue", alpha = 0.2) + geom_line(aes(x = vx, y = vThisNormPDF),
color = "purple") + labs(x = "", y = "", title = "Histogram of N(5,1) draws") +
theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
ggplot(mtcars2, aes(x = hp, fill = cyl)) + plotOptions + geom_histogram(aes(x = hp,
y = ..density.., color = cyl), position = "identity", alpha = 0.6) + theme(legend.title = element_blank(),
legend.position = c(0.8, 0.75)) + labs(x = "", y = "", title = "Histogram of hp by cyl.") +
theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
# Compute mean of Normal draws for each column of mNormDraws (dimension of
# multiv. Normal):
= data.frame(norms = c(1, 2, 3), means = apply(mNormDraws, 2, mean)) mDataBP
# Illustrate:
ggplot(data = mDataBP, aes(x = norms, y = means)) + geom_bar(stat = "identity")
# Adjust appearance of bars:
ggplot(data = mDataBP, aes(x = norms, y = means)) + geom_bar(stat = "identity", color = "blue",
fill = "blue", alpha = 0.2)
# To change x-axis, it is best to first make R realize that it is a discrete
# rather than continuous variable:
$norms = as.character(mDataBP$norms) mDataBP
ggplot(data = mDataBP, aes(x = norms, y = means)) + geom_bar(stat = "identity", color = "blue",
fill = "blue", alpha = 0.2) + scale_x_discrete(labels = c("1st", "2nd", "3rd"))
# Taken together, nice bar-plot:
ggplot(data = mDataBP, aes(x = norms, y = means)) + plotOptions + geom_bar(stat = "identity",
color = "blue", fill = "blue", alpha = 0.2) + scale_x_discrete(labels = c("1st",
"2nd", "3rd")) + labs(x = "", y = "", title = "Mean of Normal Draws")
# Note: when producing a latex document with these plots, it is a better idea
# to produce the plots individually and assemble them in latex. (more
# flexibility)
# Three plots for illustration:
= list(plotOptions, ylim(c(0, 6)), scale_x_continuous(limits = c(-2,
plotOptionsCommonHere 10), breaks = seq(-2, 10, by = 2)))
= ggplot(data = mData) + plotOptionsCommonHere + geom_histogram(aes(x = norm1),
pp1 color = "blue", fill = "blue", alpha = 0.2) + labs(x = "", y = "", title = "norm1")
= ggplot(data = mData) + plotOptionsCommonHere + geom_histogram(aes(x = norm2),
pp2 color = "blue", fill = "blue", alpha = 0.2) + labs(x = "", y = "", title = "norm2")
= ggplot(data = mData) + plotOptionsCommonHere + geom_histogram(aes(x = norm3),
pp3 color = "blue", fill = "blue", alpha = 0.2) + labs(x = "", y = "", title = "norm3")
# It might be useful to create an empty plot: (e.g. when creating a 3x3 figure,
# but have 8 plots)
ggplot() + theme_void()
# FIRST APPROACH
library(gridExtra)
library(ggpubr)
# (need both packages)
# Create 1 x 2 plot:
grid.arrange(pp1, pp2, ncol = 2)
# To save:
ggsave("myplot1.pdf", arrangeGrob(pp1, pp2, ncol = 2))
# Add title:
= text_grob("Draws from two Normals", size = 14, face = "bold", vjust = 5)
title grid.arrange(pp1, pp2, ncol = 2, top = title)
# Can specify widths:
grid.arrange(pp1, pp2, ncol = 2, top = title, widths = c(2, 2))
# Can also specify ordering of plots (useful when many more than just 2 plots
# are added):
grid.arrange(pp1, pp2, ncol = 2, top = title, widths = c(2, 2), layout_matrix = rbind(c(1,
2)))
# All of these options need to be specified also in 'arrangeGrob()' when saving
# the plot.
# Note: it would make sense to delete y-axis ticks on the right plot as they
# are common to both and already shown in the left plot.
# SECOND APPROACH
library(patchwork)
# Create 1 x 2 plot as above:
| pp2)
(pp1
# Here we have the advantage that we can save the resulting plot and call it
# later:
= (pp1 | pp2)
ppTotal
ppTotal
# Add title:
+ plot_annotation(title = "Draws from two Normals")
ppTotal
# With this approach, we do not need a special command for saving the plots,
# but can use 'ggsave()'. However, one needs to create a large plot, because
# with many plots, default size window is usually too small:
ggsave("myplot2.pdf", width = 40, height = 25, units = "cm")
# Note: changing the width and height will also change the spacing between
# figures.
# 2 x 3 plots:
| pp1 | pp1)/(pp1 | pp1 | pp1)
(pp1
# Can have even more flexible plot-arrangement:
= "
layout AABB
AABB
EEEE
EEEE
"
+ pp1 + pp1 + plot_layout(design = layout)
pp1 ggsave("myplot2.pdf", width = 40, height = 25, units = "cm")
# For plots with legends: If all plots have the same legend, you can collect
# all them into one single legend using the following commands:
+ plot_layout(guides = "collect") + theme(legend.position = "bottom") ppTotal
# There are many more things that ggplot2 can do. For example, to create a
# grid-plot, use 'geom_raster()' or 'geom_rect()', to create an area-plot, use
# 'geom_area()'. A google search usually shows quick how to use these
# commands.
# A final, important note: when iteratively adding layers to a ggplot in a loop
# (e.g. adding one line at a time to an existing plot), you need to use double
# quotation marks. Otherwise, ggplot creates the plot only once the loop is
# finished, which may lead to unexpected behavior. See this post:
# https://stackoverflow.com/questions/52671480/adding-multiple-ribbons-in-ggplot.