- Choosing the "right" plot
- Best practices and deadly sins of data visualization
- How to construct plots in (base) R
- Tips & tricks for efficient visualization
Stefan Hartmann
experiences with R and R studio?
If you happen to have a laptop with you, you can download the toy datasets here: https://tinyurl.com/neuchaRtel
or you can find them on Github along with the slides: https://github.com/empirical-linguistics/dataviz_neuchatel
You can view the entire presentation here: https://empirical-linguistics.github.io/dataviz_neuchatel/
this <- is(what, code) {
looks, like
}
this <- is(what, code) {
looks, like
}
this <- is(what, code) {
looks, like
}
For yourself
For others
"The essence of a graphic display is that a set of numbers having both magnitudes and an order are represented by an appropriate visual metaphor - the magnitude and order of the metaphorical representation match the numbers." (Wainer 1984: 139)
x <- c(1,3,3,4,7,8)
y <- c(1,1,3,9,8,5)
plot(x,y)
plot(x, y, xlab = "xlab", ylab = "ylab", main = "main")
plot(x, y, xlab = "xlab", ylab = "ylab", main = "main", col = "red")
plot(x, y, xlab = "xlab", ylab = "ylab", main = "main", col = rgb(1, 0, 0, alpha = 0.5))
rainbow
, heat.colors
, terrain.colors
RColorBrewer
package with a number of color-blind friendly palettes (argument colorblindFriendly = TRUE
)plot(x, y, xlab = "xlab", ylab = "ylab", main = "main", col = "red")
text(x = 5, y = 5,"Note the the added text! \n In a different color!", col = "blue")
plot(x, y, xlab = "xlab", ylab = "ylab", main = "main", col = "red",
pch = 20)
plot(x, y, xlab = "xlab", ylab = "ylab", main = "main", col = "red",
pch = "\u263A", cex = 2)
par(mar = c(5, 4, 4, 2) + 0.1)
par_cur <- par() # save default graphics parameters
par(mar = c(1,1,1,1)) # change margins
plot(1:20, rep(10,20), pch = c(1:20), cex=1.5, ylab="", xlab="", yaxt="n", xaxt="n")
text(1:20, rep(8.5,20), labels = 1:20)
par(par_cur) # restore default graphics parameters
plot(x, y, xlab = "xlab", ylab = "ylab", main = "main", col = "red",
xlim = c(0, max(x)),
ylim = c(0, max(y)))
plot(x, y, xlab = "xlab", ylab = "ylab", main = "main", col = "red",
xlim = c(0, max(x)), ylim = c(0, max(y)))
grid(nx = 0, ny=10)
plot(x, y,
# cex = 2,
# cex.axis = 2,
xlab = "xlab", ylab = "ylab", # cex.lab = 2,
main = "main", # cex.main = 2,
xlim = c(0, max(x)), ylim = c(0, max(y)))
plot(x, y,
cex = 2,
# cex.axis = 2,
xlab = "xlab", ylab = "ylab", # cex.lab = 2,
main = "main", # cex.main = 2,
xlim = c(0, max(x)), ylim = c(0, max(y)))
plot(x, y,
cex = 2,
cex.axis = 2,
xlab = "xlab", ylab = "ylab", # cex.lab = 2,
main = "main", # cex.main = 2,
xlim = c(0, max(x)), ylim = c(0, max(y)))
plot(x, y,
cex = 2,
cex.axis = 2,
xlab = "xlab", ylab = "ylab", cex.lab = 2,
main = "main", cex.main = 2,
xlim = c(0, max(x)), ylim = c(0, max(y)))
plot(x, y, col = "red", pch = 20)
points(x = c(4, 5, 6), y = c(2,6,8), col = "green", pch = 2)
plot(x, y, col = "red", pch = 20)
points(x = c(4, 5, 6), y = c(2,6,8), col = "green", pch = 2)
legend ("topleft",
inset = c(0.01,0.01), # distance from the margins
pch = c(20,2), # the two point characters we used
col = c("red", "green"), # the two colors we used
legend = c("red dots", "green triangles"))
lm
function for generating the model and the abline
function, which adds straight lines to a plotplot(x, y)
model <- lm(y ~ x)
abline(model)
plot(x,y, main = "lowess")
lines(lowess(x, y))
scatter.smooth(x,y, main = "loess")
plot(x, y, type = "l")
plot(x, y, type = "b")
lty
parameter:plot(x, y, type = "b", lty = 2)
lines(x = c(2:7), y = c(4:9), lty = 3, col = "darkgrey")
read.csv
to read in the dataframe height_weight.csvhw <- read.csv("examples/height_weight.csv")
plot(hw$height, hw$weight,
xlab = "Height", ylab = "Weight", main = "Height~Weight")
model_hw <- lm(hw$weight~hw$height)
abline(model_hw, col = "darkgrey", lty = 2)
read.csv
to read in the dataframe Pokemon.csvcol
parameter to show the color of each Pokémon, as indicated in the "Color" column.pch
parameter to show the form of each Pokémon, as indicated in the "Form" column.pch
part, first look what happens when you try as.numeric(pok$Form)
# read data
pok <- read.csv("examples/Pokemon.csv")
# plot
plot(pok$Height_m, pok$Weight_kg, col = pok$Color, pch = as.numeric(pok$Form),
xlab="Height", ylab="Weight", main = "Height~Weight, Pokémon")
model_pok <- lm(pok$Weight_kg~pok$Height_m)
abline(model_pok)
barplot()
function is height
# define a vector
bar_heights <- c(50, 80)
barplot(bar_heights)
names.arg
:barplot(bar_heights, names.arg = c("stuff", "more\nstuff"))
barplot(bar_heights, names.arg = c("stuff", "more\nstuff"),
main = "I'm a barplot", xlab = "I'm the x label", ylab = "I'm the y label",
cex.main = 2, cex.lab = 2, cex.axis = 2, cex.names = 2)
space
argument defines the space between bars, default is 0.2 if height
is a vectorspace
argument defines the space between bars, default is 0.2 if height
is a vectorspace
argument defines the space between bars, default is 0.2 if height
is a vectorspace
argument defines the space between bars, default is 0.2 if height
is a vectorspace
argument defines the space between bars, default is 0.2 if height
is a vectorspace = 0
barplot(bar_heights / sum(bar_heights), # get relative frequencies
names.arg = c("stuff", "more\nstuff"), space = 0)
text(x = c(0:1)+0.5,
y = (bar_heights / sum(bar_heights)) - 0.05,
labels = bar_heights)
0.5:1.5
yields {0.5,1.5}, 0.2 * 1:2
yields {0.2, 0.4} (= 0.2 * 1, 0.2 * 2)barplot(bar_heights / sum(bar_heights), # get relative frequencies
names.arg = c("stuff", "more\nstuff"))
text(x = (0.5:1.5) + (0.2 * 1:2),
y = (bar_heights / sum(bar_heights)) - 0.05,
labels = bar_heights)
bar_matrix <- matrix(c(2,4,5,4,3,3,7,6), nrow = 2)
bar_matrix
## [,1] [,2] [,3] [,4]
## [1,] 2 5 3 7
## [2,] 4 4 3 6
barplot(bar_matrix)
barplot(bar_matrix, beside = T)
Hands-on example: action-sentence compatibility task
read.csv
head
, str
, and View
direction == "distractor"
rt <- read.csv("examples/actionsentence.csv", fileEncoding = "UTF8")
rt <- subset(rt, direction != "distractor")
library(reshape2)
rt2 <- melt(rt, id.vars = c("ID", "sentence", "direction"))
library(reshape2)
rt2 <- melt(rt, id.vars = c("ID", "sentence", "direction"))
# same result but with (imho) more complicated syntax: "gather" from tidyr package
library(tidyr)
rt3 <- gather(rt, variable, value, -ID, -sentence, -direction)
all(rt2==rt3) # checks if all values are identical
library(reshape2)
rt2 <- melt(rt, id.vars = c("ID", "sentence", "direction"))
# same result but with (imho) more complicated syntax: "gather" from tidyr package
library(tidyr)
rt3 <- gather(rt, variable, value, -ID, -sentence, -direction)
all(rt2==rt3) # cheks if all values are identical
## [1] TRUE
# get mean values for "away" and "towards" subsets:
rt2_away <- subset(rt2, direction == "away")
rt2_toward <- subset(rt2, direction == "toward")
mean_away <- mean(rt2_away$value)
mean_toward <- mean(rt2_toward$value)
# combine both to one vector
rt_means <- c(mean_away, mean_toward)
rt_means
## [1] 1867.9 1789.5
barplot(rt_means, names.arg = c("away", "toward"))
t.test()
functiont_away <- t.test(rt2_away$value)
str(t_away)
## List of 9
## $ statistic : Named num 14.8
## ..- attr(*, "names")= chr "t"
## $ parameter : Named num 29
## ..- attr(*, "names")= chr "df"
## $ p.value : num 4.65e-15
## $ conf.int : num [1:2] 1610 2126
## ..- attr(*, "conf.level")= num 0.95
## $ estimate : Named num 1868
## ..- attr(*, "names")= chr "mean of x"
## $ null.value : Named num 0
## ..- attr(*, "names")= chr "mean"
## $ alternative: chr "two.sided"
## $ method : chr "One Sample t-test"
## $ data.name : chr "rt2_away$value"
## - attr(*, "class")= chr "htest"
arrows
function to plot confidence intervalsarrows
is usually used to draw arrows (duh!)plot(c(1:10), c(1:10), type = "n")
arrows(x0 = 2, x1 = 4,y0 = 5, y1 = 5)
arrows(x0 = 8, x1 = 8, y0 = 5, y1 = 9,
angle = 90, # set angle to 90 degrees = flat arrow head
code = 3) # draw arrow head on BOTH ends of the "arrow"
ci_away <- t.test(rt2_away$value)$conf.int
ci_toward <- t.test(rt2_toward$value)$conf.int
barplot(rt_means, names.arg = c("away", "toward"))
par(xpd=T)
arrows(x0 = 0.7, x1 = 0.7, y0 = ci_away[1], y1 = ci_away[2],
angle = 90, code = 3, length = .2)
arrows(x0 = 1.9, x1 = 1.9, y0 = ci_toward[1], y1 = ci_toward[2],
angle = 90, code = 3, length = .2)
avengers.csv
barplot()
needs a matrix as input. This is a bit tricky - toy around with matrix()
to create a matrix that looks like this (without the row and column names)## Avengers1 Avengers2 Avengers3
## Iron Man 37 45 18
## Thor 25 14 14
avengers[order(avengers$character),]
you can sort them by the "character" column.# read in data
avengers <- read.csv("examples/avengers.csv")
# sort by "character" column
avengers <- avengers[order(avengers$Character),]
avengers_matrix <- matrix(avengers$Screentime, ncol = 3, byrow = T)
# plot
barplot(avengers_matrix, beside = T, names.arg = c("Avengers 1", "Avengers 2", "Avengers 3"),
legend.text = c("Iron Man", "Thor"))
With the help of graphical parameters, you can change the appearance of your plot. See ?par
for more information. Some of the most important ones:
mar
: margins.xpd
: If TRUE, you can plot outside the plot region. If FALSE (the default), plotting is confined to the plot region.mfrow
: numer of c(rows, columns)bg
: background color (or no color if you choose "transparent"; default is white)par()
to see the current settings (= the default values if you haven't changed them). This can come in handy if you want to change parameters and then restore the defaults afterwards.par_default <- par()
and later on restore the current settings via par(par_default)
.png()
, tiff()
, jpeg()
, or bmp()
instead.png(filename = "myplot.jpg")
plot(x,y)
dev.off()
par(mfrow=c(nrow,ncol))
layout
m <- matrix(c(1,2,2,
1,2,2),
nrow = 2, byrow = T)
layout(m)
barplot(c(20, 40), names.arg = c("a", "b"))
plot(x = c(7,9,15,24), y = c(8,15, 40, 32), type = "l", ylab="y", xlab="x")
par(par_cur)
png()
(or the like) and dev.off()
:png("myfile.png", width = 7, height = 7, un = "in", res = 300)
layout(m)
barplot(c(20, 40), names.arg = c("a", "b"))
plot(x = c(7,9,15,24), y = c(8,15, 40, 32), type = "l", ylab="y", xlab="x")
dev.off()
## quartz_off_screen
## 2
par(par_cur)
code snippets
Let's explore them in turn.
myfunction <- function(x) {
return(x + 2)
}
myfunction(40)
## [1] 42
myplot <- function(x, y, ...) { #... for inheritance
plot(x, y, pch = 20, type = "b", lty = 2, lwd = 2)
}
myplot(x = c(1,3,7), y = c(7,3,1))
snippet plotexp
png("filename.png", width = 6.5, height = 5, un = "in", res = 300)
dev.off()
snippet plot2pan
par(mfrow = c(1,2))
png("filename.png", width = 13, height = 5, un = "in", res = 300)
dev.off()