library(tidyverse)
library(gapminder)
##################################################################################
#################################### Examples ####################################
##################################################################################
################################# Cars Examples ##################################
data(mpg)
# Question of interest: Do cars with big engines use more fuel that cars with
# small engines?
# First, replicate the first ggplot from the pre-recorded videos
ggplot(data=mpg) + geom_point(mapping = aes(x=displ,y=hwy))
# We colored the points according to class in the lecture notes, but what if
# instead we just wanted all the points to be blue?
ggplot(data=mpg) + geom_point(mapping = aes(x=displ,y=hwy),color=”blue”)
# We will further explore mapping aesthetics to a “constant,” as opposed to mapping
# aesthetics to variables, in next week’s pre-recorded videos.
# What’s wrong here?
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = “blue”))
# Hint: The color “blue” above doesn’t convey information about a variable; it
# only changes the appearance of the plot (see online text).
# More sensible to color the points according to an additional variable: class
ggplot(data=mpg) + geom_point(mapping = aes(x=displ,y=hwy,color=class))
################################ Sunspot Example #################################
data(sunspot.month)
spot_num <- sunspot.month
month <- c(1:length(sunspot.month))
sspot <- as.data.frame(cbind(spot_num,month))
# Have created a data frame containing two variables: "spot_num" the monthly
# average number of sunspots, and "month", the month the average was taken.
# Can examine our data frame
head(sspot)
# Can now plot the data with ggplot and geom_point
ggplot(data=sspot,mapping = aes(x=month,y=spot_num)) + geom_point()
ggplot(data=sspot) + geom_point(mapping = aes(x=month,y=spot_num))
# Recognize that this is time series data, and use geom_line() instead
ggplot(data=sspot,mapping = aes(x=month,y=spot_num)) + geom_line()
# Add cleaner axis labels and a title
ggplot(data=sspot,mapping = aes(x=month,y=spot_num)) +
geom_line() +
labs(x ="month",y="monthly average sunspot number",title="sunspot numbers 1749-1997")
################################ Faceting Examples ################################
# Example from pre-recorded videos, but omitting geom_smooth()
ggplot(data=mpg, mapping = aes(x=displ,y=hwy,color=class)) +
geom_point() + facet_wrap(~ class,nrow=2)
# We can facet on class because it is a categrorical variable. What if instead we
# facet on a continuous variable?
ggplot(data=mpg, mapping = aes(x=displ,y=hwy,color=class)) +
geom_point() + facet_wrap(~ hwy,nrow=2)
ggplot(data=mpg, mapping = aes(x=displ,y=hwy,color=class)) +
geom_point() + facet_wrap(~ displ,nrow=2)
# Obviously, the above two examples are not very useful data visualizations!
# Finally, examine the difference between facet_wrap() (faceting on one variable)
# and facet_grid() (faceting on two variables)
ggplot(data=mpg) +
geom_point(mapping = aes(x=displ,y=hwy,color=class)) +
facet_grid(drv ~ cyl)
# One thing to note is that we can often produce the same result with different
# code. It's important to be able to both *write* code and *read* someone else's
# code. (Otherwise, you don't really understand the language!)
# Suppress faceting on rows dimension using "."
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
# Facet on class
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ cyl, nrow=1)
############################# Layering Plots Examples ############################
gapminder <- mutate(gapminder, log10GdpPercap = log10(gdpPercap))
p <- ggplot(gapminder, aes(x=log10GdpPercap,y=lifeExp,color=continent))
g1 <- geom_point(alpha=0.1)
p1 <- p + g1
# Display the layers
p
g1
p1
ggplot(gapminder, aes(x=log10GdpPercap,y=lifeExp,color=continent)) + geom_point(alpha=0.5)
g2 <- geom_point(alpha=0.5)
p2 <- p + g2
library(gridExtra)
grid.arrange(p1, p2, nrow = 1)
# Building plots layer-by-layer can be helpful with more complicated plots, or when
# there are certain aesthetics that you want to keep "fixed" while you experiment
# with others (e.g., keeping aes(x=log10GdpPercap,y=lifeExp,color=continent) fixed
# as global aesthetics while changing the alpha values)
# Next week's lecture (or, rather, the pre-recorded videos for such) will further
# explore the concept of ggplot layers and why they are useful ways to construct
# data visualizations
######################## Further Geoms and Transformations #######################
data(diamonds)
head(diamonds)
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
# We can generally use geoms and stats interchangeably, since every geom is
# associated with a stat and vice versa
ggplot(data = diamonds) +
stat_count(mapping = aes(x = cut))
# Can use the "fill" aesthetic to provide more information in the visualization,
# in this case the clarity of the diamonds.
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity))
# Futher details given in the online text; impossible to cover it all!
# Interesting comparison between bar chart and a coxcomb, aka "polar
# area diagram" or "Nightingale rose diagram".
bar <- ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = FALSE,
width = 1
) +
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL)
bar + coord_flip()
bar + coord_polar()
##################################################################################
################################### Exercises ####################################
##################################################################################
################################### Exercise 1 ###################################
# In pre-recorded videos we made the following scatterplot:
ggplot(data=mpg) + geom_point(mapping = aes(x=displ,y=hwy,color=class))
# Redo the above scatterplot but using the aesthetic shape=class to plot different
# shapes for different kids of cars:
ggplot(data=mpg) + geom_point(mapping = aes(x=displ,y=hwy,shape=class))
##################################################################################
##################################################################################
################################### Exercise 2 ###################################
# In pre-recorded videos we did the following facetted plot:
ggplot(data=mpg, mapping = aes(x=displ,y=hwy,color=class)) +
geom_point() +
geom_smooth() +
facet_wrap(~ class,nrow=2)
# Repeat the above, but omit the nrow=2 argument to facet_wrap()
ggplot(data=mpg, mapping = aes(x=displ,y=hwy,color=class)) +
geom_point() +
geom_smooth() +
facet_wrap(~ class)
##################################################################################
##################################################################################
################################### Exercise 3 ###################################
# **Gapminder Example**
# The variable year is quantitative, but can still be used as a grouping variable.
# Do scatterplots of lifeExp versus log10GdpPercap with points colored by year.
# Add a scatterplot smoother with (i) no grouping variable and (ii) year as the
# grouping variable. Se the SE to FALSE for your smoothers.
gapminder <- mutate(gapminder, log10GdpPercap = log10(gdpPercap))
# For (i)
ggplot(gapminder,aes(x=log10GdpPercap,y=lifeExp,color=year)) + geom_point() +
geom_smooth(se=FALSE)
# For (ii)
ggplot(gapminder,aes(x=log10GdpPercap,y=lifeExp,color=year)) + geom_point() +
geom_smooth(aes(group=year), se=FALSE)
##################################################################################
##################################################################################