程序代写 Data Visualization Lab Key

Data Visualization Lab Key

Data Visualization Lab Key

Copyright By PowCoder代写 加微信 powcoder

library(readr)
library(ggplot2)
library(tidyr)
library(dplyr)
library(lubridate)
library(stringr)

Read in Data

Read in the charm city circulator dataset:

circ = read_csv(“data/Charm_City_Circulator_Ridership.csv”)

circ = read_csv(“data/Charm_City_Circulator_Ridership.csv”)

## Rows: 1146 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: “,”
## chr (2): day, date
## dbl (13): orangeBoardings, orangeAlightings, orangeAverage, purpleBoardings,…
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# covert dates
circ = mutate(circ, date = mdy(date))
# change colnames for reshaping
colnames(circ) = colnames(circ) %>%
str_replace(“Board”, “.Board”) %>%
str_replace(“Alight”, “.Alight”) %>%
str_replace(“Average”, “.Average”)

# make long
long = gather(circ, “var”, “number”,
starts_with(“orange”),
starts_with(“purple”), starts_with(“green”),
starts_with(“banner”))
# separate
long = separate(long, var, into = c(“route”, “type”),
sep = “[.]”)

## take just average ridership per day
avg = filter(long, type == “Average”)
avg = filter(avg, !is.na(number))

# separate
type_wide = spread(long, type, value = number)
head(type_wide)

## # A tibble: 6 × 7
## day date daily route Alightings Average Boardings
##
## 1 Friday 2010-01-15 1644 banner NA NA NA
## 2 Friday 2010-01-15 1644 green NA NA NA
## 3 Friday 2010-01-15 1644 orange 1643 1644 1645
## 4 Friday 2010-01-15 1644 purple NA NA NA
## 5 Friday 2010-01-22 1394. banner NA NA NA
## 6 Friday 2010-01-22 1394. green NA NA NA

In these questions, try to use ggplot2 if possible.

Plot average ridership (avg data set) by date.

q = qplot(x = date, y = number, data = avg)
q + xlim(ymd(“2011/05/03”, “2012/06/04”))

## Warning: Removed 1871 rows containing missing values (geom_point).

g = ggplot(avg, aes(x = date, y = number))
g + geom_point()

g + geom_point() + xlim(ymd(“2011/05/03”, “2012/06/04”))

## Warning: Removed 1871 rows containing missing values (geom_point).

Color the points by route (orange, purple, green, banner)

qplot(x = date, y = number, data = avg, colour = route)

first_plot = qplot(x = date, y = number, data = avg, colour = route)
print(first_plot)

g = ggplot(avg, aes(x = date, y = number, color = route))
g + geom_point()

add black smoothed curves for each route

qplot(x = date, y = number, data = avg, colour = route) + geom_smooth(aes(group = route), colour= “black”)

## `geom_smooth()` using method = ‘gam’ and formula ‘y ~ s(x, bs = “cs”)’

qplot(x = date, y = number, data = avg, colour = route) + geom_smooth()

## `geom_smooth()` using method = ‘gam’ and formula ‘y ~ s(x, bs = “cs”)’

g + geom_point() + geom_smooth(color=”black”)

## `geom_smooth()` using method = ‘gam’ and formula ‘y ~ s(x, bs = “cs”)’

g + geom_point() + geom_smooth()

## `geom_smooth()` using method = ‘gam’ and formula ‘y ~ s(x, bs = “cs”)’

Color the points by day of the week

qplot(x = date, y = number, data = avg, colour = day)

qplot(x = date, y = number, data = avg, colour = route) + geom_smooth()

## `geom_smooth()` using method = ‘gam’ and formula ‘y ~ s(x, bs = “cs”)’

avg = avg %>% mutate(dayFactor = factor(day, levels = c(“Monday”, “Tuesday”, “Wednesday”, “Thursday”, “Friday”, “Saturday”, “Sunday”)))
g = ggplot(avg, aes(x = date, y = number, color = dayFactor))
g + geom_point()

Replot 1a where the colors of the points are the name of the route (with banner –> blue)

pal = c(banner = “blue”, purple = “purple”, green= “darkgreen”, orange = “orange”)
qplot(x = date, y = number, data = avg, colour = route) +
scale_colour_manual(values = pal)

g = ggplot(avg, aes(x = date, y = number, color = route))
g + geom_point() + scale_colour_manual(values = pal)

plot average ridership by date with one panel per route

qplot(x = date, y = number, data= avg, facets = ~route)

qplot(x = date, y = number, data= avg) +
facet_wrap( ~ route)

qplot(x = date, y = number, data= avg) +
facet_wrap( ~ route, ncol =4)

qplot(x = date, y = number, data= avg, facets = ~route, colour = route) + scale_colour_manual(values=pal)

g = ggplot(avg, aes(x = date, y = number, color = route))
g + geom_point() + facet_wrap( ~ route) + scale_colour_manual(values=pal)

Plot average ridership by date with separate panels by day of the week, colored by route

qplot(x = date, y = number, data= avg, facets = ~day,
colour = route) + scale_colour_manual(values=pal)

ggplot(aes(x = date, y = number, colour = route), data= avg) +
geom_point() +
facet_wrap( ~day) + scale_colour_manual(values=pal)

Plot average ridership (avg) by date, colored by route (same as 1a). (do not take an average, use the average column for each route) Make the x-label “Year” Make the y-label “Number of People” Use the black and white theme theme_bw() Change the text_size to (text = element_text(size = 20)) in theme

first_plot = ggplot(avg, aes(x = date, y = number, color = route)) + geom_point() + scale_colour_manual(values=pal)

first_plot +
xlab(“Year”) + ylab(“Number of People”) + theme_bw() +
theme(text = element_text(size = 20))

Plot average ridership on the orange route versus date as a solid line, and add dashed “error” lines based on the boardings and alightings. the line colors should be orange. (hint linetype is an aesthetic for lines) – see also scale_linetype and scale_linetype_manual Alightings = “dashed”, Boardings = “dashed”, Average = “solid”

orange = long %>% filter(route == “orange”)

line type is dashed

ggplot(orange, aes(x = date, y = number)) +
geom_line(linetype = “dashed”, colour =”orange”)

dont do this, trying to find column named orange

ggplot(orange, aes(x = date, y = number)) +
geom_line(linetype = “dashed”, aes(colour=”orange”))

now line dashedness varies by the type

ggplot(orange, aes(x = date, y = number)) +
geom_line(aes(linetype = type), colour = “orange”)

this one as a quick plot

qplot(data = orange, x = date, y = number,
linetype = type, geom = “line”, colour = “orange”)

ggplot(orange, aes(x = date, y = number)) +
geom_line(aes(linetype = type), colour = “orange”) +
scale_linetype_manual(values = c(“dashed”,
“dashed”, “solid”))

ggplot(orange, aes(x = date, y = number)) +
geom_line(aes(linetype = type), colour = “orange”) +
scale_linetype_manual(
values = c(Alightings = “dashed”,
Boardings = “dashed”,
Average = “solid”))

程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com