Data Class Lab Key
Data Class Lab Key
Copyright By PowCoder代写 加微信 powcoder
Bike Lanes Dataset: BikeBaltimore is the Department of Transportation’s bike program. The data is from http://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gyms
You can Download as a CSV in your current working directory.
library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ dplyr 1.0.7
## ✓ tibble 3.1.5 ✓ stringr 1.4.0
## ✓ tidyr 1.1.4 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(lubridate)
## Attaching package: ‘lubridate’
## The following objects are masked from ‘package:base’:
## date, intersect, setdiff, union
bike = read_csv(
“data/Bike_Lanes.csv”)
## Rows: 1631 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: “,”
## chr (6): subType, name, block, type, project, route
## dbl (3): numLanes, length, dateInstalled
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(jhur)
bike = read_bike()
## Rows: 1631 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: “,”
## chr (6): subType, name, block, type, project, route
## dbl (3): numLanes, length, dateInstalled
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Get all the differet types of bike types from the type column. Use sort(unique()). Assign this to an object btypes. Type dput(btypes)
head(factor(bike$type))
## [1] BIKE BOULEVARD SIDEPATH SIGNED ROUTE SIDEPATH BIKE LANE
## [6] SIGNED ROUTE
## 7 Levels: BIKE BOULEVARD BIKE LANE CONTRAFLOW SHARED BUS BIKE … SIGNED ROUTE
btypes = sort(unique(bike$type))
x = c(“SIDEPATH”,”BIKE BOULEVARD”, “BIKE LANE”, “CONTRAFLOW”,
“SHARED BUS BIKE”, “SHARROW”, “SIGNED ROUTE”)
dput(btypes)
## c(“BIKE BOULEVARD”, “BIKE LANE”, “CONTRAFLOW”, “SHARED BUS BIKE”,
## “SHARROW”, “SIDEPATH”, “SIGNED ROUTE”)
dput(btypes)[c(6,1:5,7)]
## c(“BIKE BOULEVARD”, “BIKE LANE”, “CONTRAFLOW”, “SHARED BUS BIKE”,
## “SHARROW”, “SIDEPATH”, “SIGNED ROUTE”)
## [1] “SIDEPATH” “BIKE BOULEVARD” “BIKE LANE” “CONTRAFLOW”
## [5] “SHARED BUS BIKE” “SHARROW” “SIGNED ROUTE”
dput(btypes[c(6,1:5,7)])
## c(“SIDEPATH”, “BIKE BOULEVARD”, “BIKE LANE”, “CONTRAFLOW”, “SHARED BUS BIKE”,
## “SHARROW”, “SIGNED ROUTE”)
lev = c( “SIDEPATH”, “BIKE BOULEVARD”, “BIKE LANE”, “CONTRAFLOW”, “SHARED BUS BIKE”,
“SHARROW”, “SIGNED ROUTE”)
Recode type as a factor that has the SIDEPATH level first. Print head(bike$type). Note what you see. Run table(bike$type) afterwards and note the order
bike$type = factor(bike$type)
bike$type = relevel(bike$type, “SIDEPATH”)
bike$type = factor(bike$type,
levels = dput(btypes[c(6,1:5,7)]))
## c(“SIDEPATH”, “BIKE BOULEVARD”, “BIKE LANE”, “CONTRAFLOW”, “SHARED BUS BIKE”,
## “SHARROW”, “SIGNED ROUTE”)
bike = bike %>% mutate(type = factor(type,
levels = dput(btypes[c(6,1:5,7)])))
## c(“SIDEPATH”, “BIKE BOULEVARD”, “BIKE LANE”, “CONTRAFLOW”, “SHARED BUS BIKE”,
## “SHARROW”, “SIGNED ROUTE”)
table(bike$type)
## SIDEPATH BIKE BOULEVARD BIKE LANE CONTRAFLOW SHARED BUS BIKE
## 7 49 621 13 39
## SHARROW SIGNED ROUTE
## 589 304
Make a column type2, which is a factor of type, with the levels: c( “SIDEPATH”, “BIKE BOULEVARD”, “BIKE LANE”). Run table(bike$type2), with the option useNA = “always”. Note, we do not have to make type a character again before doing this
bike = bike %>%
mutate(type2 = factor(type,
levels = c( “SIDEPATH”, “BIKE BOULEVARD”,
“BIKE LANE”) ) )
table(bike$type)
## SIDEPATH BIKE BOULEVARD BIKE LANE CONTRAFLOW SHARED BUS BIKE
## 7 49 621 13 39
## SHARROW SIGNED ROUTE
## 589 304
table(bike$type2)
## SIDEPATH BIKE BOULEVARD BIKE LANE
## 7 49 621
table(bike$type2, useNA = “always”)
## SIDEPATH BIKE BOULEVARD BIKE LANE
## 7 49 621 954
Reassign dateInstalled into a character using as.character. Run head(bike$dateInstalled).
bike = bike %>%
mutate(dateInstalled =
as.character(dateInstalled)
head(bike$dateInstalled)
## [1] “0” “2010” “2010” “0” “2011” “2007”
Reassign dateInstalled a factor, using the default levels. Run head(bike$dateInstalled).
bike = bike %>%
mutate(dateInstalled =
factor(dateInstalled)
head(bike$dateInstalled)
## [1] 0 2010 2010 0 2011 2007
## Levels: 0 2006 2007 2008 2009 2010 2011 2012 2013
table(factor(bike$dateInstalled, levels = 2005:2017))
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
## 0 2 368 206 86 625 101 107 10 0 0 0 0
table(factor(bike$dateInstalled, levels = 2005:2017),
useNA=”ifany”)
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
## 0 2 368 206 86 625 101 107 10 0 0 0 0 126
Do not reassign dateInstalled, but simply run head(as.numeric(bike$dateInstalled)) we are looking to see what happens when we try to go from factor to numeric
head(as.numeric(bike$dateInstalled))
## [1] 1 6 6 1 7 3
Do not reassign dateInstalled, but simply run head(as.numeric(as.character(bike$dateInstalled))) this is how you get “numeric” values back if they were “incorrectly” factors
head(as.numeric(as.character(
bike$dateInstalled)))
## [1] 0 2010 2010 0 2011 2007
Convert type back to a character. Make a column type2 (replacing the old one) where if the type is one of these categories: c(“CONTRAFLOW”, “SHARED BUS BIKE”, “SHARROW”, “SIGNED ROUTE”) call it “OTHER”. Use %in% and ifelse. Make type2 a factor with the levels c( “SIDEPATH”, “BIKE BOULEVARD”, “BIKE LANE”, “OTHER”)
x=c(“CONTRAFLOW”, “SHARED BUS BIKE”,
“SHARROW”, “SIGNED ROUTE”)
bike = bike %>% mutate(
type = as.character(type),
type2 = ifelse(type %in% c(“CONTRAFLOW”, “SHARED BUS BIKE”,
“SHARROW”, “SIGNED ROUTE”), “OTHER”, type),
type2 = factor(type2, levels = c( “SIDEPATH”, “BIKE BOULEVARD”,
“BIKE LANE”, “OTHER”) ))
table(bike$type2)
## SIDEPATH BIKE BOULEVARD BIKE LANE OTHER
## 7 49 621 945
bike2 = bike %>%
type = factor(type,
levels = c( “SIDEPATH”, “BIKE BOULEVARD”,
“BIKE LANE”, “CONTRAFLOW”,
“SHARED BUS BIKE”,
“SHARROW”, “SIGNED ROUTE”)
type2 = recode_factor(type,
“CONTRAFLOW” = “OTHER”,
“SHARED BUS BIKE” = “OTHER”,
“SHARROW” = “OTHER”,
“SIGNED ROUTE” = “OTHER”)
table(bike2$type2)
## OTHER SIDEPATH BIKE BOULEVARD BIKE LANE
## 945 7 49 621
Parse the following dates with the correct lubridate function:
“2014/02-14”
ymd(“2014/02-14”)
## [1] “2014-02-14”
“04/22/14 03:20”
mdy_hm(“04/22/14 03:20”)
## [1] “2014-04-22 03:20:00 UTC”
“4/5/2016 03:2:22” assume mdy
mdy_hms(“4/5/2016 03:2:22”)
## [1] “2016-04-05 03:02:22 UTC”
程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com