CS计算机代考程序代写 chain #install.packages("tidyverse") # install package

#install.packages(“tidyverse”) # install package
library(tidyverse) # load package

## Import a csv file
TB <- read.csv("TB_burden_countries_2020-01-09.csv") View(TB) # View can view some summary statistics with summary() # but it is not easy to read summary(TB) # We can use library stargazer to produce better summary output library(stargazer) stargazer(TB, type = "text") # How can you add the median to stargazer output? (use the help) # Useful Tidyverse functions are filter and select # select helps us filter out columns # e.g., select all columns from 'country' to 'e_pop_num' TB1 <- TB %>%
select(country:e_pop_num)
View(TB1)

# filter helps us filter the observations by some criteria
# e.g., filter only data belonging to region EUR
TB2 <- TB %>%
filter(g_whoregion == “EUR”)
View(TB2)

# We can also apply both transformations together
TB3 <- TB %>%
select(country:e_pop_num) %>%
filter(g_whoregion == “EUR”)
View(TB3)

# with basic R we would have needed the following code to get TB3
# TB3 <- TB[TB$g_whoregion == "EUR", 1:7] # which can get complicated and difficult to read the more operations we perform on TB # Another useful functions is the function gather # it can be used to stack variables one below the other # in the code below I am stacking one below the others all the variables # from 'e_mort_exc_tbhiv_100k' to 'e_mort_num_hi' TB4 <- TB %>%
gather(e_mort_exc_tbhiv_100k:e_mort_num_hi,
key = “e_mort_key”,
value = “e_mort_value”)
View(TB4)

# we can also chain gather with other functions like select
TB %>%
select(country:e_pop_num, e_mort_exc_tbhiv_100k:e_mort_num_hi) %>%
gather(e_mort_exc_tbhiv_100k:e_mort_num_hi,
key = “e_mort_key”, value = “e_mort_value”)

# if we need to create new variables we can use mutate
# e.g., we create a new var which is population divided 100k
TB5 <- TB %>%
mutate(e_pop_100k = e_pop_num/10^5)
View(TB5)

# we can use group_by to perform operations like mutate by groups
# like calculating averaging population by region and year
# and add it as a new var
TB6 <- TB %>%
group_by(g_whoregion, year) %>%
mutate(avg_pop = mean(e_pop_num))
View(TB6)

# if we just want a summary by groupe we can use summarise instead
TB %>%
group_by(g_whoregion, year) %>%
summarise(avg_pop = mean(e_pop_num))

# finaly we can use also chain other R function like lm()
TB %>%
mutate(e_pop_mil = e_pop_num/10^6) %>%
lm(e_inc_100k ~ e_pop_mil, data = .) %>%
summary()

# we can use stargazer instead of summary to get a better output
TB %>%
mutate(e_pop_mil = e_pop_num/10^6) %>%
lm(e_inc_100k ~ e_pop_mil, data = .) %>%
stargazer(type = “text”)

# finally we can chain group_by and lm to get a linear model by group
# however, we need to install package broom
library(broom)
summary.ols <- TB %>%
mutate(e_pop_mil = e_pop_num/10^6) %>%
group_by(country) %>%
group_modify(~tidy(lm(e_inc_100k ~ e_pop_mil, data = .)))
View(summary.ols)

# Questions
# Is it sensible to remove all NAs in dataset ‘TB’?
# Take e_mort_100k as response, can you build a regression model based on this dataset? Which variables would you choose as covariates?
# Hint: Use the TB_data_dictionary file to understand the meaning of each variable.
# Which model works has the smallest MSE?

Related Posts