Homework 6 Solutions
Homework 6 Solutions
Gabriel Young (gjy2107)
April 23, 2018
Part 1
genres <- read.csv("moretti.csv", as.is = TRUE) head(genres) ## Name Begin End ## 1 Courtship 1740 1820 ## 2 Picareseque 1748 1790 ## 3 Oriental 1759 1787 ## 4 Epistolary 1766 1795 ## 5 Sentimental 1768 1790 ## 6 Spy 1770 1800 i. [10 pts] poisLoglik <- function(lambda, data) { return(sum(dpois(data, lambda = lambda, log = TRUE))) } data1 <- c(1, 0, 0, 1, 1) lambda1 <- 1 poisLoglik(lambda1, data1) ## [1] -5 poisLoglik(0, data1) ## [1] -Inf ii. [10 pts] count_new_genres <- function(year) { return(sum(genres$Begin == year)) } count_new_genres(1803) ## [1] 0 count_new_genres(1850) ## [1] 3 iii. [10 pts] years <- seq(1740, 1900) num.years <- length(years) new_genres <- rep(NA, num.years) names(new_genres) <- years 1 for (i in 1:num.years) { new_genres[i] <- count_new_genres(years[i]) } twoyears <- which(years == 1803 | years == 1850) new_genres[twoyears] ## 1803 1850 ## 0 3 head(new_genres) ## 1740 1741 1742 1743 1744 1745 ## 1 0 0 0 0 0 iv. [10 pts] lambda_seq <- seq(0, 3, by=.001) num.lambdas <- length(lambda_seq) Logliks <- rep(NA, num.lambdas) for (i in 1:num.lambdas) { Logliks[i] <- poisLoglik(lambda_seq[i], new_genres) } plot(lambda_seq,Logliks,type="l", main = "Plot of the Log Likelihood Function", ylab = "Log Likelihood", xlab = "Lambda", ylim=c(-450,-50)) 0.0 0.5 1.0 1.5 2.0 2.5 3.0 − 4 0 0 − 3 0 0 − 2 0 0 − 1 0 0 Plot of the Log Likelihood Function Lambda L o g L ik e lih o o d lambda_seq[which.max(Logliks)] ## [1] 0.273 2 v. [10 pts] NegpoisLoglik <- function(lambda, data) { return(-sum(dpois(data, lambda = lambda, log = TRUE))) } nlm(NegpoisLoglik, 1, new_genres) ## $minimum ## [1] 106.3349 ## ## $estimate ## [1] 0.2732914 ## ## $gradient ## [1] 2.984279e-07 ## ## $code ## [1] 1 ## ## $iterations ## [1] 11 vi. [10 pts] intergenre_intervals <- diff(sort(genres$Begin)) intergenre_intervals ## [1] 8 11 7 2 2 3 16 1 1 9 4 4 6 8 3 1 2 2 0 2 6 1 7 ## [24] 0 1 1 1 1 0 0 1 6 11 3 1 0 1 3 8 1 0 3 0 mean(intergenre_intervals) ## [1] 3.44186 sd(intergenre_intervals) ## [1] 3.705224 moretti.coef <- sd(intergenre_intervals)/mean(intergenre_intervals) vii. [20 pts] a. intergenre_calc <- function(new.genres) { names(new.genres) <- 1:length(new.genres) new.genres <- new.genres[new.genres !=0] years <- as.numeric(rep(names(new.genres), new.genres)) return(diff(sort(years))) } intergenre_intervals2 <- intergenre_calc(new_genres) all(intergenre_intervals == intergenre_intervals2) ## [1] TRUE b. Pois.sim <- function(num.years, mean.genres) { samples <- rpois(num.years, lambda = mean.genres) 3 intergenre_intervals <- intergenre_calc(samples) coef.of.var <- sd(intergenre_intervals)/mean(intergenre_intervals) return(list(intergenre_intervals = intergenre_intervals, coef.of.var = coef.of.var)) } for (i in 1:10) { res <- Pois.sim(161, 0.273) print(mean(res$intergenre_intervals)) } ## [1] 4.457143 ## [1] 3.636364 ## [1] 3.829268 ## [1] 3.555556 ## [1] 3.825 ## [1] 2.944444 ## [1] 4.875 ## [1] 3.347826 ## [1] 2.907407 ## [1] 4.903226 viii. [10 pts] n <- 10000 coef.of.var <- rep(NA, n) for (i in 1:n) { res <- Pois.sim(161, 0.273) coef.of.var[i] <- res$coef.of.var } mean(coef.of.var > moretti.coef)
## [1] 0.2299
ix.[10 pts]
Any answer in this question is fine. It would be more common to use permutation or bootstrap procedure or
answer this question.
4
Part 1