1.逻辑回归模拟
knitr::opts_chunk$set(echo = TRUE)
#install.packages (“tidyverse”)
#install.packages(“vctrs”)
#install.packages(“pillar”)
#install.packages(“readxl”)
#install.packages(“PKNCA”)
#install.packages(“stringr”)
library(readxl)
library(ggplot2)
library(MASS)
library(dplyr)
##
## Attaching package: ‘dplyr’
## The following object is masked from ‘package:MASS’:
##
## select
## The following objects are masked from ‘package:stats’:
##
## filter, lag
## The following objects are masked from ‘package:base’:
##
## intersect, setdiff, setequal, union
library(PKNCA)
library(stringr)
#Change your filepath to yours
tang <- read_excel("/Users/xw/Desktop/R/latency/shi-s-latency.xlsx")
tang_n<- tang %>% group_by(LogConR) %>%
count(LogConR)
N <- tang_n[,2]
tang_c<- tang %>% group_by(LogConR) %>%
summarise(Success = sum(Resp))%>%
cbind(N)%>%
mutate(Failure =n-Success, Prob=Success/n)
#Modeling
m1<-glm(Resp~ LogConR, data=tang, family="binomial")
summary(m1)
##
## Call:
## glm(formula = Resp ~ LogConR, family = "binomial", data = tang)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8099 -0.7900 0.1974 0.7448 1.9152
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.4187 0.1522 2.751 0.00594 **
## LogConR 5.8295 0.7012 8.313 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 448.55 on 323 degrees of freedom
## Residual deviance: 325.71 on 322 degrees of freedom
## AIC: 329.71
##
## Number of Fisher Scoring iterations: 5
m2<-glm(cbind(Success, Failure)~ LogConR, data=tang_c, family="quasibinomial")
summary(m2)
##
## Call:
## glm(formula = cbind(Success, Failure) ~ LogConR, family = "quasibinomial",
## data = tang_c)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.4169 -1.5415 0.2825 1.4364 1.7763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4187 0.3806 1.100 0.3078
## LogConR 5.8295 1.7541 3.323 0.0127 *
## —
## Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ‘ 1
##
## (Dispersion parameter for quasibinomial family taken to be 6.256854)
##
## Null deviance: 153.75 on 8 degrees of freedom
## Residual deviance: 30.90 on 7 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
Calibration
#dose.p(m1, cf=1:2, p=0.50)
dose.p(m2, cf=1:2, p=0.50) #the se is a bit bigger with the quasibinomial
## Dose SE
## p = 0.5: -0.07181957 0.05879236
#calibrate <-expand.grid(Prob=seq(0.5,0.95,0.05))
#calibrate$LogConR<-NA
#calibrate$se<-NA
calibrate <-expand.grid(Prob=seq(0.05,0.95,0.01))
calibrate$LogConR<-NA
calibrate$se<-NA
for (i in 1:nrow(calibrate)){
calibrate$se[i]<-attributes(dose.p(m2, cf=1:2, p=calibrate$Prob[i]))$SE[,1]
calibrate$LogConR[i]<-dose.p(m2, cf=1:2, p=calibrate$Prob[i])[1]
#print(i)
}
tang_d <- calibrate
tang_d$min <- NA
tang_d$max <- NA
for (i in 1:nrow(tang_d)){
tang_d$min[i] <- (tang_d$LogConR[i] - tang_d$se[i])
tang_d$max[i] <- (tang_d$LogConR[i] + tang_d$se[i])
}
dose_a <- dose.p(m2, cf=1:2, p=0.01)
a1 <- dose_a[1]
dose_aSE <- data.frame(attributes(dose_a))
a2 <- dose_aSE[2]
dose_b <-dose.p(m2, cf=1:2, p=0.99)
b1 <- dose_b[1]
dose_bSE <- data.frame(attributes(dose_b))
b2 <- dose_bSE[2]
tang_e <- rbind(tang_d, list(0.01, a1, a2, (a1 - a2), (a1 + a2)))
tang_e <- rbind(tang_e, list(0.99, b1, b2, (b1 - b2), (b1 + b2)))
tang_e$LogConR <- as.numeric(tang_e$LogConR)
tang_e$min <- as.numeric(tang_e$min)
tang_e$max <- as.numeric(tang_e$max)
tang_e$se <- as.numeric(tang_e$se)
Graphing
f <- tang_e[46,2]
g <- 10^f
g <- roundString(g, 3)
#g is your Prob = 0.5 point, your threshold or EOR
tang_f <- tang_e
w <- tang_f[93, 1] #prob
e <- tang_f[93, 2] #LogConR
r <- tang_f[93, 3] #se
t <- tang_f[93, 5] #max
tang_f <- rbind(tang_f, list(w, e, r, t, t))
ggplot(tang_c, aes(x=LogConR, y=Prob))+
geom_area(tang_f, mapping = aes(x = (2*(min - LogConR) + LogConR), y = Prob), fill = "grey", alpha = 0.5)+
geom_area(tang_e, mapping = aes(x = (2*(max - LogConR) + LogConR), y = Prob), fill = "white")+
geom_point(data = tang_c)+
geom_smooth(data = tang_c, method = "glm", method.args = list(family="quasibinomial"), color = "black", size = 0.25, se = F) +
geom_errorbarh(data=tang_e, aes(y=Prob[46], xmax=LogConR[46]+2*se[46], xmin=LogConR[46]-2*se[46]), height=0.05, size= 0.125) +
geom_segment(aes(x = f, y = 0, xend = f, yend = 0.5), linetype = "dotted")+
ggtitle("Physalis Potatochip/Apple", g)+
theme_linedraw()+
theme(panel.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
结果图
2.线性回归模拟
lai.data <- read.csv("/Users/xw/Desktop/xw plot for Achem/latency for 4 subjects/lai data.csv")
a<-lai.data
prob1=sum((a$x==-450)&(a$y==1))/sum(a$x==-450)
xvector = unique(a$x)
probvector = vector(mode = "numeric",1)
for (i in 1:(length(xvector))){
probvector[i] =
sum((a$x==xvector[i])&(a$y==1))/sum(a$x==xvector[i])
}
probvector
y1<-probvector
x<-xvector
lm.out <- lm(y1 ~ x)
summary(lm.out)
newx = seq(min(x),max(x),by = 0.05)
conf_interval <- predict(lm.out, newdata=data.frame(x=newx), interval="confidence",
level = 0.95)
plot(x,y1,xlab="",ylab="",ylim=c(0,1), xaxt="n",yaxt ="n",bty = 'l',pch=16, col="red")
abline(lm.out, col="red", lwd=4)
lines(newx, conf_interval[,2], col="LightCoral", lwd=2)
lines(newx, conf_interval[,3], col="LightCoral", lwd=2)
axis(1, at = c(-450,-350, -250, -150, -50, 50, 150, 250, 350, 450), labels = c(-450,-350, -250, -150, -50, 50, 150, 250, 350, 450), cex.axis=0.9, mgp=c(3, 0.3, 0), tck=0.01)
axis(2, at = c(0,0.5,1), labels = c(0,0.5,1),las=1, cex.axis=0.9, mgp=c(3, 0.3, 0), tck=0.01)
结果图