2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r
###############################################################
# Permutation tests r code
###############################################################
###############################################################
# First example: Doing a two-sample permutation test in R
x = c(90, 11, 94, 118)
y = c(197, 107, 752)
# Sample size
nx = length(x)
ny = length(y)
# Combined dataset and corresponding labels
xy = c(x, y)
xylabels = c(rep(“x”, nx), rep(“y”, ny))
# t-test statistic for original sample
teststat = (mean(y) – mean(x)) / sqrt(var(x)/nx + var(y)/ny)
teststat
# Number of permutation samples to take
N = 10000
set.seed(1)
# Create vector to store permutation t-statistics
tstat = numeric(N)
for (i in 1:N) {
newlabels = sample(xylabels, replace = F) # sample labels without replacement
# permute the data and obtain the t-test statistic
newx = xy[newlabels == “x”]
newy = xy[newlabels == “y”]
tstat[i] = (mean(newy) – mean(newx))/sqrt(var(newx)/nx + var(newy)/ny)
}
# Calculate upper tail one-sided p-value
# (proportion of permuted test statistics above observed value)
pvalue = mean(tstat >= teststat)
pvalue
# Output mean difference, t-test statistic and p-value
print(c(mean(y) – mean(x), teststat, pvalue))
# Plot permutation sampling distribution using kernel density estimate
plot(density(tstat, bw=0.1), xlab=”Test Statistics”,
main=”Permutation Distribution and Actual Test Statistic”)
abline(v = teststat, lwd = 2, col = “red”)
rug(tstat)
# To find the critical value
alpha = 0.05
# Sort permutation t-statistics and extract upper (1-alpha)% quantile
tstat = sort(tstat)
tcrit = tstat[ceiling((1 – alpha) * N)]
tcrit
# Calculate correct upper tail one-sided p-value
pvalue = (sum(tstat >= teststat) + 1) / (N + 1)
pvalue
https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r 1/5
2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r
##################################################################################
## Third example: Two-sample, one-sided, permutation test with more data
# Simulate larger samples from Exponential populations with lambda=1 and lamba=0.5
nx = 10
ny = 20
set.seed(1)
x = rexp(nx, rate = 1)
y = rexp(ny, rate = 0.5)
# Combined dataset and corresponding labels
xy = c(x,y)
xylabels = c(rep(“x”, nx), rep(“y”, ny))
# t-test statistic for original sample
teststat = (mean(y) – mean(x)) / sqrt(var(x)/nx + var(y)/ny)
# Number of permutation samples to take
N = 1000
# Create vector to store permutation t-statistics
tstat = numeric(N)
for (i in 1:N) {
newlabels = sample(xylabels, replace = F) # sample labels without replacement
# permute data and obtain t-test statistic
newx = xy[newlabels == “x”]
newy = xy[newlabels == “y”]
tstat[i] = (mean(newy) – mean(newx)) / sqrt(var(newx)/nx + var(newy)/ny)
}
# Calculate upper tail one-sided p-value
pvalue = (sum(tstat >= teststat) + 1) / (N + 1)
# Sort permutation t-statistics and extract upper (1-alpha)% quantile
alpha = 0.05
tstat = sort(tstat)
tcrit = tstat[ceiling((1 – alpha) * N)]
# Output mean different, test statistic, critical value and p-value
print(c(mean(y) – mean(x), teststat, tcrit, pvalue))
# Plot permutation sampling distribution using kernel density estimate
hist(tstat, breaks = 100, freq = FALSE, xlab = “Test Statistics”,
ylim = c(0, 0.8), xlim = c(-4,4),
main = “Permutation Distribution and Actual Test Statistic”)
lines(density(tstat, bw = 0.25))
abline(v = teststat, col = “red”)
abline(v = tcrit, col = “blue”)
legend(“topleft”,
legend = c(“Kernel Density Estimate”, “Test Statistic”, “Critical Value”),
lty = 1, col = c(“black”, “red”, “blue”))
#################################################################################
## Classical $t$-test in R, using the same data as the third example
# This is a one sided t-test with mean(y) > mean(x) as alternative hypothesis
# (hence “greater” option)
t.test(y, x, alternative = “greater”)
# One sided t-test with mean(y) > mean(x) as alternative hypothesis
t.test(y, x, alternative = “greater”)
https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r 2/5
2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r
# degrees of freedom for Welch’s test
ttestdf = (var(x)/nx+var(y)/ny)^2 / ((var(x)/nx)^2/(nx-1)+(var(y)/ny)^2/(ny-1))
ttestdf
# standard deviation for Welch’s test
ttestsd = sqrt((var(x)/nx + var(y)/ny))
ttestsd
# $t$-test statistic for Welch’s test
tteststat = (mean(y)-mean(x)) / ttestsd
tteststat
# p-value for Welch’s test
pt(tteststat, df = ttestdf, lower.tail = FALSE)
###############################################################
## Fourth example: An `Exact’ two-sample permutation test
x = c(90, 11, 94, 118)
y = c(197, 107, 752)
# Sample size
nx = length(x)
ny = length(y)
# Combined dataset and corresponding labels
xy = c(x, y)
xylabels = c(rep(“x”, nx), rep(“y”, ny))
# t-test statistic for original sample
teststat = (mean(y) – mean(x)) / sqrt(var(x)/nx + var(y)/ny)
# Number of permutation samples to take
N = choose(nx + ny, nx)
N
# Obtain all permutations
allpermutations = combn(nx + ny, nx)
allpermutations
# Create vector to store permutation t-statistics
tstat = numeric(N)
for (i in 1:N) {
# permute data and obtain t-test statistic
newx=xy[allpermutations[,i]]
newy=xy[-allpermutations[,i]]
tstat[i]=(mean(newy)-mean(newx))/sqrt(var(newx)/nx + var(newy)/ny)
}
# Calculate upper tail one-sided p-value
# (proportion of permuted test statistics above observed value)
pvalue=mean(tstat>=teststat)
# Sort permutation t-statistics and extract upper (1-alpha)% quantile
alpha=0.05
tstat=sort(tstat)
tcrit=tstat[ceiling((1-alpha)*N)]
# Output mean different, test statistic, critical value and p-value
print(c(mean(y)-mean(x), teststat, tcrit, pvalue))
table(tstat)
https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r 3/5
2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r
length(table(tstat))
(1:35)/35
####################################################################
## Fifth example: p-value of 1
x = c(197, 107, 752, 118)
y = c(90, 11, 94)
# Sample size
nx = length(x)
ny = length(y)
# Combined dataset and corresponding labels
xy = c(x, y)
xylabels = c(rep(“x”, nx), rep(“y”, ny))
# t-test statistic for original sample
teststat = (mean(y) – mean(x)) / sqrt(var(x)/nx + var(y)/ny)
# Number of permutation samples to take
N = choose(nx + ny, nx)
# Obtain all permutations
allpermutations = combn(nx + ny, nx)
# Create vector to store permutation t-statistics
tstat = numeric(N)
for (i in 1:N) {
# permute data and obtain t-test statistic
newx = xy[allpermutations[, i]]
newy = xy[-allpermutations[, i]]
tstat[i] = (mean(newy) – mean(newx)) / sqrt(var(newx)/nx + var(newy)/ny)
}
# Calculate upper tail one-sided p-value
# (proportion of permuted test statistics above observed value)
pvalue = mean(tstat >= teststat)
pvalue
###############################################################
## Sixth example: Matched pairs permutation test
# Simulate correlated samples from standard normals
n = 20
set.seed(2)
x = rnorm(n)
y = rnorm(n, 0.5*x + 0.5)
# t-test statistic for original sample extracted from t.test() function
teststat = t.test(y, x, alternative=”greater”, paired=TRUE)$statistic
# Number of permutation samples to take
N = 1000
# Create vector to store permutation t-statistics
tstat = numeric(N)
for (i in 1:N) {
# decide which ones to interchange
newlabels = sample(c(TRUE, FALSE), size = n, replace = TRUE)
# permute pairs where newlabels=TRUE and obtain t-test statistic
newx = x
https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r 4/5
2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r
newy = y
newx[newlabels] = y[newlabels]
newy[newlabels] = x[newlabels]
tstat[i] = t.test(newy, newx, alternative = “greater”, paired = TRUE)$statistic
}
# Calculate upper tail one-sided p-value and critical value
pvalue = (sum(tstat >= teststat) + 1) / (N + 1)
alpha = 0.05
tstat = sort(tstat)
tcrit = tstat[ceiling((1 – alpha) * N)]
print(c(mean(y) – mean(x), teststat, tcrit, pvalue))
# Plot the permutation sampling distribution using kernel density estimate
hist(tstat, breaks=100, freq=FALSE, xlab=”Test Statistics”,
ylim = c(0, 0.8), xlim = c(-4,4),
main = “Permutation Distribution and Actual Test Statistic”)
lines(density(tstat, bw = 0.25))
abline(v = teststat, col = “red”)
abline(v = tcrit, col = “blue”)
legend(“topleft”,
legend = c(“Kernel Density Estimate”, “Test Statistic”, “Critical Value”),
lty = 1, col = c(“black”, “red”, “blue”))
t.test(y, x, alternative = “greater”, paired = TRUE)
###############################################################
## End of code for Permutation tests.
https://learn.canterbury.ac.nz/pluginfile.php/3327574/mod_resource/content/1/permutation.r 5/5