gradientDescent<-function(y, X, epsilon=1/10, r=0.1, iters=1000){
w <- t(as.matrix(rnorm(n=dim(X)[2], mean=0,sd = 1))) # Initialize w
N <- dim(X)[1]
J <- NULL
grad <- 1
while (sqrt(sum(grad^2)) > epsilon) {
J = c(J,1/2*sum((t(y) – w%*%t(X))^2))
e = t(y) – w%*%t(X)
grad = – (2/N)*e%*%X
w = w – r*grad
}
print(paste(“Final gradient norm is”,sqrt(sum(grad^2))))
values<-list("w" = t(w), "J" = J)
return(values)
}
y <- rnorm(n = 1000, mean = 0, sd = 1)
x1 <- rnorm(n = 1000, mean = 0, sd = 1)
x2 <- rnorm(n = 1000, mean = 0, sd = 1)
sol <- gradientDescent(y = y, X = cbind(x1,x2))