Trying gradient descent for linear regression

The best way to learn an algorith is to code it. So here it is, my take on Gradient Descent Algorithm for simple linear regression.

First, we fit a simple linear model with lm for comparison with gradient descent values.

#Load libraries

library(dplyr)
library(highcharter)

#Scaling length variables from iris dataset.

iris_demo <- iris[,c("Sepal.Length","Petal.Length")] %>%
  mutate(sepal_length = as.numeric(scale(Sepal.Length)),
         petal_length = as.numeric(scale(Petal.Length))) %>%
  select(sepal_length,petal_length)

#Fit a simple linear model to compare coefficients.

regression <- lm(iris_demo$petal_length~iris_demo$sepal_length)

coef(regression)
##            (Intercept) iris_demo$sepal_length 
##           4.643867e-16           8.717538e-01
iris_demo_reg <- iris_demo

iris_demo_reg$reg <- predict(regression,iris_demo)

#Plot the model with highcharter

highchart() %>%
  hc_add_series(data = iris_demo_reg, type = "scatter", hcaes(x = sepal_length, y = petal_length), name = "Sepal Length VS Petal Length") %>%
  hc_add_series(data = iris_demo_reg, type = "line", hcaes(x = sepal_length, y = reg), name = "Linear Regression") %>%
  hc_title(text = "Linear Regression")

open

We will try to acomplish the same coefficients, this time using Gradient Descent.

library(tidyr)


set.seed(135) #To reproduce results


#Auxiliary function

# y = mx + b

reg <- function(m,b,x)  return(m * x + b)


#Starting point

b <- runif(1)
m <- runif(1)


#Gradient descent function

gradient_desc <- function(b, m, data, learning_rate = 0.01){ # Small steps
  
  # Column names = Code easier to understand
  
  colnames(data) <- c("x","y")
  
  
  #Values for first iteration
  
  b_iter <- 0     
  m_iter <- 0
  n <- nrow(data)
  
  # Compute the gradient for Mean Squared Error function
  
  for(i in 1:n){
    
    # Partial derivative for b
    
    b_iter <- b_iter + (-2/n) * (data$y[i] - ((m * data$x[i]) + b))
    
    # Partial derivative for m
    
    m_iter <- m_iter + (-2/n) * data$x[i] * (data$y[i] - ((m * data$x[i]) + b))
    
  }
  
  
  # Move to the OPPOSITE direction of the derivative
  
  new_b <- b - (learning_rate * b_iter)
  new_m <- m - (learning_rate * m_iter)
  
  # Replace values and return
  
  new <- list(new_b,new_m)
  
  return(new)
  
}

# I need to store some values to make the motion plot

vect_m <- m
vect_b <- b


# Iterate to obtain better parameters

for(i in 1:1000){
  if(i %in% c(1,100,250,500)){ # I keep some values in the iteration for the plot
    vect_m <- c(vect_m,m)
    vect_b <- c(vect_b,b)
  } 
  x <- gradient_desc(b,m,iris_demo)
  b <- x[[1]]
  m <- x[[2]]
}

print(paste0("m = ", m))
## [1] "m = 0.871753774273602"
print(paste0("b = ", b))
## [1] "b = 5.52239677041512e-10"

The difference in the coefficients is minimal.

We can see how the iterations work in the next plot:

#Compute new values

iris_demo$preit    <- reg(vect_m[1],vect_b[1],iris_demo$sepal_length)
iris_demo$it1      <- reg(vect_m[2],vect_b[2],iris_demo$sepal_length)
iris_demo$it100    <- reg(vect_m[3],vect_b[3],iris_demo$sepal_length)
iris_demo$it250    <- reg(vect_m[4],vect_b[4],iris_demo$sepal_length)
iris_demo$it500    <- reg(vect_m[5],vect_b[5],iris_demo$sepal_length)
iris_demo$finalit  <- reg(m,b,iris_demo$sepal_length)


iris_gathered <- iris_demo %>% gather(key = gr, value = val, preit:finalit) %>%
  select(-petal_length) %>% 
  distinct()


iris_start <- iris_gathered %>%
  filter(gr == "preit")


iris_seq <- iris_gathered %>%
  group_by(sepal_length) %>%
  do(sequence = list_parse(select(., y = val)))


iris_data <- left_join(iris_start, iris_seq)

#Motion Plot

irhc2 <- highchart() %>%
  hc_add_series(data = iris_data, type = "line", hcaes(x = sepal_length, y = val), name = "Gradient Descent") %>%
  hc_motion(enabled = TRUE, series = 0, startIndex = 0,
            labels = c("Iteration 1","Iteration 100","Iteration 250","Iteration 500","Final Iteration")) %>%
  hc_add_series(data = iris_demo_reg, type = "scatter", hcaes(x = sepal_length, y = petal_length), name = "Sepal Length VS Petal Length") %>%
  hc_title(text = "Gradient Descent Iterations")

irhc2

open

Maybe in a future post we can try a multivariate regression model!