Linear Regression
Linear Regression
Linear Regression Workflow Diagram and Math
<img src=”https://raw.githubusercontent.com/hadleyhzy34/pytorch/master/resources/linear.png” alt=”drawing” width=85% height=85%/>
Linear Regression Implementation from Scratch
import modules
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
Prepare data
dataset obtained from kaggle: here
Read csv file by using panda module
real_estate = pd.read_csv('/Users/hadley/Documents/pytorch/resources/real_estate.csv')
drop dataset first column since it only containes index number of each rows
real_estate = real_estate.drop(['No'],axis=1)
print(real_estate.shape)
print(real_estate.info())
(414, 7)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 X1 transaction date 414 non-null float64
1 X2 house age 414 non-null float64
2 X3 distance to the nearest MRT station 414 non-null float64
3 X4 number of convenience stores 414 non-null int64
4 X5 latitude 414 non-null float64
5 X6 longitude 414 non-null float64
6 Y house price of unit area 414 non-null float64
dtypes: float64(6), int64(1)
memory usage: 22.8 KB
None
Data splitting to training and testing model
data_train, data_test = train_test_split(real_estate, train_size=0.70, test_size = 0.30, random_state=100)
print(data_train.shape, data_test.shape)
(289, 7) (125, 7)
Split data for input, output and standardize input data
y_train = data_train.pop('Y house price of unit area')
x_train = data_train
x_train = (x_train - x_train.mean())/(x_train.max()-x_train.min())
y_test = data_test.pop('Y house price of unit area')
x_test = data_test
x_test = (x_test - x_test.mean())/(x_test.max()-x_test.min())
Initialize weights and bias
input_size = x_train.shape[1]
w = np.zeros((input_size,))
b = 0
Model prediction: forward pass
def forward(x):
return np.dot(x, w) + b
Loss function
def loss(y, y_predicted):
square = np.square(y_predicted.T-y.T)
return square.mean()*(1/2)
Gradient Calculation
def gradient_weight(x,y,y_predicted):
dw = (1/input_size)*np.dot(x.T, y_predicted-y)
return dw
def gradient_bias(x,y,y_predicted):
db = (1/input_size)*np.sum(y_predicted-y)
return db
Training Model
learning_rate = 0.01
n_iters = 100
prepare list for future plot
plt_x = []
plt_y1 = []
plt_y2 = []
for epoch in range(n_iters):
#prediction = forward pass
y_pred = forward(x_train)
#loss
l = loss(y_train, y_pred)
# print(l)
# print(l.dtype)
#gradients
dw = gradient_weight(x_train, y_train, y_pred)
db = gradient_bias(x_train, y_train, y_pred)
#update wrights
w -= learning_rate * dw
b -= learning_rate * db
# if epoch % 1 ==0:
# print(f'epoch {epoch+1}: w={w}, loss = {l:.4f}')
##prepare plot data
plt_x.append(epoch)
plt_y1.append(l)
y_test_p = forward(x_test)
l_p = loss(y_test, y_test_p)
plt_y2.append(l_p)
### Result and plot analysis
Loss curve, learning and prediction result with respect on epochs
plt.plot(plt_x, plt_y1, 'b')
plt.plot(plt_x, plt_y2, 'r')
plt.xlabel("number of epochs")
plt.ylabel("mean square error")
plt.show()
<img src=”https://raw.githubusercontent.com/hadleyhzy34/pytorch/master/resources/loss_curve.png” alt=”drawing” width=50% height=50%/>
Final result and evaluation by using r2 score r2 = 1-(np.sum((y_predicted-y)2)/np.sum((y-y.mean())2))
y_test_pred = forward(x_test)
result = r2_score(y_true=y_test,y_pred=y_test_pred)
print(result)
0.6750734862366894
Final plot to check distance between predicted value and observation with repect to each feature
x_labels = ['Transaction Date','House Age','Distance to the nearest MRT Station','Number of convenience Stores','latitude','longtitude']
y_label = 'House Price'
for i in range(6):
plt.subplot(2,3,i+1)
plt.plot(data_test.iloc[:,i], y_test, 'ro')
plt.plot(data_test.iloc[:,i], y_test_pred, 'bo')
plt.plot((data_test.iloc[:,i],data_test.iloc[:,i]), (y_test,y_test_pred), c='grey')
plt.xlabel(x_labels[i])
plt.ylabel(y_label)
plt.rcParams["figure.figsize"] = (20,5)
plt.show()
<img src=”https://raw.githubusercontent.com/hadleyhzy34/pytorch/master/resources/linear_distance.png” alt=”drawing” width=100% height=100%/>