$\hat{y_i} = g(w_0+w_1x_{i1}+w_2x_{i2}+...++w_dx_{id})$
where, $\ g(t) = \dfrac{1}{1+e^{-t}}$
$L_{MSE} = \dfrac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y_i})^2$
Initialize $\mathbf{w}$
$\mathbf{w}_{t+1} = \mathbf{w}_{t} - \eta \nabla_{\mathbf{w}} L_{MSE} $
import numpy as np
x = np.linspace(-10,20,100)
y = 4 * ((x-3) ** 2)
import matplotlib.pyplot as plt
plt.figure(dpi=120)
plt.plot(x, y)
plt.show()
x = np.random.rand() * 20 - 10
print('Initial x:', x)
x0 = x
eta = 0.1
for i in range(1000):
prev_x = x
x = x - eta * 8 * (x-3)
if np.abs(prev_x - x) < 1e-12:
print('break at:', i)
break
print('Final x: ', x)
Initial x: 9.269400416260716 break at: 19 Final x: 3.0000000000000657
import matplotlib.pyplot as plt
plt.figure(dpi=120)
xpts = np.linspace(-10,20,100)
plt.plot(xpts, 4 * ((xpts-3) ** 2))
plt.scatter(x, 4 * ((x-3) ** 2), c='r')
plt.scatter(x0, 4 * ((x0-3) ** 2), c='y')
plt.show()
X = np.vstack((
np.random.normal(loc=[0,0], scale=1, size=(100,2)),
np.random.normal(loc=[10,10], scale=1, size=(100,2))
))
n = X.shape[0]
X = np.hstack((
np.ones((n,1)), X
))
print(X.shape)
y = np.hstack(( np.zeros((100)), np.zeros((100))+1 ))
plt.figure(dpi=120)
plt.scatter(X[:,1], X[:,2], marker='x', c=y)
plt.show()
(200, 3)
$\hat{y_i} = g(w_0x_{i0}+w_1x_{i1}+w_2x_{i2}+...++w_dx_{id}) = g(\mathbf{w}^T\mathbf{x}_i)$
$g(t) = \dfrac{1}{1+e^{-t}}$
$\frac{\partial}{\partial t} g(t) = \dfrac{(1+e^{-t})^2\times 0 - 1\times (-e^{-t})}{(1+e^{-t})^2} = \dfrac{e^{-t}}{(1+e^{-t})^2} $
$ = \dfrac{1}{(1+e^{-t})}.\dfrac{e^{-t}}{(1+e^{-t})} = \dfrac{1}{(1+e^{-t})}.\left(1 - \dfrac{1}{(1+e^{-t})}\right)$
$ = g(t).(1 - g(t))$
$L_{MSE} = \dfrac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y_i})^2$
$\nabla_{w_j} L_{MSE} = -\dfrac{2}{n} \sum_{i=1}^{n} \{(y_i - \hat{y_i}).g(\mathbf{w}^{T}\mathbf{x}_i).(1-g(\mathbf{w}^T \mathbf{x}_i)).x_{ij}\}$
$= -\dfrac{2}{n} \sum_{i=1}^{n} \{(y_i - \hat{y_i}).\hat{y_i}(1-\hat{y_i}).x_{ij}\}$
def g(t):
return 1 / (1 + np.exp(-t))
w = np.random.rand(3) * 2 - 1
#w[0] = -10
w0 = np.array(w)
print('Initial w:', w0)
eta = 0.1
for i in range(1000):
prev_w = np.array(w) # prev_w = w will not create a separate copy
est_y = g(X@w)
w = w - eta * (-(2/n) * (((y - est_y) * est_y * (1 - est_y)) @ X) )
if np.linalg.norm(prev_w - w) < 1e-9:
print('break at:', i)
break
print('Final w:', w)
Initial w: [-0.66223131 0.15781143 0.23639532] Final w: [-2.58184774 0.39762116 0.26054375]
est_y = g(X@w)
plt.figure(dpi=120)
plt.scatter(X[:,1], X[:,2], marker='x', c=(est_y>=0.5))
xlims1 = np.array([-2,12])
xlims2 = - (w[0] + w[1]*xlims1) / w[2]
xlims2_w0 = - (w0[0] + w0[1]*xlims1) / w0[2]
plt.plot(xlims1, xlims2_w0, linestyle='--',c='y')
plt.plot(xlims1, xlims2, linestyle='--',c='r')
plt.show()