Looking at the digits data set

</br>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
In [2]:
X = load_digits().data
y = load_digits().target
print(X.shape)
print(y.shape)
print(y)
(1797, 64)
(1797,)
[0 1 2 ... 8 9 8]
In [3]:
print(X.max(), X.min())
16.0 0.0
In [4]:
idx = 98
img = np.reshape(X[idx], (8,8))
plt.figure(dpi=20)
plt.imshow(img, cmap='gray')
plt.show()

print(y[idx])
3




Principal Component Analysis -

  1. Set $\bar{X} = 0$
  2. Calculate Cov(X), where X is mean-centered
  3. Calculate the 2 eigenvectors $v_1,v_2$ corresponding to the top 2 eigenvalues of Cov(X)
  4. Calculate $Z = X.[v_1,v_2]$


In [5]:
# Mean-center the data
Xm = X - (np.mean(X, axis=0))

# Calculate Cov(X)
C = (1 / Xm.shape[0]) * Xm.T @ Xm

# Calculate eigenvectors of Cov(X)
eigvals, eigvecs = np.linalg.eigh(C)
V = eigvecs[:,-3:]

# Project data on to the eigenvectors
Z = Xm @ V
In [6]:
#print(np.unique(X))
print(X[y==3][0:3])
[[ 0.  0.  7. 15. 13.  1.  0.  0.  0.  8. 13.  6. 15.  4.  0.  0.  0.  2.
   1. 13. 13.  0.  0.  0.  0.  0.  2. 15. 11.  1.  0.  0.  0.  0.  0.  1.
  12. 12.  1.  0.  0.  0.  0.  0.  1. 10.  8.  0.  0.  0.  8.  4.  5. 14.
   9.  0.  0.  0.  7. 13. 13.  9.  0.  0.]
 [ 0.  2.  9. 15. 14.  9.  3.  0.  0.  4. 13.  8.  9. 16.  8.  0.  0.  0.
   0.  6. 14. 15.  3.  0.  0.  0.  0. 11. 14.  2.  0.  0.  0.  0.  0.  2.
  15. 11.  0.  0.  0.  0.  0.  0.  2. 15.  4.  0.  0.  1.  5.  6. 13. 16.
   6.  0.  0.  2. 12. 12. 13. 11.  0.  0.]
 [ 0.  1.  8. 12. 15. 14.  4.  0.  0.  3. 11.  8.  8. 12. 12.  0.  0.  0.
   0.  0.  2. 13.  7.  0.  0.  0.  0.  2. 15. 12.  1.  0.  0.  0.  0.  0.
  13.  5.  0.  0.  0.  0.  0.  0.  9. 13.  0.  0.  0.  0.  7.  8. 14. 15.
   0.  0.  0.  0. 14. 15. 11.  2.  0.  0.]]
In [7]:
plt.figure(dpi=200)
sc = plt.scatter(Z[:,-1], Z[:,-2], marker='x', c=y)
plt.show()
In [8]:
%matplotlib notebook

from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(dpi=150)
ax = fig.add_subplot(111, projection='3d')

for j in range(10):
    ax.scatter(Z[y==j,0], Z[y==j,1], Z[y==j,2], marker='x', label=str(j))

ax.legend(bbox_to_anchor=[1.3,1])
plt.show()