import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
# Load the digits data set
X, y = load_digits().data, load_digits().target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Show a random image
r_idx = np.random.randint(X_train.shape[0])
plt.figure(dpi=50)
plt.imshow(np.reshape(X_train[r_idx], (8,8)), cmap='gray')
plt.xlabel('Digit '+str(y_train[r_idx]), fontsize=24)
plt.xticks([],[])
plt.yticks([],[])
plt.show()
from sklearn.neighbors import KNeighborsClassifier
k = 5
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
# Select a random test image
r_idx = np.random.randint(X_test.shape[0])
test_p = X_test[r_idx][None,:]
y_pred = knn.predict(test_p)
dist, idx = knn.kneighbors(test_p)
print('Test Digit and Predicted Label:')
plt.figure(dpi=50)
plt.imshow(np.reshape(test_p, (8,8)), cmap='gray')
plt.xlabel('Predicted Digit: '+str(y_pred[0]), fontsize=24)
plt.title('Ground-Truth Label: '+str(y_test[r_idx]), fontsize=24)
plt.xticks([],[])
plt.yticks([],[])
plt.show()
print('\nThe '+str(k)+' nearest neighbours:')
for i in range(k):
plt.figure(dpi=50)
plt.imshow(np.reshape(X_train[idx[0,i], :], (8,8)), cmap='gray')
plt.title('Neighbour '+str(i+1)+':', fontsize=28)
plt.xlabel('Digit '+str(y_train[idx[0,i]]), fontsize=24)
plt.xticks([],[])
plt.yticks([],[])
plt.show()
from sklearn.datasets import load_sample_images
img = load_sample_images().images[1]
plt.figure(120)
plt.imshow(img)
plt.xticks([], [])
plt.yticks([], [])
plt.show()
X = np.reshape(img, (img.shape[0]*img.shape[1], img.shape[2]))
from sklearn.cluster import KMeans
for k in [2, 3, 5, 10]:
km1 = KMeans(n_clusters=k, max_iter=60, n_init=3).fit(X)
cluster_img = np.reshape(km1.cluster_centers_[km1.labels_], img.shape)
cluster_img = np.array(cluster_img, dtype=int)
plt.figure(120)
plt.imshow(np.reshape(cluster_img, img.shape))
plt.xlabel('k = '+str(k), fontsize=24)
plt.xticks([],[])
plt.yticks([],[])
plt.show()
%matplotlib notebook
X = np.random.multivariate_normal(mean=[0,0,0], cov=[[2,1,1], [1,2,1],[1,1,2]], size=300)
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(dpi=50)
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:,0], X[:,1], X[:,2])
plt.show()
from sklearn.decomposition import PCA
X_proj = PCA(n_components=2).fit_transform(X)
%matplotlib inline
plt.figure()
plt.scatter(X_proj[:,0], X_proj[:,1])
plt.show()
# 1. Mean-center the data
X_centered = X - np.mean(X, axis=0)
# 2. Form the Covariance Matrix of the data
cov_X = np.dot(X_centered.T, X_centered)
# 3. Find the Eigen-values and Eigen-vectors of the Covariance Matrix
eigvals, eigvecs = np.linalg.eigh(cov_X)
# 4. Project the data onto the Eigen-vectors corresponding to the top $k$ Eigen-values </h3>
p_comps = eigvecs[:,[-1,-2]]
proj_X = np.dot(X, p_comps)
plt.figure()
plt.scatter(X_proj[:,0], X_proj[:,1])
plt.show()