Classifying handwritten digits using a Multi-Layered Perceptron¶

In [1]:
import torch
import torch.optim as optim
import torch.nn as nn


class MLP(nn.Module):
    def __init__(self, input_dim, hidden_layer_size, n_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_layer_size)
        self.fc2 = nn.Linear(hidden_layer_size, n_classes)
    
    def forward(self, X):
        z = torch.sigmoid(self.fc1(X))
        z = torch.sigmoid(self.fc2(z))
        return z


# Load in the digits data set from scikit-learn
from sklearn.datasets import load_digits
from sklearn.preprocessing import LabelBinarizer
import numpy as np
X = load_digits().data
X = X / X.max()
y = LabelBinarizer().fit_transform(load_digits().target) # Convert each label to one-hot vectors
# Split the data into training and test data sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)
# Convert numpy arrays to torch tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))


# Initialize network parameters
input_dim = X.shape[1]
hidden_layer_size = 200
n_classes = y.shape[1]


# Initialize network
mlp = MLP(input_dim=input_dim, hidden_layer_size=hidden_layer_size, n_classes=n_classes)


# Push everything on to the GPU (if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
X_train = X_train.to(device)
y_train = y_train.to(device)
mlp.to(device)


# Training parameters
max_epochs = 10000
learning_rate = 1

loss = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(mlp.parameters(), lr=learning_rate)

for n_epoch in range(max_epochs):
    cost = torch.zeros(1).to(device)
    
    optimizer.zero_grad()
    z = mlp(X_train)
    z = loss(z, y_train)
    with torch.no_grad():
        cost = z.clone()
    
    z.backward()
    
    optimizer.step()
    
    if n_epoch % 1000 == 0 or (n_epoch+1)==max_epochs:
        print('epoch #'+str(n_epoch)+':', cost.item())
epoch #0: 0.27738213539123535
epoch #1000: 0.07225342094898224
epoch #2000: 0.036065150052309036
epoch #3000: 0.023496540263295174
epoch #4000: 0.017171038314700127
epoch #5000: 0.014018969610333443
epoch #6000: 0.012227539904415607
epoch #7000: 0.011036815121769905
epoch #8000: 0.01016143336892128
epoch #9000: 0.009475581347942352
epoch #9999: 0.008915649726986885

Find the train and test accuracy of the trained MLP¶

In [2]:
with torch.no_grad():
    z = mlp(X_train)
    y_train_pred = torch.argmax(z, 1)
    y_train_true = torch.argmax(y_train, 1)

    X_test = X_test.to(device)
    y_test = y_test.to(device)
    z = mlp(X_test)
    y_test_pred = torch.argmax(z, 1)
    y_test_true = torch.argmax(y_test, 1)


print('Training Accuracy =', (y_train_true == y_train_pred).sum().item() * 100 / y_train_true.shape[0],'\b%')
print('Test Accuracy =', (y_test_true == y_test_pred).sum().item() * 100 / y_test_true.shape[0],'\b%')
Training Accuracy = 96.89737470167064 %
Test Accuracy = 95.37037037037037 %

For 10 randomly chosen test images, show the image, the predicted class label and the correct class label¶

In [3]:
import matplotlib.pyplot as plt

r_idx = torch.randint(0, X_test.shape[0], (10,))
imgs = X_test[r_idx].cpu().numpy()
with torch.no_grad():
    z = mlp(X_test[r_idx].to(device))
    y_test_pred = torch.argmax(z, 1).cpu().numpy()
    y_test_true = torch.argmax(y_test[r_idx], 1).cpu().numpy()
fig, ax = plt.subplots(2, 5, dpi=250)
for i in range(2):
    for j in range(5):
        ax[i,j].imshow(imgs[i*5+j].reshape(8,8), cmap='gray')
        ax[i,j].set_xlabel('Pred:'+str(y_test_pred[i*5+j])+'\nTrue:'+str(y_test_true[i*5+j]))
        ax[i,j].set_xticks([],[])
        ax[i,j].set_yticks([],[])
plt.show()
In [ ]:
 




Train an MLP for dimension reduction - project the handwritten digits to 2 dimensions and plot the reduced distribution¶




In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
In [5]:
class AE(nn.Module):
    def __init__(self, input_dim, h1_dim_size, h2_dim_size, h3_dim_size):
        super(AE, self).__init__()
        self.enc1 = nn.Linear(input_dim, h1_dim_size)
        self.enc2 = nn.Linear(h1_dim_size, h2_dim_size)
        self.enc3 = nn.Linear(h2_dim_size, h3_dim_size)
        self.dec1 = nn.Linear(h3_dim_size, h2_dim_size)
        self.dec2 = nn.Linear(h2_dim_size, h1_dim_size)
        self.dec3 = nn.Linear(h1_dim_size, input_dim)
        self.latent_dim = h3_dim_size

    def encoder(self, x):
        z = F.relu(self.enc1(x))
        z = F.relu(self.enc2(z))
        z = self.enc3(z)
        return z

    def decoder(self, x):
        z = F.relu(self.dec1(x))
        z = F.relu(self.dec2(z))
        z = self.dec3(z)
        return z
    
    def forward(self, x):
        z = F.relu(self.enc1(x))
        z = F.relu(self.enc2(z))
        z = self.enc3(z)
        rz = F.relu(self.dec1(z))
        rz = F.relu(self.dec2(rz))
        rz = self.dec3(rz)
        return rz, z


# Load in the digits data set from scikit-learn
from sklearn.datasets import load_digits
from sklearn.preprocessing import LabelBinarizer
import numpy as np
X = load_digits().data
X = X / X.max()
y = LabelBinarizer().fit_transform(load_digits().target) # Convert each label to one-hot vectors
# Convert numpy arrays to torch tensors
X = torch.from_numpy(X.astype(np.float32))
y = torch.from_numpy(y.astype(np.float32))
    

# Initialize network parameters
input_dim = X.shape[1]
h1_dim_size = 100
h2_dim_size = 100
h3_dim_size = 2

autoencoder = AE(input_dim, h1_dim_size, h2_dim_size, h3_dim_size)

# Push everything on to the GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
X = X.to(device)
y = y.to(device)
autoencoder.to(device)
    
max_epochs = 10000
learning_rate = 1e-2

loss = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(autoencoder.parameters(), lr=5)

for n_epoch in range(max_epochs):
    cost = torch.zeros(1).to(device)
    
    optimizer.zero_grad()
    rz, z = autoencoder(X)
    z = loss(X, rz)
    with torch.no_grad():
        cost = z.clone()
    
    z.backward()
    
    optimizer.step()
    
    if n_epoch % 1000 == 0 or (n_epoch+1)==max_epochs:
        print('epoch #'+str(n_epoch)+':', cost.item())
        
epoch #0: 0.2427518665790558
epoch #1000: 0.03520079329609871
epoch #2000: 0.03090486116707325
epoch #3000: 0.027461538091301918
epoch #4000: 0.025334861129522324
epoch #5000: 0.024053173139691353
epoch #6000: 0.023674188181757927
epoch #7000: 0.023071520030498505
epoch #8000: 0.021803468465805054
epoch #9000: 0.02184228226542473
epoch #9999: 0.02089696004986763
In [6]:
with torch.no_grad():
    rz, z = autoencoder(X)
    z = z.cpu().numpy()
    
plt.figure(dpi=250)
plt.scatter(z[:,0], z[:,1], c=load_digits().target, marker='x')
plt.show()




In [ ]: