import torch
import torch.optim as optim
import torch.nn as nn


class MLP(nn.Module):
    def __init__(self, input_dim, hidden_layer_size, n_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_layer_size)
        self.fc2 = nn.Linear(hidden_layer_size, n_classes)
    
    def forward(self, X):
        z = torch.sigmoid(self.fc1(X))
        z = torch.sigmoid(self.fc2(z))
        return z


# Load in the digits data set from scikit-learn
from sklearn.datasets import load_digits
from sklearn.preprocessing import LabelBinarizer
import numpy as np
X = load_digits().data
X = X / X.max()
y = LabelBinarizer().fit_transform(load_digits().target) # Convert each label to one-hot vectors
# Split the data into training and test data sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)
# Convert numpy arrays to torch tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))


# Initialize network parameters
input_dim = X.shape[1]
hidden_layer_size = 200
n_classes = y.shape[1]


# Initialize network
mlp = MLP(input_dim=input_dim, hidden_layer_size=hidden_layer_size, n_classes=n_classes)


# Push everything on to the GPU (if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
X_train = X_train.to(device)
y_train = y_train.to(device)
mlp.to(device)


# Training parameters
max_epochs = 10000
learning_rate = 1

loss = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(mlp.parameters(), lr=learning_rate)

for n_epoch in range(max_epochs):
    cost = torch.zeros(1).to(device)
    
    optimizer.zero_grad()
    z = mlp(X_train)
    z = loss(z, y_train)
    with torch.no_grad():
        cost = z.clone()
    
    z.backward()
    
    optimizer.step()
    
    if n_epoch % 1000 == 0 or (n_epoch+1)==max_epochs:
        print('epoch #'+str(n_epoch)+':', cost.item())

epoch #0: 0.27738213539123535
epoch #1000: 0.07225342094898224
epoch #2000: 0.036065150052309036
epoch #3000: 0.023496540263295174
epoch #4000: 0.017171038314700127
epoch #5000: 0.014018969610333443
epoch #6000: 0.012227539904415607
epoch #7000: 0.011036815121769905
epoch #8000: 0.01016143336892128
epoch #9000: 0.009475581347942352
epoch #9999: 0.008915649726986885


with torch.no_grad():
    z = mlp(X_train)
    y_train_pred = torch.argmax(z, 1)
    y_train_true = torch.argmax(y_train, 1)

    X_test = X_test.to(device)
    y_test = y_test.to(device)
    z = mlp(X_test)
    y_test_pred = torch.argmax(z, 1)
    y_test_true = torch.argmax(y_test, 1)


print('Training Accuracy =', (y_train_true == y_train_pred).sum().item() * 100 / y_train_true.shape[0],'\b%')
print('Test Accuracy =', (y_test_true == y_test_pred).sum().item() * 100 / y_test_true.shape[0],'\b%')

Training Accuracy = 96.89737470167064 %
Test Accuracy = 95.37037037037037 %


import matplotlib.pyplot as plt

r_idx = torch.randint(0, X_test.shape[0], (10,))
imgs = X_test[r_idx].cpu().numpy()
with torch.no_grad():
    z = mlp(X_test[r_idx].to(device))
    y_test_pred = torch.argmax(z, 1).cpu().numpy()
    y_test_true = torch.argmax(y_test[r_idx], 1).cpu().numpy()
fig, ax = plt.subplots(2, 5, dpi=250)
for i in range(2):
    for j in range(5):
        ax[i,j].imshow(imgs[i*5+j].reshape(8,8), cmap='gray')
        ax[i,j].set_xlabel('Pred:'+str(y_test_pred[i*5+j])+'\nTrue:'+str(y_test_true[i*5+j]))
        ax[i,j].set_xticks([],[])
        ax[i,j].set_yticks([],[])
plt.show()


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt


class AE(nn.Module):
    def __init__(self, input_dim, h1_dim_size, h2_dim_size, h3_dim_size):
        super(AE, self).__init__()
        self.enc1 = nn.Linear(input_dim, h1_dim_size)
        self.enc2 = nn.Linear(h1_dim_size, h2_dim_size)
        self.enc3 = nn.Linear(h2_dim_size, h3_dim_size)
        self.dec1 = nn.Linear(h3_dim_size, h2_dim_size)
        self.dec2 = nn.Linear(h2_dim_size, h1_dim_size)
        self.dec3 = nn.Linear(h1_dim_size, input_dim)
        self.latent_dim = h3_dim_size

    def encoder(self, x):
        z = F.relu(self.enc1(x))
        z = F.relu(self.enc2(z))
        z = self.enc3(z)
        return z

    def decoder(self, x):
        z = F.relu(self.dec1(x))
        z = F.relu(self.dec2(z))
        z = self.dec3(z)
        return z
    
    def forward(self, x):
        z = F.relu(self.enc1(x))
        z = F.relu(self.enc2(z))
        z = self.enc3(z)
        rz = F.relu(self.dec1(z))
        rz = F.relu(self.dec2(rz))
        rz = self.dec3(rz)
        return rz, z


# Load in the digits data set from scikit-learn
from sklearn.datasets import load_digits
from sklearn.preprocessing import LabelBinarizer
import numpy as np
X = load_digits().data
X = X / X.max()
y = LabelBinarizer().fit_transform(load_digits().target) # Convert each label to one-hot vectors
# Convert numpy arrays to torch tensors
X = torch.from_numpy(X.astype(np.float32))
y = torch.from_numpy(y.astype(np.float32))
    

# Initialize network parameters
input_dim = X.shape[1]
h1_dim_size = 100
h2_dim_size = 100
h3_dim_size = 2

autoencoder = AE(input_dim, h1_dim_size, h2_dim_size, h3_dim_size)

# Push everything on to the GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
X = X.to(device)
y = y.to(device)
autoencoder.to(device)
    
max_epochs = 10000
learning_rate = 1e-2

loss = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(autoencoder.parameters(), lr=5)

for n_epoch in range(max_epochs):
    cost = torch.zeros(1).to(device)
    
    optimizer.zero_grad()
    rz, z = autoencoder(X)
    z = loss(X, rz)
    with torch.no_grad():
        cost = z.clone()
    
    z.backward()
    
    optimizer.step()
    
    if n_epoch % 1000 == 0 or (n_epoch+1)==max_epochs:
        print('epoch #'+str(n_epoch)+':', cost.item())

epoch #0: 0.2427518665790558
epoch #1000: 0.03520079329609871
epoch #2000: 0.03090486116707325
epoch #3000: 0.027461538091301918
epoch #4000: 0.025334861129522324
epoch #5000: 0.024053173139691353
epoch #6000: 0.023674188181757927
epoch #7000: 0.023071520030498505
epoch #8000: 0.021803468465805054
epoch #9000: 0.02184228226542473
epoch #9999: 0.02089696004986763


with torch.no_grad():
    rz, z = autoencoder(X)
    z = z.cpu().numpy()
    
plt.figure(dpi=250)
plt.scatter(z[:,0], z[:,1], c=load_digits().target, marker='x')
plt.show()

Classifying handwritten digits using a Multi-Layered Perceptron¶

Find the train and test accuracy of the trained MLP¶

For 10 randomly chosen test images, show the image, the predicted class label and the correct class label¶

Train an MLP for dimension reduction - project the handwritten digits to 2 dimensions and plot the reduced distribution¶