import torch
import torch.optim as optim
import torch.nn as nn
class MLP(nn.Module):
def __init__(self, input_dim, hidden_layer_size, n_classes):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_layer_size)
self.fc2 = nn.Linear(hidden_layer_size, n_classes)
def forward(self, X):
z = torch.sigmoid(self.fc1(X))
z = torch.sigmoid(self.fc2(z))
return z
# Load in the digits data set from scikit-learn
from sklearn.datasets import load_digits
from sklearn.preprocessing import LabelBinarizer
import numpy as np
X = load_digits().data
X = X / X.max()
y = LabelBinarizer().fit_transform(load_digits().target) # Convert each label to one-hot vectors
# Split the data into training and test data sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)
# Convert numpy arrays to torch tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))
# Initialize network parameters
input_dim = X.shape[1]
hidden_layer_size = 200
n_classes = y.shape[1]
# Initialize network
mlp = MLP(input_dim=input_dim, hidden_layer_size=hidden_layer_size, n_classes=n_classes)
# Push everything on to the GPU (if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
X_train = X_train.to(device)
y_train = y_train.to(device)
mlp.to(device)
# Training parameters
max_epochs = 10000
learning_rate = 1
loss = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(mlp.parameters(), lr=learning_rate)
for n_epoch in range(max_epochs):
cost = torch.zeros(1).to(device)
optimizer.zero_grad()
z = mlp(X_train)
z = loss(z, y_train)
with torch.no_grad():
cost = z.clone()
z.backward()
optimizer.step()
if n_epoch % 1000 == 0 or (n_epoch+1)==max_epochs:
print('epoch #'+str(n_epoch)+':', cost.item())
epoch #0: 0.27738213539123535 epoch #1000: 0.07225342094898224 epoch #2000: 0.036065150052309036 epoch #3000: 0.023496540263295174 epoch #4000: 0.017171038314700127 epoch #5000: 0.014018969610333443 epoch #6000: 0.012227539904415607 epoch #7000: 0.011036815121769905 epoch #8000: 0.01016143336892128 epoch #9000: 0.009475581347942352 epoch #9999: 0.008915649726986885
with torch.no_grad():
z = mlp(X_train)
y_train_pred = torch.argmax(z, 1)
y_train_true = torch.argmax(y_train, 1)
X_test = X_test.to(device)
y_test = y_test.to(device)
z = mlp(X_test)
y_test_pred = torch.argmax(z, 1)
y_test_true = torch.argmax(y_test, 1)
print('Training Accuracy =', (y_train_true == y_train_pred).sum().item() * 100 / y_train_true.shape[0],'\b%')
print('Test Accuracy =', (y_test_true == y_test_pred).sum().item() * 100 / y_test_true.shape[0],'\b%')
Training Accuracy = 96.89737470167064 % Test Accuracy = 95.37037037037037 %
import matplotlib.pyplot as plt
r_idx = torch.randint(0, X_test.shape[0], (10,))
imgs = X_test[r_idx].cpu().numpy()
with torch.no_grad():
z = mlp(X_test[r_idx].to(device))
y_test_pred = torch.argmax(z, 1).cpu().numpy()
y_test_true = torch.argmax(y_test[r_idx], 1).cpu().numpy()
fig, ax = plt.subplots(2, 5, dpi=250)
for i in range(2):
for j in range(5):
ax[i,j].imshow(imgs[i*5+j].reshape(8,8), cmap='gray')
ax[i,j].set_xlabel('Pred:'+str(y_test_pred[i*5+j])+'\nTrue:'+str(y_test_true[i*5+j]))
ax[i,j].set_xticks([],[])
ax[i,j].set_yticks([],[])
plt.show()
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
class AE(nn.Module):
def __init__(self, input_dim, h1_dim_size, h2_dim_size, h3_dim_size):
super(AE, self).__init__()
self.enc1 = nn.Linear(input_dim, h1_dim_size)
self.enc2 = nn.Linear(h1_dim_size, h2_dim_size)
self.enc3 = nn.Linear(h2_dim_size, h3_dim_size)
self.dec1 = nn.Linear(h3_dim_size, h2_dim_size)
self.dec2 = nn.Linear(h2_dim_size, h1_dim_size)
self.dec3 = nn.Linear(h1_dim_size, input_dim)
self.latent_dim = h3_dim_size
def encoder(self, x):
z = F.relu(self.enc1(x))
z = F.relu(self.enc2(z))
z = self.enc3(z)
return z
def decoder(self, x):
z = F.relu(self.dec1(x))
z = F.relu(self.dec2(z))
z = self.dec3(z)
return z
def forward(self, x):
z = F.relu(self.enc1(x))
z = F.relu(self.enc2(z))
z = self.enc3(z)
rz = F.relu(self.dec1(z))
rz = F.relu(self.dec2(rz))
rz = self.dec3(rz)
return rz, z
# Load in the digits data set from scikit-learn
from sklearn.datasets import load_digits
from sklearn.preprocessing import LabelBinarizer
import numpy as np
X = load_digits().data
X = X / X.max()
y = LabelBinarizer().fit_transform(load_digits().target) # Convert each label to one-hot vectors
# Convert numpy arrays to torch tensors
X = torch.from_numpy(X.astype(np.float32))
y = torch.from_numpy(y.astype(np.float32))
# Initialize network parameters
input_dim = X.shape[1]
h1_dim_size = 100
h2_dim_size = 100
h3_dim_size = 2
autoencoder = AE(input_dim, h1_dim_size, h2_dim_size, h3_dim_size)
# Push everything on to the GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
X = X.to(device)
y = y.to(device)
autoencoder.to(device)
max_epochs = 10000
learning_rate = 1e-2
loss = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(autoencoder.parameters(), lr=5)
for n_epoch in range(max_epochs):
cost = torch.zeros(1).to(device)
optimizer.zero_grad()
rz, z = autoencoder(X)
z = loss(X, rz)
with torch.no_grad():
cost = z.clone()
z.backward()
optimizer.step()
if n_epoch % 1000 == 0 or (n_epoch+1)==max_epochs:
print('epoch #'+str(n_epoch)+':', cost.item())
epoch #0: 0.2427518665790558 epoch #1000: 0.03520079329609871 epoch #2000: 0.03090486116707325 epoch #3000: 0.027461538091301918 epoch #4000: 0.025334861129522324 epoch #5000: 0.024053173139691353 epoch #6000: 0.023674188181757927 epoch #7000: 0.023071520030498505 epoch #8000: 0.021803468465805054 epoch #9000: 0.02184228226542473 epoch #9999: 0.02089696004986763
with torch.no_grad():
rz, z = autoencoder(X)
z = z.cpu().numpy()
plt.figure(dpi=250)
plt.scatter(z[:,0], z[:,1], c=load_digits().target, marker='x')
plt.show()