# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Read the data
trainx_df = pd.read_csv("trainx.csv", header=None)
trainy_df = pd.read_csv("trainy.csv", header=None)
testx_df = pd.read_csv("testx.csv", header=None)
testy_df = pd.read_csv("testy.csv", header=None)
# Convert df into numpy array
testx = testx_df.values
testy = testy_df.values
trainx = trainx_df.values
trainy = trainy_df.values
# Data scale
print(trainx.shape)
print(trainy.shape)
print(testx.shape)
print(testy.shape)
(60000, 784) (60000, 1) (10000, 784) (10000, 1)
# Range of the input (Grayscale)
print((trainx[0].min(), trainx[0].max()))
(0, 255)
# Visualize the data
x = testx[0]
y = testy[0][0]
plt.imshow(x.reshape(28, 28), cmap=plt.cm.binary)
plt.title(f"Label: {y}")
plt.show()
# Sigmoid activation function
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# Feedforward
def feedforward(x, w1, b1, w2, b2):
a1 = x / 255 # normalize
z2 = np.dot(w1, a1) + b1
a2 = sigmoid(z2)
z3 = np.dot(w2, a2) + b2
a3 = sigmoid(z3)
return a1, a2, a3
# Convert d into one-hot encoded vector
# ex: 7 -> [0,0,0,0,0,0,0,1,0,0]
def digit2vector(d):
vector = np.zeros([10,1])
vector[d] = 1
return vector
# Sigmoid derivative
def sigmoid_derivative(x):
return x * (1 - x)
# Compute gradients for backpropagation
def backpropagation(y, a1, a2, a3, w2):
# Output Layer
dE_b2 = -(digit2vector(y) - a3) * sigmoid_derivative(a3)
dE_w2 = np.outer(dE_b2, a2)
# Hidden Layer
dE_b1 = np.dot(w2.T, dE_b2) * sigmoid_derivative(a2)
dE_w1 = np.outer(dE_b1, a1)
return dE_w1, dE_b1, dE_w2, dE_b2
# Calculate the accuracy of the neural network
def accuracy(x, y, w1, w2, b1, b2):
correct_predictions = 0
for i in range(len(x)):
_, _, a3 = feedforward(x[i].reshape(-1,1), w1, b1, w2, b2)
predicted_label = np.argmax(a3)
correct_predictions += int(predicted_label == y[i])
return correct_predictions / len(x)
# Initialize weights and biases
w1 = np.random.randn(132, 784)
b1 = np.zeros((132, 1))
w2 = np.random.randn(10, 132)
b2 = np.zeros((10, 1))
# Tuning parameters
epochs = 30
lr = 0.1
lmbda = 0.01
batch_size = 30
indx = np.arange(len(trainx))
prev_acc = 0.0
# Lists to store accuracy for each epoch
accuracy_values = []
# Training loop: Stochastic Gradient Descent using minibatch
for epoch in range(epochs):
np.random.shuffle(indx)
for batch_start in range(0, len(trainx), batch_size):
batch_end = batch_start + batch_size
batch_indices = indx[batch_start:batch_end]
sum_w2 = np.zeros_like(w2)
sum_w1 = np.zeros_like(w1)
sum_b2 = np.zeros_like(b2)
sum_b1 = np.zeros_like(b1)
for i in batch_indices:
x = trainx[i].reshape(-1,1)
y = trainy[i]
a1, a2, a3 = feedforward(x, w1, b1, w2, b2)
dE_w1, dE_b1, dE_w2, dE_b2 = backpropagation(y, a1, a2, a3, w2)
sum_w1 += dE_w1
sum_b1 += dE_b1
sum_w2 += dE_w2
sum_b2 += dE_b2
# Update weights and biases with regularization
w1 -= lr * (sum_w1 + lmbda * w1) / batch_size
b1 -= lr * np.mean(sum_b1, axis=1, keepdims=True) / batch_size
w2 -= lr * (sum_w2 + lmbda * w2) / batch_size
b2 -= lr * np.mean(sum_b2, axis=1, keepdims=True) / batch_size
new_acc = accuracy(trainx, trainy, w1, w2, b1, b2)
accuracy_values.append(new_acc)
print(f"Epoch {epoch + 1}: Accuracy = {round(new_acc, 3)}")
if epoch > 0 and abs(new_acc - prev_acc) < 0.001:
print("Converged. The neural network stopped learning.")
break
prev_acc = new_acc
# Print final accuracy
final_acc = accuracy(trainx, trainy, w1, w2, b1, b2)
print(f"Final Accuracy: {round(final_acc, 4)}")
Epoch 1: Accuracy = 0.273 Epoch 2: Accuracy = 0.38 Epoch 3: Accuracy = 0.489 Epoch 4: Accuracy = 0.522 Epoch 5: Accuracy = 0.537 Epoch 6: Accuracy = 0.545 Epoch 7: Accuracy = 0.552 Epoch 8: Accuracy = 0.557 Epoch 9: Accuracy = 0.562 Epoch 10: Accuracy = 0.567 Epoch 11: Accuracy = 0.599 Epoch 12: Accuracy = 0.68 Epoch 13: Accuracy = 0.703 Epoch 14: Accuracy = 0.715 Epoch 15: Accuracy = 0.842 Epoch 16: Accuracy = 0.9 Epoch 17: Accuracy = 0.909 Epoch 18: Accuracy = 0.913 Epoch 19: Accuracy = 0.915 Epoch 20: Accuracy = 0.918 Epoch 21: Accuracy = 0.92 Epoch 22: Accuracy = 0.922 Epoch 23: Accuracy = 0.923 Epoch 24: Accuracy = 0.925 Epoch 25: Accuracy = 0.926 Converged. The neural network stopped learning. Final Accuracy: 0.9259
# Plotting
plt.figure(figsize=(12, 6))
plt.plot(range(1, len(accuracy_values) + 1), accuracy_values, marker='o')
plt.title('Training Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.tight_layout()
plt.show()
import random
idx = random.randint(0, len(testx) - 1)
x = testx[idx].reshape(-1, 1)
y_true = testy[idx][0]
_, _, a3 = feedforward(x, w1, b1, w2, b2)
predicted_label = np.argmax(a3)
# Display the image
plt.imshow(x.reshape(28, 28), cmap=plt.cm.binary)
plt.title(f"True Label: {y_true}, Predicted Label: {predicted_label}")
plt.show()