# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Read the data
trainx_df = pd.read_csv("trainx.csv", header=None)
trainy_df = pd.read_csv("trainy.csv", header=None)
testx_df = pd.read_csv("testx.csv", header=None)
testy_df = pd.read_csv("testy.csv", header=None)


# Convert df into numpy array
testx = testx_df.values
testy = testy_df.values
trainx = trainx_df.values
trainy = trainy_df.values


# Data scale
print(trainx.shape)
print(trainy.shape)
print(testx.shape)
print(testy.shape)

(60000, 784)
(60000, 1)
(10000, 784)
(10000, 1)


# Range of the input (Grayscale)
print((trainx[0].min(), trainx[0].max()))

(0, 255)


# Visualize the data
x = testx[0]
y = testy[0][0]
plt.imshow(x.reshape(28, 28), cmap=plt.cm.binary)
plt.title(f"Label: {y}")
plt.show()


# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


# Feedforward
def feedforward(x, w1, b1, w2, b2):
    a1 = x / 255 # normalize
    z2 = np.dot(w1, a1) + b1
    a2 = sigmoid(z2)
    z3 = np.dot(w2, a2) + b2
    a3 = sigmoid(z3)
    return a1, a2, a3


# Convert d into one-hot encoded vector
# ex: 7 -> [0,0,0,0,0,0,0,1,0,0]
def digit2vector(d):
    vector = np.zeros([10,1])
    vector[d] = 1
    return vector


# Sigmoid derivative
def sigmoid_derivative(x):
    return x * (1 - x)


# Compute gradients for backpropagation
def backpropagation(y, a1, a2, a3, w2):
    # Output Layer
    dE_b2 = -(digit2vector(y) - a3) * sigmoid_derivative(a3)
    dE_w2 = np.outer(dE_b2, a2)

    # Hidden Layer
    dE_b1 = np.dot(w2.T, dE_b2) * sigmoid_derivative(a2)
    dE_w1 = np.outer(dE_b1, a1)
    
    return dE_w1, dE_b1, dE_w2, dE_b2


# Calculate the accuracy of the neural network
def accuracy(x, y, w1, w2, b1, b2):
    correct_predictions = 0
    for i in range(len(x)):
        _, _, a3 = feedforward(x[i].reshape(-1,1), w1, b1, w2, b2)
        predicted_label = np.argmax(a3)
        correct_predictions += int(predicted_label == y[i])
    return correct_predictions / len(x)


# Initialize weights and biases
w1 = np.random.randn(132, 784)
b1 = np.zeros((132, 1))
w2 = np.random.randn(10, 132)
b2 = np.zeros((10, 1))

# Tuning parameters
epochs = 30
lr = 0.1 
lmbda = 0.01
batch_size = 30
indx = np.arange(len(trainx))
prev_acc = 0.0


# Lists to store accuracy for each epoch
accuracy_values = []

# Training loop: Stochastic Gradient Descent using minibatch
for epoch in range(epochs):
    np.random.shuffle(indx)

    for batch_start in range(0, len(trainx), batch_size):
        batch_end = batch_start + batch_size
        batch_indices = indx[batch_start:batch_end]

        sum_w2 = np.zeros_like(w2)
        sum_w1 = np.zeros_like(w1)
        sum_b2 = np.zeros_like(b2)
        sum_b1 = np.zeros_like(b1)

        for i in batch_indices:
            x = trainx[i].reshape(-1,1)
            y = trainy[i]
            a1, a2, a3 = feedforward(x, w1, b1, w2, b2)
            dE_w1, dE_b1, dE_w2, dE_b2 = backpropagation(y, a1, a2, a3, w2)

            sum_w1 += dE_w1
            sum_b1 += dE_b1
            sum_w2 += dE_w2
            sum_b2 += dE_b2

        # Update weights and biases with regularization
        w1 -= lr * (sum_w1 + lmbda * w1) / batch_size
        b1 -= lr * np.mean(sum_b1, axis=1, keepdims=True) / batch_size
        w2 -= lr * (sum_w2 + lmbda * w2) / batch_size
        b2 -= lr * np.mean(sum_b2, axis=1, keepdims=True) / batch_size

    new_acc = accuracy(trainx, trainy, w1, w2, b1, b2)
    accuracy_values.append(new_acc)
    print(f"Epoch {epoch + 1}: Accuracy = {round(new_acc, 3)}")

    if epoch > 0 and abs(new_acc - prev_acc) < 0.001:
        print("Converged. The neural network stopped learning.")
        break
    prev_acc = new_acc

# Print final accuracy
final_acc = accuracy(trainx, trainy, w1, w2, b1, b2)
print(f"Final Accuracy: {round(final_acc, 4)}")

Epoch 1: Accuracy = 0.273
Epoch 2: Accuracy = 0.38
Epoch 3: Accuracy = 0.489
Epoch 4: Accuracy = 0.522
Epoch 5: Accuracy = 0.537
Epoch 6: Accuracy = 0.545
Epoch 7: Accuracy = 0.552
Epoch 8: Accuracy = 0.557
Epoch 9: Accuracy = 0.562
Epoch 10: Accuracy = 0.567
Epoch 11: Accuracy = 0.599
Epoch 12: Accuracy = 0.68
Epoch 13: Accuracy = 0.703
Epoch 14: Accuracy = 0.715
Epoch 15: Accuracy = 0.842
Epoch 16: Accuracy = 0.9
Epoch 17: Accuracy = 0.909
Epoch 18: Accuracy = 0.913
Epoch 19: Accuracy = 0.915
Epoch 20: Accuracy = 0.918
Epoch 21: Accuracy = 0.92
Epoch 22: Accuracy = 0.922
Epoch 23: Accuracy = 0.923
Epoch 24: Accuracy = 0.925
Epoch 25: Accuracy = 0.926
Converged. The neural network stopped learning.
Final Accuracy: 0.9259


# Plotting
plt.figure(figsize=(12, 6))
plt.plot(range(1, len(accuracy_values) + 1), accuracy_values, marker='o')
plt.title('Training Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.tight_layout()
plt.show()


import random

idx = random.randint(0, len(testx) - 1)
x = testx[idx].reshape(-1, 1)
y_true = testy[idx][0]

_, _, a3 = feedforward(x, w1, b1, w2, b2)
predicted_label = np.argmax(a3)

# Display the image
plt.imshow(x.reshape(28, 28), cmap=plt.cm.binary)
plt.title(f"True Label: {y_true}, Predicted Label: {predicted_label}")
plt.show()

Understanding Neural Networks: Theory applied to code¶