diff --git a/.idea/MNIST.iml b/.idea/MNIST.iml
index 85c7612..3772c7c 100644
--- a/.idea/MNIST.iml
+++ b/.idea/MNIST.iml
@@ -4,7 +4,7 @@
-
+
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..cc52ec8
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 207156a..c38566b 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
-
+
\ No newline at end of file
diff --git a/GenericTorchMlpNetwork.py b/GenericTorchMlpNetwork.py
new file mode 100644
index 0000000..6145330
--- /dev/null
+++ b/GenericTorchMlpNetwork.py
@@ -0,0 +1,50 @@
+import torch
+import torch.nn as nn
+from typing import List, Generator
+from custom_types import TrainingBatch, EvaluationResults, DEVICE
+from tqdm import tqdm
+
+
+class GenericTorchMlpClassifier(nn.Module):
+ def __init__(self, dims_per_layer: List[int], learning_rate: float):
+ super(GenericTorchMlpClassifier, self).__init__()
+ self.layers = []
+ for i, layer_dims in enumerate(dims_per_layer[1:], 1):
+ self.layers.append(nn.Linear(dims_per_layer[i - 1], layer_dims))
+ self.loss_fn = nn.CrossEntropyLoss()
+ self.optimiser = torch.optim.Adam(params=[{"params": layer.parameters()} for layer in self.layers], lr=learning_rate)
+ self.to(torch.device(DEVICE))
+
+ def forward(self, input_batch: List[int]) -> torch.Tensor:
+ x = torch.tensor(input_batch, dtype=torch.float)
+ for layer in self.layers[:-1]:
+ x = torch.sigmoid(layer(x))
+ x = self.layers[-1](x)
+ return x
+
+ def training_epoch(self, training_data: Generator[TrainingBatch, None, None]) -> None:
+ self.train(True)
+ for x, y in tqdm(training_data):
+ prediction_probs, targets = self.forward(x), torch.tensor(y, dtype=torch.long, device=torch.device(DEVICE))
+ self.optimiser.zero_grad()
+ self.loss_fn(prediction_probs, targets).backward()
+ self.optimiser.step()
+
+ def evaluate(self, evaluation_data: Generator[TrainingBatch, None, None]) -> EvaluationResults:
+ self.train(False)
+ accumulated_loss = 0.0
+ total = 0
+ total_correctly_classified = 0
+ for x, y in tqdm(evaluation_data):
+ prediction_probs, targets = self.forward(x), torch.tensor(y, device=torch.device(DEVICE))
+ predictions = torch.argmax(prediction_probs, dim=1)
+ total += len(targets)
+ total_correctly_classified += sum(predictions == targets)
+ accumulated_loss += self.loss_fn(prediction_probs, targets)
+ return EvaluationResults(
+ total=total,
+ correct=total_correctly_classified,
+ accumulated_loss=accumulated_loss
+ )
+
+
diff --git a/multiclass_perceptron.py b/MlpNetwork.py
similarity index 85%
rename from multiclass_perceptron.py
rename to MlpNetwork.py
index 15f8200..df901a1 100644
--- a/multiclass_perceptron.py
+++ b/MlpNetwork.py
@@ -1,5 +1,5 @@
from typing import Tuple, List, Callable, Generator
-from import_data import test_x_y, train_x_y, IMAGE_SIZE, print_img_to_console, show_picture
+from import_data import get_test_data_generator, get_training_data_generator, IMAGE_SIZE, print_img_to_console
import numpy as np
@@ -44,15 +44,15 @@ class MulticlassPerceptron:
return self.classifiers[classifier_index].get_normalised_weight_array()
-def train_and_test_multiclass_perceptron(iterations: int = 5, training_inputs: int = 5000, test_inputs: int = 1000):
+def train_and_test_multiclass_perceptron(iterations: int = 5, training_inputs: int = -1, test_inputs: int = -1):
print("Loading data")
- training_data_gen = train_x_y(training_inputs)
+ training_data_gen = get_training_data_generator(training_inputs)
print("Begin training model!")
model = MulticlassPerceptron(IMAGE_SIZE, 10)
model.train(training_data_gen, iterations)
print("Model successfully trained.")
print("Testing model...")
- test_data = list(test_x_y(test_inputs)())
+ test_data = list(get_test_data_generator(test_inputs)())
n_correct = sum(model.prediction(x) == y for x, y in test_data)
accuracy = n_correct / len(test_data)
print(f"Accuracy: {accuracy} ({n_correct} correctly classified out of {len(test_data)} total test inputs.)")
@@ -60,8 +60,8 @@ def train_and_test_multiclass_perceptron(iterations: int = 5, training_inputs: i
print_img_to_console(model.view_for_classifier(i))
-def get_trained_digit_model(iterations: int = 5, training_inputs: int = 5000, test_inputs: int = 1000):
- training_data_gen = train_x_y(training_inputs)
+def make_trained_digit_model(iterations: int = 5, training_inputs: int = 5000, test_inputs: int = 1000):
+ training_data_gen = get_training_data_generator(training_inputs)
model = MulticlassPerceptron(IMAGE_SIZE, 10)
model.train(training_data_gen, iterations)
return model
diff --git a/__pycache__/GenericTorchMlpNetwork.cpython-38.pyc b/__pycache__/GenericTorchMlpNetwork.cpython-38.pyc
new file mode 100644
index 0000000..94af414
Binary files /dev/null and b/__pycache__/GenericTorchMlpNetwork.cpython-38.pyc differ
diff --git a/__pycache__/MlpNetwork.cpython-38.pyc b/__pycache__/MlpNetwork.cpython-38.pyc
new file mode 100644
index 0000000..d1f2da2
Binary files /dev/null and b/__pycache__/MlpNetwork.cpython-38.pyc differ
diff --git a/__pycache__/custom_types.cpython-38.pyc b/__pycache__/custom_types.cpython-38.pyc
new file mode 100644
index 0000000..07ad83c
Binary files /dev/null and b/__pycache__/custom_types.cpython-38.pyc differ
diff --git a/__pycache__/import_data.cpython-38.pyc b/__pycache__/import_data.cpython-38.pyc
new file mode 100644
index 0000000..9ed9e07
Binary files /dev/null and b/__pycache__/import_data.cpython-38.pyc differ
diff --git a/__pycache__/mlp_network.cpython-38.pyc b/__pycache__/mlp_network.cpython-38.pyc
new file mode 100644
index 0000000..188d81c
Binary files /dev/null and b/__pycache__/mlp_network.cpython-38.pyc differ
diff --git a/__pycache__/multiclass_perceptron.cpython-36.pyc b/__pycache__/multiclass_perceptron.cpython-36.pyc
deleted file mode 100644
index 518a3e5..0000000
Binary files a/__pycache__/multiclass_perceptron.cpython-36.pyc and /dev/null differ
diff --git a/custom_types.py b/custom_types.py
new file mode 100644
index 0000000..90b583a
--- /dev/null
+++ b/custom_types.py
@@ -0,0 +1,22 @@
+import torch
+from typing import Tuple, NamedTuple, List, Callable
+import numpy as np
+
+TrainingBatch = Tuple[List[List[float]], List[int]]
+
+
+class LossFun(NamedTuple):
+ exec: Callable[[np.array, np.array], float]
+ deriv: Callable[[np.array, np.array], np.array]
+
+
+class EvaluationResults(NamedTuple):
+ total: int
+ correct: int
+ accumulated_loss: float
+
+
+if torch.cuda.is_available():
+ DEVICE = "cuda:0"
+else:
+ DEVICE = "cpu"
diff --git a/import_data.py b/import_data.py
index 6270980..44c693f 100644
--- a/import_data.py
+++ b/import_data.py
@@ -1,5 +1,6 @@
from PIL import Image
-from typing import Union, List, Generator, Callable
+from typing import Tuple, Union, List, Generator, Callable
+from custom_types import TrainingBatch
BOX_SHADING = " ░▒▓██"
@@ -24,7 +25,7 @@ def print_img_to_console(img: Union[bytes, List[int]]):
print()
-def read_labels(file_location: str):
+def read_labels(file_location: str) -> int:
with open(file_location, 'rb') as img_file:
img_data = img_file.read()
num_items = int.from_bytes(img_data[4:8], byteorder="big")
@@ -32,7 +33,7 @@ def read_labels(file_location: str):
yield int.from_bytes(img_data[i:i + 1], byteorder="big")
-def read_imgs(file_location: str, as_bytes=False):
+def read_imgs(file_location: str, as_bytes=False) -> List[int]:
with open(file_location, 'rb') as img_file:
img_data = img_file.read()
num_items = int.from_bytes(img_data[4:8], byteorder="big")
@@ -50,28 +51,42 @@ def read_imgs(file_location: str, as_bytes=False):
start_byte = end_byte
-def read_img_lbl_pairs(imgs_file: str, lbls_file: str):
+def read_img_lbl_pairs(imgs_file: str, lbls_file: str) -> Tuple[List[int], int]:
for img, label in zip(read_imgs(imgs_file), read_labels(lbls_file)):
yield img, label
-def test_x_y(num: int = -1) -> Callable[[], Generator]:
+def get_test_data_generator(batch_size: int = 1, num: int = -1) -> Callable[[], Generator[TrainingBatch, None, None]]:
if num == -1:
num = 9992
def generator():
- for i, (img, lbl) in zip(range(num), read_img_lbl_pairs("t10k-images.idx3-ubyte", "t10k-labels.idx1-ubyte")):
- yield img, lbl
+ accum_x, accum_y = [], []
+ for i, (img, lbl) in zip(range(num), read_img_lbl_pairs("data/t10k-images.idx3-ubyte", "data/t10k-labels.idx1-ubyte")):
+ accum_x.append(img)
+ accum_y.append(lbl)
+ if (i + 1) % batch_size == 0:
+ yield accum_x, accum_y
+ accum_x, accum_y = [], []
+ elif i == num:
+ yield accum_x, accum_y
return generator
-def train_x_y(num: int = -1) -> Callable[[], Generator]:
+def get_training_data_generator(batch_size: int = 1, num: int = -1) -> Callable[[], Generator[TrainingBatch, None, None]]:
if num == -1:
num = 60000
def generator():
- for i, (img, lbl) in zip(range(num), read_img_lbl_pairs("train-images.idx3-ubyte", "train-labels.idx1-ubyte")):
- yield img, lbl
+ accum_x, accum_y = [], []
+ for i, (img, lbl) in zip(range(num), read_img_lbl_pairs("data/train-images.idx3-ubyte", "data/train-labels.idx1-ubyte")):
+ accum_x.append(img)
+ accum_y.append(lbl)
+ if (i + 1) % batch_size == 0:
+ yield accum_x, accum_y
+ accum_x, accum_y = [], []
+ elif i == num:
+ yield accum_x, accum_y
return generator
diff --git a/main.py b/main.py
index 7ed1437..535a762 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,50 @@
-import torch
-from multiclass_perceptron import train_and_test_multiclass_perceptron
-from import_data import show_picture, test_x_y
+from MlpNetwork import train_and_test_multiclass_perceptron
+from mlp_network import train_and_test_neural_network
+from import_data import show_picture, get_test_data_generator, get_training_data_generator, IMAGE_SIZE
+from GenericTorchMlpNetwork import GenericTorchMlpClassifier
+import argparse
-train_and_test_multiclass_perceptron()
\ No newline at end of file
+
+def main():
+ args = get_args()
+ classifier = GenericTorchMlpClassifier(
+ dims_per_layer=[IMAGE_SIZE, 200, 80, 10],
+ learning_rate=args.learning_rate,
+ )
+ for i in range(args.num_epochs):
+ print(f"Begin training epoch {i + 1}.")
+ classifier.training_epoch(get_training_data_generator(20)())
+ results = classifier.evaluate(get_test_data_generator(20)())
+ print(f"Evaluation results: {results.correct} / {results.total}",
+ f"Accumulated loss = {results.accumulated_loss:.3f}",
+ f"Average loss = {results.accumulated_loss / results.correct:.3f}",
+ f"Accuracy = {100 * float(results.correct) / float(results.total):.2f}%",
+ sep="\n", end="\n\n")
+
+
+def get_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--num_epochs",
+ "-e",
+ type=int,
+ default=5,
+ help="Number of training epochs to undertake."
+ )
+ parser.add_argument(
+ "--learning_rate",
+ type=float,
+ default=0.001,
+ help="Learning rate for the optimiser."
+ )
+ parser.add_argument(
+ "--num_training_samples",
+ type=int,
+ default=-1,
+ help="Number of samples to train with (default = all)."
+ )
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/mlp_network.py b/mlp_network.py
index a6d9ab9..6a14c7e 100644
--- a/mlp_network.py
+++ b/mlp_network.py
@@ -1,15 +1,11 @@
import numpy as np
from recordclass import recordclass
from typing import NamedTuple, Tuple, List, Callable, Generator
-from import_data import train_x_y, test_x_y
+from import_data import get_training_data_generator, get_test_data_generator
+from custom_types import LossFun
import sys
-class LossFun(NamedTuple):
- exec: Callable[[np.array, np.array], float]
- deriv: Callable[[np.array, np.array], np.array]
-
-
def sum_squares_loss_func(predicted: np.array, gold: np.array) -> float:
return sum((predicted - gold) ** 2)
@@ -106,10 +102,11 @@ class FFNeuralNetwork:
def train_and_test_neural_network():
model = FFNeuralNetwork([28**2, 100, 10], sum_squares_loss, 0.0001)
training_data_gen = train_x_y(1000)
- test_data = test_x_y(10)()
+ test_data = get_test_data_generator(10)()
model.train(training_data_gen, 5)
for test_datum, label in test_data:
- print(model.feed_forward(test_datum), label)
+ prediction = model.feed_forward(test_datum)
+ print(prediction, label, label == prediction)
np.set_printoptions(threshold=sys.maxsize)
print(model.layers[0].weights)