diff --git a/.idea/MNIST.iml b/.idea/MNIST.iml index 85c7612..3772c7c 100644 --- a/.idea/MNIST.iml +++ b/.idea/MNIST.iml @@ -4,7 +4,7 @@ - + diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..cc52ec8 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 207156a..c38566b 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/GenericTorchMlpNetwork.py b/GenericTorchMlpNetwork.py new file mode 100644 index 0000000..6145330 --- /dev/null +++ b/GenericTorchMlpNetwork.py @@ -0,0 +1,50 @@ +import torch +import torch.nn as nn +from typing import List, Generator +from custom_types import TrainingBatch, EvaluationResults, DEVICE +from tqdm import tqdm + + +class GenericTorchMlpClassifier(nn.Module): + def __init__(self, dims_per_layer: List[int], learning_rate: float): + super(GenericTorchMlpClassifier, self).__init__() + self.layers = [] + for i, layer_dims in enumerate(dims_per_layer[1:], 1): + self.layers.append(nn.Linear(dims_per_layer[i - 1], layer_dims)) + self.loss_fn = nn.CrossEntropyLoss() + self.optimiser = torch.optim.Adam(params=[{"params": layer.parameters()} for layer in self.layers], lr=learning_rate) + self.to(torch.device(DEVICE)) + + def forward(self, input_batch: List[int]) -> torch.Tensor: + x = torch.tensor(input_batch, dtype=torch.float) + for layer in self.layers[:-1]: + x = torch.sigmoid(layer(x)) + x = self.layers[-1](x) + return x + + def training_epoch(self, training_data: Generator[TrainingBatch, None, None]) -> None: + self.train(True) + for x, y in tqdm(training_data): + prediction_probs, targets = self.forward(x), torch.tensor(y, dtype=torch.long, device=torch.device(DEVICE)) + self.optimiser.zero_grad() + self.loss_fn(prediction_probs, targets).backward() + self.optimiser.step() + + def evaluate(self, evaluation_data: Generator[TrainingBatch, None, None]) -> EvaluationResults: + self.train(False) + accumulated_loss = 0.0 + total = 0 + total_correctly_classified = 0 + for x, y in tqdm(evaluation_data): + prediction_probs, targets = self.forward(x), torch.tensor(y, device=torch.device(DEVICE)) + predictions = torch.argmax(prediction_probs, dim=1) + total += len(targets) + total_correctly_classified += sum(predictions == targets) + accumulated_loss += self.loss_fn(prediction_probs, targets) + return EvaluationResults( + total=total, + correct=total_correctly_classified, + accumulated_loss=accumulated_loss + ) + + diff --git a/multiclass_perceptron.py b/MlpNetwork.py similarity index 85% rename from multiclass_perceptron.py rename to MlpNetwork.py index 15f8200..df901a1 100644 --- a/multiclass_perceptron.py +++ b/MlpNetwork.py @@ -1,5 +1,5 @@ from typing import Tuple, List, Callable, Generator -from import_data import test_x_y, train_x_y, IMAGE_SIZE, print_img_to_console, show_picture +from import_data import get_test_data_generator, get_training_data_generator, IMAGE_SIZE, print_img_to_console import numpy as np @@ -44,15 +44,15 @@ class MulticlassPerceptron: return self.classifiers[classifier_index].get_normalised_weight_array() -def train_and_test_multiclass_perceptron(iterations: int = 5, training_inputs: int = 5000, test_inputs: int = 1000): +def train_and_test_multiclass_perceptron(iterations: int = 5, training_inputs: int = -1, test_inputs: int = -1): print("Loading data") - training_data_gen = train_x_y(training_inputs) + training_data_gen = get_training_data_generator(training_inputs) print("Begin training model!") model = MulticlassPerceptron(IMAGE_SIZE, 10) model.train(training_data_gen, iterations) print("Model successfully trained.") print("Testing model...") - test_data = list(test_x_y(test_inputs)()) + test_data = list(get_test_data_generator(test_inputs)()) n_correct = sum(model.prediction(x) == y for x, y in test_data) accuracy = n_correct / len(test_data) print(f"Accuracy: {accuracy} ({n_correct} correctly classified out of {len(test_data)} total test inputs.)") @@ -60,8 +60,8 @@ def train_and_test_multiclass_perceptron(iterations: int = 5, training_inputs: i print_img_to_console(model.view_for_classifier(i)) -def get_trained_digit_model(iterations: int = 5, training_inputs: int = 5000, test_inputs: int = 1000): - training_data_gen = train_x_y(training_inputs) +def make_trained_digit_model(iterations: int = 5, training_inputs: int = 5000, test_inputs: int = 1000): + training_data_gen = get_training_data_generator(training_inputs) model = MulticlassPerceptron(IMAGE_SIZE, 10) model.train(training_data_gen, iterations) return model diff --git a/__pycache__/GenericTorchMlpNetwork.cpython-38.pyc b/__pycache__/GenericTorchMlpNetwork.cpython-38.pyc new file mode 100644 index 0000000..94af414 Binary files /dev/null and b/__pycache__/GenericTorchMlpNetwork.cpython-38.pyc differ diff --git a/__pycache__/MlpNetwork.cpython-38.pyc b/__pycache__/MlpNetwork.cpython-38.pyc new file mode 100644 index 0000000..d1f2da2 Binary files /dev/null and b/__pycache__/MlpNetwork.cpython-38.pyc differ diff --git a/__pycache__/custom_types.cpython-38.pyc b/__pycache__/custom_types.cpython-38.pyc new file mode 100644 index 0000000..07ad83c Binary files /dev/null and b/__pycache__/custom_types.cpython-38.pyc differ diff --git a/__pycache__/import_data.cpython-38.pyc b/__pycache__/import_data.cpython-38.pyc new file mode 100644 index 0000000..9ed9e07 Binary files /dev/null and b/__pycache__/import_data.cpython-38.pyc differ diff --git a/__pycache__/mlp_network.cpython-38.pyc b/__pycache__/mlp_network.cpython-38.pyc new file mode 100644 index 0000000..188d81c Binary files /dev/null and b/__pycache__/mlp_network.cpython-38.pyc differ diff --git a/__pycache__/multiclass_perceptron.cpython-36.pyc b/__pycache__/multiclass_perceptron.cpython-36.pyc deleted file mode 100644 index 518a3e5..0000000 Binary files a/__pycache__/multiclass_perceptron.cpython-36.pyc and /dev/null differ diff --git a/custom_types.py b/custom_types.py new file mode 100644 index 0000000..90b583a --- /dev/null +++ b/custom_types.py @@ -0,0 +1,22 @@ +import torch +from typing import Tuple, NamedTuple, List, Callable +import numpy as np + +TrainingBatch = Tuple[List[List[float]], List[int]] + + +class LossFun(NamedTuple): + exec: Callable[[np.array, np.array], float] + deriv: Callable[[np.array, np.array], np.array] + + +class EvaluationResults(NamedTuple): + total: int + correct: int + accumulated_loss: float + + +if torch.cuda.is_available(): + DEVICE = "cuda:0" +else: + DEVICE = "cpu" diff --git a/import_data.py b/import_data.py index 6270980..44c693f 100644 --- a/import_data.py +++ b/import_data.py @@ -1,5 +1,6 @@ from PIL import Image -from typing import Union, List, Generator, Callable +from typing import Tuple, Union, List, Generator, Callable +from custom_types import TrainingBatch BOX_SHADING = " ░▒▓██" @@ -24,7 +25,7 @@ def print_img_to_console(img: Union[bytes, List[int]]): print() -def read_labels(file_location: str): +def read_labels(file_location: str) -> int: with open(file_location, 'rb') as img_file: img_data = img_file.read() num_items = int.from_bytes(img_data[4:8], byteorder="big") @@ -32,7 +33,7 @@ def read_labels(file_location: str): yield int.from_bytes(img_data[i:i + 1], byteorder="big") -def read_imgs(file_location: str, as_bytes=False): +def read_imgs(file_location: str, as_bytes=False) -> List[int]: with open(file_location, 'rb') as img_file: img_data = img_file.read() num_items = int.from_bytes(img_data[4:8], byteorder="big") @@ -50,28 +51,42 @@ def read_imgs(file_location: str, as_bytes=False): start_byte = end_byte -def read_img_lbl_pairs(imgs_file: str, lbls_file: str): +def read_img_lbl_pairs(imgs_file: str, lbls_file: str) -> Tuple[List[int], int]: for img, label in zip(read_imgs(imgs_file), read_labels(lbls_file)): yield img, label -def test_x_y(num: int = -1) -> Callable[[], Generator]: +def get_test_data_generator(batch_size: int = 1, num: int = -1) -> Callable[[], Generator[TrainingBatch, None, None]]: if num == -1: num = 9992 def generator(): - for i, (img, lbl) in zip(range(num), read_img_lbl_pairs("t10k-images.idx3-ubyte", "t10k-labels.idx1-ubyte")): - yield img, lbl + accum_x, accum_y = [], [] + for i, (img, lbl) in zip(range(num), read_img_lbl_pairs("data/t10k-images.idx3-ubyte", "data/t10k-labels.idx1-ubyte")): + accum_x.append(img) + accum_y.append(lbl) + if (i + 1) % batch_size == 0: + yield accum_x, accum_y + accum_x, accum_y = [], [] + elif i == num: + yield accum_x, accum_y return generator -def train_x_y(num: int = -1) -> Callable[[], Generator]: +def get_training_data_generator(batch_size: int = 1, num: int = -1) -> Callable[[], Generator[TrainingBatch, None, None]]: if num == -1: num = 60000 def generator(): - for i, (img, lbl) in zip(range(num), read_img_lbl_pairs("train-images.idx3-ubyte", "train-labels.idx1-ubyte")): - yield img, lbl + accum_x, accum_y = [], [] + for i, (img, lbl) in zip(range(num), read_img_lbl_pairs("data/train-images.idx3-ubyte", "data/train-labels.idx1-ubyte")): + accum_x.append(img) + accum_y.append(lbl) + if (i + 1) % batch_size == 0: + yield accum_x, accum_y + accum_x, accum_y = [], [] + elif i == num: + yield accum_x, accum_y return generator diff --git a/main.py b/main.py index 7ed1437..535a762 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,50 @@ -import torch -from multiclass_perceptron import train_and_test_multiclass_perceptron -from import_data import show_picture, test_x_y +from MlpNetwork import train_and_test_multiclass_perceptron +from mlp_network import train_and_test_neural_network +from import_data import show_picture, get_test_data_generator, get_training_data_generator, IMAGE_SIZE +from GenericTorchMlpNetwork import GenericTorchMlpClassifier +import argparse -train_and_test_multiclass_perceptron() \ No newline at end of file + +def main(): + args = get_args() + classifier = GenericTorchMlpClassifier( + dims_per_layer=[IMAGE_SIZE, 200, 80, 10], + learning_rate=args.learning_rate, + ) + for i in range(args.num_epochs): + print(f"Begin training epoch {i + 1}.") + classifier.training_epoch(get_training_data_generator(20)()) + results = classifier.evaluate(get_test_data_generator(20)()) + print(f"Evaluation results: {results.correct} / {results.total}", + f"Accumulated loss = {results.accumulated_loss:.3f}", + f"Average loss = {results.accumulated_loss / results.correct:.3f}", + f"Accuracy = {100 * float(results.correct) / float(results.total):.2f}%", + sep="\n", end="\n\n") + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--num_epochs", + "-e", + type=int, + default=5, + help="Number of training epochs to undertake." + ) + parser.add_argument( + "--learning_rate", + type=float, + default=0.001, + help="Learning rate for the optimiser." + ) + parser.add_argument( + "--num_training_samples", + type=int, + default=-1, + help="Number of samples to train with (default = all)." + ) + return parser.parse_args() + + +if __name__ == "__main__": + main() diff --git a/mlp_network.py b/mlp_network.py index a6d9ab9..6a14c7e 100644 --- a/mlp_network.py +++ b/mlp_network.py @@ -1,15 +1,11 @@ import numpy as np from recordclass import recordclass from typing import NamedTuple, Tuple, List, Callable, Generator -from import_data import train_x_y, test_x_y +from import_data import get_training_data_generator, get_test_data_generator +from custom_types import LossFun import sys -class LossFun(NamedTuple): - exec: Callable[[np.array, np.array], float] - deriv: Callable[[np.array, np.array], np.array] - - def sum_squares_loss_func(predicted: np.array, gold: np.array) -> float: return sum((predicted - gold) ** 2) @@ -106,10 +102,11 @@ class FFNeuralNetwork: def train_and_test_neural_network(): model = FFNeuralNetwork([28**2, 100, 10], sum_squares_loss, 0.0001) training_data_gen = train_x_y(1000) - test_data = test_x_y(10)() + test_data = get_test_data_generator(10)() model.train(training_data_gen, 5) for test_datum, label in test_data: - print(model.feed_forward(test_datum), label) + prediction = model.feed_forward(test_datum) + print(prediction, label, label == prediction) np.set_printoptions(threshold=sys.maxsize) print(model.layers[0].weights)