In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[![View on GitHub][github-badge]][github-keras-v1] [![Open In Colab][colab-badge]][colab-keras-v1] [![Open in Binder][binder-badge]][binder-keras-v1]

[github-badge]: https://img.shields.io/badge/View-on%20GitHub-blue?logo=GitHub
[colab-badge]: https://colab.research.google.com/assets/colab-badge.svg
[binder-badge]: https://static.mybinder.org/badge_logo.svg

[github-keras-v1]: https://github.com/mbrukman/reimplementing-ml-papers/blob/main/lenet/keras/LeNet_Keras_v1_basic_implementation.ipynb
[colab-keras-v1]: https://colab.research.google.com/github/mbrukman/reimplementing-ml-papers/blob/main/lenet/keras/LeNet_Keras_v1_basic_implementation.ipynb
[binder-keras-v1]: https://mybinder.org/v2/gh/mbrukman/reimplementing-ml-papers/main?filepath=lenet/keras/LeNet_Keras_v1_basic_implementation.ipynb

In [None]:
%%bash

readonly GH_USER="mbrukman"
readonly GH_REPO="reimplementing-ml-papers"
readonly GH_BRANCH="main"

# Download our library for processing MNIST dataset and the LeNet model.
for path in datasets/mnist/mnist_keras.py \
            lenet/keras/lenet.py ; do
  module="$(basename "${path}")"
  if ! [ -f "${module}" ]; then
    curl -s -o "${module}" "https://raw.githubusercontent.com/${GH_USER}/${GH_REPO}/${GH_BRANCH}/${path}"
  fi
done

In [None]:
from tensorflow import keras

# Import our LeNet model constructor downloaded above.
from lenet import LeNet

# Import the MNIST dataset processor downloaded above.
from mnist_keras import MNIST

We will start with a very simple approximation of the network described in the paper and evolve it over time to more closely match the paper.

For one, there isn't a built-in Keras layer that matches the subsampling layer in the paper: neither `AveragePooling2D` nor `MaxPooling2D` have any trainable parameters, but the subsampling layer described in the paper does, so this is already one difference.

The activation function is a more complex function than the `tanh` we're using here, but it's a reasonable approximation, and even with these changes, we get quite a good accuracy on both training and test sets.

In [None]:
# Define the model architecture.
model = LeNet(subsampling=keras.layers.MaxPooling2D,
              activation=keras.activations.tanh)

model.summary()

Model: "LeNet-5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 C1 (Conv2D)                 (None, 28, 28, 6)         156       
                                                                 
 S2 (MaxPooling2D)           (None, 14, 14, 6)         0         
                                                                 
 S2_act (Activation)         (None, 14, 14, 6)         0         
                                                                 
 C3 (Conv2D)                 (None, 10, 10, 16)        2416      
                                                                 
 S4 (MaxPooling2D)           (None, 5, 5, 16)          0         
                                                                 
 S4_act (Activation)         (None, 5, 5, 16)          0         
                                                                 
 C5 (Conv2D)                 (None, 1, 1, 120)         4812

In [None]:
# Compile the model with optimizer and loss function.
opt = keras.optimizers.Adam(learning_rate=0.001)
loss_fn = keras.losses.CategoricalCrossentropy()
model.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy'])

For details on the MNIST dataset including a data exploration, see [MNIST directory in my repo](https://github.com/mbrukman/reimplementing-ml-papers/tree/main/datasets/mnist).

Here, we will import a shared library to process the MNIST dataset into the format that we need to use below for model training and testing.

In [None]:
%%capture --no-stderr

# This will download the MNIST dataset via the Keras library which outputs data
# to stdout, so we silence it above to avoid extraneous output.
mnist_data = MNIST()

In [None]:
# Train the model.
#
# In this notebook, we scale the input into the range [0.0, 1.0] and convert the
# labels y to a categorical (one-hot) encoding from the default numeric values.
#
# For consistency, we use the same transformations for the test dataset below.
model.fit(mnist_data.x_train_scale_0_1(),
          mnist_data.y_train_categorical(),
          epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fa774b278e0>

In [None]:
# Evaluate the model.
#
# Note that we use the same input range scaling and label encoding as above.
model.evaluate(mnist_data.x_test_scale_0_1(), mnist_data.y_test_categorical())



[0.056457001715898514, 0.9861999750137329]