import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.manifold import TSNE
from skimage.transform import rotate
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

# Estética dos plots
plt.rcParams['mathtext.fontset'] = 'custom' 
plt.rcParams['mathtext.rm'] = 'Bitstream Vera Sans' 
plt.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic' 
plt.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold' 
plt.rcParams['font.size'] = 16 
plt.rcParams['mathtext.fontset'] = 'stix' 
plt.rcParams['font.family'] = 'STIXGeneral' 

Preprocessamento dos dados

Começamos por pre-processar os dados. Para tanto, utilisaremos o próprio Tensorflow para fazer o carregamento dos dados. À partir da biblioteca Keras, carregamos os dados de treino e de teste usando a chamada

tf.keras.datasets.mnist.load_data()

Na verdade, o Tensorflow possui vários datasets comuns em machine learning. Uma lista completa pode ser encontrada no seguinte link

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

Note que dividimos os dados em 4 arrays. Estes arrays correspondem aos dados de treino e teste. Os dados de treino correspondem àqueles que serão utilizados durante a otimização do modelo, e o de teste será usado para avaliar o modelo. Fazemos essa divisão por dois motivos:

  1. Queremos simular a situação em que o nosso modelo é treinado num conjunto de dados fixo, e depois é utilisado na prática com dados novos, os quais o modelo não viu durante a fase de treino. Para o caso do MNIST, imagine que treinamos a rede numa base de dados local, e utilisamos o modelo para a predição em tempo real de dígitos numa aplicação remota. Os dados obtidos em tempo real não foram vistos pela rede neural durante treinamento.
  2. As estatísticas obtidas com os dados de treinamento são geralmente mais otimistas do que em dados não vistos. Imagine o caso em que uma pessoa estuda para uma prova à partir de uma lista de exercícios. Quem você acha que teria o melhor desempenho? (1) um aluno que faz uma prova com as questões retiradas da lista, ou (2) um aluno que faz uma prova com questões inteiramente novas?

Além de dividir os dados em treino/teste, iremos também dividí-los entre características (array X) e rótulos (array y).

Formatação dos dados

Iremos começar analizando os dados como vieram no dataset da biblioteca tensorflow. Dado que as aplicações são, via de regra, para redes neurais convolucionais, os dados vem como matrizes.

Visualização imagens como matrizes

fig, ax = plt.subplots()
ax.imshow(x_train[0], cmap='gray')
_ = ax.set_xticks([])
_ = ax.set_yticks([])

print("Formato da matriz de dados: {}".format(x_train.shape))
Formato da matriz de dados: (60000, 28, 28)

note que os dados estão salvos como imagens. Portanto, a faixa de valores para seus pixels está entre 0 e 255. Além disso, os rótulos estão salvos em formato categórico, ou seja, $y_{i} \in \{1, \cdots, K\}$, onde $K$ é o número de classes. Particularmente, $K = 10$ para o dataset MNIST.

Para converter a matriz de caracteríticas, tomaremos 2 passos:

  1. converter de int para float,
  2. converter da faixa [0, 255] para [0, 1]

Note que podemos aplicar a seguinte transformação,

$$ x \leftarrow \dfrac{x - x_{min}}{x_{max}-x_{min}}, $$

Como discutido anteriormente, $x_{min} = 0$ e $x_{max} = 255$, portanto,

$$ x \leftarrow \dfrac{x}{255} $$

Como nesse tutorial usaremos redes neurais convolucionais, as amostras permanecerão em formato de imagem. Dessa forma, temos tensores de treino/teste com formato $(N, H, W)$, onde $N_{tr} = 60000$, $H=28$ e $W=28$

Exercício 1: Se um float ocupa 32 bits em memória, qual o espaço ocupado pelo tensor de treino?

Xtr = x_train.astype(float)[..., np.newaxis] / 255.0
Xts = x_test.astype(float)[..., np.newaxis] / 255.0

print("Formato da matriz de dados: {}".format(Xtr.shape))
print("Nova faixa de valores de X: [{}, {}]".format(Xtr.min(), Xtr.max()))
Formato da matriz de dados: (60000, 28, 28, 1)
Nova faixa de valores de X: [0.0, 1.0]

Iremos também transformar a notação categórica dos rótulos na notação One Hot. Isso é simples utilizando a biblioteca scikit-learn do Python, através da classe OneHotEncoder. O exemplo abaixo fornece uma ilustração para 3 classes e 3 amostras:

$$ y^{cat} = [1, 2, 3] \iff y^{OneHot} = \begin{bmatrix} 1 & 0 & 0\\ 0 & 1 & 0\\ 0 & 0 & 1 \end{bmatrix} $$
# OBS1: o objeto OneHotEncoder espera um array de 2 dimensões.
#       Porém y_train só possui 1 dimensão (observe os prints
#       abaixo). Para convertê-lo num array 2D, utilisaremos a
#       função reshape, que muda o formato do array.
# OBS2: .reshape(-1, ...) faz com que a biblioteca numpy faça
#       uma inferência do valor adequado para a dimensão especificada
#       como -1. No caso, como utilisamos .reshape(-1, 1), teremos uma
#       transformação de formatação (N, ) -> (N, 1)

print("Formato de y_train antes de usar .reshape: {}".format(y_train.shape))
print("Formato de y_train após usar .reshape: {}".format(y_train.reshape(-1, 1).shape))

enc = OneHotEncoder(sparse=False)
ytr = enc.fit_transform(y_train.reshape(-1, 1))
yts = enc.fit_transform(y_test.reshape(-1, 1))

print("Formato da matriz de rótulos após a aplicação da nova codificação: {}".format(ytr.shape))
Formato de y_train antes de usar .reshape: (60000,)
Formato de y_train após usar .reshape: (60000, 1)
Formato da matriz de rótulos após a aplicação da nova codificação: (60000, 10)

Definição dos modelos

def network1(input_shape=(28, 28, 1), n_classes=10):
    x = tf.keras.layers.Input(shape=input_shape)

    # Convolutional block: Convolution -> Activation -> Pooling
    y = tf.keras.layers.Conv2D(filters=36, kernel_size=(3, 3), padding='same', activation='relu')(x)
    y = tf.keras.layers.MaxPool2D(pool_size=(2, 2), padding='same')(y)
    y = tf.keras.layers.Flatten()(y)

    y = tf.keras.layers.Dense(units=100, activation='relu')(y)
    y = tf.keras.layers.Dense(units=n_classes, activation='softmax')(y)

    return tf.keras.models.Model(x, y)
def network2(input_shape=(28, 28, 1), n_classes=10):
    x = tf.keras.layers.Input(shape=input_shape)
    # Convolutional block: Convolution -> Activation -> Pooling
    y = tf.keras.layers.Conv2D(filters=36,
                             kernel_size=(7, 7),
                             padding='same',
                             kernel_regularizer=tf.keras.regularizers.l2(1e-3),
                             activation='relu')(x)
    y = tf.keras.layers.MaxPool2D(pool_size=(2, 2), padding='same')(y)
    y = tf.keras.layers.Flatten()(y)
    y = tf.keras.layers.Dense(units=100, activation='relu',
                            kernel_regularizer=tf.keras.regularizers.l2(1e-3))(y)
    y = tf.keras.layers.Dense(units=n_classes, activation='softmax')(y)

    return tf.keras.models.Model(x, y)

Treinamento do modelo não-regularizado

model1 = network1()
model1.summary()
Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 36)        360       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 36)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 7056)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               705700    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1010      
=================================================================
Total params: 707,070
Trainable params: 707,070
Non-trainable params: 0
_________________________________________________________________
loss_obj = tf.keras.losses.CategoricalCrossentropy()
optimizer_obj = tf.keras.optimizers.Adam(lr=0.01)

model1.compile(loss=loss_obj, optimizer=optimizer_obj, metrics=['accuracy'])
hist1 = model1.fit(x=Xtr, y=ytr, batch_size=1024, epochs=30, validation_data=(Xts, yts), validation_batch_size=128)

Epoch 1/30
59/59 [==============================] - 1s 21ms/step - loss: 0.3896 - accuracy: 0.8817 - val_loss: 0.0785 - val_accuracy: 0.9752
Epoch 2/30
59/59 [==============================] - 1s 18ms/step - loss: 0.0661 - accuracy: 0.9799 - val_loss: 0.0575 - val_accuracy: 0.9812
Epoch 3/30
59/59 [==============================] - 1s 18ms/step - loss: 0.0408 - accuracy: 0.9878 - val_loss: 0.0549 - val_accuracy: 0.9833
Epoch 4/30
59/59 [==============================] - 1s 18ms/step - loss: 0.0271 - accuracy: 0.9920 - val_loss: 0.0477 - val_accuracy: 0.9852
Epoch 5/30
59/59 [==============================] - 1s 18ms/step - loss: 0.0198 - accuracy: 0.9939 - val_loss: 0.0555 - val_accuracy: 0.9835
Epoch 6/30
59/59 [==============================] - 1s 18ms/step - loss: 0.0173 - accuracy: 0.9942 - val_loss: 0.0529 - val_accuracy: 0.9841
Epoch 7/30
29/59 [=============>................] - ETA: 0s - loss: 0.0095 - accuracy: 0.9974
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-23-8fc2a4d20239> in <module>()
----> 1 hist1 = model1.fit(x=Xtr, y=ytr, batch_size=1024, epochs=30, validation_data=(Xts, yts), validation_batch_size=128)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
    106   def _method_wrapper(self, *args, **kwargs):
    107     if not self._in_multi_worker_mode():  # pylint: disable=protected-access
--> 108       return method(self, *args, **kwargs)
    109 
    110     # Running inside `run_distribute_coordinator` already.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1101               logs = tmp_logs  # No error, now safe to assign to logs.
   1102               end_step = step + data_handler.step_increment
-> 1103               callbacks.on_train_batch_end(end_step, logs)
   1104         epoch_logs = copy.copy(logs)
   1105 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in on_train_batch_end(self, batch, logs)
    438     """
    439     if self._should_call_train_batch_hooks:
--> 440       self._call_batch_hook(ModeKeys.TRAIN, 'end', batch, logs=logs)
    441 
    442   def on_test_batch_begin(self, batch, logs=None):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in _call_batch_hook(self, mode, hook, batch, logs)
    287       self._call_batch_begin_hook(mode, batch, logs)
    288     elif hook == 'end':
--> 289       self._call_batch_end_hook(mode, batch, logs)
    290     else:
    291       raise ValueError('Unrecognized hook: {}'.format(hook))

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in _call_batch_end_hook(self, mode, batch, logs)
    307       batch_time = time.time() - self._batch_start_time
    308 
--> 309     self._call_batch_hook_helper(hook_name, batch, logs)
    310 
    311     if self._check_timing:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in _call_batch_hook_helper(self, hook_name, batch, logs)
    340       hook = getattr(callback, hook_name)
    341       if getattr(callback, '_supports_tf_logs', False):
--> 342         hook(batch, logs)
    343       else:
    344         if numpy_logs is None:  # Only convert once.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in on_train_batch_end(self, batch, logs)
    959 
    960   def on_train_batch_end(self, batch, logs=None):
--> 961     self._batch_update_progbar(batch, logs)
    962 
    963   def on_test_batch_end(self, batch, logs=None):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in _batch_update_progbar(self, batch, logs)
   1014     if self.verbose == 1:
   1015       # Only block async when verbose = 1.
-> 1016       logs = tf_utils.to_numpy_or_python_type(logs)
   1017       self.progbar.update(self.seen, list(logs.items()), finalize=False)
   1018 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/tf_utils.py in to_numpy_or_python_type(tensors)
    535     return t  # Don't turn ragged or sparse tensors to NumPy.
    536 
--> 537   return nest.map_structure(_to_single_numpy_or_python_type, tensors)
    538 
    539 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/nest.py in map_structure(func, *structure, **kwargs)
    633 
    634   return pack_sequence_as(
--> 635       structure[0], [func(*x) for x in entries],
    636       expand_composites=expand_composites)
    637 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/nest.py in <listcomp>(.0)
    633 
    634   return pack_sequence_as(
--> 635       structure[0], [func(*x) for x in entries],
    636       expand_composites=expand_composites)
    637 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/tf_utils.py in _to_single_numpy_or_python_type(t)
    531   def _to_single_numpy_or_python_type(t):
    532     if isinstance(t, ops.Tensor):
--> 533       x = t.numpy()
    534       return x.item() if np.ndim(x) == 0 else x
    535     return t  # Don't turn ragged or sparse tensors to NumPy.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in numpy(self)
   1061     """
   1062     # TODO(slebedev): Consider avoiding a copy for non-CPU or remote tensors.
-> 1063     maybe_arr = self._numpy()  # pylint: disable=protected-access
   1064     return maybe_arr.copy() if isinstance(maybe_arr, np.ndarray) else maybe_arr
   1065 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in _numpy(self)
   1027   def _numpy(self):
   1028     try:
-> 1029       return self._numpy_internal()
   1030     except core._NotOkStatusException as e:  # pylint: disable=protected-access
   1031       six.raise_from(core._status_to_exception(e.code, e.message), None)  # pylint: disable=protected-access

KeyboardInterrupt: 
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
axes[0].plot(100 * np.array(hist1.history['accuracy']), label='Treino')
axes[0].plot(100 * np.array(hist1.history['val_accuracy']), label='Teste')
axes[0].set_ylabel('Percentual de Acerto')
axes[0].set_xlabel('Época')
axes[0].legend()

axes[1].plot(100 * np.array(hist1.history['loss']), label='Treino')
axes[1].plot(100 * np.array(hist1.history['val_loss']), label='Teste')
axes[1].set_ylabel('Função de Erro')
axes[1].set_xlabel('Época')
axes[1].legend()
<matplotlib.legend.Legend at 0x7ff4fddc3128>
filters = model1.weights[0].numpy()

fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
  ax.imshow(np.squeeze(filters[:, :, :, i]), cmap='gray')
  ax.set_yticks([])
  ax.set_xticks([])
inp = model1.layers[0].input
outs = [layer.output for layer in model1.layers]

layerized_model1 = tf.keras.models.Model(inp, outs)
Omat = layerized_model1.predict(Xts[0, ...].reshape(1, 28, 28, 1))
plt.imshow(Omat[0][0, :, :, 0], cmap='gray')
plt.yticks([])
plt.xticks([])
([], <a list of 0 Text major ticklabel objects>)
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
  ax.imshow(np.squeeze(Omat[1][0, :, :, i]), cmap='gray')
  ax.set_yticks([])
  ax.set_xticks([])
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
  ax.imshow(np.squeeze(Omat[2][0, :, :, i]), cmap='gray')
  ax.set_yticks([])
  ax.set_xticks([])
fig, axes = plt.subplots(1, 2, figsize=(10, 5))

axes[0].imshow(x, cmap='gray')
axes[0].set_xticks([])
axes[0].set_yticks([])

axes[1].bar(np.arange(10), Omat[-1][0, :])
_ = axes[1].set_xticks([i for i in range(10)])
x = Omat[0][0, :, :, 0]
xrot = rotate(x, angle=15)
fig, axes = plt.subplots(1, 2, figsize=(10, 5))

axes[0].imshow(x, cmap='gray')
axes[0].set_xticks([])
axes[0].set_yticks([])

axes[1].imshow(xrot, cmap='gray')
axes[1].set_xticks([])
axes[1].set_yticks([])
[]
tmp = layerized_model.predict(xrot.reshape(1, 28, 28, 1))
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
    ax.imshow(np.squeeze(tmp[1][0, :, :, i]), cmap='gray')
    ax.set_yticks([])
    ax.set_xticks([])
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
    ax.imshow(np.squeeze(tmp[2][0, :, :, i]), cmap='gray')
    ax.set_yticks([])
    ax.set_xticks([])
fig, axes = plt.subplots(2, 2, figsize=(10, 5))

axes[0, 0].imshow(x, cmap='gray')
axes[0, 0].set_xticks([])
axes[0, 0].set_yticks([])

axes[0, 1].imshow(xrot, cmap='gray')
axes[0, 1].set_xticks([])
axes[0, 1].set_yticks([])

axes[1, 0].bar(np.arange(10), Omat[-1][0, :])
axes[1, 1].bar(np.arange(10), tmp[-1][0, :])
<BarContainer object of 10 artists>
x = Omat[0][0, :, :, 0]
xrot = rotate(x, angle=45)
fig, axes = plt.subplots(1, 2, figsize=(10, 5))

axes[0].imshow(x, cmap='gray')
axes[0].set_xticks([])
axes[0].set_yticks([])

axes[1].imshow(xrot, cmap='gray')
axes[1].set_xticks([])
axes[1].set_yticks([])
[]
tmp = layerized_model1.predict(xrot.reshape(1, 28, 28, 1))
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
    ax.imshow(np.squeeze(tmp[1][0, :, :, i]), cmap='gray')
    ax.set_yticks([])
    ax.set_xticks([])
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
    ax.imshow(np.squeeze(tmp[2][0, :, :, i]), cmap='gray')
    ax.set_yticks([])
    ax.set_xticks([])
fig, axes = plt.subplots(2, 2, figsize=(10, 5))

axes[0, 0].imshow(x, cmap='gray')
axes[0, 0].set_xticks([])
axes[0, 0].set_yticks([])

axes[0, 1].imshow(xrot, cmap='gray')
axes[0, 1].set_xticks([])
axes[0, 1].set_yticks([])

axes[1, 0].bar(np.arange(10), Omat[-1][0, :])
axes[1, 0].set_ylim([0, 1.1])
_ = axes[1, 0].set_xticks([i for i in range(0, 10)])
axes[1, 1].bar(np.arange(10), tmp[-1][0, :])
axes[1, 1].set_ylim([0, 1.1])
_ = axes[1, 1].set_xticks([i for i in range(0, 10)])
x = Omat[0][0, :, :, 0]
noise = 0.5 * np.random.randn(*x.shape)
xnoise = np.clip(x + noise, 0.0, 1.0)
fig, axes = plt.subplots(1, 3, figsize=(10, 5))

axes[0].imshow(x, cmap='gray')
axes[0].set_xticks([])
axes[0].set_yticks([])

axes[1].imshow(xnoise, cmap='gray')
axes[1].set_xticks([])
axes[1].set_yticks([])

axes[2].imshow(noise, cmap='gray')
axes[2].set_xticks([])
axes[2].set_yticks([])
[]
tmp = layerized_model1.predict(xnoise.reshape(1, 28, 28, 1))
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
    ax.imshow(np.squeeze(tmp[1][0, :, :, i]), cmap='gray')
    ax.set_yticks([])
    ax.set_xticks([])
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
    ax.imshow(np.squeeze(tmp[2][0, :, :, i]), cmap='gray')
    ax.set_yticks([])
    ax.set_xticks([])
fig, axes = plt.subplots(2, 2, figsize=(10, 5))

axes[0, 0].imshow(x, cmap='gray')
axes[0, 0].set_xticks([])
axes[0, 0].set_yticks([])

axes[0, 1].imshow(xnoise, cmap='gray')
axes[0, 1].set_xticks([])
axes[0, 1].set_yticks([])

axes[1, 0].bar(np.arange(10), Omat[-1][0, :])
axes[1, 0].set_ylim([0, 1.1])
_ = axes[1, 0].set_xticks([i for i in range(0, 10)])
axes[1, 1].bar(np.arange(10), tmp[-1][0, :])
axes[1, 1].set_ylim([0, 1.1])
_ = axes[1, 1].set_xticks([i for i in range(0, 10)])
y = Xts[1, ...].reshape(28, 28)

alpha = 0.25
interpol_xy = np.clip((1 - alpha) * x + alpha * y, 0.0, 1.0)

fig, axes = plt.subplots(1, 2, figsize=(10, 5))

axes[0].imshow(x, cmap='gray')
axes[0].set_xticks([])
axes[0].set_yticks([])

axes[1].imshow(interpol_xy, cmap='gray')
axes[1].set_xticks([])
axes[1].set_yticks([])
[]
tmp = layerized_model1.predict(interpol_xy.reshape(1, 28, 28, 1))
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
  ax.imshow(np.squeeze(tmp[1][0, :, :, i]), cmap='gray')
  ax.set_yticks([])
  ax.set_xticks([])
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
  ax.imshow(np.squeeze(tmp[2][0, :, :, i]), cmap='gray')
  ax.set_yticks([])
  ax.set_xticks([])
fig, axes = plt.subplots(2, 2, figsize=(10, 5))

axes[0, 0].imshow(x, cmap='gray')
axes[0, 0].set_xticks([])
axes[0, 0].set_yticks([])

axes[0, 1].imshow(interpol_xy, cmap='gray')
axes[0, 1].set_xticks([])
axes[0, 1].set_yticks([])

axes[1, 0].bar(np.arange(10), Omat[-1][0, :])
axes[1, 0].set_ylim([0, 1.1])
_ = axes[1, 0].set_xticks([i for i in range(0, 10)])
axes[1, 1].bar(np.arange(10), tmp[-1][0, :])
axes[1, 1].set_ylim([0, 1.1])
_ = axes[1, 1].set_xticks([i for i in range(0, 10)])

Treinamento do modelo regularizado

model2 = network2()
model2.summary()
Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 36)        1800      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 36)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 7056)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               705700    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1010      
=================================================================
Total params: 708,510
Trainable params: 708,510
Non-trainable params: 0
_________________________________________________________________
loss_obj = tf.keras.losses.CategoricalCrossentropy()
optimizer_obj = tf.keras.optimizers.Adam(lr=0.01)

model2.compile(loss=loss_obj, optimizer=optimizer_obj, metrics=['accuracy'])
hist2 = model2.fit(x=Xtr, y=ytr, batch_size=1024, epochs=30, validation_data=(Xts, yts), validation_batch_size=128)

Epoch 1/30
59/59 [==============================] - 1s 22ms/step - loss: 0.6784 - accuracy: 0.8953 - val_loss: 0.2166 - val_accuracy: 0.9735
Epoch 2/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1922 - accuracy: 0.9717 - val_loss: 0.1650 - val_accuracy: 0.9763
Epoch 3/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1672 - accuracy: 0.9746 - val_loss: 0.1539 - val_accuracy: 0.9772
Epoch 4/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1580 - accuracy: 0.9770 - val_loss: 0.1367 - val_accuracy: 0.9819
Epoch 5/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1517 - accuracy: 0.9775 - val_loss: 0.1408 - val_accuracy: 0.9799
Epoch 6/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1460 - accuracy: 0.9791 - val_loss: 0.1364 - val_accuracy: 0.9827
Epoch 7/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1401 - accuracy: 0.9804 - val_loss: 0.1364 - val_accuracy: 0.9778
Epoch 8/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1428 - accuracy: 0.9794 - val_loss: 0.1384 - val_accuracy: 0.9777
Epoch 9/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1334 - accuracy: 0.9815 - val_loss: 0.1343 - val_accuracy: 0.9805
Epoch 10/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1312 - accuracy: 0.9815 - val_loss: 0.1217 - val_accuracy: 0.9823
Epoch 11/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1380 - accuracy: 0.9801 - val_loss: 0.1439 - val_accuracy: 0.9765
Epoch 12/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1320 - accuracy: 0.9817 - val_loss: 0.1237 - val_accuracy: 0.9818
Epoch 13/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1326 - accuracy: 0.9808 - val_loss: 0.1493 - val_accuracy: 0.9763
Epoch 14/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1283 - accuracy: 0.9819 - val_loss: 0.1145 - val_accuracy: 0.9840
Epoch 15/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1305 - accuracy: 0.9812 - val_loss: 0.1307 - val_accuracy: 0.9805
Epoch 16/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1255 - accuracy: 0.9827 - val_loss: 0.1200 - val_accuracy: 0.9832
Epoch 17/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1242 - accuracy: 0.9823 - val_loss: 0.1394 - val_accuracy: 0.9755
Epoch 18/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1242 - accuracy: 0.9825 - val_loss: 0.1146 - val_accuracy: 0.9857
Epoch 19/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1223 - accuracy: 0.9827 - val_loss: 0.1157 - val_accuracy: 0.9829
Epoch 20/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1233 - accuracy: 0.9825 - val_loss: 0.1206 - val_accuracy: 0.9837
Epoch 21/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1189 - accuracy: 0.9840 - val_loss: 0.1135 - val_accuracy: 0.9849
Epoch 22/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1234 - accuracy: 0.9819 - val_loss: 0.1268 - val_accuracy: 0.9810
Epoch 23/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1202 - accuracy: 0.9830 - val_loss: 0.1265 - val_accuracy: 0.9802
Epoch 24/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1194 - accuracy: 0.9830 - val_loss: 0.1245 - val_accuracy: 0.9803
Epoch 25/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1224 - accuracy: 0.9825 - val_loss: 0.1144 - val_accuracy: 0.9826
Epoch 26/30
59/59 [==============================] - 1s 18ms/step - loss: 0.1149 - accuracy: 0.9842 - val_loss: 0.1087 - val_accuracy: 0.9829
Epoch 27/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1193 - accuracy: 0.9827 - val_loss: 0.1279 - val_accuracy: 0.9803
Epoch 28/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1157 - accuracy: 0.9837 - val_loss: 0.1053 - val_accuracy: 0.9868
Epoch 29/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1167 - accuracy: 0.9826 - val_loss: 0.1192 - val_accuracy: 0.9820
Epoch 30/30
59/59 [==============================] - 1s 19ms/step - loss: 0.1134 - accuracy: 0.9840 - val_loss: 0.1102 - val_accuracy: 0.9831
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
axes[0].plot(100 * np.array(hist2.history['accuracy']), label='Treino')
axes[0].plot(100 * np.array(hist2.history['val_accuracy']), label='Teste')
axes[0].set_ylabel('Percentual de Acerto')
axes[0].set_xlabel('Época')
axes[0].legend()

axes[1].plot(100 * np.array(hist2.history['loss']), label='Treino')
axes[1].plot(100 * np.array(hist2.history['val_loss']), label='Teste')
axes[1].set_ylabel('Função de Erro')
axes[1].set_xlabel('Época')
axes[1].legend()
<matplotlib.legend.Legend at 0x7ff4fd196940>
filters = model2.weights[0].numpy()

fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
  ax.imshow(np.squeeze(filters[:, :, :, i]), cmap='gray')
  ax.set_yticks([])
  ax.set_xticks([])
inp = model2.layers[0].input
outs = [layer.output for layer in model2.layers]

layerized_model2 = tf.keras.models.Model(inp, outs)
Omat = layerized_model2.predict(Xts[0, ...].reshape(1, 28, 28, 1))
plt.imshow(Omat[0][0, :, :, 0], cmap='gray')
plt.yticks([])
plt.xticks([])
([], <a list of 0 Text major ticklabel objects>)
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
  ax.imshow(np.squeeze(Omat[1][0, :, :, i]), cmap='gray')
  ax.set_yticks([])
  ax.set_xticks([])
fig, axes = plt.subplots(6, 6, figsize=(8, 8))

for i, ax in enumerate(axes.flatten()):
  ax.imshow(np.squeeze(Omat[2][0, :, :, i]), cmap='gray')
  ax.set_yticks([])
  ax.set_xticks([])
fig, axes = plt.subplots(1, 2, figsize=(10, 5))

axes[0].imshow(x, cmap='gray')
axes[0].set_xticks([])
axes[0].set_yticks([])

axes[1].bar(np.arange(10), Omat[-1][0, :])
_ = axes[1].set_xticks([i for i in range(10)])

Comparação entre os dois modelos

Acurácia

fig, axes = plt.subplots(2, 1, figsize=(15, 5))

axes[0].plot(hist1.history['accuracy'], label='Modelo 1')
axes[0].plot(hist2.history['accuracy'], label='Modelo 2')
axes[0].legend()

axes[1].plot(hist1.history['val_accuracy'], label='Modelo 1')
axes[1].plot(hist2.history['val_accuracy'], label='Modelo 2')
axes[1].legend()
<matplotlib.legend.Legend at 0x7ff4fd14d198>

Custo

fig, axes = plt.subplots(2, 1, figsize=(15, 5))

axes[0].plot(hist1.history['loss'], label='Modelo 1')
axes[0].plot(hist2.history['loss'], label='Modelo 2')
axes[0].legend()

axes[1].plot(hist1.history['val_loss'], label='Modelo 1')
axes[1].plot(hist2.history['val_loss'], label='Modelo 2')
axes[1].legend()
<matplotlib.legend.Legend at 0x7ff4fd15be48>

Métricas

yp1 = model1.predict(Xts).argmax(axis=1)
yp2 = model2.predict(Xts).argmax(axis=1)
print("Taxa de precisão:                {}".format(100 * accuracy_score(y_test, yp1)))
print("Taxa de precisão (Regularizado): {}".format(100 * accuracy_score(y_test, yp2)))
Taxa de precisão:                98.98
Taxa de precisão (Regularizado): 98.31

Visualização das representações

Omat = layerized_model2.predict(Xts)
sample_inds = []
for i in np.unique(y_test):
  sample_inds.append(np.where(y_test == i)[0][:100])
sample_inds = np.concatenate(sample_inds, axis=0)
raw_rep = Omat[0][sample_inds, :]
print(raw_rep.shape)

tsne = TSNE(n_components=2, init='pca', verbose=True)
tsne.fit(raw_rep.reshape(-1, 784))

embedding = tsne.embedding_
(1000, 28, 28, 1)
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 1000 samples in 0.056s...
[t-SNE] Computed neighbors for 1000 samples in 1.707s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1000
[t-SNE] Mean sigma: 2.611868
[t-SNE] KL divergence after 250 iterations with early exaggeration: 70.110123
[t-SNE] KL divergence after 1000 iterations: 1.031477
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
for i in np.unique(y_test):
  ax.scatter(embedding[100 * i: 100 * (i + 1), 0],
             embedding[100 * i: 100 * (i + 1), 1],
             label='Digit {}'.format(i))
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
<matplotlib.legend.Legend at 0x7ff4fc52c6d8>
conv_rep = Omat[3][sample_inds, :]
print(conv_rep.shape)

tsne = TSNE(n_components=2, init='pca', verbose=True)
tsne.fit(conv_rep)

embedding = tsne.embedding_
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 1000 samples in 0.508s...
[t-SNE] Computed neighbors for 1000 samples in 15.771s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1000
[t-SNE] Mean sigma: 2.780777
[t-SNE] KL divergence after 250 iterations with early exaggeration: 71.441025
[t-SNE] KL divergence after 1000 iterations: 0.994165
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
for i in np.unique(y_test):
  ax.scatter(embedding[100 * i: 100 * (i + 1), 0],
             embedding[100 * i: 100 * (i + 1), 1],
             label='Digit {}'.format(i))
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
<matplotlib.legend.Legend at 0x7ff4fdece5f8>
dense_rep = Omat[-2][sample_inds, :]

tsne = TSNE(n_components=2, init='pca', verbose=True)
tsne.fit(dense_rep)

embedding = tsne.embedding_
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 1000 samples in 0.007s...
[t-SNE] Computed neighbors for 1000 samples in 0.217s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1000
[t-SNE] Mean sigma: 1.311268
[t-SNE] KL divergence after 250 iterations with early exaggeration: 54.055824
[t-SNE] KL divergence after 1000 iterations: 0.571683
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
for i in np.unique(y_test):
  ax.scatter(embedding[100 * i: 100 * (i + 1), 0],
             embedding[100 * i: 100 * (i + 1), 1],
             label='Digit {}'.format(i))
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
<matplotlib.legend.Legend at 0x7ff4fdea9c88>