In [3]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
In [4]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
170498071/170498071 ━━━━━━━━━━━━━━━━━━━━ 200s 1us/step
170498071/170498071 [==============================] - 2s 0us/step
In [30]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
plt.figure(figsize=(10,10))
for i in range(25):
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(train_images[i])
# The CIFAR labels happen to be arrays,
# which is why you need the extra index
plt.xlabel(class_names[train_labels[i][0]])
plt.show()
In [31]:
model = models.Sequential()
# First Convolutional Layer
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
# Second Convolutional Layer
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
# Third Convolutional Layer
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# Flatten the results to feed into a dense layer
model.add(layers.Flatten())
# Dense Layers
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
In [32]:
model.summary()
Model: "sequential_6"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_22 (Conv2D) │ (None, 30, 30, 32) │ 896 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_16 (MaxPooling2D) │ (None, 15, 15, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_23 (Conv2D) │ (None, 13, 13, 64) │ 18,496 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_17 (MaxPooling2D) │ (None, 6, 6, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_24 (Conv2D) │ (None, 4, 4, 64) │ 36,928 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_6 (Flatten) │ (None, 1024) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_12 (Dense) │ (None, 64) │ 65,600 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_13 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 122,570 (478.79 KB)
Trainable params: 122,570 (478.79 KB)
Non-trainable params: 0 (0.00 B)
conv2d (Conv2D) (None, 30, 30, 32) 896
max_pooling2d (MaxPooling2 (None, 15, 15, 32) 0
D)
conv2d_1 (Conv2D) (None, 13, 13, 64) 18496
max_pooling2d_1 (MaxPoolin (None, 6, 6, 64) 0
g2D)
conv2d_2 (Conv2D) (None, 4, 4, 64) 36928
=================================================================
Total params: 56320 (220.00 KB)
Trainable params: 56320 (220.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Parameters We Can Change and Their Effects
1. Number of Convolutional Layers
2. Number of Filters in Each Convolutional Layer
3. Filter Size (Kernel Size)
4. Activation Functions
5. Pooling Layers
6. Number of Neurons in Dense Layers
7. Batch Size
8. Epochs
9. Learning Rate
Changes were made for each parameter and examples were given to show how each change affected the
model.
In [42]:
# Increasing Convolutional Layers ( 3 to 4 )
model2 = models.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
In [43]:
model2.summary()
Model: "sequential_12"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_45 (Conv2D) │ (None, 30, 30, 32) │ 896 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_33 (MaxPooling2D) │ (None, 15, 15, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_46 (Conv2D) │ (None, 13, 13, 64) │ 18,496 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_34 (MaxPooling2D) │ (None, 6, 6, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_47 (Conv2D) │ (None, 4, 4, 128) │ 73,856 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_35 (MaxPooling2D) │ (None, 2, 2, 128) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_48 (Conv2D) │ (None, 0, 0, 128) │ 147,584 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_12 (Flatten) │ (None, 0) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_24 (Dense) │ (None, 64) │ 64 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_25 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 241,546 (943.54 KB)
Trainable params: 241,546 (943.54 KB)
Non-trainable params: 0 (0.00 B)
Adding more convolutional layers (e.g., from 3 to 4) can help the model learn more complex features, but
may increase the risk of overfitting and computational cost.
In [56]:
# Increasing Number of Filters in Each Convolutional Layer
model3 = models.Sequential([
layers.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
In [57]:
model3.summary()
Model: "sequential_19"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_67 (Conv2D) │ (None, 30, 30, 64) │ 1,792 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_46 (MaxPooling2D) │ (None, 15, 15, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_68 (Conv2D) │ (None, 13, 13, 128) │ 73,856 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_47 (MaxPooling2D) │ (None, 6, 6, 128) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_69 (Conv2D) │ (None, 4, 4, 128) │ 147,584 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_19 (Flatten) │ (None, 2048) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_38 (Dense) │ (None, 64) │ 131,136 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_39 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 355,018 (1.35 MB)
Trainable params: 355,018 (1.35 MB)
Non-trainable params: 0 (0.00 B)
Non-trainable params: 0 (0.00 B)
Increasing the number of filters helps capture more features but also increases the model size and
computational demands.
In [58]:
# Increasing Filter Size ( 3x3 to 5x5 )
model4 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
In [59]:
model4.summary()
Model: "sequential_20"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_70 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_48 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_71 (Conv2D) │ (None, 10, 10, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_49 (MaxPooling2D) │ (None, 5, 5, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_72 (Conv2D) │ (None, 1, 1, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_20 (Flatten) │ (None, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_40 (Dense) │ (None, 64) │ 4,160 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_41 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 160,970 (628.79 KB)
Trainable params: 160,970 (628.79 KB)
Non-trainable params: 0 (0.00 B)
Larger filters (5x5) can capture broader patterns but may miss finer details that smaller filters (3x3) can
catch.
In [60]:
# Changing activation functions ( relu to tanh )
model5 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='tanh', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='tanh'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='tanh'),
layers.Flatten(),
layers.Dense(64, activation='tanh'),
layers.Dense(10, activation='tanh')
])
In [61]:
model5.summary()
Model: "sequential_21"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_73 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_50 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_74 (Conv2D) │ (None, 10, 10, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_51 (MaxPooling2D) │ (None, 5, 5, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_75 (Conv2D) │ (None, 1, 1, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_21 (Flatten) │ (None, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_42 (Dense) │ (None, 64) │ 4,160 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_43 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 160,970 (628.79 KB)
Trainable params: 160,970 (628.79 KB)
Non-trainable params: 0 (0.00 B)
Different activation functions can affect the ability of the model to learn complex patterns. ReLU is
commonly used, but tanh can also be effective depending on the problem.
In [66]:
# Remove pooling layers
model6 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='relu', input_shape=(32, 32, 3)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='relu')
])
In [67]:
model6.summary()
Model: "sequential_24"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_82 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_83 (Conv2D) │ (None, 24, 24, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_84 (Conv2D) │ (None, 20, 20, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_24 (Flatten) │ (None, 25600) │ 0 │
│ flatten_24 (Flatten) │ (None, 25600) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_48 (Dense) │ (None, 64) │ 1,638,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_49 (Dense) │ (None, 10) │ 650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 1,795,274 (6.85 MB)
Trainable params: 1,795,274 (6.85 MB)
Non-trainable params: 0 (0.00 B)
Pooling layers reduce the spatial dimensions and help prevent overfitting. Without pooling, the model might
capture too much detail, leading to overfitting.
In [68]:
# Increasing Number of Neurons in Dense Layers
model7 = models.Sequential([
layers.Conv2D(32, (5, 5), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(10, activation='relu')
])
In [69]:
model7.summary()
Model: "sequential_25"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_85 (Conv2D) │ (None, 28, 28, 32) │ 2,432 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_54 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_86 (Conv2D) │ (None, 10, 10, 64) │ 51,264 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_55 (MaxPooling2D) │ (None, 5, 5, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_87 (Conv2D) │ (None, 1, 1, 64) │ 102,464 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_25 (Flatten) │ (None, 64) │ 0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_50 (Dense) │ (None, 128) │ 8,320 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_51 (Dense) │ (None, 10) │ 1,290 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 165,770 (647.54 KB)
Trainable params: 165,770 (647.54 KB)
Non-trainable params: 0 (0.00 B)
More neurons can help the model learn more complex patterns but also increase the risk of overfitting and
computational cost.
In [75]:
In [75]:
# Main model
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10, batch_size=32,
validation_data=(test_images, test_labels))
Epoch 1/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9235 - loss: 0.2167 - val_accur
acy: 0.6948 - val_loss: 1.5697
Epoch 2/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9302 - loss: 0.1978 - val_accur
acy: 0.6915 - val_loss: 1.6489
Epoch 3/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9336 - loss: 0.1839 - val_accur
acy: 0.6915 - val_loss: 1.6949
Epoch 4/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9376 - loss: 0.1721 - val_accur
acy: 0.6889 - val_loss: 1.7276
Epoch 5/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9420 - loss: 0.1607 - val_accur
acy: 0.6890 - val_loss: 1.7500
Epoch 6/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9444 - loss: 0.1591 - val_accur
acy: 0.6897 - val_loss: 1.8458
Epoch 7/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9413 - loss: 0.1585 - val_accur
acy: 0.6941 - val_loss: 1.8822
Epoch 8/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9470 - loss: 0.1469 - val_accur
acy: 0.6868 - val_loss: 2.0302
Epoch 9/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 0.9491 - loss: 0.1458 - val_accu
racy: 0.6926 - val_loss: 1.9869
Epoch 10/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9533 - loss: 0.1328 - val_accur
acy: 0.6862 - val_loss: 2.0189
In [76]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
313/313 - 1s - 3ms/step - accuracy: 0.6862 - loss: 2.0189
In [77]:
print(test_acc)
0.6862000226974487
In [78]:
# Increasing batch size
history2 = model.fit(train_images, train_labels, epochs=10, batch_size=64,
validation_data=(test_images, test_labels))
Epoch 1/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9721 - loss: 0.0830 - val_accura
cy: 0.6997 - val_loss: 2.2044
Epoch 2/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 16ms/step - accuracy: 0.9914 - loss: 0.0342 - val_accura
cy: 0.6977 - val_loss: 2.2941
Epoch 3/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9837 - loss: 0.0496 - val_accura
cy: 0.6930 - val_loss: 2.3404
Epoch 4/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9755 - loss: 0.0696 - val_accura
cy: 0.6911 - val_loss: 2.4313
Epoch 5/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9776 - loss: 0.0660 - val_accura
cy: 0.6899 - val_loss: 2.5113
Epoch 6/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9747 - loss: 0.0721 - val_accura
cy: 0.6919 - val_loss: 2.5785
Epoch 7/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9789 - loss: 0.0605 - val_accura
cy: 0.6881 - val_loss: 2.6383
Epoch 8/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9786 - loss: 0.0608 - val_accura
cy: 0.6819 - val_loss: 2.7417
Epoch 9/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9732 - loss: 0.0755 - val_accura
cy: 0.6898 - val_loss: 2.8422
Epoch 10/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.9790 - loss: 0.0590 - val_accura
cy: 0.6864 - val_loss: 2.9127
In [79]:
plt.plot(history2.history['accuracy'], label='accuracy')
plt.plot(history2.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
313/313 - 1s - 3ms/step - accuracy: 0.6864 - loss: 2.9127
In [80]:
print(test_acc)
0.6863999962806702
Larger batch sizes can speed up training but might reduce the generalization ability. Smaller batch sizes
usually offer better generalization.
In [81]:
# Increasing batch size
history3 = model.fit(train_images, train_labels, epochs=20, validation_data=(test_images
, test_labels))
Epoch 1/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9264 - loss: 0.2437 - val_accur
acy: 0.6814 - val_loss: 2.5280
Epoch 2/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9628 - loss: 0.1067 - val_accu
racy: 0.6946 - val_loss: 2.5628
Epoch 3/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 17s 11ms/step - accuracy: 0.9572 - loss: 0.1222 - val_accu
racy: 0.6743 - val_loss: 2.7162
Epoch 4/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9606 - loss: 0.1153 - val_accu
racy: 0.6823 - val_loss: 2.7181
Epoch 5/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9628 - loss: 0.1038 - val_accu
racy: 0.6873 - val_loss: 2.6969
Epoch 6/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 0.9617 - loss: 0.1090 - val_accu
racy: 0.6854 - val_loss: 2.6625
Epoch 7/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 17s 11ms/step - accuracy: 0.9641 - loss: 0.1058 - val_accu
racy: 0.6900 - val_loss: 2.6380
Epoch 8/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9649 - loss: 0.1016 - val_accu
racy: 0.6850 - val_loss: 2.6855
Epoch 9/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 16s 10ms/step - accuracy: 0.9657 - loss: 0.1038 - val_accu
racy: 0.6824 - val_loss: 2.7350
Epoch 10/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9667 - loss: 0.0965 - val_accur
acy: 0.6781 - val_loss: 2.7364
Epoch 11/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9645 - loss: 0.1027 - val_accur
acy: 0.6857 - val_loss: 2.6774
Epoch 12/20
Epoch 12/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9660 - loss: 0.0973 - val_accur
acy: 0.6903 - val_loss: 2.7796
Epoch 13/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9663 - loss: 0.1006 - val_accur
acy: 0.6790 - val_loss: 3.0427
Epoch 14/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9628 - loss: 0.1122 - val_accur
acy: 0.6880 - val_loss: 2.8659
Epoch 15/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9695 - loss: 0.0924 - val_accur
acy: 0.6828 - val_loss: 2.8592
Epoch 16/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9667 - loss: 0.0948 - val_accur
acy: 0.6825 - val_loss: 2.7363
Epoch 17/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9695 - loss: 0.0881 - val_accur
acy: 0.6870 - val_loss: 2.8467
Epoch 18/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9693 - loss: 0.0877 - val_accur
acy: 0.6827 - val_loss: 2.9090
Epoch 19/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9724 - loss: 0.0858 - val_accur
acy: 0.6836 - val_loss: 2.9486
Epoch 20/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9689 - loss: 0.0924 - val_accur
acy: 0.6797 - val_loss: 2.8446
In [84]:
plt.plot(history3.history['accuracy'], label='accuracy')
plt.plot(history3.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
313/313 - 1s - 3ms/step - accuracy: 0.7017 - loss: 4.0073
In [88]:
print(test_acc)
0.70169997215271
0.70169997215271
More epochs allow for more training iterations, which can improve accuracy but also increase the risk of
overfitting.
In [83]:
# Reducing learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='sparse_cat
egorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10, batch_size=32,
validation_data=(test_images, test_labels))
Epoch 1/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 0.9905 - loss: 0.0298 - val_accur
acy: 0.6990 - val_loss: 2.9388
Epoch 2/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 0.9991 - loss: 0.0073 - val_accur
acy: 0.6992 - val_loss: 3.0310
Epoch 3/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 0.9999 - loss: 0.0039 - val_accu
racy: 0.6999 - val_loss: 3.1500
Epoch 4/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 25s 16ms/step - accuracy: 1.0000 - loss: 0.0027 - val_accu
racy: 0.6998 - val_loss: 3.2499
Epoch 5/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 18s 11ms/step - accuracy: 1.0000 - loss: 0.0018 - val_accu
racy: 0.6997 - val_loss: 3.3865
Epoch 6/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 17s 11ms/step - accuracy: 1.0000 - loss: 0.0012 - val_accu
racy: 0.7018 - val_loss: 3.4865
Epoch 7/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 1.0000 - loss: 8.0826e-04 - val_a
ccuracy: 0.7022 - val_loss: 3.6365
Epoch 8/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 10ms/step - accuracy: 1.0000 - loss: 5.5039e-04 - val_
accuracy: 0.7027 - val_loss: 3.7390
Epoch 9/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 14s 9ms/step - accuracy: 1.0000 - loss: 4.0029e-04 - val_a
ccuracy: 0.7015 - val_loss: 3.8755
Epoch 10/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 15s 9ms/step - accuracy: 1.0000 - loss: 2.8834e-04 - val_a
ccuracy: 0.7017 - val_loss: 4.0073
In [89]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
313/313 - 1s - 3ms/step - accuracy: 0.7017 - loss: 4.0073
In [90]:
print(test_acc)
0.70169997215271
A lower learning rate ensures a more thorough learning process but requires more training time. A higher
learning rate can speed up training but may lead to suboptimal convergence.