Deeplearning - Ai Deeplearning - Ai
Deeplearning - Ai Deeplearning - Ai
DeepLearning.AI makes these slides available for educational purposes. You may not use or
distribute these slides for commercial purposes. You may make copies of these slides and
use or distribute them for educational purposes as long as you cite DeepLearning.AI as the
source of the slides.
Before NMS
Non-maximum suppression
(NMS)
NMS
1. Region proposal
Feature Extraction
SVM
Classification
ConvNet
Feature Extraction
SVM
Classification
ConvNet
Feature Extraction
Regression
Bounding Boxes
Transfer Learning for R-CNN
Slow
memory
Fast R-CNN
Feature Extraction
ConvNet Pool Dense Feature
Vector
Dense Softmax Classification
Feature
Vector
Faster R-CNN
Feature Extraction
ConvNet
Feature Extraction
Dense Softmax Classification
module_handle = ...
detector = ...
import tensorflow as tf
import tensorflow_hub as hub
module_handle = ...
detector = ...
Go to:
https://www.tensorflow.org/hub
https://tfhub.dev/s?module-type=image-object-detection
import tensorflow as tf
import tensorflow_hub as hub
module_handle =
"https://tfhub.dev/google/faster_rcnn/openimages_v4/
inception_resnet_v2/1"
detector = ...
import tensorflow as tf
import tensorflow_hub as hub
module_handle =
"https://tfhub.dev/google/faster_rcnn/openimages_v4/
inception_resnet_v2/1"
detector = ...
import tensorflow as tf
import tensorflow_hub as hub
module_handle =
"https://tfhub.dev/google/faster_rcnn/openimages_v4/
inception_resnet_v2/1"
detector = hub.load(module_handle).signatures['default']
image_url =
"https://upload.wikimedia.org/wikipedia/commons/f/fb/20130807_dublin014.JPG"
downloaded_image_path = ...
image_url =
"https://upload.wikimedia.org/wikipedia/commons/f/fb/20130807_dublin014.JPG"
downloaded_image_path = download_and_resize_image(url=image_url
new_width=3872,
new_height=2592)
img = tf.io.read_file(downloaded_image_path)
img = tf.image.decode_jpeg(img, channels=3)
converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
result = detector(converted_img)
img = tf.io.read_file(downloaded_image_path)
img = tf.image.decode_jpeg(img, channels=3)
converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
result = detector(converted_img)
img = tf.io.read_file(downloaded_image_path)
img = tf.image.decode_jpeg(img, channels=3)
converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
result = detector(converted_img)
img = tf.io.read_file(downloaded_image_path)
img = tf.image.decode_jpeg(img, channels=3)
converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
result = detector(converted_img)
Found 100 objects.
[0.43670595 0.34758776 0.2438663 0.23315561 0.22782972 0.21416378
0.2057755 0.20488328 0.20278934 0.19843656 0.18925622 0.18167153
...]
[b'Person' b'Footwear' b'Footwear' b'Building' b'Person' b'Footwear'
b'Window' b'Building' b'Person' b'Window' b'Window' b'Window' b'Window'
...]
[[0.5130533 0.9170097 0.82187796 0.99240506]
[0.80095136 0.954444 0.83115625 0.98134536]
[0.79767334 0.94279504 0.8265182 0.9654046 ]
Found 100 objects.
[0.43670595 0.34758776 0.2438663 0.23315561 0.22782972 0.21416378 Probability
0.2057755 0.20488328 0.20278934 0.19843656 0.18925622 0.18167153
...]
[b'Person' b'Footwear' b'Footwear' b'Building' b'Person' b'Footwear' Class
b'Window' b'Building' b'Person' b'Window' b'Window' b'Window' b'Window'
...]
[[0.5130533 0.9170097 0.82187796 0.99240506] Bounding box
[0.80095136 0.954444 0.83115625 0.98134536]
[0.79767334 0.94279504 0.8265182 0.9654046 ]
# Clone the tensorflow models repository
!git clone --depth 1 https://github.com/tensorflow/models
# Clone the tensorflow models repository
!git clone --depth 1 https://github.com/tensorflow/models
# Clone the tensorflow models repository
!git clone --depth 1 https://github.com/tensorflow/models
# Clone the tensorflow models repository
!git clone --depth 1 https://github.com/tensorflow/models
# Clone the tensorflow models repository
!git clone --depth 1 https://github.com/tensorflow/models
%%bash
sudo apt install -y protobuf-compiler
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .
%%bash
sudo apt install -y protobuf-compiler
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .
%%bash
sudo apt install -y protobuf-compiler
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .
%%bash
sudo apt install -y protobuf-compiler
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .
%%bash
sudo apt install -y protobuf-compiler
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
pbtxt
item {
name: "/m/04_sv"
id: 4
display_name: "motorcycle"
}
item {
name: "/m/05czz6l"
id: 5
display_name: "airplane"
}
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
pbtxt
item {
name: "/m/04_sv"
id: 4
display_name: "motorcycle"
}
item {
name: "/m/05czz6l"
id: 5
display_name: "airplane"
}
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
PATH_TO_LABELS =
'./models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(
PATH_TO_LABELS, use_display_name=True)
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
PATH_TO_LABELS =
'./models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(
PATH_TO_LABELS, use_display_name=True)
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
PATH_TO_LABELS =
'./models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(
PATH_TO_LABELS, use_display_name=True)
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
viz_utils.visualize_boxes_and_labels_on_image_array(
image=...,
boxes=...,
classes=...,
scores=...,
...)
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops
results = hub_model(image_np)
detection_scores
detection_keypoint_scores
detection_classes
detection_keypoints
num_detections
detection_boxes
result.keys()
detection_scores
detection_keypoint_scores
Standard
detection_classes
detection_keypoints
num_detections
detection_boxes
result.keys()
detection_scores
detection_keypoint_scores
Standard
detection_classes
detection_keypoints
num_detections
detection_boxes
result.keys()
detection_scores
detection_keypoint_scores
Standard
detection_classes
detection_keypoints
num_detections
detection_boxes
viz_utils.visualize_boxes_and_labels_on_image_array(
image=
boxes=
classes=
scores=
category_index=
use_normalized_coordinates=
min_score_thresh=
)
viz_utils.visualize_boxes_and_labels_on_image_array(
image=image_np_with_detections[0],
boxes=
classes=
scores=
category_index=
use_normalized_coordinates=
min_score_thresh=
)
viz_utils.visualize_boxes_and_labels_on_image_array(
image=image_np_with_detections[0],
boxes=result['detection_boxes'][0],
classes=
scores=
category_index=
use_normalized_coordinates=
min_score_thresh=
)
viz_utils.visualize_boxes_and_labels_on_image_array(
image=image_np_with_detections[0],
boxes=result['detection_boxes'][0],
classes=(result['detection_classes'][0] +
label_id_offset).astype(int),
scores=
category_index=
use_normalized_coordinates=
min_score_thresh=
)
viz_utils.visualize_boxes_and_labels_on_image_array(
image=image_np_with_detections[0],
boxes=result['detection_boxes'][0],
classes=(result['detection_classes'][0] +
label_id_offset).astype(int),
scores=result['detection_scores'][0],
category_index=
use_normalized_coordinates=
min_score_thresh=
)
viz_utils.visualize_boxes_and_labels_on_image_array(
image=image_np_with_detections[0],
boxes=result['detection_boxes'][0],
classes=(result['detection_classes'][0] +
label_id_offset).astype(int),
scores=result['detection_scores'][0],
category_index=category_index,
use_normalized_coordinates=,
min_score_thresh=,
)
viz_utils.visualize_boxes_and_labels_on_image_array(
image=image_np_with_detections[0],
boxes=result['detection_boxes'][0],
classes=(result['detection_classes'][0] +
label_id_offset).astype(int),
scores=result['detection_scores'][0],
category_index=category_index,
use_normalized_coordinates=True,
min_score_thresh=
)
viz_utils.visualize_boxes_and_labels_on_image_array(
image=image_np_with_detections[0],
boxes=result['detection_boxes'][0],
classes=(result['detection_classes'][0] +
label_id_offset).astype(int),
scores=result['detection_scores'][0],
category_index=category_index,
use_normalized_coordinates=True,
Normalized
min_score_thresh=
) 0.5
1.0
viz_utils.visualize_boxes_and_labels_on_image_array(
image=image_np_with_detections[0],
boxes=result['detection_boxes'][0],
classes=(result['detection_classes'][0] +
label_id_offset).astype(int),
scores=result['detection_scores'][0],
category_index=category_index,
use_normalized_coordinates=True,
Normalized Denormalized
min_score_thresh=
) 0.5 128
1.0 256
viz_utils.visualize_boxes_and_labels_on_image_array(
image=image_np_with_detections[0],
boxes=result['detection_boxes'][0],
classes=(result['detection_classes'][0] +
label_id_offset).astype(int),
scores=result['detection_scores'][0],
category_index=category_index,
use_normalized_coordinates=True,
min_score_thresh=.40
)
“Focal Loss for Dense Object Detection”
By: Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, Piotr Dollár
https://arxiv.org/abs/1708.02002
https://arxiv.org/abs/1708.02002
https://arxiv.org/abs/1708.02002
https://arxiv.org/abs/1708.02002
Model configuration
Checkpoint
Model configuration (weights)
Checkpoint
Model configuration (weights)
!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/
ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz
https://github.com/tensorflow/models
!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/
ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz
checkpoint_path = 'models/research/object_detection/
test_data/checkpoint/ckpt-0'
pipeline_config = 'models/research/object_detection/configs/tf2/
ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config'
checkpoint_path = 'models/research/object_detection/
test_data/checkpoint/ckpt-0'
pipeline_config = 'models/research/object_detection/configs/tf2/
ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config'
checkpoint_path = 'models/research/object_detection/
test_data/checkpoint/ckpt-0'
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
Model configuration
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
Model configuration
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
Model configuration
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
Model configuration
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
Model configuration
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
Model configuration
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
Model configuration
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
Model configuration
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
model_config=model_config, is_training=True)
Model configuration
https://arxiv.org/abs/1708.02002
https://arxiv.org/abs/1708.02002
https://arxiv.org/abs/1708.02002
Restoring weights
Restoring weights RetinaNet model
Base layers
Box predictor
Box prediction head
Restoring weights RetinaNet model
Base layers
Box predictor Classification head
Box prediction head
Restoring weights RetinaNet model
Base layers
Box predictor Classification head
Box prediction head
Box predictor
model
Feature extractor
Restoring weights RetinaNet model
Base layers
Box predictor Classification head
Box prediction head
Box predictor
model
Feature extractor
checkpoint model
fake_box_predictor = tf.compat.v2.train.Checkpoint(
_base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
_box_prediction_head=detection_model._box_predictor._box_prediction_head,
)
fake_model = tf.compat.v2.train.Checkpoint(
_feature_extractor=detection_model._feature_extractor,
_box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
ckpt.restore(checkpoint_path).expect_partial()
fake_box_predictor = tf.compat.v2.train.Checkpoint(
_base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
_box_prediction_head=detection_model._box_predictor._box_prediction_head,
)
fake_model = tf.compat.v2.train.Checkpoint(
_feature_extractor=detection_model._feature_extractor,
_box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
ckpt.restore(checkpoint_path).expect_partial()
Box predictor
fake_box_predictor = tf.compat.v2.train.Checkpoint(
_base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
_box_prediction_head=detection_model._box_predictor._box_prediction_head,
)
Checkpoint
fake_model = tf.compat.v2.train.Checkpoint( (weights)
_feature_extractor=detection_model._feature_extractor,
_box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
RetinaNet model
ckpt.restore(checkpoint_path).expect_partial()
fake_model = tf.compat.v2.train.Checkpoint(
_feature_extractor=detection_model._feature_extractor,
_box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
RetinaNet (detection_model)
ckpt.restore(checkpoint_path).expect_partial()
fake_model = tf.compat.v2.train.Checkpoint(
_feature_extractor=detection_model._feature_extractor,
_box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
RetinaNet (detection_model)
ckpt.restore(checkpoint_path).expect_partial()
Box predictor
model
Feature extractor
fake_box_predictor = tf.compat.v2.train.Checkpoint(
_base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
_box_prediction_head=detection_model._box_predictor._box_prediction_head,
)
fake_model = tf.compat.v2.train.Checkpoint(
_feature_extractor=detection_model._feature_extractor,
_box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
RetinaNet (detection_model)
ckpt.restore(checkpoint_path).expect_partial()
Box predictor
model
Feature extractor
fake_box_predictor = tf.compat.v2.train.Checkpoint(
_base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
_box_prediction_head=detection_model._box_predictor._box_prediction_head,
)
fake_model = tf.compat.v2.train.Checkpoint(
_feature_extractor=detection_model._feature_extractor,
_box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
RetinaNet (detection_model)
ckpt.restore(checkpoint_path).expect_partial()
Box predictor
model
Feature extractor
checkpoint model
fake_box_predictor = tf.compat.v2.train.Checkpoint(
_base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
_box_prediction_head=detection_model._box_predictor._box_prediction_head,
)
fake_model = tf.compat.v2.train.Checkpoint(
_feature_extractor=detection_model._feature_extractor,
_box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
RetinaNet (detection_model)
ckpt.restore(checkpoint_path).expect_partial()
Box predictor
model
Feature extractor
checkpoint model
fake_box_predictor = tf.compat.v2.train.Checkpoint(
_base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
_box_prediction_head=detection_model._box_predictor._box_prediction_head,
)
fake_model = tf.compat.v2.train.Checkpoint(
_feature_extractor=detection_model._feature_extractor,
_box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
RetinaNet (detection_model)
ckpt.restore(checkpoint_path).expect_partial()
Box predictor
model
Feature extractor
checkpoint model
# Run dummy image through the model so that variables are created
image, shapes = detection_model.preprocess(tf.zeros([1, 640, 640, 3]))
prediction_dict = detection_model.predict(image, shapes)
_ = detection_model.postprocess(prediction_dict, shapes)
print('Weights restored!')
# Run model through a dummy image so that variables are created
image, shapes = detection_model.preprocess(tf.zeros([1, 640, 640, 3]))
prediction_dict = detection_model.predict(image, shapes)
_ = detection_model.postprocess(prediction_dict, shapes)
print('Weights restored!')
# Run model through a dummy image so that variables are created
image, shapes = detection_model.preprocess(tf.zeros([1, 640, 640, 3]))
prediction_dict = detection_model.predict(image, shapes)
_ = detection_model.postprocess(prediction_dict, shapes)
print('Weights restored!')
# Run model through a dummy image so that variables are created
image, shapes = detection_model.preprocess(tf.zeros([1, 640, 640, 3]))
prediction_dict = detection_model.predict(image, shapes)
_ = detection_model.postprocess(prediction_dict, shapes)
print('Weights restored!')
Prepare data for training
Trainable variables
Model
Custom Training Loop
Trainable variables
Model
Fine tune
Custom Training Loop For all training images:
Trainable variables
Model
Fine tune
Custom Training Loop For all training images:
Model
Fine tune
Custom Training Loop For all training images:
Model Predict
Fine tune
Custom Training Loop For all training images:
Model Predict
Loss
Fine tune
Custom Training Loop For all training images:
Model Predict
Loss
Fine tune
Gradient
Custom Training Loop For all training images:
Model Predict
Loss
Fine tune
Gradient
Optimize
for var in detection_model.trainable_variables:
print(var.name)
for var in detection_model.trainable_variables:
print(var.name)
for var in detection_model.trainable_variables:
print(var.name)
WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead/BoxPredictor/kernel:0
WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead/BoxPredictor/bias:0
WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead/ClassPredictor/kernel:0
WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead/ClassPredictor/bias:0
WeightSharedConvolutionalBoxPredictor/BoxPredictionTower/conv2d_0/kernel:0
WeightSharedConvolutionalBoxPredictor/BoxPredictionTower/conv2d_0/BatchNorm/feature_0/gamma:0
WeightSharedConvolutionalBoxPredictor/BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta:0
WeightSharedConvolutionalBoxPredictor/BoxPredictionTower/conv2d_1/kernel:0
prefixes_to_train = [
'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead',
'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead']