Kevin Richmond
Published © LGPL

Grad-CAM for your Machine Learning projects

Apply Grad-CAM to have a better understanding of your Machine Learning image classification projects

IntermediateFull instructions provided1.5 hours709
Grad-CAM for your Machine Learning projects

Things used in this project

Software apps and online services

MicroPython
MicroPython

Story

Read more

Schematics

model

CNN model

Code

Images location and classes.py

Python
Program that creates an excel file with image files locations and its class
#!/usr/bin/env python
# coding: utf-8

# https://www.kaggle.com/code/roy2004/cnn-waste-classification-from-jpg-op-3

import os

import pandas as pd

df = pd.DataFrame({'filename': [], 'category': []}) # Create a blank dataframe with two columns: image file name and the waste category

cd = os.getcwd()
folders = os.listdir(cd)

for fd in folders:
    if (".py" not in fd and ".h5" not in fd and ".xlsx" not in fd):
        folderpath = cd + "\\" + fd
        files = os.listdir(folderpath)
        for fl in files:
            filepath = folderpath + "\\" + fl
            df = df.append({'filename': filepath, 'category': fd}, ignore_index=True)

df = df.dropna()
print("Number of images = {}".format(df.size))
print (df['category'].value_counts()) # Quick look of the dataframe

with pd.ExcelWriter('Image Classification database.xlsx', mode='w') as writer:
    df.to_excel(writer)

Define, train and test model - h5.py

Python
Define, train and test a CNN model for image classification. Save the h5 file
#!/usr/bin/env python
# coding: utf-8

# https://www.kaggle.com/code/roy2004/cnn-waste-classification-from-jpg-op-3

import os
from random import randint

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

from PIL import Image

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential # Importing models from Keras
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.layers import Dense, InputLayer, Dropout, Conv1D, Conv2D, Flatten, Reshape, MaxPooling1D, MaxPooling2D, BatchNormalization, TimeDistributed
from tensorflow.keras.optimizers import Adam

df = pd.read_excel('Image Classification database.xlsx', index_col=0) # Import a table with location to specific images and its category

df['category'].value_counts().plot.bar()
print (df['category'].value_counts())

df_train=df.sample(frac=0.8,replace=False) # Randomly sample 80% of data from dataframe for training. Replace = False to prevent repeat sampling
df_valid=df.drop(df_train.index.values) # The rest 20% is the validation images

df_train['category'].value_counts().plot.bar()
print (df_train['category'].value_counts())

df_valid['category'].value_counts().plot.bar()
print(df_valid['category'].value_counts())

#Image.open(random.choice(df_train['filename'])).show()

FAST_RUN = False # True if you want to quick test you model (training for 3 epochs). False to have a full train (50 epochs).
epochs = 3 if FAST_RUN else 100

IMAGE_WIDTH = 400 # Enter the width and height of images
IMAGE_HEIGHT = 225
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)

IMAGE_CHANNELS = 1 # 3 if RGB. 1 if Grayscale
batch_size = 32
d = 0.1 # Dropout rate

# Use when training on pre-trained weights
START_EPOCH = 0 # if fresh train, enter 0
Transfer = False
Pretrained_Link = os.getcwd() + "/model.h5"

#rdm = randint(0,len(df_train['filename']))
#sample = df_train['filename'].iloc[rdm]
#pic = Image.open(sample)
#pic.show()

classes_values = ["background", "fork", "knife", "spoon" ]
classes = len(classes_values)

# Create Keras Sequential Model
model = Sequential()
model.add(Conv2D(32, kernel_size=3, activation='relu', kernel_constraint=tf.keras.constraints.MaxNorm(1), padding='same'))
model.add(MaxPooling2D(pool_size=2, strides=2, padding='same'))
model.add(Conv2D(16, kernel_size=3, activation='relu', kernel_constraint=tf.keras.constraints.MaxNorm(1), padding='same', name = "last_conv2d"))
model.add(MaxPooling2D(pool_size=2, strides=2, padding='same'))
model.add(Flatten())
model.add(Dropout(0.25))
model.add(Dense(classes, activation='softmax', name='y_pred'))

if Transfer:
    model.load_weights(Pretrained_Link)

opt = Adam(learning_rate=0.0005, beta_1=0.9, beta_2=0.999)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

#model.summary()

earlystop = EarlyStopping(patience=10,restore_best_weights=True)

LR_START = .001 # Learning rate (LR) schedule for TPU, GPU and CPU
LR_MIN = 1e-6
LR_EXP_DECAY = .94

# Define a Learning Rate function on epoch that will decrease exponentially.
def lrfn(epoch):
    lr = (LR_START - LR_MIN) * LR_EXP_DECAY ** (epoch + START_EPOCH) + LR_MIN
    return lr

lr_callback = LearningRateScheduler(lrfn, verbose=True)

rng = [i for i in range(START_EPOCH, epochs + START_EPOCH)] # Visualize the change in learning rate
y = [lrfn(x) for x in rng]
#plt.plot(rng, y)
#plt.show()
print("Learning rate schedule: {:.3g} to {:.3g}".format(y[0], y[-1]))

total_train = df_train.shape[0] # Total number of images for training
total_validate = df_valid.shape[0] # Total number of images for validation

print("Training: {}, Validation: {}".format(total_train,total_validate))

train_datagen = ImageDataGenerator(rotation_range=15, rescale=1./255, shear_range=0.1, horizontal_flip=True, vertical_flip=True)

train_generator = train_datagen.flow_from_dataframe(df_train, "", x_col='filename', y_col='category', target_size=IMAGE_SIZE, class_mode='categorical', batch_size=batch_size, color_mode = "grayscale") # According to the dataframe, pull images one by one from image directory

validation_datagen = ImageDataGenerator(rescale=1./255) # Validation doesn't need much data Augmentation
validation_generator = validation_datagen.flow_from_dataframe(df_valid, "", x_col='filename', y_col='category', target_size=IMAGE_SIZE, class_mode='categorical', batch_size=batch_size, color_mode = "grayscale") # According to the dataframe, pull images one by one from image directory

history = model.fit(train_generator, batch_size=batch_size, epochs=epochs, validation_data=validation_generator, validation_steps=total_validate//batch_size, steps_per_epoch=total_train//batch_size, callbacks=[earlystop, lr_callback])

model.save("model.h5") # Save Model in h5 format (old one). New models are saved as SaveModel
#model.save("model_raw") # Save Model

test_df = df.sample(frac = 0.3) # Randomly select 30% of data
nb_samples = test_df.shape[0] # Number of testing samples

test_gen = ImageDataGenerator(rescale=1./255) # Test generator in the same fashion of the train/validation generators
test_generator = test_gen.flow_from_dataframe(test_df, "", x_col='filename', y_col='category', class_mode=None, target_size=IMAGE_SIZE, batch_size=batch_size, shuffle=False, color_mode = "grayscale")

predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))

test_df['pred_category'] = np.argmax(predict, axis=-1)

label_map = dict((v,k) for k,v in train_generator.class_indices.items())
test_df['pred_category'] = test_df['pred_category'].replace(label_map)

test_df["background"] = predict[:, [0]]
test_df["fork"] = predict[:, [1]]
test_df["knife"] = predict[:, [2]]
test_df["spoon"] = predict[:, [3]]

submission_df = test_df.copy()

with pd.ExcelWriter('Summary.xlsx', mode='w') as writer:
    submission_df.to_excel(writer)

JPG Image editor - Resizing.py

Python
Code to reduce image sizes to make the ML running lighter
#!/usr/bin/env python
# coding: utf-8

import os
import numpy as np
import PIL
from PIL import Image
from PIL import ImageFilter
from PIL import ImageEnhance
from PIL import ImageOps

files_in_path = os.listdir()
files_in_path_lst = []

for fip in files_in_path:
    files_in_path_lst.append(fip)

for i in files_in_path_lst:

    aux = i[-4:]
    name = i[:-4]
    ext = ".jpg"

    if aux == ext:

        orig = Image.open(i)
        width, height = orig.size

        print(i, width, height)

        newsize = (width//10, height//10) # width and heights reduced 10 times
        orig = orig.resize(newsize)

        raw = ImageOps.grayscale(orig)
        ty = "raw "
        fn = ty + name + ext
        raw.save(fn)

Grad-CAM for CNN model - h5.py

Python
Plot Grad-CAM images to see models insights in the class decision
#https://github.com/ShawnHymel/ei-workshop-image-data-augmentation/blob/master/workshop_01_saliency_and_grad_cam.ipynb

import PIL
import cv2
import numpy as np
import tensorflow as tf
import os

from tensorflow import keras
from keras import activations, layers, models, backend
from skimage.transform import resize

import matplotlib.pyplot as plt

LABELS = ["background", "fork", "knife", "spoon"] # Labels

IMAGE_PATH = r"your\image\path" # Change this based on your image sample
TRUE_LABEL = "yourimageclass"

# If you wrote your own model should match your image size
# If you are importing from Edge Impulse go to Image resolution (Edge Impulse project > Impulse design > Image data)
WIDTH = 400
HEIGHT = 225

true_idx = LABELS.index(TRUE_LABEL) # Find index of true label in label list

model = tf.keras.models.load_model("model.h5") # Load model file
model.summary()

img = PIL.Image.open(IMAGE_PATH) # Load image
img = img.convert('L') # Convert the image to grayscale
img = np.asarray(img) # Convert the image to a Numpy array

img = resize(img, (WIDTH, HEIGHT, 1), anti_aliasing=True) # Resize the image and normalize the values (to be between 0.0 and 1.0)

print("Actual label:", TRUE_LABEL) # Show the ground-truth label

plt.imshow(img, cmap='gray', vmin=0.0, vmax=1.0) # Display image (make sure we're looking at the right thing)
plt.show()

# The Keras model expects images in a 4D array with dimensions (sample, height, width, channel)

img_0 = img.reshape(img.shape + (1,)) # Add extra dimension to the image (placeholder for color channels)
images = np.array([img_0]) # Keras expects more than one image (in Numpy array), so convert image(s) to such array
print(images.shape) # Print dimensions of inference input

preds = model.predict(images) # Inference

# Print out predictions
for i, pred in enumerate(preds[0]):
  print(LABELS[i] + ": " + str(pred))

model.layers[-1].activation = None # For either algorithm, we need to remove the Softmax activation function of the last layer

# Based on: https://github.com/keisen/tf-keras-vis/blob/master/tf_keras_vis/saliency.py
def get_saliency_map(img_array, model, class_idx):

  img_tensor = tf.convert_to_tensor(img_array) # Gradient calculation requires input to be a tensor

  # Do a forward pass of model with image and track the computations on the "tape"
  with tf.GradientTape(watch_accessed_variables=False, persistent=True) as tape:

    tape.watch(img_tensor) # Compute (non-softmax) outputs of model with given image
    outputs = model(img_tensor, training=False)

    score = outputs[:, true_idx] # Get score (predicted value) of actual class

  grads = tape.gradient(score, img_tensor) # Compute gradients of the loss with respect to the input image

  grads_disp = [np.max(g, axis=-1) for g in grads] # Finds max value in each color channel of the gradient (should be grayscale for this demo)

  grad_disp = grads_disp[0] # There should be only one gradient heatmap for this demo

  grad_disp = tf.abs(grad_disp) # The absolute value of the gradient shows the effect of change at each pixel. Source: https://christophm.github.io/interpretable-ml-book/pixel-attribution.html

  heatmap_min = np.min(grad_disp) # Normalize to between 0 and 1 (use epsilon, a very small float, to prevent divide-by-zero error)
  heatmap_max = np.max(grad_disp)
  heatmap = (grad_disp - heatmap_min) / (heatmap_max - heatmap_min + tf.keras.backend.epsilon())

  return heatmap.numpy()


saliency_map = get_saliency_map(images, model, true_idx) # Generate saliency map for the given input image

plt.imshow(saliency_map, cmap='magma', vmin=0.0, vmax=1.0) # Draw map
plt.show()

idx = 0 # Overlay the saliency map on top of the original input image
ax = plt.subplot()
ax.imshow(images[idx,:,:,0], cmap='gray', vmin=0.0, vmax=1.0)
ax.imshow(saliency_map, cmap='magma', alpha=0.25)
plt.show()

### This function comes from https://keras.io/examples/vision/grad_cam/
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):

  grad_model = tf.keras.models.Model([model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]) # First, we create a model that maps the input image to the activations of the last conv layer as well as the output predictions

  # Then, we compute the gradient of the top predicted class for our input image with respect to the activations of the last conv layer
  with tf.GradientTape() as tape:
      last_conv_layer_output, preds = grad_model(img_array)
      if pred_index is None:
          pred_index = tf.argmax(preds[0])
      class_channel = preds[:, pred_index]

  grads = tape.gradient(class_channel, last_conv_layer_output) # This is the gradient of the output neuron (top predicted or chosen) with regard to the output feature map of the last conv layer

  pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)) # This is a vector where each entry is the mean intensity of the gradient over a specific feature map channel

  last_conv_layer_output = last_conv_layer_output[0] # We multiply each channel in the feature map array by "how important this channel is" with regard to the top predicted class then sum all the channels to obtain the heatmap class activation
  heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
  heatmap = tf.squeeze(heatmap)

  heatmap = tf.abs(heatmap) # The absolute value of the gradient shows the effect of change at each pixel. Source: https://christophm.github.io/interpretable-ml-book/pixel-attribution.html

  heatmap_min = np.min(heatmap) # Normalize to between 0 and 1 (use epsilon, a very small float, to prevent divide-by-zero error)
  heatmap_max = np.max(heatmap)
  heatmap = (heatmap - heatmap_min) / (heatmap_max - heatmap_min + tf.keras.backend.epsilon())

  return heatmap.numpy()

# We need to tell Grad-CAM where to find the last convolution layer

#for layer in model.layers:
#  print(layer, layer.name) # Print out the layers in the model

last_conv_layer = None # Go backwards through the model to find the last convolution layer
for layer in reversed(model.layers):
    if 'conv' in layer.name:
        last_conv_layer = layer.name
        break

if last_conv_layer is not None:
  print("Last convolution layer found:", last_conv_layer) # Give a warning if the last convolution layer could not be found
else:
  print("ERROR: Last convolution layer could not be found. Do not continue.")

heatmap = make_gradcam_heatmap(images, model, last_conv_layer) # Generate class activation heatmap

plt.imshow(heatmap, cmap='magma', vmin=0.0, vmax=1.0) # Draw map
plt.show()

# Overlay the saliency map on top of the original input image

big_heatmap = cv2.resize(heatmap, dsize=(HEIGHT, WIDTH), interpolation=cv2.INTER_CUBIC) # The heatmap is a lot smaller than the original image, so we upsample it

idx = 0 # Draw original image with heatmap superimposed over it
ax = plt.subplot()
ax.imshow(images[idx,:,:,0], cmap='gray', vmin=0.0, vmax=1.0)
ax.imshow(big_heatmap, cmap='magma', alpha=0.25)
plt.show()

Credits

Kevin Richmond
7 projects • 2 followers
Engineer passionate about automation, IoT, Machine Learning and sustainability.
Contact
Thanks to Shawn Hymel.

Comments

Please log in or sign up to comment.