Gabriel Choong
Published © GPL3+

Facial Expression and Object Tracking for Education AI

Using ML and OpenCV to improve the efficiency of learning. Facial Expression analysis and background distractions limitation.

IntermediateWork in progress27
Facial Expression and Object Tracking for Education AI

Things used in this project

Software apps and online services

Apple macOS 13.5
Any computer that runs python is perfectly fine. The python version I use is 3.9, with TensorFlow version 2.11.1
Microsoft Visual Studio Code
For better viewing and interactive experience for the code (.ipynb). Alternatively, running the jupyter-notebook command also works.

Story

Read more

Code

education-ai.ipynb

Python
This is the notebook that I'm currently working to train the facial expression classification model. The model is still in the early stages and I'm still developing it.

To use it, download the file as .ipynb, then use the interactive user interface of the notebook to run each individual cell. Alternatively, you can convert it to a python file using nbconvert, or follow this link: https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&ved=2ahUKEwjXtLz4vveAAxX8S2wGHcB4BSEQFnoECAgQAw&url=https%3A%2F%2Fcode.visualstudio.com%2Fdocs%2Fpython%2Fjupyter-support-py%23%3A~%3Atext%3DWhen%2520you%2520open%2520a%2520notebook%2CPython%2520file%2520(.py).&usg=AOvVaw1sqMAdZem_dg8GMOfvJMqC&opi=89978449.
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\nUse print if the data is found to be missing\\n'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# preparing the data\n",
    "data = pd.read_csv('fer2013.csv')\n",
    "train = data.loc[data['Usage'] == 'Training']\n",
    "test = data.loc[data['Usage'] == 'PrivateTest']\n",
    "\n",
    "'''\n",
    "Use print if the data is found to be missing\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns\n",
      "Wall time: 3.1 s\n",
      "Preprocessing Done\n",
      "Number of Training Samples:  28709\n",
      "Number of Test Samples:  3589\n",
      "Training Label Counts:  [3995  436 4097 7215 4830 3171 4965]\n",
      "Training Data Shape:  (28709, 48, 48, 1)\n",
      "Training Label Shape:  (28709,)\n",
      "Training Label Counts:  [491  55 528 879 594 416 626]\n"
     ]
    }
   ],
   "source": [
    "# prepare training data\n",
    "%time\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "width, height = 48, 48\n",
    "\n",
    "train_datapoints = train['pixels'].tolist()\n",
    "\n",
    "# getting features for training\n",
    "X_train = []\n",
    "for xseq in train_datapoints:\n",
    "    xx = [int(xp) for xp in xseq.split(' ')]\n",
    "    xx = np.asarray(xx).reshape(width, height)\n",
    "    X_train.append(xx.astype('float32'))\n",
    "\n",
    "X_train = np.asarray(X_train)\n",
    "X_train = np.expand_dims(X_train, -1)\n",
    "\n",
    "test_datapoints = test['pixels'].tolist()\n",
    "\n",
    "# getting features for testing\n",
    "X_test = []\n",
    "for xseq in test_datapoints:\n",
    "    xx = [int(xp) for xp in xseq.split(' ')]\n",
    "    xx = np.asarray(xx).reshape(width, height)\n",
    "    X_test.append(xx.astype('float32'))\n",
    "\n",
    "X_test = np.asarray(X_test)\n",
    "X_test = np.expand_dims(X_test, -1)\n",
    "\n",
    "# getting labels for training\n",
    "y_train = train['emotion']\n",
    "y_train = np.asarray(y_train)\n",
    "\n",
    "# getting labels for testing\n",
    "y_test = test['emotion']\n",
    "y_test = np.asarray(y_test)\n",
    "\n",
    "print(\"Preprocessing Done\")\n",
    "print(\"Number of Training Samples: \", X_train.shape[0])\n",
    "print(\"Number of Test Samples: \", X_test.shape[0])\n",
    "print(\"Training Label Counts: \", np.bincount(y_train))\n",
    "print(\"Training Data Shape: \", X_train.shape)\n",
    "print(\"Training Label Shape: \", y_train.shape)\n",
    "print(\"Training Label Counts: \", np.bincount(y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# visualise training data\n",
    "\n",
    "visualise = False\n",
    "\n",
    "if visualise:\n",
    "\n",
    "    # Mapping emotion label with expression\n",
    "    labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']\n",
    "\n",
    "    # Create a figure with 4 rows and 5 columns\n",
    "    width = 10\n",
    "    height = 10\n",
    "    fig = plt.figure(figsize=(10, 15))\n",
    "    columns = 4\n",
    "    rows = 5\n",
    "\n",
    "    import random\n",
    "\n",
    "    # For each row and column, randomly select an image from the dataset and display it\n",
    "    for i in range(1, columns*rows +1):\n",
    "    #     img = np.random.randint(10, size=(h,w))\n",
    "        fig.add_subplot(rows, columns, i)\n",
    "        random_factor = random.randint(1,len(X_train))\n",
    "        plt.imshow(X_train[random_factor].reshape(48,48), cmap = 'gray')\n",
    "        plt.title(labels[y_train[random_factor]])\n",
    "    plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " conv2d (Conv2D)             (None, 46, 46, 128)       1280      \n",
      "                                                                 \n",
      " max_pooling2d (MaxPooling2D  (None, 23, 23, 128)      0         \n",
      " )                                                               \n",
      "                                                                 \n",
      " dropout (Dropout)           (None, 23, 23, 128)       0         \n",
      "                                                                 \n",
      " conv2d_1 (Conv2D)           (None, 19, 19, 256)       819456    \n",
      "                                                                 \n",
      " max_pooling2d_1 (MaxPooling  (None, 9, 9, 256)        0         \n",
      " 2D)                                                             \n",
      "                                                                 \n",
      " dropout_1 (Dropout)         (None, 9, 9, 256)         0         \n",
      "                                                                 \n",
      " conv2d_2 (Conv2D)           (None, 7, 7, 512)         1180160   \n",
      "                                                                 \n",
      " max_pooling2d_2 (MaxPooling  (None, 3, 3, 512)        0         \n",
      " 2D)                                                             \n",
      "                                                                 \n",
      " dropout_2 (Dropout)         (None, 3, 3, 512)         0         \n",
      "                                                                 \n",
      " conv2d_3 (Conv2D)           (None, 1, 1, 1024)        4719616   \n",
      "                                                                 \n",
      " dropout_3 (Dropout)         (None, 1, 1, 1024)        0         \n",
      "                                                                 \n",
      " flatten (Flatten)           (None, 1024)              0         \n",
      "                                                                 \n",
      " dense (Dense)               (None, 256)               262400    \n",
      "                                                                 \n",
      " dropout_4 (Dropout)         (None, 256)               0         \n",
      "                                                                 \n",
      " dense_1 (Dense)             (None, 512)               131584    \n",
      "                                                                 \n",
      " dropout_5 (Dropout)         (None, 512)               0         \n",
      "                                                                 \n",
      " dense_2 (Dense)             (None, 7)                 3591      \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 7,118,087\n",
      "Trainable params: 7,118,087\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "# Normalise the data\n",
    "X_train /= 255.0\n",
    "X_train -= np.mean(X_train, axis=0)\n",
    "X_train /= np.std(X_train, axis=0)\n",
    "X_test /= 255.0\n",
    "X_test -= np.mean(X_test, axis=0)\n",
    "X_test /= np.std(X_test, axis=0)\n",
    "\n",
    "import tensorflow as tf\n",
    "\n",
    "# Convolutional layer 1\n",
    "model = tf.keras.models.Sequential([\n",
    "    tf.keras.layers.Conv2D(128, (3, 3), activation='relu', input_shape=(48, 48, 1)),\n",
    "    tf.keras.layers.MaxPooling2D(2, 2),\n",
    "    tf.keras.layers.Dropout(0.25),\n",
    "])\n",
    "\n",
    "# Convolutional layer 2\n",
    "model.add(tf.keras.layers.Conv2D(256, (5, 5), activation='relu'))\n",
    "model.add(tf.keras.layers.MaxPooling2D(2, 2))\n",
    "model.add(tf.keras.layers.Dropout(0.25))\n",
    "\n",
    "# Convolutional layer 3\n",
    "model.add(tf.keras.layers.Conv2D(512, (3, 3), activation='relu'))\n",
    "model.add(tf.keras.layers.MaxPooling2D(2, 2))\n",
    "model.add(tf.keras.layers.Dropout(0.25))\n",
    "\n",
    "# Convolutional layer 4\n",
    "model.add(tf.keras.layers.Conv2D(1024, (3, 3), activation='relu'))\n",
    "# model.add(tf.keras.layers.MaxPooling2D(2, 2))\n",
    "model.add(tf.keras.layers.Dropout(0.25))\n",
    "\n",
    "# Compile the model\n",
    "model.add(tf.keras.layers.Flatten())\n",
    "model.add(tf.keras.layers.Dense(256, activation='relu'))\n",
    "model.add(tf.keras.layers.Dropout(0.25))\n",
    "model.add(tf.keras.layers.Dense(512, activation='relu'))\n",
    "model.add(tf.keras.layers.Dropout(0.25))\n",
    "model.add(tf.keras.layers.Dense(7, activation='softmax'))\n",
    "\n",
    "# Compile the model\n",
    "model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/100\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-08-24 17:23:30.528140: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "898/898 [==============================] - 401s 446ms/step - loss: 1.7023 - accuracy: 0.3076 - val_loss: 1.5260 - val_accuracy: 0.4032\n",
      "Epoch 2/100\n",
      "898/898 [==============================] - 411s 457ms/step - loss: 1.5013 - accuracy: 0.4121 - val_loss: 1.4014 - val_accuracy: 0.4609\n",
      "Epoch 3/100\n",
      "898/898 [==============================] - 413s 459ms/step - loss: 1.4200 - accuracy: 0.4492 - val_loss: 1.3395 - val_accuracy: 0.4806\n",
      "Epoch 4/100\n",
      "898/898 [==============================] - 414s 461ms/step - loss: 1.3729 - accuracy: 0.4686 - val_loss: 1.3020 - val_accuracy: 0.5032\n",
      "Epoch 5/100\n",
      "898/898 [==============================] - 414s 461ms/step - loss: 1.3317 - accuracy: 0.4889 - val_loss: 1.2610 - val_accuracy: 0.5230\n",
      "Epoch 6/100\n",
      "588/898 [==================>...........] - ETA: 2:20 - loss: 1.3050 - accuracy: 0.5041"
     ]
    }
   ],
   "source": [
    "# Train the model with the training=True flag.\n",
    "model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot the accuracy and loss\n",
    "plt.plot(history.history['accuracy'])\n",
    "plt.plot(history.history['val_accuracy'])\n",
    "plt.title('Model Accuracy')\n",
    "plt.ylabel('Accuracy')\n",
    "plt.xlabel('Epoch')\n",
    "plt.legend(['Training', 'Testing'], loc='upper left')\n",
    "plt.savefig(\"initial_accuracy.png\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save the model\n",
    "model.save(\"initial_model.h5\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "machine-learning",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

Credits

Gabriel Choong

Gabriel Choong

1 project • 0 followers
Thanks to packt.

Comments