Welcome to Hackster!
Hackster is a community dedicated to learning hardware, from beginner to pro. Join us, it's free!
Noah Jillson
Published

Automatic Emotion Journal

Using an emotion recognition model, a raspberry pi camera journals my emotions throughout the day.

BeginnerFull instructions provided20 hours1,631
Automatic Emotion Journal

Things used in this project

Hardware components

Raspberry Pi 3 Model B+
Raspberry Pi 3 Model B+
×1
Camera Module
Raspberry Pi Camera Module
×1
Generic mouse and keyboard
×1

Software apps and online services

PyCharm CE
This is my preferred python IDE for my Mac
Raspbian
Raspberry Pi Raspbian
Raspberry Pi Thonny
The default Python IDE on my Raspberry Pi 3 B+
Face-Recognition
This library allowed me to locate facial landmarks
TensorFlow
TensorFlow
I used TensorFlow to create and train my emotion recognition model

Hand tools and fabrication machines

TensorFlow Lite
TensorFlow Lite
I used TensorFlow Lite to run my emotion recognition model on the Raspberry Pit B+

Story

Read more

Code

Automatic Emotion Journal

Python
This script loads and makes predictions with the pre-trained emotion recognition .tflite model. It is meant to be run on a Raspberry Pi.
import csv
import os
import sys

from time import sleep
from datetime import datetime

import face_recognition as fr
from picamera import PiCamera

import numpy as np
import tflite_runtime.interpreter as tflite


def establish_camera_connection() -> object:
    camera = PiCamera()
    camera.rotation = 180 # This rights the image since the camera hangs upsidedown
    return camera
    
    
def take_photo(camera: object) -> str:
    save_path = "/home/pi/Desktop/Automatic Emotion Journal/Face Repository/unkown.jpg"
    camera.start_preview()
    print("prepare for photo")
    sleep(5)
    camera.capture(save_path)
    camera.stop_preview()
    print("photo taken")
    return save_path


def load_image(image_path: str) -> list:
    image_as_numpy = fr.load_image_file(image_path)
    landmarks: dict = fr.face_landmarks(image_as_numpy)
    top_lip = landmarks[0]["top_lip"]
    bottom_lip = landmarks[0]["bottom_lip"]

    def merge(arr1: list, arr2: list) -> list:
        merged = []
        for i in arr1:
            merged.append(i)
        for i in arr2:
            merged.append(i)
        return merged

    mouth_landmarks = merge(top_lip, bottom_lip)
    return mouth_landmarks


def locate_maxs_and_mins(land: list) -> tuple:
    maximum_x, maximum_y = -1 * sys.maxsize, -1 * sys.maxsize
    minimum_x, minimum_y = sys.maxsize, sys.maxsize
    for point in land:
        maximum_x = max(maximum_x, point[0])
        minimum_x = min(minimum_x, point[0])
        maximum_y = max(maximum_y, point[1])
        minimum_y = min(minimum_y, point[1])
    coordinates: tuple = (minimum_x, minimum_y, maximum_x, maximum_y)
    return coordinates


def standardize(mouth_landmarks: list) -> list:
    buffer = 1
    extrema = locate_maxs_and_mins(mouth_landmarks)
    # the actual width and height are increased by twice the buffer so that the loop below does not divide by zero for
    # the minimum x and y points
    actual_width = (extrema[2] + buffer) - (extrema[0] - buffer)
    actual_height = (extrema[3] + buffer) - (extrema[1] - buffer)
    desired_width = 40
    desired_height = 20
    standardized_mouth_landmarks = []

    for point in mouth_landmarks:
        ax = point[0] - extrema[0]
        ay = point[1] - extrema[1]
        bx = actual_width - ax
        by = actual_height - ay

        ratio_x = ax / bx
        ratio_y = ay / by

        # Mistake here allows for a_prime_x to equal 40. To fix this I think rounding should be done on the a_prime_x
        # definition (i.e a_prime_x = round(desired_width - b_prime_x))
        b_prime_x = round(desired_width / (ratio_x + 1))
        a_prime_x = desired_width - b_prime_x

        b_prime_y = round(desired_height / (ratio_y + 1))
        a_prime_y = desired_height - b_prime_y

        # Double check here that the same calculation for a prime x works for y because I might be finding something
        # else here. Since a is the distance from the minimum, then maybe this might be a problem, but (x,y) coordinates
        # are usually done from 0, the minimum
        standardized_mouth_landmarks.append((a_prime_x, a_prime_y))

    # the idea is to then take these points and convert them to an 'image' which would really just be a
    # two dimensional numpy array with the first axis being the desired_height and the second axis being the
    # desired_width
    return standardized_mouth_landmarks


def format_data(mouth_landmarks: list) -> object:
    formatted_data = np.zeros((1, 20, 41), dtype=np.float32)
    
    for point in mouth_landmarks:
        x = point[0]
        y = point[1]
        formatted_data[0, (19 - y), x] = 1.0
    return formatted_data


def rename_photo(prediction: str, time_stamp: str):
    os.rename(r'/home/pi/Desktop/Automatic Emotion Journal/Face Repository/unkown.jpg',
              r'/home/pi/Desktop/Automatic Emotion Journal/Face Repository/' + prediction + "_" + time_stamp + '.jpg')

def append_to_csv(prediction: str, time_stamp: str):
    with open("emotion_juornal.txt", mode='a') as training_data:
        csv_w = csv.writer(training_data, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_w.writerow([prediction] + [time_stamp])


def generate_time_stamp() -> str:
    current_time = datetime.now()
    time_stamp = current_time.strftime("%H-%M-%S")
    return str(time_stamp)


if __name__ == "__main__":
    model_path = "/home/pi/Desktop/Automatic Emotion Journal/erm_v4.tflite"
    input_data_path = "/home/pi/Desktop/Automatic Emotion Journal/mouth_landmarks_unknown.txt"
    labels = ["happy", "sad"]

    # Loading tflite model
    interpreter = tflite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    
    # Allows access to input and output layers
    input_tensors = interpreter.get_input_details()
    output_tensors = interpreter.get_output_details()
    
    # creates a connection to the camera port
    camera = establish_camera_connection()
    
    for i in range(5):
        # Preparing and formatting the data that will be fed to the input layer
        photo = take_photo(camera)
        try:
            landmarks = load_image(photo)
            landmarks = standardize(landmarks)
            formatted_data = format_data(landmarks)
        except IndexError:
            continue
        
        # Making a prediction and time stamp
        interpreter.set_tensor(input_tensors[0]['index'], formatted_data)
        interpreter.invoke()
        time_stamp = generate_time_stamp()
        
        # Housekeeping
        output_data = interpreter.get_tensor(output_tensors[0]['index'])
        prediction = labels[np.argmax(output_data[0])]
        rename_photo(prediction, time_stamp)
        append_to_csv(prediction, time_stamp)
        sleep(10)
    

Mouth Focused Data Organizer

Python
This script appends top and bottom lip facial landmarks to a .csv file that is used to train the emotion recogntion TensorFlow model. This script is not actually part of the final product, but I am including this here because training the model and aggregating data is half of the battle. The function "def convert_to_images():" is not my function; I got the function from the tutorial linked here and in the sources (https://medium.com/@iKhushPatel/convert-video-to-images-images-to-video-using-opencv-python-db27a128a481). This script is meant to be run on a normal computer.
import cv2
import csv
import sys
import face_recognition as fr
from PIL import Image


def main():
    convert_to_images()
    count = 1
    path = "image" + str(count) + ".jpg"
    # yes I could do some math here to know how many frames will be converted to images which would really speed up this
    # process; Definitely something to tackle in the future if performance ever becomes an issue.
    while image_exists(path):
        # for count in range(10):
        landmarks = load_image(path)
        landmarks = standardize(landmarks)
        append_to_csv(landmarks)
        count += 1
        path = "image" + str(count) + ".jpg"


def image_exists(path: str) -> bool:
    try:
        Image.open(path)
    except:
        return False
    return True


def append_to_csv(land: list):
    try:
        with open("training_set_expanded.txt", mode='a') as training_data:
            csv_w = csv.writer(training_data, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_w.writerow(land)
    except IndexError:
        print("no face found")


def standardize(mouth_landmarks: list) -> list:
    buffer = 1
    extrema = locate_maxs_and_mins_from_list(mouth_landmarks)
    # the actual width and height are increased by twice the buffer so that the loop below does not divide by zero for
    # the minimum x and y points
    actual_width = (extrema[2] + buffer) - (extrema[0] - buffer)
    actual_height = (extrema[3] + buffer) - (extrema[1] - buffer)
    desired_width = 40
    desired_height = 20
    standardized_mouth_landmarks = []

    for point in mouth_landmarks:
        ax = point[0] - extrema[0]
        ay = point[1] - extrema[1]
        bx = actual_width - ax
        by = actual_height - ay

        ratio_x = ax / bx
        ratio_y = ay / by

        # Mistake here allows for a_prime_x to equal 40. To fix this I think rounding should be done on the a_prime_x
        # definition (i.e a_prime_x = round(desired_width - b_prime_x))
        b_prime_x = round(desired_width / (ratio_x + 1))
        a_prime_x = desired_width - b_prime_x

        b_prime_y = round(desired_height / (ratio_y + 1))
        a_prime_y = desired_height - b_prime_y

        # Double check here that the same calculation for a prime x works for y because I might be finding something
        # else here. Since a is the distance from the minimum, then maybe this might be a problem, but (x,y) coordinates
        # are usually done from 0, the minimum
        standardized_mouth_landmarks.append((a_prime_x, a_prime_y))

    # the idea is to then take these points and convert them to an 'image' which would really just be a
    # two dimensional numpy array with the first axis being the desired_height and the second axis being the
    # desired_width
    return standardized_mouth_landmarks


def load_image(path):
    path: str = path
    image_as_numpy = fr.load_image_file(path)
    landmarks: dict = fr.face_landmarks(image_as_numpy)
    # im = crop_to_face(landmarks, path)
    # im.save(path)
    # image_as_numpy = fr.load_image_file(path)
    # landmarks = fr.face_landmarks(image_as_numpy)
    # print("cropped and loaded")
    # print(landmarks)
    top_lip = landmarks[0]["top_lip"]
    bottom_lip = landmarks[0]["bottom_lip"]

    def merge(arr1: list, arr2: list) -> list:
        merged = []
        for i in arr1:
            merged.append(i)
        for i in arr2:
            merged.append(i)
        return merged

    mouth_landmarks = merge(top_lip, bottom_lip)
    return mouth_landmarks


def locate_maxs_and_mins_from_list(land: list) -> tuple:
    maximum_x, maximum_y = -1 * sys.maxsize, -1 * sys.maxsize
    minimum_x, minimum_y = sys.maxsize, sys.maxsize
    for point in land:
        maximum_x = max(maximum_x, point[0])
        minimum_x = min(minimum_x, point[0])
        maximum_y = max(maximum_y, point[1])
        minimum_y = min(minimum_y, point[1])
    coordinates: tuple = (minimum_x, minimum_y, maximum_x, maximum_y)
    return coordinates


# Not my code here, this function was borrowed from an video to image tutorial
def convert_to_images():
    vidcap = cv2.VideoCapture("/Users/noahjillson/Desktop/sad_2.mov")

    def getFrame(sec):
        vidcap.set(cv2.CAP_PROP_POS_MSEC, sec * 1000)
        has_frames, image = vidcap.read()
        if has_frames:
            cv2.imwrite("image" + str(count) + ".jpg", image)  # save frame as JPG file
        return has_frames

    sec = 0
    frame_rate = 0.2  # //it will capture image in each 0.5 second
    count = 1
    success = getFrame(sec)
    while success:
        count = count + 1
        sec = sec + frame_rate
        sec = round(sec, 2)
        success = getFrame(sec)


if __name__ == "__main__":
    main()

Emotion Model Creation and Training

Python
This script creates and trains the emotion recognition model using TensorFlow Keras. This script is not necessarily part of the final project, but it is necessary for recreating it and following along. For the model part of this project, I followed this TensorFlow tutorial linked here and in the sources making my own changes to better fit the model to my project's purposes (https://www.youtube.com/watch?v=cvNtZqphr6A). This script is meant to be run on a normal computer.
import csv

import numpy as np
from tensorflow import keras


def load_data(path: str):
    csv_data = []
    arr = np.zeros((814, 20, 41))
    with open(path, mode='r') as training_data:
        csv_r = csv.reader(training_data, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL)
        for row in csv_r:
            csv_data.append(row)

    face_index = 0
    for face in csv_data:
        for coord in face:
            usable = coord.replace('(', '').replace(')', '').replace(' ', '')
            x = int(usable[:usable.index(',')])
            y = int(usable[(usable.index(',') + 1):])
            try:
                arr[face_index, (19 - y), x] = 1
            except IndexError:
                print(str(face_index) + "  " + str(19-y) + "  " + str(x))

        face_index += 1
    # yes I could streamline into one loop inside the csv reading, but I don't want to accidentally screw up with that
    # before I test the model
    return arr


if __name__ == "__main__":
    train_i = load_data("training_set_expanded.txt")
    train_l = np.zeros(814)
    train_l[208:411] = 1
    train_l[598:] = 1

    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(20, 41)),
        keras.layers.Dense(160, activation="relu"),
        keras.layers.Dense(2, activation="softmax")
    ])
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    model.fit(train_i, train_l, epochs=6)
    model.save('emotion_recognition_model_v4')

TensorFlow to TensorFlow Lite Model Converter

Python
This script is meant to convert a TensorFlow 2 model into a .tflite file format. This short script follows the example documentation on TensorFlow's documentation site linked here and in the sources (https://www.tensorflow.org/lite/convert)
import tensorflow as tf

if __name__ == "__main__":
    converter = tf.lite.TFLiteConverter.from_saved_model("/Users/noahjillson/Desktop/facialrecognition/emotion_recognition_model_v4")
    tflite_model = converter.convert()

    with open("erm_v4.tflite", "wb") as file:
        file.write(tflite_model)

Credits

Noah Jillson
3 projects • 5 followers
Contact

Comments

Please log in or sign up to comment.