donutsorelse
Published © GPL3+

Gaze Driven Mouse Movement

Move your mouse with your eyes to make gaming easier for those with disabilities

AdvancedWork in progress20 hours78

Things used in this project

Story

Read more

Code

GazeMouseMover

Python
import tkinter as tk
from threading import Thread
import cv2
import pyautogui
import time
from screeninfo import get_monitors
import numpy as np



# Global flags for various functionalities
display_webcam = True
click_on_blink = False
mouse_control_enabled = True
program_running = True
calibration_data=[]

# Global variable for ongoing gaze position
ongoing_gaze_position = {'x': 0, 'y': 0}
optimal_threshold = 55
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')

detector_params = cv2.SimpleBlobDetector_Params()
detector_params.filterByArea = True
detector_params.maxArea = 1500
detector = cv2.SimpleBlobDetector_create(detector_params)
logging_enabled = False
gaze_log = []
video_capture = cv2.VideoCapture(0)

# def toggle_webcam_display(*args):
#     global display_webcam
#     display_webcam = not display_webcam
def toggle_webcam_display():
    global display_webcam
    display_webcam = not display_webcam
    print(f"Webcam display toggled. Now display_webcam = {display_webcam}")


def toggle_click_on_blink(*args):
    global click_on_blink
    click_on_blink = not click_on_blink


def toggle_mouse_control(*args):
    global mouse_control_enabled
    mouse_control_enabled = not mouse_control_enabled


def run_calibration(video_capture, detector):
    global optimal_threshold, calibration_data
    optimal_threshold = find_best_threshold(video_capture, detector)
    print(f"Optimal threshold: {optimal_threshold}")
    calibration_data = run_calibration_sequence(video_capture, optimal_threshold)
    print("Calibration Results:", calibration_data)


def close_application(*args):
    global program_running
    program_running = False


    monitor = get_monitors()[0]
    screen_width, screen_height = monitor.width, monitor.height


    # Make the window full screen for calibration
    cv2.namedWindow('Calibration', cv2.WND_PROP_FULLSCREEN)
    cv2.setWindowProperty('Calibration', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)


def find_best_threshold(cap, detector):
    global face_cascade, eye_cascade


    optimal_threshold = 50
    best_keypoints_count = 0
    threshold = 50
    thresholdInc = 0
    start_time = time.time()
    threshold_seconds = 5
    tied_thresholds = []


    while True:
        ret, frame = cap.read()
        if not ret:
            print("No ret break")
            break


        # Display the frame during threshold calibration
        cv2.imshow('Threshold Calibration', frame)


        if (time.time() - start_time) < threshold_seconds:
            # Threshold calibration logic
            face_frame = detect_faces(frame, face_cascade)
            if face_frame is not None:
                current_max_count = 0
                for eye in detect_eyes(face_frame, eye_cascade):
                    if eye is not None:
                        eye = cut_eyebrows(eye)
                        keypoints = blob_process(eye, threshold, detector)
                        keypoints_count = len(keypoints)
                        if keypoints_count > current_max_count:
                            current_max_count = keypoints_count
                            if keypoints_count > best_keypoints_count:
                                best_keypoints_count = keypoints_count
                                tied_thresholds = [threshold]
                            elif keypoints_count == best_keypoints_count:
                                tied_thresholds.append(threshold)
                print(f"Current threshold: {threshold}, Keypoints count: {current_max_count}")
            thresholdInc += 1
            if thresholdInc > 3:
                thresholdInc = 0
                threshold += 1
                if threshold > 65:
                    threshold = 50
        elif best_keypoints_count==0:
            threshold_seconds+=5
        else:
            break


        # Check for user input to break the loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break


    if tied_thresholds:
        optimal_threshold = int(sum(tied_thresholds) / len(tied_thresholds))
    cv2.destroyWindow('Threshold Calibration')

    print(f"Optimal threshold: {optimal_threshold}")
    
    return optimal_threshold


def move_mouse_to_gaze_position(gaze_position):
    # Get screen size
    # monitor = get_monitors()[0]
    # screen_width, screen_height = monitor.width, monitor.height

    # # Scale gaze position to screen size
    # x_on_screen = int(gaze_position['x'] * screen_width / 100)
    # y_on_screen = int(gaze_position['y'] * screen_height / 100)

    # # Move the mouse to the gaze position on the screen
    # pyautogui.moveTo(x_on_screen, y_on_screen)
    
    screen_coordinates = translate_gaze_to_screen(gaze_position, calibration_data)
    pyautogui.moveTo(screen_coordinates['x'], screen_coordinates['y'])
   
def find_gaze_position(eye, threshold):
    global detector
    keypoints = blob_process(eye, threshold, detector)
    if keypoints:
        # Assuming the first keypoint is the pupil
        pupil = keypoints[0]
        eye_width = eye.shape[1]
        eye_height = eye.shape[0]


        # Normalize the pupil position within the eye frame
        normalized_x = max(0, min(100, (pupil.pt[0] / eye_width) * 100))
        normalized_y = max(0, min(100, (pupil.pt[1] / eye_height) * 100))


        gaze_position = {'x': normalized_x, 'y': normalized_y}
        print(f"Gaze Position: {gaze_position}")
        # Log the gaze position if logging is enabled
        if logging_enabled:
            gaze_log.append(gaze_position)

        return {'x': normalized_x, 'y': normalized_y}
    return None

def translate_gaze_to_screen(gaze_position, calibration_data):
    screen_width, screen_height = get_screen_size()  # Define this function to get screen dimensions

    # Variables to store the sum of distances and weighted positions
    sum_distances = 0
    weighted_x = 0
    weighted_y = 0

    # Iterate over calibration points
    for calib_point, calib_gaze in calibration_data:
        # Calculate the distance between the current gaze position and this calibration gaze position
        distance = ((gaze_position['x'] - calib_gaze['x'])**2 + (gaze_position['y'] - calib_gaze['y'])**2)**0.5

        if distance == 0:  # If exactly at a calibration point, return the corresponding screen point
            return {'x': calib_point[0] * screen_width, 'y': calib_point[1] * screen_height}

        weight = 1 / distance
        sum_distances += weight
        weighted_x += calib_point[0] * weight
        weighted_y += calib_point[1] * weight

    # Calculate the average position weighted by the inverse of the distance to each calibration point
    avg_x = (weighted_x / sum_distances) * screen_width
    avg_y = (weighted_y / sum_distances) * screen_height

    return {'x': avg_x, 'y': avg_y}

def get_screen_size():
    # Implement this function to return the actual screen width and height
    # For example, using screeninfo:
    monitor = get_monitors()[0]
    return monitor.width, monitor.height


def run_calibration_sequence(cap, optimal_threshold):
    global ongoing_gaze_position
    calibration_points = [(0.1, 0.1), (0.5, 0.1), (0.9, 0.1),
                          (0.1, 0.5), (0.5, 0.5), (0.9, 0.5),
                          (0.1, 0.9), (0.5, 0.9), (0.9, 0.9)]
    calibration_results = []
    awaitingNewResult = False #dont use most recent - use a legitimate value only by capturing the next result we get
    proceed = False
    print("calibration started")
    cv2.namedWindow('Calibration', cv2.WND_PROP_FULLSCREEN)
    cv2.setWindowProperty('Calibration', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

    for point in calibration_points:
        while True:
            ret, frame = cap.read()
            if not ret:
                continue


            screen_width = frame.shape[1]
            screen_height = frame.shape[0]
            point_position = (int(point[0] * screen_width), int(point[1] * screen_height))
            cv2.circle(frame, point_position, 10, (0, 0, 255), -1)


            flipped_frame = cv2.flip(frame, 1)  # Flip the frame

            # cv2.imshow('Flipped Video Feed', flipped_frame)
            cv2.imshow('Calibration', flipped_frame)

            key = cv2.waitKey(1) & 0xFF
            if key == ord(' '):
                awaitingNewResult = True
                print("Space pressed - awaiting result")
            if key == ord('q'):  # Quit on 'q' key press
                break

            face_frame = detect_faces(frame, face_cascade)
            if face_frame is not None:
                eyes = detect_eyes(face_frame, eye_cascade)
                # print("test 1")
                for eye in eyes:
                    if eye is not None:
                        print("test 2")
                        eye = cut_eyebrows(eye)
                        gaze_position = find_gaze_position(eye, optimal_threshold)
                        if gaze_position:
                            print(f"test 3 - Awaiting New Result: {awaitingNewResult}")
                            ongoing_gaze_position = gaze_position
                            if awaitingNewResult:
                                
                                print("test 4")
                                captured_gaze = ongoing_gaze_position
                                calibration_results.append((point, captured_gaze))
                                awaitingNewResult=False
                                proceed=True
                                break
            if proceed:
                proceed=False
                break

    cv2.destroyWindow('Calibration')
    return calibration_results
   
def capture_gaze(cap, optimal_threshold):
    global ongoing_gaze_position
    ongoing_gaze_position = {'x': 0, 'y': 0}  # Reset to default


    while True:
        ret, frame = cap.read()
        if not ret:
            break


        face_frame = detect_faces(frame, face_cascade)
        if face_frame is not None:
            eyes = detect_eyes(face_frame, eye_cascade)
            for eye in eyes:
                if eye is not None:
                    eye = cut_eyebrows(eye)
                    new_gaze_position = find_gaze_position(eye, optimal_threshold)
                    if new_gaze_position and new_gaze_position != ongoing_gaze_position:
                        ongoing_gaze_position = new_gaze_position
                        return new_gaze_position
                   
def detect_faces(img, cascade):
    gray_frame = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    coords = cascade.detectMultiScale(gray_frame, 1.3, 5)
    if len(coords) > 1:
        biggest = max(coords, key=lambda i: i[3])
        biggest = np.array([biggest], np.int32)
    elif len(coords) == 1:
        biggest = coords
    else:
        return None
    for (x, y, w, h) in biggest:
        frame = img[y:y + h, x:x + w]
    return frame




def detect_eyes(img, cascade):
    gray_frame = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    eyes = cascade.detectMultiScale(gray_frame, 1.3, 5)
    width, height = np.size(img, 1), np.size(img, 0)
    left_eye, right_eye = None, None
    for (x, y, w, h) in eyes:
        if y > height / 2:
            continue
        eyecenter = x + w / 2
        if eyecenter < width * 0.5:
            left_eye = img[y:y + h, x:x + w]
        else:
            right_eye = img[y:y + h, x:x + w]
    return left_eye, right_eye


def cut_eyebrows(img):
    height, width = img.shape[:2]
    eyebrow_h = int(height / 4)
    return img[eyebrow_h:height, 0:width]




def blob_process(img, threshold, detector):
    gray_frame = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, img = cv2.threshold(gray_frame, threshold, 255, cv2.THRESH_BINARY)
    img = cv2.erode(img, None, iterations=2)
    img = cv2.dilate(img, None, iterations=4)
    img = cv2.medianBlur(img, 5)
    return detector.detect(img)


def eye_aspect_ratio(eye):
    # Assuming eye is a list of points (x, y) marking the eye corners and midpoints of the top and bottom eyelids
    # This function calculates and returns the eye aspect ratio


    # Compute the distances between the vertical eye landmarks
    vertical_1 = np.linalg.norm(eye[1] - eye[5])
    vertical_2 = np.linalg.norm(eye[2] - eye[4])


    # Compute the distance between the horizontal eye landmarks
    horizontal = np.linalg.norm(eye[0] - eye[3])


    # Calculate the eye aspect ratio
    ear = (vertical_1 + vertical_2) / (2.0 * horizontal)


    return ear


# ear = eye aspect ratio (lol)
def calculate_ear(eye_region):
    # Calculate the eye aspect ratio based on the height and width of the eye region
    height, width = eye_region.shape[:2]
    ear = height / width
    return ear


# def run_in_thread(fn):
#     def run():
#         fn()
#         app.mainloop()
#     thread = Thread(target=run)
#     thread.start()
#     return thread

def start_main_loop():
# Initialization    
    global face_cascade, eye_cascade, detector
    global ongoing_gaze_position
    global display_webcam
    global video_capture
    video_capture = cv2.VideoCapture(0)


    blink_threshold = 0.20  # Threshold value might need adjustment
    blink_frames_threshold = 3  # Number of consecutive frames with low EAR to consider as a blink
    blink_frames = 0
    run_calibration(video_capture, detector)
    # Main loop
    while program_running:
        ret, frame = video_capture.read()
        if not ret:
            break
        # Flip the frame horizontally
        frame = cv2.flip(frame, 1)

        if display_webcam  and ongoing_gaze_position is not None:
            # Draw a red circle around the ongoing gaze position
            gaze_x = int(ongoing_gaze_position['x'] * frame.shape[1] / 100)
            gaze_y = int(ongoing_gaze_position['y'] * frame.shape[0] / 100)
            cv2.circle(frame, (gaze_x, gaze_y), 5, (0, 0, 255), -1)


        # Gaze tracking logic
        face_frame = detect_faces(frame, face_cascade)
        if face_frame is not None:
            eyes = detect_eyes(face_frame, eye_cascade)
            for eye in eyes:
                if eye is not None:
                    eye = cut_eyebrows(eye)
                    gaze_position = find_gaze_position(eye, optimal_threshold)
                    if gaze_position:
                        ongoing_gaze_position = gaze_position
                        if mouse_control_enabled:
                            move_mouse_to_gaze_position(gaze_position)


        # Blink detection for mouse click
        if click_on_blink:
            left_eye, right_eye = detect_eyes(face_frame, eye_cascade)
            if left_eye is not None and right_eye is not None:
                left_ear = calculate_ear(left_eye)
                right_ear = calculate_ear(right_eye)
                ear = (left_ear + right_ear) / 2.0


                if ear < blink_threshold:
                    blink_frames += 1
                    if blink_frames >= blink_frames_threshold:
                        pyautogui.click()
                        blink_frames = 0
                else:
                    blink_frames = 0

        
        # Only display the frame if display_webcam is True
        # if display_webcam:
        #     cv2.imshow('Gaze Tracking', frame)
        # else:
        #     cv2.destroyWindow('Gaze Tracking')  # Close the window if it's not supposed to display

        if display_webcam:
            # If the webcam display is toggled on, create the window and show the frame
            cv2.imshow('Gaze Tracking', frame)
            print("Gaze Tracking")
        elif cv2.getWindowProperty('Gaze Tracking', 0) >= 0:
            # If the webcam display is toggled off and the window is open, close it
            cv2.destroyWindow('Gaze Tracking')


        if cv2.waitKey(1) & 0xFF == ord('q'):  # Quit on 'q' key press
            break


    video_capture.release()
    cv2.destroyAllWindows()

#Used for debugging gaze to mouse movement logic
def start_logging():
    global logging_enabled
    logging_enabled = True
    print("Logging started.")

def stop_logging():
    global logging_enabled, gaze_log
    logging_enabled = False
    print("Logging stopped. Data collected:")
    print(gaze_log)
    gaze_log = []  # Clear the log after printing


def run_in_thread(fn):
    def run():
        fn()
    thread = Thread(target=run)
    thread.start()
    
if __name__ == "__main__":
    app = tk.Tk()
    app.title("Gaze Tracking Control")
    
    # toggle_webcam_button = tk.Button(app, text="Toggle Webcam Display", command=toggle_webcam_display)
    # toggle_webcam_button.pack()
    toggle_webcam_button = tk.Button(app, text="Toggle Webcam Display", command=toggle_webcam_display)
    toggle_webcam_button.pack()

    toggle_click_on_blink_button = tk.Button(app, text="Toggle Click on Blink", command=toggle_click_on_blink)
    toggle_click_on_blink_button.pack()

    toggle_mouse_control_button = tk.Button(app, text="Toggle Mouse Control", command=toggle_mouse_control)
    toggle_mouse_control_button.pack()

    rerun_calibration_button = tk.Button(app, text="Rerun Calibration", command=lambda: run_calibration(video_capture, detector))
    rerun_calibration_button.pack()

    close_application_button = tk.Button(app, text="Close Application", command=close_application)
    close_application_button.pack()
    start_log_button = tk.Button(app, text="Start Logging", command=start_logging)
    start_log_button.pack()

    stop_log_button = tk.Button(app, text="Stop Logging", command=stop_logging)
    stop_log_button.pack()
    run_in_thread(start_main_loop)

    app.mainloop()
    # Start the OpenCV main loop in a separate thread
    # run_in_thread(start_main_loop)   

Credits

donutsorelse

donutsorelse

16 projects • 15 followers
I make different stuff every week of all kinds. Usually I make funny yet useful inventions.

Comments