Hackster is hosting Hackster Holidays, Ep. 6: Livestream & Giveaway Drawing. Watch previous episodes or stream live on Monday!Stream Hackster Holidays, Ep. 6 on Monday!
Daksh Sambhare
Created August 30, 2024

Haptic-Navigator and Visual Information Scanner

A device capable of helping in navigation using binocular vision and Visual information reading through audio

AdvancedFull instructions providedOver 1 day98

Things used in this project

Story

Read more

Schematics

Circuit Diagrams

Circuit Diagrams

circuit Diagrams

Code

Depth estimation

Python
Raw format of metric Depth estimation
import cv2
import numpy as np
import os
import torch

from depth_anything_v2.dpt import DepthAnythingV2

# Import the OpenCV extra functionalities for object detection
classNames = []
classFile = "/home/dsay/Documents/hackster/depthanything/Depth-Anything-V2/metric_depth/cocoobject/Object_Detection_Files/coco.names"
with open(classFile, "rt") as f:
    classNames = f.read().rstrip("\n").split("\n")

configPath = "/home/dsay/Documents/hackster/depthanything/Depth-Anything-V2/metric_depth/cocoobject/Object_Detection_Files/ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt"
weightsPath = "/home/dsay/Documents/hackster/depthanything/Depth-Anything-V2/metric_depth/cocoobject/Object_Detection_Files/frozen_inference_graph.pb"

net = cv2.dnn_DetectionModel(weightsPath, configPath)
net.setInputSize(320, 320)
net.setInputScale(1.0 / 127.5)
net.setInputMean((127.5, 127.5, 127.5))
net.setInputSwapRB(True)

def getObjects(img, thres, nms, draw=True, objects=[]):
    classIds, confs, bbox = net.detect(img, confThreshold=thres, nmsThreshold=nms)
    objectInfo = []
    if len(classIds) != 0:
        for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
            className = classNames[classId - 1]
            if className in objects:
                objectInfo.append([box, className])
                if draw:
                    cv2.rectangle(img, box, color=(0, 255, 0), thickness=2)
                    cv2.putText(img, classNames[classId-1].upper(), (box[0]-10, box[1]-30), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
                    cv2.putText(img, str(round(confidence*100, 2)), (box[0]-200, box[1]-30), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
    return img, objectInfo

    # Perform depth estimation on the entire frame
    depth = depth_anything.infer_image(raw_image, args.input_size)
    
    # Normalize and convert depth to visual representation
    depth_visual = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
    depth_visual = depth_visual.astype(np.uint8)
    
    # Convert depth to meters
    depth_meters = depth * args.max_depth / 255.0
    
    # Apply colormap or grayscale
    if args.grayscale:
        depth_visual = cv2.cvtColor(depth_visual, cv2.COLOR_GRAY2BGR)
    else:
        cmap = matplotlib.cm.get_cmap('Spectral')
        depth_visual = (cmap(depth_visual)[:, :, :3] * 255).astype(np.uint8)
    
    # Combine original frame with depth visualization
    split_region = np.ones((raw_image.shape[0], 50, 3), dtype=np.uint8) * 255
    combined_result = np.hstack([raw_image, split_region, depth_visual])
    
    return combined_result


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Depth Anything V2 Metric Depth Estimation on Webcam')
    parser.add_argument('--video-path1', type=int, default=2, help='Webcam index for video capture (camera 1)')
    parser.add_argument('--video-path2', type=int, default=1, help='Webcam index for video capture (camera 2)')
    parser.add_argument('--input-size', type=int, default=518, help='Input size for image processing')
    parser.add_argument('--outdir', type=str, default='./vis_depth', help='Output directory')
    

    # Initialize DepthAnythingV2 model
    depth_anything = DepthAnythingV2(**{**model_configs[args.encoder], 'max_depth': args.max_depth})
    depth_anything.load_state_dict(torch.load(args.load_from, map_location='cpu'))
    depth_anything = depth_anything.to(DEVICE).eval()
    
    # Open webcam captures for both cameras
    cap1 = cv2.VideoCapture(args.video_path1)
    cap2 = cv2.VideoCapture(args.video_path2)
    frame_count = 0
    
    # Create output directory if not exists
    os.makedirs(args.outdir, exist_ok=True)
    
    while True:
        # Read frames from both cameras
        ret1, frame1 = cap1.read()
        ret2, frame2 = cap2.read()
        
        if not ret1 or not ret2:
            break
        
        frame_count += 1
        
        # Process frames from both cameras
        combined_result1 = process_frame(frame1, depth_anything, args, frame_count)
        combined_result2 = process_frame(frame2, depth_anything, args, frame_count)
        
        # Display the processed frames from both cameras
        cv2.imshow('Camera 1 Depth Estimation', combined_result1)
        cv2.imshow('Camera 2 Depth Estimation', combined_result2)
        
        # Press 'q' to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    # Release webcam captures and close all windows
    cap1.release()
    cap2.release()
    cv2.destroyAllWindows()

Credits

Daksh Sambhare

Daksh Sambhare

1 project • 3 followers

Comments