StoryIn this lesson, we’ll enhance your BallyBot’s camera capabilities by adding real-time face detection to the video stream. Using machine learning models, your robot will detect human faces and highlight them in the video feed. This is a foundational step towards future interactive robot projects.
Prerequisites
- Complete Lesson 7 (Video Streaming)
- Basic understanding of C++
Rather than creating this project from scratch we will use Lesson 7: Video Stream With the BallyBots Camera's code as a base.
#include "esp_camera.h"
#include <WiFi.h>
#include <WebServer.h>
#include <ArduinoWebsockets.h>
#define CAMERA_MODEL_AI_THINKER
#define PWDN_GPIO_NUM 32
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 0
#define SIOD_GPIO_NUM 26
#define SIOC_GPIO_NUM 27
#define Y9_GPIO_NUM 35
#define Y8_GPIO_NUM 34
#define Y7_GPIO_NUM 39
#define Y6_GPIO_NUM 36
#define Y5_GPIO_NUM 21
#define Y4_GPIO_NUM 19
#define Y3_GPIO_NUM 18
#define Y2_GPIO_NUM 5
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 23
#define PCLK_GPIO_NUM 22
/* Settings */
const char* ssid = "BallyBot_AP";
const char* password = "12345678";
/* WebSocket variables */
using namespace websockets;
WebsocketsServer wserver;
WebsocketsClient wclient;
bool ws_client_connected = false;
const uint16_t ws_server_port = 65080;
/* Normal Server */
WebServer server(80);
void setup() {
Serial.begin(115200);
Serial.setDebugOutput(true);
Serial.println("hello world");
CameraSetup();
WiFi.mode(WIFI_AP);
WiFi.softAP(ssid, password);
Serial.print("AP IP address: "); Serial.println(WiFi.softAPIP());
wserver.listen(ws_server_port);
server.on("/", handle_OnConnect);
server.begin();
while(!ws_client_connected){
server.handleClient();
}
/* WebsocketsServer.accept() will delay everything until recieving a connection */
wclient = wserver.accept();
Serial.print("wclient: ");
Serial.println(wclient.available());
Serial.println("Websocket Connected!");
Serial.print(WiFi.localIP());
}
void loop() {
camera_fb_t *fb = esp_camera_fb_get();
if(!fb){
Serial.println("Camera capture failed");
esp_camera_fb_return(fb);
return;
}
if(fb->format != PIXFORMAT_JPEG){
Serial.println("Non-JPEG data not implemented");
return;
}
wclient.sendBinary((const char*) fb->buf, fb->len);
esp_camera_fb_return(fb);
}
void handle_OnConnect() {
Serial.println("a client connected to website url");
server.send(200, "text/html", SendHTML());
ws_client_connected = true;
}
String SendHTML(){
const char index_html[] PROGMEM = R"rawliteral(
<!DOCTYPE HTML>
<html><head>
<title>ESP Input Form</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
Hello Client
<img id="stream" src="PlaceHolder.jpg">
<script>
const WS_IP = '192.168.4.1'
const WS_PORT = '65080'
const img = document.getElementById("stream")
const WS_URL = `ws:///${WS_IP}:${WS_PORT}`;
const ws = new WebSocket(WS_URL);
let urlObject;
ws.onopen = () => {
};
ws.onclose = () => {
};
ws.onmessage = message => {
const arrayBuffer = message.data;
if(urlObject){
URL.revokeObjectURL(urlObject);
}
urlObject = URL.createObjectURL(new Blob([arrayBuffer]));
img.src = urlObject;
}
</script>
</body></html>
)rawliteral";
return index_html;
}
void CameraSetup(){
camera_config_t config;
config.ledc_channel = LEDC_CHANNEL_0;
config.ledc_timer = LEDC_TIMER_0;
config.pin_d0 = Y2_GPIO_NUM;
config.pin_d1 = Y3_GPIO_NUM;
config.pin_d2 = Y4_GPIO_NUM;
config.pin_d3 = Y5_GPIO_NUM;
config.pin_d4 = Y6_GPIO_NUM;
config.pin_d5 = Y7_GPIO_NUM;
config.pin_d6 = Y8_GPIO_NUM;
config.pin_d7 = Y9_GPIO_NUM;
config.pin_xclk = XCLK_GPIO_NUM;
config.pin_pclk = PCLK_GPIO_NUM;
config.pin_vsync = VSYNC_GPIO_NUM;
config.pin_href = HREF_GPIO_NUM;
config.pin_sscb_sda = SIOD_GPIO_NUM;
config.pin_sscb_scl = SIOC_GPIO_NUM;
config.pin_pwdn = PWDN_GPIO_NUM;
config.pin_reset = RESET_GPIO_NUM;
config.xclk_freq_hz = 10000000;
config.pixel_format = PIXFORMAT_JPEG;
/* init with high specs to pre-allocate larger buffers */
if(psramFound()){
config.frame_size = FRAMESIZE_VGA;
config.jpeg_quality = 40;
config.fb_count = 2;
} else {
config.frame_size = FRAMESIZE_SVGA;
config.jpeg_quality = 12;
config.fb_count = 1;
}
/* camera init */
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
Serial.printf("Camera init failed with error 0x%x", err);
return;
}
}
Step 1: Add Face Detection LibrariesThere are two models we will be using for face detection:
- HumanFaceDetectMSR01: Detects face regions.
- HumanFaceDetectMNP01: Refines detection accuracy.
Add these lines after your existing includes:
#include "human_face_detect_msr01.hpp"
#include "human_face_detect_mnp01.hpp"
#include "fb_gfx.h" /* For drawing rectangle on the frame buffer */
Step 2: Create Face Detection ObjectsTo use the face finding libraries, we need to make an object from each library's class. Let's name them s1
and s2
s1
and s2
hold functions of the libraries like infer()
that is used for facial detection.
s1
is configured for high sensitivity (low confidence threshold) but may generate more false positives.s2
is stricter (higher confidence threshold) to refine results froms1
.
Create these instances of the facial detection class before the setup()
function:
HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F); /*Threshold, sensitivity */
HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);
#define FACE_COLOR_YELLOW 0x00FF00 /* Color for bounding boxes */
Step 3: Modify the Camera ConfigurationThe camera currently captures images in JPEG format, but we will need RBG images to for infer()
to work. We can fix this updating the camera settings to capture images in RGB565 format instead of JPEG.
Specifically, update CameraSetup()
to output frames in RGB565 pixel format:
/* In CameraSetup(), change this line: */
config.pixel_format = PIXFORMAT_RGB565; /* Was PIXFORMAT_JPEG */
config.frame_size = FRAMESIZE_QVGA; /* QVGA will make it run faster */
For more information on how JPEG is unique here is an interesting video:https://www.youtube.com/watch?v=qMcw-uNT21I
Step 4: Integrate Detection into the Video LoopNow we can add s1
& s2
's infer()
commands that perform the facial detection. In the loop, we will call on infer()
on camera's frame-buffer variable fb
camera_fb_t *fb = esp_camera_fb_get();
if(!fb){
Serial.println("Camera capture failed");
esp_camera_fb_return(fb);
return;
}
/* Detect faces */
std::list<dl::detect::result_t> results = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, results);
Once this is done we can just do a Serial.println()
for now telling us if it has detected faces:
if(!results.empty()) {
Serial.println("I see faces");
}
Then we need to convert our RGB565 frame-buffer into the JPEG format to stream it to the clients browser. JPEG is more efficient to use because has much smaller files sizes and so is easier to send data with.
C++
void loop() {
camera_fb_t *fb = esp_camera_fb_get();
if (!fb) {
Serial.println("Camera capture failed");
return;
}
// Detect faces
std::list<dl::detect::result_t> results = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, results);
// Convert RGB565 to JPEG for streaming
size_t jpg_len = 0;
uint8_t *jpg_buf = NULL;
fmt2jpg(fb->buf, fb->len, fb->width, fb->height, PIXFORMAT_RGB565, 80, &jpg_buf, &jpg_len);
if (jpg_buf && jpg_len > 0) {
wclient.sendBinary((const char*)jpg_buf, jpg_len);
free(jpg_buf);
}
esp_camera_fb_return(fb);
}
Step 5: Highlight face in the frame-bufferThe previous code will detect faces but there is not noticeable difference in the video being sent! We need to add a square around where it detected the face in the video we send.
To draw rectangles around the detected faces in the frame-buffer, we'll use the fb_gfx
library's fb_gfx_drawFastHLine()
command.
/* Draw face boxes */
if(!results.empty()) {
fb_data_t rfb;
rfb.width = fb->width;
rfb.height = fb->height;
rfb.data = fb->buf;
rfb.bytes_per_pixel = 2;
rfb.format = FB_RGB565;
for(auto &face : results) {
int x = face.box[0];
int y = face.box[1];
int w = face.box[2] - x;
int h = face.box[3] - y;
/* Draw rectangle */
fb_gfx_drawFastHLine(&rfb, x, y, w, FACE_COLOR_YELLOW);
fb_gfx_drawFastHLine(&rfb, x, y+h, w, FACE_COLOR_YELLOW);
fb_gfx_drawFastVLine(&rfb, x, y, h, FACE_COLOR_YELLOW);
fb_gfx_drawFastVLine(&rfb, x+w, y, h, FACE_COLOR_YELLOW);
}
}
Step 6: Test the Face Detection- Upload the code.
- Connect to the BallyBot’s WiFi.
- Open
192.168.4.1
in a browser. - Faces in the camera’s view should now have yellow bounding boxes!
Here is the full code of this lesson for reference in future projects:
#include <ArduinoWebsockets.h>
#include <WebServer.h>
#include <WiFi.h>
#include "esp_camera.h"
#include "fb_gfx.h"
#include "human_face_detect_mnp01.hpp" /* for face detection */
#include "human_face_detect_msr01.hpp" /* Add these two lines */
#define CAMERA_MODEL_AI_THINKER
#define PWDN_GPIO_NUM 32
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 0
#define SIOD_GPIO_NUM 26
#define SIOC_GPIO_NUM 27
#define Y9_GPIO_NUM 35
#define Y8_GPIO_NUM 34
#define Y7_GPIO_NUM 39
#define Y6_GPIO_NUM 36
#define Y5_GPIO_NUM 21
#define Y4_GPIO_NUM 19
#define Y3_GPIO_NUM 18
#define Y2_GPIO_NUM 5
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 23
#define PCLK_GPIO_NUM 22
/* Add face detection objects */
HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);
#define FACE_COLOR_YELLOW 0x00FF00
/* Settings */
const char *ssid = "BallyBot_AP";
const char *password = "12345678";
/* WebSocket variables */
using namespace websockets;
WebsocketsServer wserver;
WebsocketsClient wclient;
bool ws_client_connected = false;
const uint16_t ws_server_port = 65080;
/* Normal Server */
WebServer server(80);
void setup() {
pinMode(14, OUTPUT); /* must set pin as it is on by default */
Serial.begin(115200);
Serial.setDebugOutput(true);
Serial.println("hello world");
CameraSetup();
WiFi.mode(WIFI_AP);
WiFi.softAP(ssid, password);
Serial.print("AP IP address: ");
Serial.println(WiFi.softAPIP());
wserver.listen(ws_server_port);
server.on("/", handle_OnConnect);
server.begin();
while (!ws_client_connected) {
server.handleClient();
}
/* WebsocketsServer.accept() will delay everything until recieving a
* connection */
wclient = wserver.accept();
Serial.print("wclient: ");
Serial.println(wclient.available());
Serial.println("Websocket Connected!");
Serial.print(WiFi.localIP());
}
void loop() {
camera_fb_t *fb = esp_camera_fb_get();
if (!fb) {
Serial.println("Camera capture failed");
esp_camera_fb_return(fb);
return;
}
/* Add face detection */
std::list<dl::detect::result_t> results =
s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3},
results);
/* Draw face boxes */
if (!results.empty()) {
fb_data_t rfb;
rfb.width = fb->width;
rfb.height = fb->height;
rfb.data = fb->buf;
rfb.bytes_per_pixel = 2;
rfb.format = FB_RGB565;
for (auto &face : results) {
int x = face.box[0];
int y = face.box[1];
int w = face.box[2] - x;
int h = face.box[3] - y;
/* Draw rectangle */
fb_gfx_drawFastHLine(&rfb, x, y, w, FACE_COLOR_YELLOW);
fb_gfx_drawFastHLine(&rfb, x, y + h, w, FACE_COLOR_YELLOW);
fb_gfx_drawFastVLine(&rfb, x, y, h, FACE_COLOR_YELLOW);
fb_gfx_drawFastVLine(&rfb, x + w, y, h, FACE_COLOR_YELLOW);
}
}
/* Convert to JPEG before sending */
size_t jpg_len = 0;
uint8_t *jpg_buf = NULL;
fmt2jpg(fb->buf, fb->len, fb->width, fb->height, PIXFORMAT_RGB565, 80,
&jpg_buf, &jpg_len);
if (jpg_buf && jpg_len > 0) {
wclient.sendBinary((const char *)jpg_buf, jpg_len);
free(jpg_buf);
}
esp_camera_fb_return(fb);
}
void handle_OnConnect() {
Serial.println("a client connected to website url");
server.send(200, "text/html", SendHTML());
ws_client_connected = true;
}
String SendHTML() {
const char index_html[] PROGMEM = R"rawliteral(
<!DOCTYPE HTML>
<html><head>
<title>ESP Input Form</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
Hello Client
<img id="stream" src="PlaceHolder.jpg">
<script>
const WS_IP = '192.168.4.1'
const WS_PORT = '65080'
const img = document.getElementById("stream")
const WS_URL = `ws:///${WS_IP}:${WS_PORT}`;
const ws = new WebSocket(WS_URL);
let urlObject;
ws.onopen = () => {
};
ws.onclose = () => {
};
ws.onmessage = message => {
const arrayBuffer = message.data;
if(urlObject){
URL.revokeObjectURL(urlObject);
}
urlObject = URL.createObjectURL(new Blob([arrayBuffer]));
img.src = urlObject;
}
</script>
</body></html>
)rawliteral";
return index_html;
}
void CameraSetup() {
camera_config_t config;
config.ledc_channel = LEDC_CHANNEL_0;
config.ledc_timer = LEDC_TIMER_0;
config.pin_d0 = Y2_GPIO_NUM;
config.pin_d1 = Y3_GPIO_NUM;
config.pin_d2 = Y4_GPIO_NUM;
config.pin_d3 = Y5_GPIO_NUM;
config.pin_d4 = Y6_GPIO_NUM;
config.pin_d5 = Y7_GPIO_NUM;
config.pin_d6 = Y8_GPIO_NUM;
config.pin_d7 = Y9_GPIO_NUM;
config.pin_xclk = XCLK_GPIO_NUM;
config.pin_pclk = PCLK_GPIO_NUM;
config.pin_vsync = VSYNC_GPIO_NUM;
config.pin_href = HREF_GPIO_NUM;
config.pin_sscb_sda = SIOD_GPIO_NUM;
config.pin_sscb_scl = SIOC_GPIO_NUM;
config.pin_pwdn = PWDN_GPIO_NUM;
config.pin_reset = RESET_GPIO_NUM;
config.xclk_freq_hz = 10000000;
config.pixel_format = PIXFORMAT_RGB565;
/* init with high specs to pre-allocate larger buffers */
if (psramFound()) {
config.frame_size = FRAMESIZE_QVGA;
config.jpeg_quality = 40;
config.fb_count = 2;
} else {
config.frame_size = FRAMESIZE_QVGA;
config.jpeg_quality = 12;
config.fb_count = 1;
}
/* camera init */
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
Serial.printf("Camera init failed with error 0x%x", err);
return;
}
}
Troubleshooting
- No boxes visible: Ensure the camera’s pixel format is set to
RGB565
. - Laggy stream: Reduce the frame size to
FRAMESIZE_QVGA
inCameraSetup()
. - False positives: Adjust the detection parameters (Step 5).
Your BallyBot now sees faces! This opens doors to:
- Security: Trigger alarms or log detections.
- Interaction: Make the robot wave or speak when a face appears.
- Navigation: Follow faces autonomously.
What’s Next? In Lesson 9, we’ll use the facial detection to react to the environment, turning on LEDs when there is a face visible!
Resources
Previous Lesson: Lesson 7: Video Stream With the BallyBots Camera
Next Lesson: Lesson 9: Making BallyBot react to face detection
Comments
Please log in or sign up to comment.