As we are starting to utilize more and more video conferencing software, it is crucial for us to to be able to run AI and AR on top of the video itself. As MixPose is a yoga live streaming app with AI Pose detection, this guide will focus on how to integrate AI to overlay on top of WebRTC. You can try out a live demo of the yoga class at https://mixpose.com
- nodejs
- firebase functions
- Agora Web SDK 3.1.0 or above
- firebase hosting
Agora offers 10, 000 free minutes per month on the their sdk usage, which can be used perfectly for testing out the application. To sign up for an account go to https://sso.agora.io/v2/signup and you can get started there.
After account is created, simply create a API key.
Step 2: Run the Agora WebSDK exampleIn this example we will be focusing on javascript SDK for the web, for more detail you can follow the guide through https://docs.agora.io/en/Video/start_call_web?platform=Web
I've used firebase functions as backend serving to get the token to start the class, which you can call the functions generateRtcToken to get the token with your APIId or certificate. Any nodejs server would do
const functions = require('firebase-functions');
// // Create and Deploy Your First Cloud Functions
// // https://firebase.google.com/docs/functions/write-firebase-functions
//
// exports.helloWorld = functions.https.onRequest((request, response) => {
// functions.logger.info("Hello logs!", {structuredData: true});
// response.send("Hello from Firebase!");
// });
const RtcTokenBuilder = require('./src/RtcTokenBuilder').RtcTokenBuilder;
const RtcRole = require('./src/RtcTokenBuilder').Role;
const RtmTokenBuilder = require('./src/RtmTokenBuilder').RtmTokenBuilder;
const RtmRole = require('./src/RtmTokenBuilder').Role
const generateRtcToken = (className, accountName, roleValue) => {
// Rtc Examples
const appID = 'test';
const appCertificate = 'test';
const channelName = className;
//const uid = 100;
const account = accountName;
const role = roleValue;
const expirationTimeInSeconds = 7200;
const currentTimestamp = Math.floor(Date.now() / 1000);
const privilegeExpiredTs = currentTimestamp + expirationTimeInSeconds;
// IMPORTANT! Build token with either the uid or with the user account. Comment out the option you do not want to use below.
// Build token with uid
//const tokenA = RtcTokenBuilder.buildTokenWithUid(appID, appCertificate, channelName, uid, role, privilegeExpiredTs);
//console.log("Token With Integer Number Uid: " + tokenA);
// Build token with user account
const token = RtcTokenBuilder.buildTokenWithAccount(appID, appCertificate, channelName, account, role, privilegeExpiredTs);
return token
}
exports.generateRtcToken = functions.https.onCall((data, context) => {
var token = generateRtcToken(data.className, data.userName, RtcRole.PUBLISHER);
return token;
});
alternatively, if you are building quick POC you can also use temp token for audio/video call via Agora dashboard, but this would not be recommended for production.
There is also a step by step guide that can help to build your first video chat web app on Agora at https://www.agora.io/en/blog/building-a-group-video-chat-web-app/?_ga=2.184722720.864757430.1597829478-674087565.1593581567
The video joining process is pretty standard across the field
Step 3: Redraw the video element into canvasWe can't really run tfjs on the video element from agora SDK, but we can create 2 elements that can redraw the live videofeed into canvas
<canvas id="output" >
<canvas id="image" />
Then we can transfer the video image feed into output by following code
imagectx.clearRect(0, 0, $('#output').width(), $('#output').height());
imagectx.save();
imagectx.scale(-1, 1);
imagectx.translate(-$('#output').width(), 0);
imagectx.drawImage(video, 0, 0, $('#output').width(), $('#output').height());
imagectx.restore();
Step 4: Run tfjs on the canvas itselfNow we have an element we can run AI inference on, we can finally use TFJS. as time of the writing we are doing it via TF 2.0
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/posenet"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm@2.0.0/dist/tf-backend-wasm.js"></script>
And we can now run the inference on top of the output canvas
async function poseDetectionFrame() {
if(!JSON.parse(getMeta("useai")))
{
return;
}
let poses = [];
let minPoseConfidence;
let minPartConfidence;
imagectx.clearRect(0, 0, $('#output').width(), $('#output').height());
imagectx.save();
imagectx.scale(-1, 1);
imagectx.translate(-$('#output').width(), 0);
imagectx.drawImage(video, 0, 0, $('#output').width(), $('#output').height());
imagectx.restore();
const pose = await net.estimatePoses(image, {
flipHorizontal: false,
decodingMethod: 'single-person'
});
ctx.clearRect(0, 0, $('#output').width(), $('#output').height());
ctx.save();
ctx.scale(-1, 1);
ctx.translate(-$('#output').width(), 0);
ctx.drawImage(video, 0, 0, $('#output').width(), $('#output').height());
ctx.restore();
poses = poses.concat(pose);
minPoseConfidence = + 0.15;
minPartConfidence = + 0.1;
// For each pose (i.e. person) detected in an image, loop through the poses
// and draw the resulting skeleton and keypoints if over certain confidence
// Step 5
requestAnimationFrame(poseDetectionFrame);
}
poseDetectionFrame();
setTimeout(function () {
$owlCarouselNew.trigger('refresh.owl.carousel');
}, 1500);
}
Step 5: Getting AR stick figureAR is actually quiet easy once you have the key points, we can draw the stick figures that represent the real time inference. We now have AR on top of the AI data to give user a better representation of their poses.
poses.forEach(({score, keypoints}) => {
if (score >= minPoseConfidence) {
drawKeypoints(keypoints, minPartConfidence, ctx);
drawSkeleton(keypoints, minPartConfidence, ctx);
/*
if (guiState.output.showBoundingBox) {
drawBoundingBox(keypoints, ctx);
}*/
}
});
via following functions
function drawPoint(ctx, y, x, r, color) {
ctx.beginPath();
ctx.arc(x, y, r, 0, 2 * Math.PI);
ctx.fillStyle = color;
ctx.fill();
}
/**
* Draw the bounding box of a pose. For example, for a whole person standing
* in an image, the bounding box will begin at the nose and extend to one of
* ankles
*/
function drawBoundingBox(keypoints, ctx) {
const boundingBox = posenet.getBoundingBox(keypoints);
ctx.rect(
boundingBox.minX, boundingBox.minY, boundingBox.maxX - boundingBox.minX,
boundingBox.maxY - boundingBox.minY);
ctx.strokeStyle = boundingBoxColor;
ctx.stroke();
}
/**
* Draws a line on a canvas, i.e. a joint
*/
function drawSegment([ay, ax], [by, bx], color, scale, ctx) {
ctx.beginPath();
ctx.moveTo(ax * scale, ay * scale);
ctx.lineTo(bx * scale, by * scale);
ctx.lineWidth = lineWidth;
ctx.strokeStyle = color;
ctx.stroke();
}
/**
* Draws a pose skeleton by looking up all adjacent keypoints/joints
*/
function drawSkeleton(keypoints, minConfidence, ctx, scale = 1) {
const adjacentKeyPoints =function drawPoint(ctx, y, x, r, color) {
ctx.beginPath();
ctx.arc(x, y, r, 0, 2 * Math.PI);
ctx.fillStyle = color;
ctx.fill();
}
/**
* Draw the bounding box of a pose. For example, for a whole person standing
* in an image, the bounding box will begin at the nose and extend to one of
* ankles
*/
function drawBoundingBox(keypoints, ctx) {
const boundingBox = posenet.getBoundingBox(keypoints);
ctx.rect(
boundingBox.minX, boundingBox.minY, boundingBox.maxX - boundingBox.minX,
boundingBox.maxY - boundingBox.minY);
ctx.strokeStyle = boundingBoxColor;
ctx.stroke();
}
/**
* Draws a line on a canvas, i.e. a joint
*/
function drawSegment([ay, ax], [by, bx], color, scale, ctx) {
ctx.beginPath();
ctx.moveTo(ax * scale, ay * scale);
ctx.lineTo(bx * scale, by * scale);
ctx.lineWidth = lineWidth;
ctx.strokeStyle = color;
ctx.stroke();
}
/**
* Draws a pose skeleton by looking up all adjacent keypoints/joints
*/
function drawSkeleton(keypoints, minConfidence, ctx, scale = 1) {
const adjacentKeyPoints =
posenet.getAdjacentKeyPoints(keypoints, minConfidence);
adjacentKeyPoints.forEach((keypoints) => {
drawSegment(
toTuple(keypoints[0].position), toTuple(keypoints[1].position), color,
scale, ctx);
});
}
/**
* Draw pose keypoints onto a canvas
*/
function drawKeypoints(keypoints, minConfidence, ctx, scale = 1) {
for (let i = 0; i < keypoints.length; i++) {
const keypoint = keypoints[i];
if (keypoint.score < minConfidence) {
continue;
}
const {y, x} = keypoint.position;
drawPoint(ctx, y * scale, x * scale, 3, color);
}
}
function toTuple({y, x}) {
return [y, x];
}
posenet.getAdjacentKeyPoints(keypoints, minConfidence);
adjacentKeyPoints.forEach((keypoints) => {
drawSegment(
toTuple(keypoints[0].position), toTuple(keypoints[1].position), color,
scale, ctx);
});
}
/**
* Draw pose keypoints onto a canvas
*/
function drawKeypoints(keypoints, minConfidence, ctx, scale = 1) {
for (let i = 0; i < keypoints.length; i++) {
const keypoint = keypoints[i];
if (keypoint.score < minConfidence) {
continue;
}
const {y, x} = keypoint.position;
drawPoint(ctx, y * scale, x * scale, 3, color);
}
}
function toTuple({y, x}) {
return [y, x];
}
Final Step: Test it live and integrate it into productionAfter all the hardwork is done, test it through, then you can integrate it into similar type of application into your own
To try out this live in a yoga class visit https://mixpose.com
Comments
Please log in or sign up to comment.