Hackster is hosting Hackster Holidays, Ep. 6: Livestream & Giveaway Drawing. Watch previous episodes or stream live on Monday!Stream Hackster Holidays, Ep. 6 on Monday!
JuanVi
Published

A.I. Assistant Pendant

The A.I. Pendant helps the visually impaired navigate their surroundings by taking photos and providing real-time descriptions through A.I.

ExpertFull instructions provided200
A.I. Assistant Pendant

Things used in this project

Hardware components

Seeed Studio XIAO ESP32S3 Sense
Seeed Studio XIAO ESP32S3 Sense
×1
PCBWay Custom PCB
PCBWay Custom PCB
×1
Speaker 3W 4R 40mm
×1
Li-Po Battery 3.7V 400mA
×1
SMD 0805 100K Resistor
×1
SMD 0805 10K Resistor
×1
SMD 0805 100nF Capacitor
×1
SMD Button 6x6x15mm
×1
SPDT Sliding Switch MSS22D18 2.5mm
×1
MAX98357AETE_T (1)
×1
Ferrite Bead 0805: BLM21AG121SN1D (1)
×2
SMD 0805 220pF Capacitor (1)
×2
SMD 0805 10uF Capacitor (1)
×1
SMD 0805 100nF Capacitor (1)
×1
JST 2.0mm Pitch THT 2 Pos, Right Angle
×1
JST 2.0mm Pitch Female 2 Pos (1 for Speaker, 1 for Battery)
×2
Pin Header 2.54mm 7 Pos
×2
Pin Socket 2.54mm 7 Pos
×2

Software apps and online services

Arduino IDE
Arduino IDE

Hand tools and fabrication machines

3D Printer (generic)
3D Printer (generic)
Soldering iron (generic)
Soldering iron (generic)
Solder Wire, Lead Free
Solder Wire, Lead Free
Solder Flux, Soldering
Solder Flux, Soldering
Solder Paste, Rework
Solder Paste, Rework
Hot Air Station, Industrial
Hot Air Station, Industrial
Hot Plate, Programmable
Hot Plate, Programmable
Wire Stripper / Crimper, 10AWG - 20AWG Strip
Wire Stripper / Crimper, 10AWG - 20AWG Strip

Story

Read more

Custom parts and enclosures

SolidEdge Files

ESP32 FRONT

ESP32 BACK

BUTTON CAP 2

BUTTON CAP 1

BUTTON EXTENDER (x2)

Schematics

Schematic PDF

Front PCB

Back_PCB

PCB_MOUNT_F

PCB_MOUNT_B

Schematic Kicad

PCB Kicad

Code

BUILD2GETHER_ESP32.ino

C/C++
main
#include "network_param.h"
#include "AudioMic.h"
#include "CloudSpeechClient.h"
#include "Audio.h"
#include <WiFiClientSecure.h>
#include <ArduinoJson.h>
#include <ChatGPT.hpp>
#include "Base64ImageData.h"

#include "esp_camera.h"
#include "SPIFFS.h"
#include <WiFi.h>
#include "base64.h"
#include "CameraConfig.h"

// AUDIO I2S PIN OUTPUT
#define I2S_LRC D3
#define I2S_DOUT D1
#define I2S_BCLK D2
#define I2S_MCLK 0

Audio audio(false, 3, 1); // USE I2S 1 (I2S0 FOR MICROPHONE AS I2S_MODE_PDM IS ONLY IN PIN I2S0)


WiFiClientSecure client_audio;
ChatGPT<WiFiClientSecure> chatGPT_Client(&client_audio, "v1", chatgpt_token, 60000);


// Variables for buttons
int button_pressed_camera = 0;
int button_pressed_microphone = 0;

const int button_camera = D8;
const int button_microphone = D9;

// Variable for audio transcript
String audio_transcript;

// Variables for camera
const char *photoPath = "/photo.jpg"; // File path for the photo
const char *base64ImageDataChar = nullptr;

void button_camera_trigger() {
  if (button_pressed_camera == 0) {
    button_pressed_camera = 1;
  }
}

void button_microphone_trigger() {
  if (button_pressed_microphone == 0) {
    button_pressed_microphone = 1;
  }
}

void capturePhoto() {
  camera_fb_t *photo = esp_camera_fb_get();
  if (!photo) {
    Serial.println("Error capturing photo");
    return;
  }

  // Save the photo in SPIFFS
  File photoFile = SPIFFS.open(photoPath, FILE_WRITE);
  if (!photoFile) {
    Serial.println("Error opening file for writing");
    return;
  }

  photoFile.write(photo->buf, photo->len);
  photoFile.close();

  Serial.println("Photo saved at: " + String(photoPath));

  esp_camera_fb_return(photo);
  //verifyImage(photoPath);
}

void verifyImage(const char *filePath) {
  File file = SPIFFS.open(filePath, FILE_READ);
  if (!file) {
    Serial.println("Error opening file for reading");
    return;
  }

  size_t fileSize = file.size();
  uint8_t *buffer = new uint8_t[fileSize];
  file.read(buffer, fileSize);
  file.close();

  String base64Data = base64::encode(buffer, fileSize);

  delete[] buffer;

  Serial.println("Image saved in Base64:\n");
  Serial.println(base64Data);

  // Optimized creation of base64DataWithPrefix
  String base64DataWithPrefix = "data:image/jpeg;base64," + base64Data;

  base64ImageDataChar = base64DataWithPrefix.c_str();

  /* String result;
    Serial.println("\n\n[ChatGPT] - Asking a Vision Question");
    if (chatGPT_Client.vision_question("gpt-4o", "user", "text", "Whats in this image?", "image_url", base64ImageDataChar, "auto", 500, true, result)) {
      Serial.print("[ChatGPT] Response: ");
      Serial.println(result);
    } else {
      Serial.print("[ChatGPT] Error: ");
      Serial.println(result);
    }
    Serial.println("END");*/
}

void ChatGPT_Vision(const String& question) {
  /*
  model: Model to use for generating the response (e.g., "gpt-4o").
  role: Role of the message sender (e.g., "user" or "assistant").
  type: Type of content (e.g., "text").
  text: Text content of the message.
  image_type: Type of the image (e.g., "image_url").
  image_url: URL of the image or Base64 Image Data
  detail: Detail level of the image (e.g., "high", "low", "auto").
  max_tokens: Maximum number of tokens to generate in the response.
  content_only: Flag indicating whether to extract only the content of the response. (e.g., true - answer only, false - full response)
  result: Reference to a String variable to store the result of the API call.
  */

  String result;
  Serial.println("\n\n[ChatGPT] - Asking a Vision Question");
  if (chatGPT_Client.vision_question("gpt-4o", "user", "text", question.c_str(), "image_url", base64ImageDataChar, "auto", 500, true, result)) {
    Serial.print("[ChatGPT] Response: ");
    Serial.println(result);
    audio.openai_speech(chatgpt_token, "tts-1", result, "shimmer", "mp3", "1");
  } else {
    Serial.print("[ChatGPT] Error: ");
    Serial.println(result);
  }
}

void setup() {
  Serial.begin(115200);
  //Serial.setDebugOutput(true);
  //while(!Serial);
  Serial.println("START");
  WiFi.disconnect();
  WiFi.mode(WIFI_STA);
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) delay(1500);
  // BUTTONS
  pinMode(button_camera, INPUT);
  attachInterrupt(digitalPinToInterrupt(button_camera), button_camera_trigger, FALLING);

  pinMode(button_microphone, INPUT);
  attachInterrupt(digitalPinToInterrupt(button_microphone), button_microphone_trigger, FALLING);
  // Audio
  audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT, -1);
  audio.setVolume(7);

  client_audio.setInsecure();

  // CAMERA
  camera_config_t config = getCameraConfig();

  esp_err_t err = esp_camera_init(&config);
  if (err != ESP_OK) {
    Serial.printf("Error initializing the camera: 0x%x", err);
    audio.connecttospeech("ERROR CAMERA", "en");
    return;
  }

  // Mount the SPIFFS file system
  if (!SPIFFS.begin(true)) {
    Serial.println("Error mounting SPIFFS");
    audio.connecttospeech("ERROR SPIFFS", "en");
    return;
  }

  Serial.println("Camera ready");
  //audio.connecttospeech("CAMERA READY", "en");

  // CHATGPT CLIENT
}

void loop() {
  // Main code here, to run repeatedly:
  if (button_pressed_camera == 1) {
    capturePhoto();
    button_pressed_camera = 0;
  } else if (button_pressed_microphone == 1) {
    verifyImage(photoPath);
    //Serial.println(base64ImageDataChar);

    Serial.println("\r\nRecord start!\r\n");
    AudioMic* audio_Mic = new AudioMic(SENSE);

    audio_Mic->Record();
    Serial.println("Recording Completed. Now Processing...");
    //audio.connecttospeech("RECORD FINISH", "en");

    CloudSpeechClient* cloudSpeechClient = new CloudSpeechClient(USE_APIKEY);
    cloudSpeechClient->Transcribe(audio_Mic);

    Serial.print("Transcript: ");
    Serial.println(transcript);
    //audio.connecttospeech("TRANSCRIPT", "en");

    audio_transcript = String(transcript);

    delete cloudSpeechClient;
    delete audio_Mic;

    Serial.print("Transcript AUDIO: ");
    Serial.println(audio_transcript);

    //audio.connecttospeech(transcript, "es");
    button_pressed_microphone = 0;
    ChatGPT_Vision(audio_transcript);
  }
  audio.loop();
}

void audio_info(const char *info) {
  Serial.print("audio_info: "); Serial.println(info);
  if (strcmp(info, "End of Stream.") == 0) {
    Serial.println("FINISH");
    esp_restart();
  }
}

network_param.h

C Header File
#ifndef _NETWORK_PARAM_H
#define _NETWORK_PARAM_H

inline const char *ssid = "ssid";
inline const char *password = "password";

inline const char*  server = "speech.googleapis.com";
inline const char* chatgpt_token = "your-api-key";//CHATGPT API KEY


// To get the certificate for your region run:
// openssl s_client -showcerts -connect speech.googleapis.com:443
// Copy the certificate (all lines between and including ---BEGIN CERTIFICATE---
// and --END CERTIFICATE--) to root.cert and put here on the root_cert variable.
inline const char* root_ca= 

"-----BEGIN CERTIFICATE-----\n"
"MIIFCzCCAvOgAwIBAgIQf/AFoHxM3tEArZ1mpRB7mDANBgkqhkiG9w0BAQsFADBH\n"
"MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM\n"
"QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjMxMjEzMDkwMDAwWhcNMjkwMjIw\n"
"MTQwMDAwWjA7MQswCQYDVQQGEwJVUzEeMBwGA1UEChMVR29vZ2xlIFRydXN0IFNl\n"
"cnZpY2VzMQwwCgYDVQQDEwNXUjIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK\n"
"AoIBAQCp/5x/RR5wqFOfytnlDd5GV1d9vI+aWqxG8YSau5HbyfsvAfuSCQAWXqAc\n"
"+MGr+XgvSszYhaLYWTwO0xj7sfUkDSbutltkdnwUxy96zqhMt/TZCPzfhyM1IKji\n"
"aeKMTj+xWfpgoh6zySBTGYLKNlNtYE3pAJH8do1cCA8Kwtzxc2vFE24KT3rC8gIc\n"
"LrRjg9ox9i11MLL7q8Ju26nADrn5Z9TDJVd06wW06Y613ijNzHoU5HEDy01hLmFX\n"
"xRmpC5iEGuh5KdmyjS//V2pm4M6rlagplmNwEmceOuHbsCFx13ye/aoXbv4r+zgX\n"
"FNFmp6+atXDMyGOBOozAKql2N87jAgMBAAGjgf4wgfswDgYDVR0PAQH/BAQDAgGG\n"
"MB0GA1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjASBgNVHRMBAf8ECDAGAQH/\n"
"AgEAMB0GA1UdDgQWBBTeGx7teRXUPjckwyG77DQ5bUKyMDAfBgNVHSMEGDAWgBTk\n"
"rysmcRorSCeFL1JmLO/wiRNxPjA0BggrBgEFBQcBAQQoMCYwJAYIKwYBBQUHMAKG\n"
"GGh0dHA6Ly9pLnBraS5nb29nL3IxLmNydDArBgNVHR8EJDAiMCCgHqAchhpodHRw\n"
"Oi8vYy5wa2kuZ29vZy9yL3IxLmNybDATBgNVHSAEDDAKMAgGBmeBDAECATANBgkq\n"
"hkiG9w0BAQsFAAOCAgEARXWL5R87RBOWGqtY8TXJbz3S0DNKhjO6V1FP7sQ02hYS\n"
"TL8Tnw3UVOlIecAwPJQl8hr0ujKUtjNyC4XuCRElNJThb0Lbgpt7fyqaqf9/qdLe\n"
"SiDLs/sDA7j4BwXaWZIvGEaYzq9yviQmsR4ATb0IrZNBRAq7x9UBhb+TV+PfdBJT\n"
"DhEl05vc3ssnbrPCuTNiOcLgNeFbpwkuGcuRKnZc8d/KI4RApW//mkHgte8y0YWu\n"
"ryUJ8GLFbsLIbjL9uNrizkqRSvOFVU6xddZIMy9vhNkSXJ/UcZhjJY1pXAprffJB\n"
"vei7j+Qi151lRehMCofa6WBmiA4fx+FOVsV2/7R6V2nyAiIJJkEd2nSi5SnzxJrl\n"
"Xdaqev3htytmOPvoKWa676ATL/hzfvDaQBEcXd2Ppvy+275W+DKcH0FBbX62xevG\n"
"iza3F4ydzxl6NJ8hk8R+dDXSqv1MbRT1ybB5W0k8878XSOjvmiYTDIfyc9acxVJr\n"
"Y/cykHipa+te1pOhv7wYPYtZ9orGBV5SGOJm4NrB3K1aJar0RfzxC3ikr7Dyc6Qw\n"
"qDTBU39CluVIQeuQRgwG3MuSxl7zRERDRilGoKb8uY45JzmxWuKxrfwT/478JuHU\n"
"/oTxUFqOl2stKnn7QGTq8z29W+GgBLCXSBxC9epaHM0myFH/FJlniXJfHeytWt0=\n"
"-----END CERTIFICATE-----\n";

// Getting Access Token : 
// At first, you should get service account key (JSON file).
// Type below command in Google Cloud Shell to get AccessToken: 
// $ gcloud auth activate-service-account --key-file=KEY_FILE   (KEY_FILE is your service account key file)
// $ gcloud auth print-access-token
// The Access Token is expired in an hour.
// Google recommends to use Access Token.
//const String AccessToken = "";

// It is also possible to use "API Key" instead of "Access Token". It doesn't have time limit.
inline const String ApiKey = "Googles Speech To Texy Api Key";

// see https://cloud.google.com/docs/authentication?hl=ja#getting_credentials_for_server-centric_flow
// see https://qiita.com/basi/items/3623a576b754f738138e (Japanese)

#endif  // _NETWORK_PARAM_H

AudioMic.cpp

C/C++
#include "AudioMic.h"

AudioMic::AudioMic(MicType micType) {
  wavData = new char*[wavDataSize/dividedWavDataSize];
  for (int i = 0; i < wavDataSize/dividedWavDataSize; ++i) wavData[i] = new char[dividedWavDataSize];
  i2s = new I2S(micType);
}

AudioMic::~AudioMic() {
  for (int i = 0; i < wavDataSize/dividedWavDataSize; ++i) delete[] wavData[i];
  delete[] wavData;
  delete i2s;
}

void AudioMic::CreateWavHeader(byte* header, int waveDataSize){
  header[0] = 'R';
  header[1] = 'I';
  header[2] = 'F';
  header[3] = 'F';
  unsigned int fileSizeMinus8 = waveDataSize + 44 - 8;
  header[4] = (byte)(fileSizeMinus8 & 0xFF);
  header[5] = (byte)((fileSizeMinus8 >> 8) & 0xFF);
  header[6] = (byte)((fileSizeMinus8 >> 16) & 0xFF);
  header[7] = (byte)((fileSizeMinus8 >> 24) & 0xFF);
  header[8] = 'W';
  header[9] = 'A';
  header[10] = 'V';
  header[11] = 'E';
  header[12] = 'f';
  header[13] = 'm';
  header[14] = 't';
  header[15] = ' ';
  header[16] = 0x10;  // linear PCM
  header[17] = 0x00;
  header[18] = 0x00;
  header[19] = 0x00;
  header[20] = 0x01;  // linear PCM
  header[21] = 0x00;
  header[22] = 0x01;  // monoral
  header[23] = 0x00;
  header[24] = 0x80;  // sampling rate 16000
  header[25] = 0x3E;
  header[26] = 0x00;
  header[27] = 0x00;
  header[28] = 0x00;  // Byte/sec = 16000x2x1 = 32000
  header[29] = 0x7D;
  header[30] = 0x00;
  header[31] = 0x00;
  header[32] = 0x02;  // 16bit monoral
  header[33] = 0x00;
  header[34] = 0x10;  // 16bit
  header[35] = 0x00;
  header[36] = 'd';
  header[37] = 'a';
  header[38] = 't';
  header[39] = 'a';
  header[40] = (byte)(waveDataSize & 0xFF);
  header[41] = (byte)((waveDataSize >> 8) & 0xFF);
  header[42] = (byte)((waveDataSize >> 16) & 0xFF);
  header[43] = (byte)((waveDataSize >> 24) & 0xFF);
}

void AudioMic::Record() {
  CreateWavHeader(paddedHeader, wavDataSize);
  int bitBitPerSample = i2s->GetBitPerSample();
  if (bitBitPerSample == 16) {
    for (int j = 0; j < wavDataSize/dividedWavDataSize; ++j) {
      i2s->Read(i2sBuffer, i2sBufferSize/2);
      for (int i = 0; i < i2sBufferSize/8; ++i) {
        wavData[j][2*i] = i2sBuffer[4*i + 2];
        wavData[j][2*i + 1] = i2sBuffer[4*i + 3];
      }
    }
  }
  else if (bitBitPerSample == 32) {
    for (int j = 0; j < wavDataSize/dividedWavDataSize; ++j) {
      i2s->Read(i2sBuffer, i2sBufferSize);
      for (int i = 0; i < i2sBufferSize/8; ++i) {
        wavData[j][2*i] = i2sBuffer[8*i + 2];
        wavData[j][2*i + 1] = i2sBuffer[8*i + 3];
      }
    }
  }
}

AudioMic.h

C Header File
#ifndef _AUDIOMIC_H
#define _AUDIOMIC_H

#include <Arduino.h>
#include "I2S.h"

// 16bit, monoral, 16000Hz,  linear PCM
class AudioMic {
  I2S* i2s;
  static const int headerSize = 44;
  static const int i2sBufferSize = 12000;
  char i2sBuffer[i2sBufferSize];
  void CreateWavHeader(byte* header, int waveDataSize);

public:
  static const int wavDataSize = 90000;                   // It must be multiple of dividedWavDataSize. Recording time is about 1.9 second.
  static const int dividedWavDataSize = i2sBufferSize/4;
  char** wavData;                                         // It's divided. Because large continuous memory area can't be allocated in esp32.
  byte paddedHeader[headerSize + 4] = {0};                // The size must be multiple of 3 for Base64 encoding. Additional byte size must be even because wave data is 16bit.

  AudioMic(MicType micType);
  ~AudioMic();
  void Record();
};

#endif // _AUDIO_H

Base64ImageData.h

C Header File
Example of base64 encoded Image of an Orange Cat
#ifndef BASE64_IMAGE_DATA_H
#define BASE64_IMAGE_DATA_H

// The image is an orange cat :).
const char* base64ImageData = "";

#endif

camera_pins.h

C Header File
#if defined(CAMERA_MODEL_WROVER_KIT)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM  21
#define SIOD_GPIO_NUM  26
#define SIOC_GPIO_NUM  27

#define Y9_GPIO_NUM    35
#define Y8_GPIO_NUM    34
#define Y7_GPIO_NUM    39
#define Y6_GPIO_NUM    36
#define Y5_GPIO_NUM    19
#define Y4_GPIO_NUM    18
#define Y3_GPIO_NUM    5
#define Y2_GPIO_NUM    4
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM  23
#define PCLK_GPIO_NUM  22

#elif defined(CAMERA_MODEL_ESP_EYE)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM  4
#define SIOD_GPIO_NUM  18
#define SIOC_GPIO_NUM  23

#define Y9_GPIO_NUM    36
#define Y8_GPIO_NUM    37
#define Y7_GPIO_NUM    38
#define Y6_GPIO_NUM    39
#define Y5_GPIO_NUM    35
#define Y4_GPIO_NUM    14
#define Y3_GPIO_NUM    13
#define Y2_GPIO_NUM    34
#define VSYNC_GPIO_NUM 5
#define HREF_GPIO_NUM  27
#define PCLK_GPIO_NUM  25

#define LED_GPIO_NUM 22

#elif defined(CAMERA_MODEL_M5STACK_PSRAM)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM  27
#define SIOD_GPIO_NUM  25
#define SIOC_GPIO_NUM  23

#define Y9_GPIO_NUM    19
#define Y8_GPIO_NUM    36
#define Y7_GPIO_NUM    18
#define Y6_GPIO_NUM    39
#define Y5_GPIO_NUM    5
#define Y4_GPIO_NUM    34
#define Y3_GPIO_NUM    35
#define Y2_GPIO_NUM    32
#define VSYNC_GPIO_NUM 22
#define HREF_GPIO_NUM  26
#define PCLK_GPIO_NUM  21

#elif defined(CAMERA_MODEL_M5STACK_V2_PSRAM)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM  27
#define SIOD_GPIO_NUM  22
#define SIOC_GPIO_NUM  23

#define Y9_GPIO_NUM    19
#define Y8_GPIO_NUM    36
#define Y7_GPIO_NUM    18
#define Y6_GPIO_NUM    39
#define Y5_GPIO_NUM    5
#define Y4_GPIO_NUM    34
#define Y3_GPIO_NUM    35
#define Y2_GPIO_NUM    32
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM  26
#define PCLK_GPIO_NUM  21

#elif defined(CAMERA_MODEL_M5STACK_WIDE)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM  27
#define SIOD_GPIO_NUM  22
#define SIOC_GPIO_NUM  23

#define Y9_GPIO_NUM    19
#define Y8_GPIO_NUM    36
#define Y7_GPIO_NUM    18
#define Y6_GPIO_NUM    39
#define Y5_GPIO_NUM    5
#define Y4_GPIO_NUM    34
#define Y3_GPIO_NUM    35
#define Y2_GPIO_NUM    32
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM  26
#define PCLK_GPIO_NUM  21

#define LED_GPIO_NUM 2

#elif defined(CAMERA_MODEL_M5STACK_ESP32CAM)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM  27
#define SIOD_GPIO_NUM  25
#define SIOC_GPIO_NUM  23

#define Y9_GPIO_NUM    19
#define Y8_GPIO_NUM    36
#define Y7_GPIO_NUM    18
#define Y6_GPIO_NUM    39
#define Y5_GPIO_NUM    5
#define Y4_GPIO_NUM    34
#define Y3_GPIO_NUM    35
#define Y2_GPIO_NUM    17
#define VSYNC_GPIO_NUM 22
#define HREF_GPIO_NUM  26
#define PCLK_GPIO_NUM  21

#elif defined(CAMERA_MODEL_M5STACK_UNITCAM)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM  27
#define SIOD_GPIO_NUM  25
#define SIOC_GPIO_NUM  23

#define Y9_GPIO_NUM    19
#define Y8_GPIO_NUM    36
#define Y7_GPIO_NUM    18
#define Y6_GPIO_NUM    39
#define Y5_GPIO_NUM    5
#define Y4_GPIO_NUM    34
#define Y3_GPIO_NUM    35
#define Y2_GPIO_NUM    32
#define VSYNC_GPIO_NUM 22
#define HREF_GPIO_NUM  26
#define PCLK_GPIO_NUM  21

#elif defined(CAMERA_MODEL_M5STACK_CAMS3_UNIT)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM 21
#define XCLK_GPIO_NUM  11
#define SIOD_GPIO_NUM  17
#define SIOC_GPIO_NUM  41

#define Y9_GPIO_NUM    13
#define Y8_GPIO_NUM    4
#define Y7_GPIO_NUM    10
#define Y6_GPIO_NUM    5
#define Y5_GPIO_NUM    7
#define Y4_GPIO_NUM    16
#define Y3_GPIO_NUM    15
#define Y2_GPIO_NUM    6
#define VSYNC_GPIO_NUM 42
#define HREF_GPIO_NUM  18
#define PCLK_GPIO_NUM  12

#define LED_GPIO_NUM 14

#elif defined(CAMERA_MODEL_AI_THINKER)
#define PWDN_GPIO_NUM  32
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM  0
#define SIOD_GPIO_NUM  26
#define SIOC_GPIO_NUM  27

#define Y9_GPIO_NUM    35
#define Y8_GPIO_NUM    34
#define Y7_GPIO_NUM    39
#define Y6_GPIO_NUM    36
#define Y5_GPIO_NUM    21
#define Y4_GPIO_NUM    19
#define Y3_GPIO_NUM    18
#define Y2_GPIO_NUM    5
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM  23
#define PCLK_GPIO_NUM  22

// 4 for flash led or 33 for normal led
#define LED_GPIO_NUM   4

#elif defined(CAMERA_MODEL_TTGO_T_JOURNAL)
#define PWDN_GPIO_NUM  0
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM  27
#define SIOD_GPIO_NUM  25
#define SIOC_GPIO_NUM  23

#define Y9_GPIO_NUM    19
#define Y8_GPIO_NUM    36
#define Y7_GPIO_NUM    18
#define Y6_GPIO_NUM    39
#define Y5_GPIO_NUM    5
#define Y4_GPIO_NUM    34
#define Y3_GPIO_NUM    35
#define Y2_GPIO_NUM    17
#define VSYNC_GPIO_NUM 22
#define HREF_GPIO_NUM  26
#define PCLK_GPIO_NUM  21

#elif defined(CAMERA_MODEL_XIAO_ESP32S3)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM  10
#define SIOD_GPIO_NUM  40
#define SIOC_GPIO_NUM  39

#define Y9_GPIO_NUM    48
#define Y8_GPIO_NUM    11
#define Y7_GPIO_NUM    12
#define Y6_GPIO_NUM    14
#define Y5_GPIO_NUM    16
#define Y4_GPIO_NUM    18
#define Y3_GPIO_NUM    17
#define Y2_GPIO_NUM    15
#define VSYNC_GPIO_NUM 38
#define HREF_GPIO_NUM  47
#define PCLK_GPIO_NUM  13

#elif defined(CAMERA_MODEL_ESP32_CAM_BOARD)
// The 18 pin header on the board has Y5 and Y3 swapped
#define USE_BOARD_HEADER 0
#define PWDN_GPIO_NUM    32
#define RESET_GPIO_NUM   33
#define XCLK_GPIO_NUM    4
#define SIOD_GPIO_NUM    18
#define SIOC_GPIO_NUM    23

#define Y9_GPIO_NUM 36
#define Y8_GPIO_NUM 19
#define Y7_GPIO_NUM 21
#define Y6_GPIO_NUM 39
#if USE_BOARD_HEADER
#define Y5_GPIO_NUM 13
#else
#define Y5_GPIO_NUM 35
#endif
#define Y4_GPIO_NUM 14
#if USE_BOARD_HEADER
#define Y3_GPIO_NUM 35
#else
#define Y3_GPIO_NUM 13
#endif
#define Y2_GPIO_NUM    34
#define VSYNC_GPIO_NUM 5
#define HREF_GPIO_NUM  27
#define PCLK_GPIO_NUM  25

#elif defined(CAMERA_MODEL_ESP32S3_CAM_LCD)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM  40
#define SIOD_GPIO_NUM  17
#define SIOC_GPIO_NUM  18

#define Y9_GPIO_NUM    39
#define Y8_GPIO_NUM    41
#define Y7_GPIO_NUM    42
#define Y6_GPIO_NUM    12
#define Y5_GPIO_NUM    3
#define Y4_GPIO_NUM    14
#define Y3_GPIO_NUM    47
#define Y2_GPIO_NUM    13
#define VSYNC_GPIO_NUM 21
#define HREF_GPIO_NUM  38
#define PCLK_GPIO_NUM  11

#elif defined(CAMERA_MODEL_ESP32S2_CAM_BOARD)
// The 18 pin header on the board has Y5 and Y3 swapped
#define USE_BOARD_HEADER 0
#define PWDN_GPIO_NUM    1
#define RESET_GPIO_NUM   2
#define XCLK_GPIO_NUM    42
#define SIOD_GPIO_NUM    41
#define SIOC_GPIO_NUM    18

#define Y9_GPIO_NUM 16
#define Y8_GPIO_NUM 39
#define Y7_GPIO_NUM 40
#define Y6_GPIO_NUM 15
#if USE_BOARD_HEADER
#define Y5_GPIO_NUM 12
#else
#define Y5_GPIO_NUM 13
#endif
#define Y4_GPIO_NUM 5
#if USE_BOARD_HEADER
#define Y3_GPIO_NUM 13
#else
#define Y3_GPIO_NUM 12
#endif
#define Y2_GPIO_NUM    14
#define VSYNC_GPIO_NUM 38
#define HREF_GPIO_NUM  4
#define PCLK_GPIO_NUM  3

#elif defined(CAMERA_MODEL_ESP32S3_EYE)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM  15
#define SIOD_GPIO_NUM  4
#define SIOC_GPIO_NUM  5

#define Y2_GPIO_NUM 11
#define Y3_GPIO_NUM 9
#define Y4_GPIO_NUM 8
#define Y5_GPIO_NUM 10
#define Y6_GPIO_NUM 12
#define Y7_GPIO_NUM 18
#define Y8_GPIO_NUM 17
#define Y9_GPIO_NUM 16

#define VSYNC_GPIO_NUM 6
#define HREF_GPIO_NUM  7
#define PCLK_GPIO_NUM  13

#elif defined(CAMERA_MODEL_DFRobot_FireBeetle2_ESP32S3) || defined(CAMERA_MODEL_DFRobot_Romeo_ESP32S3)
#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM  45
#define SIOD_GPIO_NUM  1
#define SIOC_GPIO_NUM  2

#define Y9_GPIO_NUM    48
#define Y8_GPIO_NUM    46
#define Y7_GPIO_NUM    8
#define Y6_GPIO_NUM    7
#define Y5_GPIO_NUM    4
#define Y4_GPIO_NUM    41
#define Y3_GPIO_NUM    40
#define Y2_GPIO_NUM    39
#define VSYNC_GPIO_NUM 6
#define HREF_GPIO_NUM  42
#define PCLK_GPIO_NUM  5

#else
#error "Camera model not selected"
#endif

CameraConfig.cpp

C/C++
// CameraConfig.cpp
#include "CameraConfig.h"
#include "esp_camera.h"          // Para el tipo `camera_config_t` y funciones de la cmara


camera_config_t getCameraConfig() {
    camera_config_t config;
    config.ledc_channel = LEDC_CHANNEL_0;
    config.ledc_timer = LEDC_TIMER_0;
    config.pin_d0 = Y2_GPIO_NUM;
    config.pin_d1 = Y3_GPIO_NUM;
    config.pin_d2 = Y4_GPIO_NUM;
    config.pin_d3 = Y5_GPIO_NUM;
    config.pin_d4 = Y6_GPIO_NUM;
    config.pin_d5 = Y7_GPIO_NUM;
    config.pin_d6 = Y8_GPIO_NUM;
    config.pin_d7 = Y9_GPIO_NUM;
    config.pin_xclk = XCLK_GPIO_NUM;
    config.pin_pclk = PCLK_GPIO_NUM;
    config.pin_vsync = VSYNC_GPIO_NUM;
    config.pin_href = HREF_GPIO_NUM;
    config.pin_sccb_sda = SIOD_GPIO_NUM;
    config.pin_sccb_scl = SIOC_GPIO_NUM;
    config.pin_pwdn = PWDN_GPIO_NUM;
    config.pin_reset = RESET_GPIO_NUM;
    config.xclk_freq_hz = 20000000;
    config.pixel_format = PIXFORMAT_JPEG;  // for streaming
    config.grab_mode = CAMERA_GRAB_LATEST;
    config.fb_location = CAMERA_FB_IN_PSRAM;
    config.jpeg_quality = 5;
    config.fb_count = 2;
    config.frame_size = FRAMESIZE_VGA;

    if (config.pixel_format == PIXFORMAT_JPEG) {
        if (psramFound()) {
            // Additional PSRAM configurations (if needed)
        } else {
            config.frame_size = FRAMESIZE_SVGA;
            config.fb_location = CAMERA_FB_IN_DRAM;
        }
    } else {
        config.frame_size = FRAMESIZE_240X240;
    }

#if defined(CAMERA_MODEL_XIAO_ESP32S3)
    pinMode(13, INPUT_PULLUP);
    pinMode(14, INPUT_PULLUP);
#endif

    return config;
}

CloudSpeechClient.h

C Header File
#ifndef _CLOUDSPEECHCLIENT_H
#define _CLOUDSPEECHCLIENT_H
#include <WiFiClientSecure.h>
#include "AudioMic.h"

extern const char* transcript;

enum Authentication {
  USE_ACCESSTOKEN,
  USE_APIKEY
};

class CloudSpeechClient {
  WiFiClientSecure client;
  void PrintHttpBody2(AudioMic* audio_Mic);
  Authentication authentication;

public:
  CloudSpeechClient(Authentication authentication);
  ~CloudSpeechClient();
  void Transcribe(AudioMic* audio_Mic);
  void client_stop();
  void client_begin();
};

#endif // _CLOUDSPEECHCLIENT_H

I2S.cpp

C/C++
#include "I2S.h"
#define SAMPLE_RATE (16000)
#define PIN_I2S_BCLK 26
#define PIN_I2S_LRC 22
#define PIN_I2S_DIN 34
#define PIN_I2S_DOUT 25

#define I2S_PORT I2S_NUM_0
// This I2S specification : 
//  -   LRC high is channel 2 (right).
//  -   LRC signal transitions once each word.
//  -   DATA is valid on the CLOCK rising edge.
//  -   Data bits are MSB first.
//  -   DATA bits are left-aligned with respect to LRC edge.
//  -   DATA bits are right-shifted by one with respect to LRC edges.


I2S::I2S(MicType micType) {
  

if (micType == SENSE) {
    BITS_PER_SAMPLE = I2S_BITS_PER_SAMPLE_16BIT;
    
  i2s_config_t i2s_config = {
      .mode                 = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM ),
      .sample_rate          = 16000U,
      .bits_per_sample      = I2S_BITS_PER_SAMPLE_16BIT,
      .channel_format       = I2S_CHANNEL_FMT_ONLY_RIGHT,     // Also works
      //.channel_format       = I2S_CHANNEL_FMT_ONLY_LEFT,
      .communication_format = I2S_COMM_FORMAT_PCM,            // Also works
      //.communication_format = I2S_COMM_FORMAT_I2S,
      .intr_alloc_flags     = ESP_INTR_FLAG_LEVEL1,
      .dma_buf_count        = 4,
      .dma_buf_len          = 1024,
      .use_apll             = false,
      .tx_desc_auto_clear   = false,
      .fixed_mclk           = 0
  };
   
  i2s_pin_config_t i2s_mic_pins = {
      .bck_io_num     = I2S_PIN_NO_CHANGE,
      .ws_io_num      = 42,
      .data_out_num   = I2S_PIN_NO_CHANGE,
      .data_in_num    = 41   
  };
  
  i2s_driver_install(I2S_PORT, &i2s_config, 0, NULL);
  i2s_set_pin(I2S_PORT, &i2s_mic_pins);
  i2s_set_clk(I2S_PORT, SAMPLE_RATE, BITS_PER_SAMPLE, I2S_CHANNEL_STEREO);

  }
}

int I2S::Read(char* data, int numData) {
    size_t bytes_read = 0;
    esp_err_t result = i2s_read(I2S_PORT, (void *)data, numData, &bytes_read, portMAX_DELAY);
    if (result == ESP_OK) {
        return bytes_read;
    } else {
        Serial.print("Error reading I2S data: ");
        Serial.println(result);
        return 0;
    }


    
}

int I2S::GetBitPerSample() {
  return (int)BITS_PER_SAMPLE;
}

I2S.h

C Header File
#ifndef _I2S_H
#define _I2S_H
#include <Arduino.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "driver/i2s.h"
#include "esp_system.h"

enum MicType {
  ADMP441,
  ICS43434,
  M5GO,
  M5STACKFIRE,
  SENSE
};

class I2S {
  i2s_bits_per_sample_t BITS_PER_SAMPLE;
public:
  I2S(MicType micType);
  int Read(char* data, int numData);
  int GetBitPerSample();
};

#endif // _I2S_H

Credits

JuanVi

JuanVi

3 projects • 5 followers

Comments