Wokwi - Online ESP32, STM32, Arduino Simulator


// --- Deepgram SpeechToText API call with ESP32 & SD Card [Arduino IDE, no additional library needed]
// --- Workflow: Reading AUDIO .wav file on SD(SPI) Card, sending to Deepgram Server, receiving Transcription response
// --- repeating endless in loop()

#include <WiFi.h>               // all 3 libraries are part of Arduino/ESP32 library (no install needed)
#include <WiFiClientSecure.h>
#include <SD.h>
  

// Use the Wokwi virtual WiFi access point (no password needed)
const char* ssid = "Wokwi-GUEST";
const char* password = "";      

const char* deepgramApiKey =    "...";                   // add your credentials here !           

const char* audio_filename =    "/AudioGerman.wav";      // AUDIO on SD Card file to transcript  

WiFiClientSecure client;        


// ----------------------------------------------------------------------------------------------------------------------------

void setup() 
{
  Serial.begin(115200);
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) 
  { delay(1000);
    Serial.println("Connecting to WiFi...");
  } Serial.println("Connected to WiFi");

  // Initialize SD card
  if (!SD.begin()) 
  /*if (!SD.begin(SD_CS_PIN)) // in case we use a dedicated CS pin (beyond SD VSPI Default pins 5,18,19,23) */
  { Serial.println("ERROR - Card Mount Failed");
    return;
  }
  uint8_t cardType = SD.cardType();
  if (cardType == CARD_NONE) 
  { Serial.println("ERROR - No SD card attached");
    return;
  } Serial.println("SD card initialized.");

  // Connecting to Deepgram, stayinp connected 
  // info: connectimg once in setup, keeping open for best performance (as we send repeating AUDIO requests in loop below)
    
  client.setInsecure();                               // opening encrypted https connection (without Certificate)
  if (!client.connect("api.deepgram.com", 443))       // here we connect to Deepgram server 
  { Serial.println("ERROR - Connection failed");
    return;
  } Serial.println("> Connected to Deepgram Server." );  
}


// ----------------------------------------------------------------------------------------------------------------------------

void loop() 
{
  long start_time = millis();   
  
  File audioFile = SD.open( audio_filename );    
  if (!audioFile) {
    Serial.println("ERROR - Failed to open file for reading");
    return;
  }
  size_t audio_size = audioFile.size();
  audioFile.close();
  Serial.println("\n> Audio File [" + (String) audio_filename + "] found, size: " + (String) audio_size );

  // ---------- Send HTTPS request header
  /*client.println("POST /v1/listen HTTP/1.1");*/
  client.println("POST /v1/listen?model=nova-2-general&detect_language=true HTTP/1.1"); 
  client.println("Host: api.deepgram.com");
  client.println("Authorization: Token " + String(deepgramApiKey));
  client.println("Content-Type: audio/wav");
  client.println("Content-Length: " + String(audio_size));
  client.println();
  Serial.println("> POST Request to Deepgram Server started, sending WAV data now ..." );

  // ---------- Reading the AUDIO wav file, sending in CHUNKS (closing file after done)
  File file = SD.open( audio_filename, FILE_READ );
  const size_t bufferSize = 1024; // we use a 1KB buffer 
  uint8_t buffer[bufferSize];
  size_t bytesRead;
  while (file.available()) 
  { bytesRead = file.read(buffer, sizeof(buffer));
    if (bytesRead > 0) client.write(buffer, bytesRead);   // sending WAV AUDIO data    
  }
  file.close();
  Serial.println("> All bytes sent, waiting Deepgram transcription ... \n-----------------" );

  // ---------- Receiving Deepgram Server response 
  String response = "";
  while ( response == "" )    
  { while (client.available())    
    { char c = client.read();
      response += String(c);      
    }       
  } 
  
  // ---------- Printing complete Deepgram RESPONSE)
  Serial.println("Response: " + response ); 


  // ---------- closing connection to Deepgram 
  // we keep open for best performance on next request in loop()
  /* client.stop();     // end connection */


  // ---------- Addon (optionally):
  // Extracting and printing summary:  Transcription + Detected language + Total duration [sec],
  // using String operations only (no library needed), alternatively: using json.h libraries 
  
  int pos_start, pos_end;
  String transcription, language;
  String json_Transcript_Start = "\"transcript\":";
  String json_Transcript_End   = "\"confidence\":";
  String json_DetectLang_Start = "\"detected_language\":";
  String json_DetectLang_End   = "\"language_confidence\":";
  
  pos_start = response.indexOf(json_Transcript_Start);      
  if (pos_start > 0) 
  {  pos_start += json_Transcript_Start.length()+1;      
     pos_end = response.indexOf(json_Transcript_End, pos_start);   
  }  transcription = response.substring(pos_start, pos_end-2);

  pos_start = response.indexOf(json_DetectLang_Start);      
  if (pos_start > 0) 
  {  pos_start += json_DetectLang_Start.length()+1;      
     pos_end = response.indexOf(json_DetectLang_End, pos_start);   
  }  language = response.substring(pos_start, pos_end-2);
  
  Serial.println("=> Transcription: [" + transcription + "]" );
  Serial.println("=> Detected Language: [" + language + "]" ); 
  Serial.print(  "=> Total Duration (from local SD.open(file) to Transcription complete [sec]: "); 
  Serial.println( float ((millis()-start_time))/1000, 2 );   // print 2 decimal digits 
  Serial.println("-----------------\n");
  
}