Project Notes: Whisper + APEX + Python+React+ LEARN DUTCH APP

here is the semi finished outcome.

https://learn.chaoyu.nl/

The Why: I am learning Dutch, I listen to a lot of BNR news on my 2.5 to 3 hours drive from home to office and back. I feel like that I understand a lot of the content but at same time miss a lot of the context. I really wanna have a transcript of the podcast so I can read and listen at the same time.

The What: Ideally a Mobile App that I can use to note words, linking words from these audios and play the sentences where the words are used. more .. and more ..

The How: a lot of patience

On Ubuntu Whisper Server

Miniconda, TensorFlow, Pytorch, Nvidia gtx 1080ti
OCI CLI

Python Codes

getListofAudiosToTranscript.py

import requests
from oauthlib.oauth2 import BackendApplicationClient
from requests_oauthlib import OAuth2Session
import os


client_id = ""
client_secret = ""
token_url = "https://apex.chaoyu.nl/ords/dev_apex/oauth/token" 


# Create an OAuth2 session
client = BackendApplicationClient(client_id=client_id)
oauth = OAuth2Session(client=client)
token = oauth.fetch_token(token_url=token_url, client_id=client_id, client_secret=client_secret)

# Make a REST call with OAuth2 authentication
url = 'https://apex.chaoyu.nl/ords/dev_apex/learndutchforfree/listfilestocreatetranscripts'  # Replace with your API endpoint

response = oauth.get(url)

if response.status_code == 200:
    data = response.json()  # If the response contains JSON data
    # print(data)
    for items in data:
        print(items['audio_name'])
        cmd = "oci os object get -bn public --name "+items['audio_name']+ " --file ./AudioFilesToProcess/" + items['audio_name']+ " --no-multipart"
        os.system(cmd)

    os.system('conda run -n tf python transcriptAudios.py') # I activate the conda environment tf (tensorflow) and run the python script
    
else:
    print("Error:", response.status_code)

transcriptAudios.py

import whisper
# import required module
import os
import torch
# assign directory
directory = './AudioFilesToProcess/'
model = whisper.load_model("large")

for filename in os.listdir(directory):
    torch.cuda.empty_cache()

    result = model.transcribe(directory+filename)

    clobText = ''
    for items in result["segments"]:
        clobText += ' {"Id":"' + str(items["id"]) + '","starts":"' + str(round(items["start"], 2)) + '","text":"' + items["text"] + "\"}\n" 
    # print(clobText)
    from updateTranscriptColumn import updateTranscriptColumn
    updateTranscriptColumn(filename, clobText)

for filename in os.listdir(directory):
    os.remove(directory+filename)

updateTranscriptColumn.py

import requests
from oauthlib.oauth2 import BackendApplicationClient
from requests_oauthlib import OAuth2Session
import json

# OAuth2 client credentials
client_id = ""
client_secret = ""
token_url = "https://apex.chaoyu.nl/ords/dev_apex/oauth/token" 

# Create an OAuth2 session
client = BackendApplicationClient(client_id=client_id)
oauth = OAuth2Session(client=client)
token = oauth.fetch_token(token_url=token_url, client_id=client_id, client_secret=client_secret)

# Make a POST REST call with OAuth2 authentication and JSON data
url = 'https://apex.chaoyu.nl/ords/dev_apex/learndutchforfree/uploadtranscript'   # Replace with your API endpoint


def updateTranscriptColumn(audio_name, transcript):
    data = {
        "file_name": audio_name,
        "transcript": transcript
    }
    headers = {
        'Content-Type': 'application/json'
    }
    # print(json.dumps(data))
    response = oauth.post(url, data=json.dumps(data), headers=headers)

    if response.status_code == 200:
        result = response.json()  # If the response contains JSON data
        print(result)
    else:
        result = response.json()
        print("Error:", response.status_code)
        print("Error:", result)

On APEX

create or replace package ldff_utility_pck is

   -- Author  : CHAOY
   -- Created : 19/05/2023 21:39:38
   -- Purpose : 

   procedure pr_ajax_fileuploadhandler;

   procedure pr_insert_audio(i_audio_name in audios_ldff.audio_name%type
                            ,i_mime       in audios_ldff.audio_mime%type
                            ,i_blob       in audios_ldff.file_content%type);

   procedure pr_oci_file_upload(i_file_name in audios_ldff.audio_name%type
                               ,i_blob      in audios_ldff.file_content%type
                               ,i_mime      in audios_ldff.audio_mime%type);

   procedure pr_list_files_for_transcripts;

   procedure pr_post_req_audio_transcript(i_requst_body in blob);

   function fn_return_audio_html(i_file_name in audios_ldff.audio_name%type) return clob;

end ldff_utility_pck;

create or replace package body ldff_utility_pck is

   c_oci_base_url         constant varchar2(100) := 'https://objectstorage.eu-frankfurt-1.oraclecloud.com';
   c_oci_bucket_namespace constant varchar2(100) := '/n/frpnibrn7ulj';
   c_oci_bucket_name      constant varchar2(100) := '/b/public/o/';
   c_oci_auth             constant varchar2(10) := 'OCI_AUTH';

   procedure pr_ajax_fileuploadhandler is
   
      l_collection_name constant apex_collections.collection_name%type := 'AJAX_FILE_UPLOADED';
      l_blob        blob;
      l_filename    varchar2(200);
      l_mime_type   varchar2(200);
      l_multi_parts varchar2(32000);
      l_count       number;
      e_exits       exception;
   begin
      l_filename  := apex_application.g_x01;
      l_mime_type := nvl(apex_application.g_x02
                        ,'application/octet-stream');
   
      -- build BLOB from f01 30k array (base64 encoded)
      dbms_lob.createtemporary(l_blob
                              ,false
                              ,dbms_lob.session);
   
      for i in 1 .. apex_application.g_f01.count
      loop
         l_multi_parts := wwv_flow.g_f01(i);
         if length(l_multi_parts) > 0
         then
            dbms_lob.append(dest_lob => l_blob
                           ,src_lob  => to_blob(utl_encode.base64_decode(utl_raw.cast_to_raw(l_multi_parts))));
         end if;
      end loop;
   
      apex_collection.create_or_truncate_collection(p_collection_name => l_collection_name);
      -- add collection member (only if BLOB is not null)
      if dbms_lob.getlength(l_blob) is not null
      then
         select count(1)
         into   l_count
         from   audios_ldff t
         where  t.audio_name = l_filename;
      
         if l_count <> 0
         then
            raise e_exits;
         end if;
      
         pr_insert_audio(i_audio_name => l_filename
                        ,i_mime       => l_mime_type
                        ,i_blob       => l_blob);
      
         pr_oci_file_upload(i_file_name => l_filename
                           ,i_blob      => l_blob
                           ,i_mime      => l_mime_type);
         /*
         apex_collection.add_member(p_collection_name => l_collection_name
                                   ,p_c001            => l_filename
                                   ,p_c002            => l_mime_type
                                   ,p_blob001         => l_blob);
                                   
                                   */
      end if;
   
      apex_json.open_object;
      apex_json.write(p_name  => 'result'
                     ,p_value => 'success');
      apex_json.close_object;
   
   exception
      when e_exits then
         apex_json.open_object;
         apex_json.write(p_name  => 'result'
                        ,p_value => 'fail');
         apex_json.write(p_name  => 'errormsg'
                        ,p_value => 'File name taken');
         apex_json.close_object;
      when others then
         apex_json.open_object;
         apex_json.write(p_name  => 'result'
                        ,p_value => 'fail');
         apex_json.write(p_name  => 'errormsg'
                        ,p_value => sqlerrm);
         apex_json.close_object;
   end pr_ajax_fileuploadhandler;

   procedure pr_insert_audio(i_audio_name in audios_ldff.audio_name%type
                            ,i_mime       in audios_ldff.audio_mime%type
                            ,i_blob       in audios_ldff.file_content%type) is
   begin
   
      insert into audios_ldff
         (audio_name
         ,audio_mime
          --,file_content
         ,file_size)
      values
         (i_audio_name
         ,i_mime
          --,i_blob
         ,round(dbms_lob.getlength(i_blob) / 1024 / 1024));
   
      ---
   
   end pr_insert_audio;

   procedure pr_oci_file_upload(i_file_name in audios_ldff.audio_name%type
                               ,i_blob      in audios_ldff.file_content%type
                               ,i_mime      in audios_ldff.audio_mime%type) is
   
      l_response clob;
   begin
      apex_web_service.g_request_headers(1).name := 'Content-Type';
      apex_web_service.g_request_headers(1).value := i_mime;
   
      l_response := apex_web_service.make_rest_request(p_url                  => c_oci_base_url ||
                                                                                 c_oci_bucket_namespace ||
                                                                                 c_oci_bucket_name || i_file_name
                                                      ,p_http_method          => 'PUT'
                                                      ,p_body_blob            => i_blob
                                                      ,p_credential_static_id => c_oci_auth);
   
   end pr_oci_file_upload;
   ---
   procedure pr_list_files_for_transcripts is
      c_audios sys_refcursor;
   
   begin
      /*
         open c_audios for
            select t.audio_name
            from   audios_ldff t
            where  t.audio_transcript is null;
         apex_json.write('data'
                        ,rec.audio_name);
      */
      apex_json.open_array;
   
      for rec in (select t.audio_name
                  from   audios_ldff t
                  where  t.audio_transcript is null)
      loop
         apex_json.open_object;
         apex_json.write('audio_name'
                        ,rec.audio_name);
         apex_json.close_object; 
      end loop;
      apex_json.close_all;
   
   end pr_list_files_for_transcripts;

   ---------

   procedure pr_post_req_audio_transcript(i_requst_body in blob) is
   
      l_json_object json_object_t;
      l_file_name   audios_ldff.audio_name%type;
      l_transcript  audios_ldff.audio_transcript%type;
   begin
      l_json_object := json_object_t.parse(i_requst_body);
      l_file_name   := l_json_object.get_string(key => 'file_name');
      l_transcript  := l_json_object.get_clob(key => 'transcript');
   
      update audios_ldff t
      set    t.audio_transcript = l_transcript
      where  t.audio_name = l_file_name;
   
      apex_json.open_object;
      apex_json.write(p_name  => 'result'
                     ,p_value => 'success');
      apex_json.write(p_name  => 'file_name'
                     ,p_value => l_file_name);
      apex_json.write(p_name  => 'transcripsSize'
                     ,p_value => length(l_transcript));
      apex_json.close_object;
   
   exception
      when others then
         apex_json.open_object;
         apex_json.write(p_name  => 'result'
                        ,p_value => 'fail');
         apex_json.write(p_name  => 'errormsg'
                        ,p_value => sqlerrm);
         apex_json.close_object;
      
   end pr_post_req_audio_transcript;

   -------
   function fn_return_audio_html(i_file_name in audios_ldff.audio_name%type) return clob is
      l_json_payload clob;
      l_response     clob;
   
      l_base_url         constant varchar2(100) := 'https://objectstorage.eu-frankfurt-1.oraclecloud.com';
      l_bucket_namespace constant varchar2(100) := '/n/frpnibrn7ulj/b/public';
      l_json_obj json_object_t;
   
   begin
      apex_json.initialize_clob_output(p_preserve => true);
      apex_json.open_object;
      apex_json.write('accessType'
                     ,'ObjectRead');
      apex_json.write('name'
                     ,'myRequest' || to_char(systimestamp at time zone 'UTC'
                                            ,'yyyyddmmhh24miss')); -- required, but arbitrary. must be unqiue
      apex_json.write('objectName'
                     ,i_file_name); -- required if access type is ObjectRead, Duhhhhh..
   
      apex_json.write('timeExpires'
                     ,to_char(systimestamp at time zone 'UTC' + numtodsinterval(1
                                                     ,'HOUR')
                             ,'yyyy-mm-dd"T"hh24:mi:ss"Z"')); -- required,  why UTC, because OCI works UTC. What is T and Z, T is just a sperator where Z is UTC 0 ( PAR expiration must conform to RFC 3339: 2023-05-2000:42:59)
      apex_json.close_object;
      l_json_payload := apex_json.get_clob_output;
      l_response     := apex_web_service.make_rest_request(p_url                  => l_base_url || l_bucket_namespace ||
                                                                                     '/p/'
                                                          ,p_http_method          => 'POST'
                                                          ,p_body                 => l_json_payload
                                                          ,p_credential_static_id => 'OCI_AUTH');
   
      l_json_obj := json_object_t.parse(l_response);
   
      if apex_web_service.g_status_code = 200
      then
         return '<audio controls id="p5-audio-player" autoplay> ' || '<source src="' || l_base_url || l_json_obj.get_string('accessUri') || '" type="audio/mp3">Your browser does not support the audio element.</audio>';
      end if;
   
      return '<span>Error</span>';
   exception
      when others then
         return '<span>Error</span>';
   end fn_return_audio_html;
end ldff_utility_pck;

Cookie	Duration	Description
cookielawinfo-checkbox-analytics	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookie is used to store the user consent for the cookies in the category "Analytics".
cookielawinfo-checkbox-functional	11 months	The cookie is set by GDPR cookie consent to record the user consent for the cookies in the category "Functional".
cookielawinfo-checkbox-necessary	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookies is used to store the user consent for the cookies in the category "Necessary".
cookielawinfo-checkbox-others	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookie is used to store the user consent for the cookies in the category "Other.
cookielawinfo-checkbox-performance	11 months	This cookie is set by GDPR Cookie Consent plugin. The cookie is used to store the user consent for the cookies in the category "Performance".
viewed_cookie_policy	11 months	The cookie is set by the GDPR Cookie Consent plugin and is used to store whether or not user has consented to the use of cookies. It does not store any personal data.

here is the semi finished outcome.

On Ubuntu Whisper Server

On APEX

Leave a Reply Cancel reply