here is the semi finished outcome.
The Why: I am learning Dutch, I listen to a lot of BNR news on my 2.5 to 3 hours drive from home to office and back. I feel like that I understand a lot of the content but at same time miss a lot of the context. I really wanna have a transcript of the podcast so I can read and listen at the same time.
The What: Ideally a Mobile App that I can use to note words, linking words from these audios and play the sentences where the words are used. more .. and more ..
The How: a lot of patience
On Ubuntu Whisper Server
- Miniconda, TensorFlow, Pytorch, Nvidia gtx 1080ti
- OCI CLI
Python Codes
getListofAudiosToTranscript.py
import requests
from oauthlib.oauth2 import BackendApplicationClient
from requests_oauthlib import OAuth2Session
import os
client_id = ""
client_secret = ""
token_url = "https://apex.chaoyu.nl/ords/dev_apex/oauth/token"
# Create an OAuth2 session
client = BackendApplicationClient(client_id=client_id)
oauth = OAuth2Session(client=client)
token = oauth.fetch_token(token_url=token_url, client_id=client_id, client_secret=client_secret)
# Make a REST call with OAuth2 authentication
url = 'https://apex.chaoyu.nl/ords/dev_apex/learndutchforfree/listfilestocreatetranscripts' # Replace with your API endpoint
response = oauth.get(url)
if response.status_code == 200:
data = response.json() # If the response contains JSON data
# print(data)
for items in data:
print(items['audio_name'])
cmd = "oci os object get -bn public --name "+items['audio_name']+ " --file ./AudioFilesToProcess/" + items['audio_name']+ " --no-multipart"
os.system(cmd)
os.system('conda run -n tf python transcriptAudios.py') # I activate the conda environment tf (tensorflow) and run the python script
else:
print("Error:", response.status_code)
transcriptAudios.py
import whisper
# import required module
import os
import torch
# assign directory
directory = './AudioFilesToProcess/'
model = whisper.load_model("large")
for filename in os.listdir(directory):
torch.cuda.empty_cache()
result = model.transcribe(directory+filename)
clobText = ''
for items in result["segments"]:
clobText += ' {"Id":"' + str(items["id"]) + '","starts":"' + str(round(items["start"], 2)) + '","text":"' + items["text"] + "\"}\n"
# print(clobText)
from updateTranscriptColumn import updateTranscriptColumn
updateTranscriptColumn(filename, clobText)
for filename in os.listdir(directory):
os.remove(directory+filename)
updateTranscriptColumn.py
import requests
from oauthlib.oauth2 import BackendApplicationClient
from requests_oauthlib import OAuth2Session
import json
# OAuth2 client credentials
client_id = ""
client_secret = ""
token_url = "https://apex.chaoyu.nl/ords/dev_apex/oauth/token"
# Create an OAuth2 session
client = BackendApplicationClient(client_id=client_id)
oauth = OAuth2Session(client=client)
token = oauth.fetch_token(token_url=token_url, client_id=client_id, client_secret=client_secret)
# Make a POST REST call with OAuth2 authentication and JSON data
url = 'https://apex.chaoyu.nl/ords/dev_apex/learndutchforfree/uploadtranscript' # Replace with your API endpoint
def updateTranscriptColumn(audio_name, transcript):
data = {
"file_name": audio_name,
"transcript": transcript
}
headers = {
'Content-Type': 'application/json'
}
# print(json.dumps(data))
response = oauth.post(url, data=json.dumps(data), headers=headers)
if response.status_code == 200:
result = response.json() # If the response contains JSON data
print(result)
else:
result = response.json()
print("Error:", response.status_code)
print("Error:", result)
On APEX
create or replace package ldff_utility_pck is
-- Author : CHAOY
-- Created : 19/05/2023 21:39:38
-- Purpose :
procedure pr_ajax_fileuploadhandler;
procedure pr_insert_audio(i_audio_name in audios_ldff.audio_name%type
,i_mime in audios_ldff.audio_mime%type
,i_blob in audios_ldff.file_content%type);
procedure pr_oci_file_upload(i_file_name in audios_ldff.audio_name%type
,i_blob in audios_ldff.file_content%type
,i_mime in audios_ldff.audio_mime%type);
procedure pr_list_files_for_transcripts;
procedure pr_post_req_audio_transcript(i_requst_body in blob);
function fn_return_audio_html(i_file_name in audios_ldff.audio_name%type) return clob;
end ldff_utility_pck;
create or replace package body ldff_utility_pck is
c_oci_base_url constant varchar2(100) := 'https://objectstorage.eu-frankfurt-1.oraclecloud.com';
c_oci_bucket_namespace constant varchar2(100) := '/n/frpnibrn7ulj';
c_oci_bucket_name constant varchar2(100) := '/b/public/o/';
c_oci_auth constant varchar2(10) := 'OCI_AUTH';
procedure pr_ajax_fileuploadhandler is
l_collection_name constant apex_collections.collection_name%type := 'AJAX_FILE_UPLOADED';
l_blob blob;
l_filename varchar2(200);
l_mime_type varchar2(200);
l_multi_parts varchar2(32000);
l_count number;
e_exits exception;
begin
l_filename := apex_application.g_x01;
l_mime_type := nvl(apex_application.g_x02
,'application/octet-stream');
-- build BLOB from f01 30k array (base64 encoded)
dbms_lob.createtemporary(l_blob
,false
,dbms_lob.session);
for i in 1 .. apex_application.g_f01.count
loop
l_multi_parts := wwv_flow.g_f01(i);
if length(l_multi_parts) > 0
then
dbms_lob.append(dest_lob => l_blob
,src_lob => to_blob(utl_encode.base64_decode(utl_raw.cast_to_raw(l_multi_parts))));
end if;
end loop;
apex_collection.create_or_truncate_collection(p_collection_name => l_collection_name);
-- add collection member (only if BLOB is not null)
if dbms_lob.getlength(l_blob) is not null
then
select count(1)
into l_count
from audios_ldff t
where t.audio_name = l_filename;
if l_count <> 0
then
raise e_exits;
end if;
pr_insert_audio(i_audio_name => l_filename
,i_mime => l_mime_type
,i_blob => l_blob);
pr_oci_file_upload(i_file_name => l_filename
,i_blob => l_blob
,i_mime => l_mime_type);
/*
apex_collection.add_member(p_collection_name => l_collection_name
,p_c001 => l_filename
,p_c002 => l_mime_type
,p_blob001 => l_blob);
*/
end if;
apex_json.open_object;
apex_json.write(p_name => 'result'
,p_value => 'success');
apex_json.close_object;
exception
when e_exits then
apex_json.open_object;
apex_json.write(p_name => 'result'
,p_value => 'fail');
apex_json.write(p_name => 'errormsg'
,p_value => 'File name taken');
apex_json.close_object;
when others then
apex_json.open_object;
apex_json.write(p_name => 'result'
,p_value => 'fail');
apex_json.write(p_name => 'errormsg'
,p_value => sqlerrm);
apex_json.close_object;
end pr_ajax_fileuploadhandler;
procedure pr_insert_audio(i_audio_name in audios_ldff.audio_name%type
,i_mime in audios_ldff.audio_mime%type
,i_blob in audios_ldff.file_content%type) is
begin
insert into audios_ldff
(audio_name
,audio_mime
--,file_content
,file_size)
values
(i_audio_name
,i_mime
--,i_blob
,round(dbms_lob.getlength(i_blob) / 1024 / 1024));
---
end pr_insert_audio;
procedure pr_oci_file_upload(i_file_name in audios_ldff.audio_name%type
,i_blob in audios_ldff.file_content%type
,i_mime in audios_ldff.audio_mime%type) is
l_response clob;
begin
apex_web_service.g_request_headers(1).name := 'Content-Type';
apex_web_service.g_request_headers(1).value := i_mime;
l_response := apex_web_service.make_rest_request(p_url => c_oci_base_url ||
c_oci_bucket_namespace ||
c_oci_bucket_name || i_file_name
,p_http_method => 'PUT'
,p_body_blob => i_blob
,p_credential_static_id => c_oci_auth);
end pr_oci_file_upload;
---
procedure pr_list_files_for_transcripts is
c_audios sys_refcursor;
begin
/*
open c_audios for
select t.audio_name
from audios_ldff t
where t.audio_transcript is null;
apex_json.write('data'
,rec.audio_name);
*/
apex_json.open_array;
for rec in (select t.audio_name
from audios_ldff t
where t.audio_transcript is null)
loop
apex_json.open_object;
apex_json.write('audio_name'
,rec.audio_name);
apex_json.close_object;
end loop;
apex_json.close_all;
end pr_list_files_for_transcripts;
---------
procedure pr_post_req_audio_transcript(i_requst_body in blob) is
l_json_object json_object_t;
l_file_name audios_ldff.audio_name%type;
l_transcript audios_ldff.audio_transcript%type;
begin
l_json_object := json_object_t.parse(i_requst_body);
l_file_name := l_json_object.get_string(key => 'file_name');
l_transcript := l_json_object.get_clob(key => 'transcript');
update audios_ldff t
set t.audio_transcript = l_transcript
where t.audio_name = l_file_name;
apex_json.open_object;
apex_json.write(p_name => 'result'
,p_value => 'success');
apex_json.write(p_name => 'file_name'
,p_value => l_file_name);
apex_json.write(p_name => 'transcripsSize'
,p_value => length(l_transcript));
apex_json.close_object;
exception
when others then
apex_json.open_object;
apex_json.write(p_name => 'result'
,p_value => 'fail');
apex_json.write(p_name => 'errormsg'
,p_value => sqlerrm);
apex_json.close_object;
end pr_post_req_audio_transcript;
-------
function fn_return_audio_html(i_file_name in audios_ldff.audio_name%type) return clob is
l_json_payload clob;
l_response clob;
l_base_url constant varchar2(100) := 'https://objectstorage.eu-frankfurt-1.oraclecloud.com';
l_bucket_namespace constant varchar2(100) := '/n/frpnibrn7ulj/b/public';
l_json_obj json_object_t;
begin
apex_json.initialize_clob_output(p_preserve => true);
apex_json.open_object;
apex_json.write('accessType'
,'ObjectRead');
apex_json.write('name'
,'myRequest' || to_char(systimestamp at time zone 'UTC'
,'yyyyddmmhh24miss')); -- required, but arbitrary. must be unqiue
apex_json.write('objectName'
,i_file_name); -- required if access type is ObjectRead, Duhhhhh..
apex_json.write('timeExpires'
,to_char(systimestamp at time zone 'UTC' + numtodsinterval(1
,'HOUR')
,'yyyy-mm-dd"T"hh24:mi:ss"Z"')); -- required, why UTC, because OCI works UTC. What is T and Z, T is just a sperator where Z is UTC 0 ( PAR expiration must conform to RFC 3339: 2023-05-2000:42:59)
apex_json.close_object;
l_json_payload := apex_json.get_clob_output;
l_response := apex_web_service.make_rest_request(p_url => l_base_url || l_bucket_namespace ||
'/p/'
,p_http_method => 'POST'
,p_body => l_json_payload
,p_credential_static_id => 'OCI_AUTH');
l_json_obj := json_object_t.parse(l_response);
if apex_web_service.g_status_code = 200
then
return '<audio controls id="p5-audio-player" autoplay> ' || '<source src="' || l_base_url || l_json_obj.get_string('accessUri') || '" type="audio/mp3">Your browser does not support the audio element.</audio>';
end if;
return '<span>Error</span>';
exception
when others then
return '<span>Error</span>';
end fn_return_audio_html;
end ldff_utility_pck;