Generate Subtitles Using AI Tool

from vosk import Model, KaldiRecognizer, SetLogLevel
import sys
import os
import wave
import subprocess
import codecs
import datetime
import json
 
 
def format_sec(sec):
	total_sec = int(sec)
	s = total_sec % 60
	m = total_sec // 60
	h = m // 60
	m = m % 60
	frame = int(sec * 1000) % 1000
	frame = frame // (1000 // 25)
	return "{0:02d}:{1:02d}:{2:02d},{3:03d}".format(h, m, s, frame)
 
 
class Stt:
	def __init__(self, mp4_path):
		self.index = 0
 
		self.model_path = r'/path/to/vosk-model-en-us-0.42-gigaspeech'
		self.mp4_path = mp4_path
 
		self.ffmpeg_path = 'ffmpeg'
		self.sample_rate = 16000
		self.model = Model(self.model_path)
		self.rec = KaldiRecognizer(self.model, self.sample_rate)
		self.rec.SetWords(True)
 
		SetLogLevel(-1)
 
		self.process = subprocess.Popen([self.ffmpeg_path, '-loglevel', 'quiet', '-i',
						self.mp4_path,
						'-ar', str(self.sample_rate), '-ac', '1', '-f', 's16le', '-'],
						stdout=subprocess.PIPE)
 
	def output_one_line(self, result, frm, to, f):
		sentence = ""
		start_sec = 60 * 60 * 24
		end_sec = 0
		for i in range(frm, to):
 
			float_value = float(result[i]["start"])
			if start_sec > float_value:
				start_sec = float_value
 
			float_value = float(result[i]["end"])
			if end_sec < float_value:
				end_sec = float_value
 
			sentence += result[i]["word"] + " "
 
		self.index += 1
 
		f.write(str(self.index) + "\n")
		f.write(format_sec(start_sec) + " --> " + format_sec(end_sec) + "\n")
		f.write(sentence + "\n")
		f.write("\n")
		f.flush()
 
		sys.stdout.write(format_sec(end_sec) + "\n")
 
	def recv_text(self, subtitle, f):
		js = json.loads(subtitle)
		if not "result" in js:
			sys.stdout.write("no result in json.\n")
			return
		result = js["result"]
 
		frm = 0
		length = len(result)
		while frm < length:
			to = frm + 7
			if length - to < 7:
				to = length
			if to > length:
				to = length
			self.output_one_line(result, frm, to, f)
			frm = to
 
		# sys.stdout.buffer.write(subtitle)
		sys.stdout.flush()
 
	def transcribe(self):
		with open(self.mp4_path + ".srt", 'w') as f:
			while True:
				data = self.process.stdout.read(4000)
				if len(data) == 0:
					subtitle = self.rec.FinalResult().encode('utf-8')
					self.recv_text(subtitle, f)
					break
				if self.rec.AcceptWaveform(data):
					subtitle = self.rec.Result().encode('utf-8')
					self.recv_text(subtitle, f)
 
		f.close()
 
stt = Stt(sys.argv[1])
stt.transcribe()

refer to:
kdenlive-23.04.1/bin/data/kdenlive/scripts/speechtotext.py
https://docs.kdenlive.org/en/effects_and_compositions/subtitles.html
https://realpython.com/python-formatted-output/