mirror of
https://github.com/zebrajr/whisper.git
synced 2025-12-06 00:19:50 +01:00
55 lines
1.4 KiB
Python
55 lines
1.4 KiB
Python
import zlib
|
|
from typing import Iterator, TextIO
|
|
|
|
|
|
def exact_div(x, y):
|
|
assert x % y == 0
|
|
return x // y
|
|
|
|
|
|
def str2bool(string):
|
|
str2val = {"True": True, "False": False}
|
|
if string in str2val:
|
|
return str2val[string]
|
|
else:
|
|
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
|
|
|
|
|
|
def optional_int(string):
|
|
return None if string == "None" else int(string)
|
|
|
|
|
|
def optional_float(string):
|
|
return None if string == "None" else float(string)
|
|
|
|
|
|
def compression_ratio(text) -> float:
|
|
return len(text) / len(zlib.compress(text.encode("utf-8")))
|
|
|
|
|
|
def format_timestamp(seconds: float):
|
|
assert seconds >= 0, "non-negative timestamp expected"
|
|
milliseconds = round(seconds * 1000.0)
|
|
|
|
hours = milliseconds // 3_600_000
|
|
milliseconds -= hours * 3_600_000
|
|
|
|
minutes = milliseconds // 60_000
|
|
milliseconds -= minutes * 60_000
|
|
|
|
seconds = milliseconds // 1_000
|
|
milliseconds -= seconds * 1_000
|
|
|
|
return (f"{hours}:" if hours > 0 else "") + f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
|
|
|
|
|
|
def write_vtt(transcript: Iterator[dict], file: TextIO):
|
|
print("WEBVTT\n", file=file)
|
|
for segment in transcript:
|
|
print(
|
|
f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
|
|
f"{segment['text'].replace('-->', '->')}\n",
|
|
file=file,
|
|
flush=True,
|
|
)
|