Added resync function to line text with correct timestamps.

This commit is contained in:
- 2025-02-18 01:27:55 -08:00
parent a538387345
commit a2ebf77fcb

View File

@ -26,14 +26,35 @@ def parse_vtt(file_path):
def remove_duplicates(subtitles): def remove_duplicates(subtitles):
no_dupes = list() no_dupes = list()
prev = subtitles[0] prev = subtitles[0]
# print(type(prev), ":", prev)
for caption in subtitles: for caption in subtitles:
if prev["text"] != str() and caption["text"] != prev["text"]: if prev["text"] != str() and caption["text"] != prev["text"]:
# print(prev)
no_dupes.append(prev) no_dupes.append(prev)
prev = caption prev = caption
return no_dupes return no_dupes
def resync(subtitles):
# create placeholder timestamp for the first
init = "00:00:00.000 --> 00:00:5.000 align:start position:0%"
new_subs = list()
new_subs.append({
"time": init,
"text": subtitles[0]["text"]
})
for i in range(len(subtitles)):
if i == len(subtitles) -1:
break
# we don't need the first text
# but we still need the fuck timestamp
curr = subtitles[i]
next_ = subtitles[i+1]
new_time = curr["time"]
new_text = next_["text"]
new_subs.append({
"time": new_time,
"text": new_text
})
return new_subs
def export_to_vtt(subtitles, output_file_path): def export_to_vtt(subtitles, output_file_path):
with open(output_file_path, 'w', encoding='utf-8') as file: with open(output_file_path, 'w', encoding='utf-8') as file:
# Write the WebVTT header # Write the WebVTT header
@ -55,9 +76,9 @@ if __name__ == "__main__":
print(vtt_file_path) print(vtt_file_path)
export_to_vtt( export_to_vtt(
remove_duplicates( resync(remove_duplicates(
parse_vtt(vtt_file_path) parse_vtt(vtt_file_path)
), "output.vtt" )), "output.vtt"
) )