Removed duplicates but the timing is off.

This commit is contained in:
- 2025-02-17 23:34:36 -08:00
parent bfb0a135d1
commit a538387345

View File

@ -23,6 +23,30 @@ def parse_vtt(file_path):
return parsed_captions
def remove_duplicates(subtitles):
no_dupes = list()
prev = subtitles[0]
# print(type(prev), ":", prev)
for caption in subtitles:
if prev["text"] != str() and caption["text"] != prev["text"]:
# print(prev)
no_dupes.append(prev)
prev = caption
return no_dupes
def export_to_vtt(subtitles, output_file_path):
with open(output_file_path, 'w', encoding='utf-8') as file:
# Write the WebVTT header
file.write("WEBVTT\n\n")
for subtitle in subtitles:
# Assuming subtitle has 'time' and 'text' keys
time = subtitle['time']
text = subtitle['text']
# Write the formatted subtitle
file.write(f"{time}\n{text}\n\n")
if __name__ == "__main__":
import sys
@ -30,7 +54,10 @@ if __name__ == "__main__":
vtt_file_path = sys.argv[1]
print(vtt_file_path)
parsed_data = parse_vtt(vtt_file_path)
for caption in parsed_data:
print(caption)
export_to_vtt(
remove_duplicates(
parse_vtt(vtt_file_path)
), "output.vtt"
)