Removed duplicates but the timing is off.
This commit is contained in:
parent
bfb0a135d1
commit
a538387345
@ -23,6 +23,30 @@ def parse_vtt(file_path):
|
||||
|
||||
return parsed_captions
|
||||
|
||||
def remove_duplicates(subtitles):
|
||||
no_dupes = list()
|
||||
prev = subtitles[0]
|
||||
# print(type(prev), ":", prev)
|
||||
for caption in subtitles:
|
||||
if prev["text"] != str() and caption["text"] != prev["text"]:
|
||||
# print(prev)
|
||||
no_dupes.append(prev)
|
||||
prev = caption
|
||||
return no_dupes
|
||||
|
||||
def export_to_vtt(subtitles, output_file_path):
|
||||
with open(output_file_path, 'w', encoding='utf-8') as file:
|
||||
# Write the WebVTT header
|
||||
file.write("WEBVTT\n\n")
|
||||
|
||||
for subtitle in subtitles:
|
||||
# Assuming subtitle has 'time' and 'text' keys
|
||||
time = subtitle['time']
|
||||
text = subtitle['text']
|
||||
|
||||
# Write the formatted subtitle
|
||||
file.write(f"{time}\n{text}\n\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
@ -30,7 +54,10 @@ if __name__ == "__main__":
|
||||
vtt_file_path = sys.argv[1]
|
||||
print(vtt_file_path)
|
||||
|
||||
parsed_data = parse_vtt(vtt_file_path)
|
||||
for caption in parsed_data:
|
||||
print(caption)
|
||||
export_to_vtt(
|
||||
remove_duplicates(
|
||||
parse_vtt(vtt_file_path)
|
||||
), "output.vtt"
|
||||
)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user