Removed duplicates but the timing is off.
This commit is contained in:
parent
bfb0a135d1
commit
a538387345
@ -23,6 +23,30 @@ def parse_vtt(file_path):
|
|||||||
|
|
||||||
return parsed_captions
|
return parsed_captions
|
||||||
|
|
||||||
|
def remove_duplicates(subtitles):
|
||||||
|
no_dupes = list()
|
||||||
|
prev = subtitles[0]
|
||||||
|
# print(type(prev), ":", prev)
|
||||||
|
for caption in subtitles:
|
||||||
|
if prev["text"] != str() and caption["text"] != prev["text"]:
|
||||||
|
# print(prev)
|
||||||
|
no_dupes.append(prev)
|
||||||
|
prev = caption
|
||||||
|
return no_dupes
|
||||||
|
|
||||||
|
def export_to_vtt(subtitles, output_file_path):
|
||||||
|
with open(output_file_path, 'w', encoding='utf-8') as file:
|
||||||
|
# Write the WebVTT header
|
||||||
|
file.write("WEBVTT\n\n")
|
||||||
|
|
||||||
|
for subtitle in subtitles:
|
||||||
|
# Assuming subtitle has 'time' and 'text' keys
|
||||||
|
time = subtitle['time']
|
||||||
|
text = subtitle['text']
|
||||||
|
|
||||||
|
# Write the formatted subtitle
|
||||||
|
file.write(f"{time}\n{text}\n\n")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
@ -30,7 +54,10 @@ if __name__ == "__main__":
|
|||||||
vtt_file_path = sys.argv[1]
|
vtt_file_path = sys.argv[1]
|
||||||
print(vtt_file_path)
|
print(vtt_file_path)
|
||||||
|
|
||||||
parsed_data = parse_vtt(vtt_file_path)
|
export_to_vtt(
|
||||||
for caption in parsed_data:
|
remove_duplicates(
|
||||||
print(caption)
|
parse_vtt(vtt_file_path)
|
||||||
|
), "output.vtt"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user