Added resync function to line text with correct timestamps.
This commit is contained in:
parent
a538387345
commit
a2ebf77fcb
@ -26,14 +26,35 @@ def parse_vtt(file_path):
|
||||
def remove_duplicates(subtitles):
|
||||
no_dupes = list()
|
||||
prev = subtitles[0]
|
||||
# print(type(prev), ":", prev)
|
||||
for caption in subtitles:
|
||||
if prev["text"] != str() and caption["text"] != prev["text"]:
|
||||
# print(prev)
|
||||
no_dupes.append(prev)
|
||||
prev = caption
|
||||
return no_dupes
|
||||
|
||||
def resync(subtitles):
|
||||
# create placeholder timestamp for the first
|
||||
init = "00:00:00.000 --> 00:00:5.000 align:start position:0%"
|
||||
new_subs = list()
|
||||
new_subs.append({
|
||||
"time": init,
|
||||
"text": subtitles[0]["text"]
|
||||
})
|
||||
for i in range(len(subtitles)):
|
||||
if i == len(subtitles) -1:
|
||||
break
|
||||
# we don't need the first text
|
||||
# but we still need the fuck timestamp
|
||||
curr = subtitles[i]
|
||||
next_ = subtitles[i+1]
|
||||
new_time = curr["time"]
|
||||
new_text = next_["text"]
|
||||
new_subs.append({
|
||||
"time": new_time,
|
||||
"text": new_text
|
||||
})
|
||||
return new_subs
|
||||
|
||||
def export_to_vtt(subtitles, output_file_path):
|
||||
with open(output_file_path, 'w', encoding='utf-8') as file:
|
||||
# Write the WebVTT header
|
||||
@ -55,9 +76,9 @@ if __name__ == "__main__":
|
||||
print(vtt_file_path)
|
||||
|
||||
export_to_vtt(
|
||||
remove_duplicates(
|
||||
resync(remove_duplicates(
|
||||
parse_vtt(vtt_file_path)
|
||||
), "output.vtt"
|
||||
)), "output.vtt"
|
||||
)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user