Added resync function to line text with correct timestamps.
This commit is contained in:
parent
a538387345
commit
a2ebf77fcb
@ -26,14 +26,35 @@ def parse_vtt(file_path):
|
|||||||
def remove_duplicates(subtitles):
|
def remove_duplicates(subtitles):
|
||||||
no_dupes = list()
|
no_dupes = list()
|
||||||
prev = subtitles[0]
|
prev = subtitles[0]
|
||||||
# print(type(prev), ":", prev)
|
|
||||||
for caption in subtitles:
|
for caption in subtitles:
|
||||||
if prev["text"] != str() and caption["text"] != prev["text"]:
|
if prev["text"] != str() and caption["text"] != prev["text"]:
|
||||||
# print(prev)
|
|
||||||
no_dupes.append(prev)
|
no_dupes.append(prev)
|
||||||
prev = caption
|
prev = caption
|
||||||
return no_dupes
|
return no_dupes
|
||||||
|
|
||||||
|
def resync(subtitles):
|
||||||
|
# create placeholder timestamp for the first
|
||||||
|
init = "00:00:00.000 --> 00:00:5.000 align:start position:0%"
|
||||||
|
new_subs = list()
|
||||||
|
new_subs.append({
|
||||||
|
"time": init,
|
||||||
|
"text": subtitles[0]["text"]
|
||||||
|
})
|
||||||
|
for i in range(len(subtitles)):
|
||||||
|
if i == len(subtitles) -1:
|
||||||
|
break
|
||||||
|
# we don't need the first text
|
||||||
|
# but we still need the fuck timestamp
|
||||||
|
curr = subtitles[i]
|
||||||
|
next_ = subtitles[i+1]
|
||||||
|
new_time = curr["time"]
|
||||||
|
new_text = next_["text"]
|
||||||
|
new_subs.append({
|
||||||
|
"time": new_time,
|
||||||
|
"text": new_text
|
||||||
|
})
|
||||||
|
return new_subs
|
||||||
|
|
||||||
def export_to_vtt(subtitles, output_file_path):
|
def export_to_vtt(subtitles, output_file_path):
|
||||||
with open(output_file_path, 'w', encoding='utf-8') as file:
|
with open(output_file_path, 'w', encoding='utf-8') as file:
|
||||||
# Write the WebVTT header
|
# Write the WebVTT header
|
||||||
@ -55,9 +76,9 @@ if __name__ == "__main__":
|
|||||||
print(vtt_file_path)
|
print(vtt_file_path)
|
||||||
|
|
||||||
export_to_vtt(
|
export_to_vtt(
|
||||||
remove_duplicates(
|
resync(remove_duplicates(
|
||||||
parse_vtt(vtt_file_path)
|
parse_vtt(vtt_file_path)
|
||||||
), "output.vtt"
|
)), "output.vtt"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user