diff --git a/parse-subs.py b/parse-subs.py index fb77dbc..e275faf 100644 --- a/parse-subs.py +++ b/parse-subs.py @@ -26,14 +26,35 @@ def parse_vtt(file_path): def remove_duplicates(subtitles): no_dupes = list() prev = subtitles[0] - # print(type(prev), ":", prev) for caption in subtitles: if prev["text"] != str() and caption["text"] != prev["text"]: - # print(prev) no_dupes.append(prev) prev = caption return no_dupes +def resync(subtitles): + # create placeholder timestamp for the first + init = "00:00:00.000 --> 00:00:5.000 align:start position:0%" + new_subs = list() + new_subs.append({ + "time": init, + "text": subtitles[0]["text"] + }) + for i in range(len(subtitles)): + if i == len(subtitles) -1: + break + # we don't need the first text + # but we still need the fuck timestamp + curr = subtitles[i] + next_ = subtitles[i+1] + new_time = curr["time"] + new_text = next_["text"] + new_subs.append({ + "time": new_time, + "text": new_text + }) + return new_subs + def export_to_vtt(subtitles, output_file_path): with open(output_file_path, 'w', encoding='utf-8') as file: # Write the WebVTT header @@ -55,9 +76,9 @@ if __name__ == "__main__": print(vtt_file_path) export_to_vtt( - remove_duplicates( + resync(remove_duplicates( parse_vtt(vtt_file_path) - ), "output.vtt" + )), "output.vtt" )