From a5383873458e7601ca1af074ab8e285722402cc5 Mon Sep 17 00:00:00 2001
From: - <->
Date: Mon, 17 Feb 2025 23:34:36 -0800
Subject: [PATCH] Removed duplicates but the timing is off.

---
 parse-subs.py | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/parse-subs.py b/parse-subs.py
index ac08b06..fb77dbc 100644
--- a/parse-subs.py
+++ b/parse-subs.py
@@ -23,6 +23,30 @@ def parse_vtt(file_path):
 
     return parsed_captions
 
+def remove_duplicates(subtitles):
+    no_dupes = list()
+    prev = subtitles[0]
+    # print(type(prev), ":", prev)
+    for caption in subtitles:
+        if prev["text"] != str() and caption["text"] != prev["text"]:
+            # print(prev)
+            no_dupes.append(prev)
+        prev = caption
+    return no_dupes
+
+def export_to_vtt(subtitles, output_file_path):
+    with open(output_file_path, 'w', encoding='utf-8') as file:
+        # Write the WebVTT header
+        file.write("WEBVTT\n\n")
+        
+        for subtitle in subtitles:
+            # Assuming subtitle has 'time' and 'text' keys
+            time = subtitle['time']
+            text = subtitle['text']
+
+            # Write the formatted subtitle
+            file.write(f"{time}\n{text}\n\n")
+
 if __name__ == "__main__":
     import sys
 
@@ -30,7 +54,10 @@ if __name__ == "__main__":
     vtt_file_path = sys.argv[1]
     print(vtt_file_path)
 
-    parsed_data = parse_vtt(vtt_file_path)
-    for caption in parsed_data:
-        print(caption)
+    export_to_vtt(
+        remove_duplicates(
+            parse_vtt(vtt_file_path)
+        ), "output.vtt"
+    )
+