Initial: added basic example from AI.

2025-02-17 20:58:09 -08:00 · 2025-02-17 20:58:09 -08:00 · bfb0a135d1
commit bfb0a135d1
1 changed files with 36 additions and 0 deletions
--- a/parse-subs.py
+++ b/parse-subs.py
@ -0,0 +1,36 @@
+
+def parse_vtt(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        content = file.read()
+
+    # Split the content into lines
+    lines = content.strip().split('\n')
+    parsed_captions = []
+    
+    # Skip the first line if it is the WebVTT header
+    if lines[0] == 'WEBVTT':
+        lines = lines[1:]
+
+    # Process each caption block
+    for i in range(len(lines)):
+        if '-->' in lines[i]:  # This line contains the timing
+            time = lines[i].strip()
+            text = lines[i + 1].strip() if i + 1 < len(lines) else ''
+            parsed_captions.append({
+                'time': time,
+                'text': text
+            })
+
+    return parsed_captions
+
+if __name__ == "__main__":
+    import sys
+
+
+    vtt_file_path = sys.argv[1]
+    print(vtt_file_path)
+
+    parsed_data = parse_vtt(vtt_file_path)
+    for caption in parsed_data:
+        print(caption)
+