Initial: added basic example from AI.

2025-02-17 20:58:09 -08:00 · 2025-02-17 20:58:09 -08:00 · bfb0a135d1
commit bfb0a135d1
1 changed files with 36 additions and 0 deletions
--- a/parse-subs.py
+++ b/parse-subs.py
@ -0,0 +1,36 @@
 def parse_vtt(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    # Split the content into lines
    lines = content.strip().split('\n')
    parsed_captions = []
    # Skip the first line if it is the WebVTT header
    if lines[0] == 'WEBVTT':
        lines = lines[1:]
    # Process each caption block
    for i in range(len(lines)):
        if '-->' in lines[i]:  # This line contains the timing
            time = lines[i].strip()
            text = lines[i + 1].strip() if i + 1 < len(lines) else ''
            parsed_captions.append({
                'time': time,
                'text': text
            })
    return parsed_captions
 if __name__ == "__main__":
    import sys
    vtt_file_path = sys.argv[1]
    print(vtt_file_path)
    parsed_data = parse_vtt(vtt_file_path)
    for caption in parsed_data:
        print(caption)