commit bfb0a135d1cff8dfaf1ba52eccf4873527ef8245 Author: - <-> Date: Mon Feb 17 20:58:09 2025 -0800 Initial: added basic example from AI. diff --git a/parse-subs.py b/parse-subs.py new file mode 100644 index 0000000..ac08b06 --- /dev/null +++ b/parse-subs.py @@ -0,0 +1,36 @@ + +def parse_vtt(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + content = file.read() + + # Split the content into lines + lines = content.strip().split('\n') + parsed_captions = [] + + # Skip the first line if it is the WebVTT header + if lines[0] == 'WEBVTT': + lines = lines[1:] + + # Process each caption block + for i in range(len(lines)): + if '-->' in lines[i]: # This line contains the timing + time = lines[i].strip() + text = lines[i + 1].strip() if i + 1 < len(lines) else '' + parsed_captions.append({ + 'time': time, + 'text': text + }) + + return parsed_captions + +if __name__ == "__main__": + import sys + + + vtt_file_path = sys.argv[1] + print(vtt_file_path) + + parsed_data = parse_vtt(vtt_file_path) + for caption in parsed_data: + print(caption) +