def parse_vtt(file_path): with open(file_path, 'r', encoding='utf-8') as file: content = file.read() # Split the content into lines lines = content.strip().split('\n') parsed_captions = [] # Skip the first line if it is the WebVTT header if lines[0] == 'WEBVTT': lines = lines[1:] # Process each caption block for i in range(len(lines)): if '-->' in lines[i]: # This line contains the timing time = lines[i].strip() text = lines[i + 1].strip() if i + 1 < len(lines) else '' parsed_captions.append({ 'time': time, 'text': text }) return parsed_captions if __name__ == "__main__": import sys vtt_file_path = sys.argv[1] print(vtt_file_path) parsed_data = parse_vtt(vtt_file_path) for caption in parsed_data: print(caption)