Initial: added basic example from AI.

This commit is contained in:
- 2025-02-17 20:58:09 -08:00
commit bfb0a135d1

36
parse-subs.py Normal file
View File

@ -0,0 +1,36 @@
def parse_vtt(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# Split the content into lines
lines = content.strip().split('\n')
parsed_captions = []
# Skip the first line if it is the WebVTT header
if lines[0] == 'WEBVTT':
lines = lines[1:]
# Process each caption block
for i in range(len(lines)):
if '-->' in lines[i]: # This line contains the timing
time = lines[i].strip()
text = lines[i + 1].strip() if i + 1 < len(lines) else ''
parsed_captions.append({
'time': time,
'text': text
})
return parsed_captions
if __name__ == "__main__":
import sys
vtt_file_path = sys.argv[1]
print(vtt_file_path)
parsed_data = parse_vtt(vtt_file_path)
for caption in parsed_data:
print(caption)