Initial: added basic example from AI.
This commit is contained in:
commit
bfb0a135d1
36
parse-subs.py
Normal file
36
parse-subs.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
|
||||||
|
def parse_vtt(file_path):
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
content = file.read()
|
||||||
|
|
||||||
|
# Split the content into lines
|
||||||
|
lines = content.strip().split('\n')
|
||||||
|
parsed_captions = []
|
||||||
|
|
||||||
|
# Skip the first line if it is the WebVTT header
|
||||||
|
if lines[0] == 'WEBVTT':
|
||||||
|
lines = lines[1:]
|
||||||
|
|
||||||
|
# Process each caption block
|
||||||
|
for i in range(len(lines)):
|
||||||
|
if '-->' in lines[i]: # This line contains the timing
|
||||||
|
time = lines[i].strip()
|
||||||
|
text = lines[i + 1].strip() if i + 1 < len(lines) else ''
|
||||||
|
parsed_captions.append({
|
||||||
|
'time': time,
|
||||||
|
'text': text
|
||||||
|
})
|
||||||
|
|
||||||
|
return parsed_captions
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
vtt_file_path = sys.argv[1]
|
||||||
|
print(vtt_file_path)
|
||||||
|
|
||||||
|
parsed_data = parse_vtt(vtt_file_path)
|
||||||
|
for caption in parsed_data:
|
||||||
|
print(caption)
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user