Removed duplicates but the timing is off.
This commit is contained in:
		
							parent
							
								
									bfb0a135d1
								
							
						
					
					
						commit
						a538387345
					
				| @ -23,6 +23,30 @@ def parse_vtt(file_path): | ||||
| 
 | ||||
|     return parsed_captions | ||||
| 
 | ||||
| def remove_duplicates(subtitles): | ||||
|     no_dupes = list() | ||||
|     prev = subtitles[0] | ||||
|     # print(type(prev), ":", prev) | ||||
|     for caption in subtitles: | ||||
|         if prev["text"] != str() and caption["text"] != prev["text"]: | ||||
|             # print(prev) | ||||
|             no_dupes.append(prev) | ||||
|         prev = caption | ||||
|     return no_dupes | ||||
| 
 | ||||
| def export_to_vtt(subtitles, output_file_path): | ||||
|     with open(output_file_path, 'w', encoding='utf-8') as file: | ||||
|         # Write the WebVTT header | ||||
|         file.write("WEBVTT\n\n") | ||||
|          | ||||
|         for subtitle in subtitles: | ||||
|             # Assuming subtitle has 'time' and 'text' keys | ||||
|             time = subtitle['time'] | ||||
|             text = subtitle['text'] | ||||
| 
 | ||||
|             # Write the formatted subtitle | ||||
|             file.write(f"{time}\n{text}\n\n") | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     import sys | ||||
| 
 | ||||
| @ -30,7 +54,10 @@ if __name__ == "__main__": | ||||
|     vtt_file_path = sys.argv[1] | ||||
|     print(vtt_file_path) | ||||
| 
 | ||||
|     parsed_data = parse_vtt(vtt_file_path) | ||||
|     for caption in parsed_data: | ||||
|         print(caption) | ||||
|     export_to_vtt( | ||||
|         remove_duplicates( | ||||
|             parse_vtt(vtt_file_path) | ||||
|         ), "output.vtt" | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user