Compare commits
	
		
			1 Commits
		
	
	
		
			master
			...
			readme-edi
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| edc7fa0550 | 
							
								
								
									
										14
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								Makefile
									
									
									
									
									
								
							| @ -1,20 +1,14 @@ | ||||
| 
 | ||||
| ORIGINAL_SUBS=autogen-subs | ||||
| VIDEO=1ere.mkv | ||||
| # VIDEO=1ere-introcut.mp4
 | ||||
| 
 | ||||
| test: getvideo | ||||
| 	ffmpeg -i $(VIDEO) -i mangen-subs.fr.vtt -vcodec copy -acodec copy -c:s mov_text test.mp4 -y | ||||
| 
 | ||||
| getvideo: | ||||
| 	yt-dlp https://www.youtube.com/watch?v=WRq2197FlMw -o $(VIDEO) | ||||
| VIDEO=1ere-introcut.mp4 | ||||
| 
 | ||||
| cleansubs: | ||||
| 	python scripts/parse-subs.py $(ORIGINAL_SUBS).fr.vtt | ||||
| # 	ffmpeg -i 1ere.mkv -i output.vtt -vcodec copy -acodec copy -c:s mov_text test.mp4 -y
 | ||||
| 
 | ||||
| getsubs: | ||||
| 	yt-dlp --write-auto-subs --sub-lang=fr --skip-download  https://www.youtube.com/watch?v=WRq2197FlMw -o autogen-subs | ||||
| 
 | ||||
| # omit the music
 | ||||
| # clip:
 | ||||
| # 	ffmpeg -ss 3:18 -i 1ere-combined.mp4 -vcodec copy -acodec copy 1ere-introcut.mp4
 | ||||
| clip: | ||||
| 	ffmpeg -ss 3:18 -i 1ere-combined.mp4 -vcodec copy -acodec copy $(VIDEO) | ||||
|  | ||||
| @ -9,18 +9,14 @@ The original video and the auto-generated captions in French can be sourced from | ||||
| 
 | ||||
| The automatically generated French transcript needs to be checked over by a human. The French transcript then needs to be translated to English, also with human oversight. | ||||
| 
 | ||||
| * Verify and edit the file `mangen-subs.fr.vtt` which contains the current draft of the human-edited auto-generated transcript. | ||||
| * Run `make test` to add subtitles to the video and ensure the format is correct. | ||||
| File `mangen-subs.fr.vtt` contains the current draft of the human-edited auto-generated transcript. | ||||
| 
 | ||||
| ### Contributors | ||||
| 
 | ||||
| * tuxmain, [https://txmn.tk/](https://txmn.tk/) | ||||
| * 1 anonymous contributor(s) | ||||
| 
 | ||||
| ### Getting the original auto-generated transcript | ||||
| 
 | ||||
| *To see how much a southern accent and speech quirks can throw off an AI.* | ||||
| 
 | ||||
| Run the following to download the auto-generated transcript from YouTube to a new file named `autogen-subs.fr.vtt`. | ||||
| 
 | ||||
|     yt-dlp --write-auto-subs --sub-lang=fr --skip-download  https://www.youtube.com/watch?v=WRq2197FlMw -o autogen-subs | ||||
|  | ||||
							
								
								
									
										3566
									
								
								mangen-subs.fr.vtt
									
									
									
									
									
								
							
							
						
						
									
										3566
									
								
								mangen-subs.fr.vtt
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -1,50 +0,0 @@ | ||||
| 
 | ||||
| import re | ||||
| import sys | ||||
| import argparse | ||||
| 
 | ||||
| def offset_timestamp(timestamp, offset): | ||||
|     hours, minutes, seconds, microseconds = map(float, re.split('[:.]', timestamp)) | ||||
|     total_seconds = hours * 3600 \ | ||||
|         + minutes * 60 \ | ||||
|         + seconds  \ | ||||
|         + (microseconds * 0.001) \ | ||||
|         + offset | ||||
|     new_hours = int(total_seconds // 3600) | ||||
|     new_minutes = int((total_seconds % 3600) // 60) | ||||
|     new_seconds = total_seconds % 60 | ||||
|     return f"{new_hours:02}:{new_minutes:02}:{new_seconds:.3f}" | ||||
| 
 | ||||
| def update_webvtt(file_path, offset): | ||||
|     with open(file_path, 'r', encoding='utf-8') as file: | ||||
|         lines = file.readlines() | ||||
| 
 | ||||
|     updated_lines = [] | ||||
|     for line in lines: | ||||
|         ts = line[:line.find("align")] | ||||
|         if '-->' in ts: | ||||
|             start, end = ts.split(' --> ') | ||||
|             updated_start = offset_timestamp(start.strip(), offset) | ||||
|             updated_end = offset_timestamp(end.strip(), offset) | ||||
|             updated_lines.append(f"{updated_start} --> {updated_end}\n") | ||||
|         else: | ||||
|             updated_lines.append(line) | ||||
| 
 | ||||
|     with open('updated_' + file_path, 'w', encoding='utf-8') as file: | ||||
|         file.writelines(updated_lines) | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     parser = argparse.ArgumentParser() | ||||
|     parser.add_argument('input_file', type=str, help='The input file name') | ||||
|     parser.add_argument('time_offset', type=str, help='The output file name') | ||||
|     args = parser.parse_args() | ||||
| 
 | ||||
|     vtt = args.input_file | ||||
|     t   = args.time_offset | ||||
|      | ||||
|     minutes, seconds = map(float, re.split('[:.]', t)) | ||||
|     offset = minutes * 60 + seconds | ||||
| 
 | ||||
|     print("bumping timestamps in file %s by %f seconds" % (vtt,  offset)) | ||||
|     update_webvtt(vtt, -offset)  # Use -40 to offset back by 40 seconds | ||||
| 
 | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user