Update README

2025-02-22 07:27:58 -08:00
4 changed files with 1774 additions and 1862 deletions
--- a/14
+++ b/14
@ -1,20 +1,14 @@

 ORIGINAL_SUBS=autogen-subs
-VIDEO=1ere.mkv
-# VIDEO=1ere-introcut.mp4
-
-test: getvideo
-	ffmpeg -i $(VIDEO) -i mangen-subs.fr.vtt -vcodec copy -acodec copy -c:s mov_text test.mp4 -y
-
-getvideo:
-	yt-dlp https://www.youtube.com/watch?v=WRq2197FlMw -o $(VIDEO)
+VIDEO=1ere-introcut.mp4

 cleansubs:
 	python scripts/parse-subs.py $(ORIGINAL_SUBS).fr.vtt
+# 	ffmpeg -i 1ere.mkv -i output.vtt -vcodec copy -acodec copy -c:s mov_text test.mp4 -y

 getsubs:
 	yt-dlp --write-auto-subs --sub-lang=fr --skip-download  https://www.youtube.com/watch?v=WRq2197FlMw -o autogen-subs

 # omit the music
-# clip:
-# 	ffmpeg -ss 3:18 -i 1ere-combined.mp4 -vcodec copy -acodec copy 1ere-introcut.mp4
+clip:
+	ffmpeg -ss 3:18 -i 1ere-combined.mp4 -vcodec copy -acodec copy $(VIDEO)
--- a/README.md
+++ b/README.md
@ -9,18 +9,14 @@ The original video and the auto-generated captions in French can be sourced from

 The automatically generated French transcript needs to be checked over by a human. The French transcript then needs to be translated to English, also with human oversight.

-* Verify and edit the file `mangen-subs.fr.vtt` which contains the current draft of the human-edited auto-generated transcript.
-* Run `make test` to add subtitles to the video and ensure the format is correct.
+File `mangen-subs.fr.vtt` contains the current draft of the human-edited auto-generated transcript.

 ### Contributors

 * tuxmain, [https://txmn.tk/](https://txmn.tk/)
-* 1 anonymous contributor(s)

 ### Getting the original auto-generated transcript

-*To see how much a southern accent and speech quirks can throw off an AI.*
-
 Run the following to download the auto-generated transcript from YouTube to a new file named `autogen-subs.fr.vtt`.

    yt-dlp --write-auto-subs --sub-lang=fr --skip-download  https://www.youtube.com/watch?v=WRq2197FlMw -o autogen-subs
--- a/mangen-subs.fr.vtt
+++ b/mangen-subs.fr.vtt
--- a/scripts/update-subs.py
+++ b/scripts/update-subs.py
@ -1,50 +0,0 @@
-
-import re
-import sys
-import argparse
-
-def offset_timestamp(timestamp, offset):
-    hours, minutes, seconds, microseconds = map(float, re.split('[:.]', timestamp))
-    total_seconds = hours * 3600 \
-        + minutes * 60 \
-        + seconds  \
-        + (microseconds * 0.001) \
-        + offset
-    new_hours = int(total_seconds // 3600)
-    new_minutes = int((total_seconds % 3600) // 60)
-    new_seconds = total_seconds % 60
-    return f"{new_hours:02}:{new_minutes:02}:{new_seconds:.3f}"
-
-def update_webvtt(file_path, offset):
-    with open(file_path, 'r', encoding='utf-8') as file:
-        lines = file.readlines()
-
-    updated_lines = []
-    for line in lines:
-        ts = line[:line.find("align")]
-        if '-->' in ts:
-            start, end = ts.split(' --> ')
-            updated_start = offset_timestamp(start.strip(), offset)
-            updated_end = offset_timestamp(end.strip(), offset)
-            updated_lines.append(f"{updated_start} --> {updated_end}\n")
-        else:
-            updated_lines.append(line)
-
-    with open('updated_' + file_path, 'w', encoding='utf-8') as file:
-        file.writelines(updated_lines)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('input_file', type=str, help='The input file name')
-    parser.add_argument('time_offset', type=str, help='The output file name')
-    args = parser.parse_args()
-
-    vtt = args.input_file
-    t   = args.time_offset
-    
-    minutes, seconds = map(float, re.split('[:.]', t))
-    offset = minutes * 60 + seconds
-
-    print("bumping timestamps in file %s by %f seconds" % (vtt,  offset))
-    update_webvtt(vtt, -offset)  # Use -40 to offset back by 40 seconds
-